/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <memory>
#include <string_view>
#include <sal/config.h>
#include <unicode/idna.h>
#include <svl/urihelper.hxx>
#include <com/sun/star/ucb/Command.hpp>
#include <com/sun/star/ucb/IllegalIdentifierException.hpp>
#include <com/sun/star/ucb/UniversalContentBroker.hpp>
#include <com/sun/star/ucb/UnsupportedCommandException.hpp>
#include <com/sun/star/ucb/XCommandEnvironment.hpp>
#include <com/sun/star/ucb/XCommandProcessor.hpp>
#include <com/sun/star/ucb/XContent.hpp>
#include <com/sun/star/ucb/XUniversalContentBroker.hpp>
#include <com/sun/star/uno/Any.hxx>
#include <com/sun/star/uno/Exception.hpp>
#include <com/sun/star/uno/Reference.hxx>
#include <com/sun/star/uno/RuntimeException.hpp>
#include <com/sun/star/uno/XComponentContext.hpp>
#include <com/sun/star/uri/UriReferenceFactory.hpp>
#include <com/sun/star/uri/XUriReference.hpp>
#include <com/sun/star/uri/XUriReferenceFactory.hpp>
#include <comphelper/processfactory.hxx>
#include <osl/diagnose.h>
#include <rtl/character.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/ustring.hxx>
#include <sal/types.h>
#include <sal/log.hxx>
#include <tools/inetmime.hxx>
#include <unotools/charclass.hxx>
using namespace com::sun::star;
OUString URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
OUString const & rTheRelURIRef,
Link<OUString *, bool> const & rMaybeFileHdl,
bool bCheckFileExists,
bool bIgnoreFragment,
INetURLObject::EncodeMechanism eEncodeMechanism,
INetURLObject::DecodeMechanism eDecodeMechanism,
rtl_TextEncoding eCharset,
FSysStyle eStyle)
{
// Backwards compatibility:
if( rTheRelURIRef.startsWith("#") )
return rTheRelURIRef;
INetURLObject aAbsURIRef;
if (rTheBaseURIRef.HasError())
aAbsURIRef. SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
else
{
bool bWasAbsolute;
aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef,
bWasAbsolute,
bIgnoreFragment,
eEncodeMechanism,
eCharset,
false/*bRelativeNonURIs*/,
eStyle);
if (bCheckFileExists
&& !bWasAbsolute
&& (aAbsURIRef.GetProtocol() == INetProtocol::File))
{
INetURLObject aNonFileURIRef;
aNonFileURIRef.SetSmartURL(rTheRelURIRef,
eEncodeMechanism,
eCharset,
eStyle);
if (!aNonFileURIRef.HasError()
&& aNonFileURIRef.GetProtocol() != INetProtocol::File)
{
bool bMaybeFile = false;
if (rMaybeFileHdl.IsSet())
{
OUString aFilePath(rTheRelURIRef);
bMaybeFile = rMaybeFileHdl.Call(&aFilePath);
}
if (!bMaybeFile)
aAbsURIRef = std::move(aNonFileURIRef);
}
}
}
return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset);
}
namespace { Link<OUString *, bool> gMaybeFileHdl; }
void URIHelper::SetMaybeFileHdl(Link<OUString *, bool> const & rTheMaybeFileHdl)
{
gMaybeFileHdl = rTheMaybeFileHdl;
}
Link<OUString *, bool> const & URIHelper::GetMaybeFileHdl()
{
return gMaybeFileHdl;
}
namespace {
bool isAbsoluteHierarchicalUriReference(
css::uno::Reference< css::uri::XUriReference > const & uriReference)
{
return uriReference.is() && uriReference->isAbsolute()
&& !uriReference->hasRelativePath();
}
// To improve performance, assume that if for any prefix URL of a given
// hierarchical URL either a UCB content cannot be created, or the UCB content
// does not support the getCasePreservingURL command, then this will hold for
// any other prefix URL of the given URL, too:
enum Result { Success, GeneralFailure, SpecificFailure };
Result normalizePrefix( css::uno::Reference< css::ucb::XUniversalContentBroker > const & broker,
OUString const & uri, OUString * normalized)
{
assert(broker.is() && normalized != nullptr);
css::uno::Reference< css::ucb::XContent > content;
try {
content = broker->queryContent(broker->createContentIdentifier(uri));
} catch (css::ucb::IllegalIdentifierException &) {}
if (!content.is()) {
return GeneralFailure;
}
try {
bool ok =
(css::uno::Reference< css::ucb::XCommandProcessor >(
content, css::uno::UNO_QUERY_THROW)->execute(
css::ucb::Command(u"getCasePreservingURL"_ustr,
-1, css::uno::Any()),
0,
css::uno::Reference< css::ucb::XCommandEnvironment >())
>>= *normalized);
OSL_ASSERT(ok);
} catch (css::uno::RuntimeException &) {
throw;
} catch (css::ucb::UnsupportedCommandException &) {
return GeneralFailure;
} catch (css::uno::Exception &) {
return SpecificFailure;
}
return Success;
}
OUString normalize(
css::uno::Reference< css::ucb::XUniversalContentBroker > const & broker,
css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory,
OUString const & uriReference)
{
// normalizePrefix can potentially fail (a typically example being a file
// URL that denotes a non-existing resource); in such a case, try to
// normalize as long a prefix of the given URL as possible (i.e., normalize
// all the existing directories within the path):
OUString normalized;
sal_Int32 n = uriReference.indexOf('#');
normalized = n == -1 ? uriReference : uriReference.copy(0, n);
switch (normalizePrefix(broker, normalized, &normalized)) {
case Success:
return n == -1 ? normalized : normalized + uriReference.subView(n);
case GeneralFailure:
return uriReference;
case SpecificFailure:
default:
break;
}
css::uno::Reference< css::uri::XUriReference > ref(
uriFactory->parse(uriReference));
if (!isAbsoluteHierarchicalUriReference(ref)) {
return uriReference;
}
sal_Int32 count = ref->getPathSegmentCount();
if (count < 2) {
return uriReference;
}
OUStringBuffer head(ref->getScheme());
head.append(':');
if (ref->hasAuthority()) {
head.append("//" + ref->getAuthority());
}
for (sal_Int32 i = count - 1; i > 0; --i) {
OUStringBuffer buf(head);
for (sal_Int32 j = 0; j < i; ++j) {
buf.append('/');
buf.append(ref->getPathSegment(j));
}
normalized = buf.makeStringAndClear();
if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
{
buf.append(normalized);
css::uno::Reference< css::uri::XUriReference > preRef(
uriFactory->parse(normalized));
if (!isAbsoluteHierarchicalUriReference(preRef)) {
// This could only happen if something is inconsistent:
break;
}
sal_Int32 preCount = preRef->getPathSegmentCount();
// normalizePrefix may have added or removed a final slash:
if (preCount != i) {
if (preCount == i - 1) {
buf.append('/');
} else if (preCount - 1 == i && !buf.isEmpty()
&& buf[buf.getLength() - 1] == '/')
{
buf.setLength(buf.getLength() - 1);
} else {
// This could only happen if something is inconsistent:
break;
}
}
for (sal_Int32 j = i; j < count; ++j) {
buf.append('/');
buf.append(ref->getPathSegment(j));
}
if (ref->hasQuery()) {
buf.append('?');
buf.append(ref->getQuery());
}
if (ref->hasFragment()) {
buf.append('#');
buf.append(ref->getFragment());
}
return buf.makeStringAndClear();
}
}
return uriReference;
}
}
css::uno::Reference< css::uri::XUriReference >
URIHelper::normalizedMakeRelative(
css::uno::Reference< css::uno::XComponentContext > const & context,
OUString const & baseUriReference, OUString const & uriReference)
{
OSL_ASSERT(context.is());
css::uno::Reference< css::ucb::XUniversalContentBroker > broker(
css::ucb::UniversalContentBroker::create(context));
css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
css::uri::UriReferenceFactory::create(context));
return uriFactory->makeRelative(
uriFactory->parse(normalize(broker, uriFactory, baseUriReference)),
uriFactory->parse(normalize(broker, uriFactory, uriReference)), true,
true, false);
}
OUString URIHelper::simpleNormalizedMakeRelative(
OUString const & baseUriReference, OUString const & uriReference)
{
css::uno::Reference< css::uri::XUriReference > rel(
URIHelper::normalizedMakeRelative(
comphelper::getProcessComponentContext(), baseUriReference,
uriReference));
return rel.is() ? rel->getUriReference() : uriReference;
}
// FindFirstURLInText
namespace {
sal_Int32 nextChar(std::u16string_view rStr, sal_Int32 nPos)
{
return rtl::isHighSurrogate(rStr[nPos])
&& rStr.size() - nPos >= 2
&& rtl::isLowSurrogate(rStr[nPos + 1]) ?
nPos + 2 : nPos + 1;
}
bool isBoundary1(CharClass const & rCharClass, OUString const & rStr,
sal_Int32 nPos, sal_Int32 nEnd)
{
if (nPos == nEnd)
return true;
if (rCharClass.isLetterNumeric(rStr, nPos))
return false;
switch (rStr[nPos])
{
case '$':
case '%':
case '&':
case '-':
case '/':
case '@':
case '\\':
return false;
default:
return true;
}
}
bool isBoundary2(CharClass const & rCharClass, OUString const & rStr,
sal_Int32 nPos, sal_Int32 nEnd)
{
if (nPos == nEnd)
return true;
if (rCharClass.isLetterNumeric(rStr, nPos))
return false;
switch (rStr[nPos])
{
case '!':
case '#':
case '$':
case '%':
case '&':
case '\'':
case '*':
case '+':
case '-':
case '/':
case '=':
case '?':
case '@':
case '^':
case '_':
case '`':
case '{':
case '|':
case '}':
case '~':
return false;
default:
return true;
}
}
// tdf#145381 Added MatchingBracketDepth counter to detect matching closing
// brackets that are part of the uri
bool checkWChar(CharClass const & rCharClass, OUString const & rStr,
sal_Int32 * pPos, sal_Int32 * pEnd,
sal_Int32 * pMatchingBracketDepth = nullptr,
bool bBackslash = false, bool bPipe = false)
{
sal_Unicode c = rStr[*pPos];
if (rtl::isAscii(c))
{
static sal_uInt8 const aMap[128]
= { 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 4, 4, 4, 1, // !"#$%&'
5, 6, 1, 1, 1, 4, 1, 4, // ()*+,-./
4, 4, 4, 4, 4, 4, 4, 4, // 01234567
4, 4, 1, 1, 0, 1, 0, 1, // 89:;<=>?
4, 4, 4, 4, 4, 4, 4, 4, // @ABCDEFG
4, 4, 4, 4, 4, 4, 4, 4, // HIJKLMNO
4, 4, 4, 4, 4, 4, 4, 4, // PQRSTUVW
4, 4, 4, 1, 2, 1, 0, 1, // XYZ[\]^_
0, 4, 4, 4, 4, 4, 4, 4, // `abcdefg
4, 4, 4, 4, 4, 4, 4, 4, // hijklmno
4, 4, 4, 4, 4, 4, 4, 4, // pqrstuvw
4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
switch (aMap[c])
{
default: // not uric
return false;
case 1: // uric
++(*pPos);
return true;
case 2: // "\"
if (bBackslash)
{
*pEnd = ++(*pPos);
return true;
}
else
return false;
case 3: // "|"
if (bPipe)
{
*pEnd = ++(*pPos);
return true;
}
else
return false;
case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
// isBoundary1)
*pEnd = ++(*pPos);
return true;
case 5: // opening bracket
++(*pPos);
if(nullptr != pMatchingBracketDepth)
++(*pMatchingBracketDepth);
return true;
case 6: // closing bracket
++(*pPos);
if(nullptr != pMatchingBracketDepth && *pMatchingBracketDepth > 0)
{
--(*pMatchingBracketDepth);
// tdf#145381 When there was an opening bracket, detect this closing bracket
// as part of the uri
*pEnd = *pPos;
}
return true;
}
}
else if (rCharClass.isLetterNumeric(rStr, *pPos))
{
*pEnd = *pPos = nextChar(rStr, *pPos);
return true;
}
else
return false;
}
sal_uInt32 scanDomain(OUString const & rStr, sal_Int32 * pPos,
sal_Int32 nEnd)
{
sal_Unicode const * pBuffer = rStr.getStr();
sal_Unicode const * p = pBuffer + *pPos;
sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false);
*pPos = sal::static_int_cast< sal_Int32 >(p - pBuffer);
return nLabels;
}
}
OUString URIHelper::FindFirstURLInText(OUString const & rText,
sal_Int32 & rBegin,
sal_Int32 & rEnd,
CharClass const & rCharClass,
INetURLObject::EncodeMechanism eMechanism,
rtl_TextEncoding eCharset)
{
if (rBegin > rEnd || rEnd > rText.getLength())
return OUString();
// Search for the first substring of [rBegin..rEnd[ that matches any of the
// following productions (for which the appropriate style bit is set in
// eStyle, if applicable).
// 1st Production (known scheme):
// \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
// \B1
// 2nd Production (file):
// \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
// 3rd Production (ftp):
// \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
// 4th Production (http):
// \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
// 5th Production (mailto):
// \B2 local-part "@" domain \B1
// 6th Production (UNC file):
// \B1 "\\" domain "\" *(wchar / "\") \B1
// 7th Production (DOS file):
// \B1 ALPHA ":\" *(wchar / "\") \B1
// 8th Production (Unix-like DOS file):
// \B1 ALPHA ":/" *(wchar / "\") \B1
// The productions use the following auxiliary rules.
// local-part = atom *("." atom)
// atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
// / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
// / "~")
// domain = label *("." label)
// label = alphanum [*(alphanum / "-") alphanum]
// alphanum = ALPHA / DIGIT
// wchar = <any uric character (ignoring the escaped rule), or "%", or
// a letter or digit (according to rCharClass)>
// "\B1" (boundary 1) stands for the beginning or end of the block of text,
// or a character that is neither (a) a letter or digit (according to
// rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
// (FIXME: What was the rationale for this set of punctuation characters?)
// "\B2" (boundary 2) stands for the beginning or end of the block of text,
// or a character that is neither (a) a letter or digit (according to
// rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
// "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
// 822 <atom> character, or "@" from \B1's set above).
// Productions 1--4, and 6--8 try to find a maximum-length match, but they
// stop at the first <wchar> character that is a "\B1" character which is
// only followed by "\B1" characters (taking "\" and "|" characters into
// account appropriately). Production 5 simply tries to find a maximum-
// length match.
// Productions 1--4 use the given eMechanism and eCharset. Productions 5--9
// use EncodeMechanism::All.
// Productions 6--9 are only applicable if the FSysStyle::Dos bit is set in
// eStyle.
// tdf#145381: In addition to the productions I added a mechanism to detect
// matching brackets. The task presents the case of an url that ends on a
// closing bracket. This needs to be detected as part of the uri in the case
// that a matching opening bracket exists.
bool bBoundary1 = true;
bool bBoundary2 = true;
for (sal_Int32 nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
{
sal_Unicode c = rText[nPos];
if (bBoundary1)
{
if (rtl::isAsciiAlpha(c))
{
sal_Int32 i = nPos;
INetProtocol eScheme = INetURLObject::CompareProtocolScheme(rText.subView(i, rEnd - i));
if (eScheme == INetProtocol::File) // 2nd
{
while (rText[i++] != ':') ;
sal_Int32 nPrefixEnd = i;
sal_Int32 nUriEnd = i;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd, nullptr, true,
true)) ;
if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
{
++i;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
}
if (nUriEnd != nPrefixEnd
&& isBoundary1(rCharClass, rText, nUriEnd, rEnd))
{
INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
INetProtocol::File, eMechanism, eCharset,
FSysStyle::Detect);
if (!aUri.HasError())
{
rBegin = nPos;
rEnd = nUriEnd;
return
aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
}
}
}
else if (eScheme != INetProtocol::NotValid) // 1st
{
while (rText[i++] != ':') ;
sal_Int32 nPrefixEnd = i;
sal_Int32 nUriEnd = i;
sal_Int32 nMatchingBracketDepth = 0;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd,
&nMatchingBracketDepth)) ;
if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
{
++i;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
}
if (nUriEnd != nPrefixEnd
&& (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
|| rText[nUriEnd] == '\\'))
{
INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
INetProtocol::Http, eMechanism,
eCharset);
if (!aUri.HasError())
{
rBegin = nPos;
rEnd = nUriEnd;
return
aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
}
}
}
// 3rd, 4th:
i = nPos;
sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
if (nLabels >= 3
&& rText[nPos + 3] == '.'
&& (((rText[nPos] == 'w'
|| rText[nPos] == 'W')
&& (rText[nPos + 1] == 'w'
|| rText[nPos + 1] == 'W')
&& (rText[nPos + 2] == 'w'
|| rText[nPos + 2] == 'W'))
|| ((rText[nPos] == 'f'
|| rText[nPos] == 'F')
&& (rText[nPos + 1] == 't'
|| rText[nPos + 1] == 'T')
&& (rText[nPos + 2] == 'p'
|| rText[nPos + 2] == 'P'))))
// (note that rText.GetChar(nPos + 3) is guaranteed to be
// valid)
{
sal_Int32 nUriEnd = i;
if (i != rEnd && rText[i] == '/')
{
nUriEnd = ++i;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
}
if (i != rEnd && rText[i] == '#')
{
++i;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
}
if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
|| rText[nUriEnd] == '\\')
{
INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
INetProtocol::Http, eMechanism,
eCharset);
if (!aUri.HasError())
{
rBegin = nPos;
rEnd = nUriEnd;
return
aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
}
}
}
if (rEnd - nPos >= 3
&& rText[nPos + 1] == ':'
&& (rText[nPos + 2] == '/'
|| rText[nPos + 2] == '\\')) // 7th, 8th
{
i = nPos + 3;
sal_Int32 nUriEnd = i;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
{
INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
INetProtocol::File,
INetURLObject::EncodeMechanism::All,
RTL_TEXTENCODING_UTF8,
FSysStyle::Dos);
if (!aUri.HasError())
{
rBegin = nPos;
rEnd = nUriEnd;
return
aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
}
}
}
}
else if (rEnd - nPos >= 2
&& rText[nPos] == '\\'
&& rText[nPos + 1] == '\\') // 6th
{
sal_Int32 i = nPos + 2;
sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
if (nLabels >= 1 && i != rEnd && rText[i] == '\\')
{
sal_Int32 nUriEnd = ++i;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd,
nullptr, true)) ;
if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
{
INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
INetProtocol::File,
INetURLObject::EncodeMechanism::All,
RTL_TEXTENCODING_UTF8,
FSysStyle::Dos);
if (!aUri.HasError())
{
rBegin = nPos;
rEnd = nUriEnd;
return
aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
}
}
}
}
}
if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th
{
bool bDot = false;
for (sal_Int32 i = nPos + 1; i != rEnd; ++i)
{
sal_Unicode c2 = rText[i];
if (INetMIME::isAtomChar(c2))
bDot = false;
else if (bDot)
break;
else if (c2 == '.')
bDot = true;
else
{
if (c2 == '@')
{
++i;
sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
if (nLabels >= 1
&& isBoundary1(rCharClass, rText, i, rEnd))
{
INetURLObject aUri(rText.subView(nPos, i - nPos),
INetProtocol::Mailto,
INetURLObject::EncodeMechanism::All);
if (!aUri.HasError())
{
rBegin = nPos;
rEnd = i;
return aUri.GetMainURL(
INetURLObject::DecodeMechanism::ToIUri);
}
}
}
break;
}
}
}
bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd);
bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd);
}
rBegin = rEnd;
return OUString();
}
OUString URIHelper::FindFirstDOIInText(std::u16string_view rText,
sal_Int32 & rBegin,
sal_Int32 & rEnd,
CharClass const & rCharClass)
{
if (rBegin > rEnd || rEnd > static_cast<sal_Int32>(rText.size()))
return OUString();
sal_Int32 start = 7;
sal_Int32 count = rEnd-rBegin;
OUString candidate(rText.substr(rBegin, count));
// Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+"
if (candidate.startsWithIgnoreAsciiCase("doi:10."))
{
bool flag = true;
sal_Int32 digit = 0;
for (sal_Int32 i=start; i<count; i++)
{
sal_Unicode c = candidate[i];
// Match 4 to 9 digits before slash
if (digit >= 0)
{
if (digit>9)
{
flag = false;
break;
}
if ( rCharClass.isDigit(candidate,i) )
{
digit++;
}
else if (c=='/' && digit>=4 && i<count-1)
{
digit=-1;
}
else
{
flag = false;
break;
}
}
// Match [-._;()\/:a-zA-Z0-9] after slash
else if (!( rCharClass.isAlphaNumeric(candidate, i) || c == '.' || c == '-' || c=='_' ||
c==';' || c=='(' || c==')' || c=='\\' || (c=='/' && i<count-1) || c==':'))
{
flag = false;
break;
}
}
if (flag && digit==-1)
{
return OUString::Concat("https://doi.org/")+candidate.subView(4);
}
}
rBegin = rEnd;
return OUString();
}
OUString URIHelper::removePassword(OUString const & rURI,
INetURLObject::EncodeMechanism eEncodeMechanism,
INetURLObject::DecodeMechanism eDecodeMechanism,
rtl_TextEncoding eCharset)
{
INetURLObject aObj(rURI, eEncodeMechanism, eCharset);
return aObj.HasError() ?
rURI :
aObj.GetURLNoPass(eDecodeMechanism, eCharset);
}
OUString URIHelper::resolveIdnaHost(OUString const & url) {
css::uno::Reference<css::uri::XUriReference> uri(
css::uri::UriReferenceFactory::create(
comphelper::getProcessComponentContext())
->parse(url));
if (!(uri.is() && uri->hasAuthority())) {
return url;
}
auto auth(uri->getAuthority());
if (auth.isEmpty())
return url;
sal_Int32 hostStart = auth.indexOf('@') + 1;
sal_Int32 hostEnd = auth.getLength();
while (hostEnd > hostStart && rtl::isAsciiDigit(auth[hostEnd - 1])) {
--hostEnd;
}
if (hostEnd > hostStart && auth[hostEnd - 1] == ':') {
--hostEnd;
} else {
hostEnd = auth.getLength();
}
auto asciiOnly = true;
for (auto i = hostStart; i != hostEnd; ++i) {
if (!rtl::isAscii(auth[i])) {
asciiOnly = false;
break;
}
}
if (asciiOnly) {
// Avoid icu::IDNA case normalization in purely non-IDNA domain names:
return url;
}
UErrorCode e = U_ZERO_ERROR;
std::unique_ptr<icu::IDNA> idna(
icu::IDNA::createUTS46Instance(
(UIDNA_USE_STD3_RULES | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ | UIDNA_CHECK_CONTEXTO),
e));
if (U_FAILURE(e)) {
SAL_WARN("vcl.gdi", "icu::IDNA::createUTS46Instance " << e);
return url;
}
icu::UnicodeString ascii;
icu::IDNAInfo info;
idna->nameToASCII(
icu::UnicodeString(
reinterpret_cast<UChar const *>(auth.getStr() + hostStart),
hostEnd - hostStart),
ascii, info, e);
if (U_FAILURE(e) || info.hasErrors()) {
return url;
}
OUStringBuffer buf(uri->getScheme());
buf.append(OUString::Concat("://") + auth.subView(0, hostStart));
buf.append(
reinterpret_cast<sal_Unicode const *>(ascii.getBuffer()),
ascii.length());
buf.append(auth.subView(hostEnd) + uri->getPath());
if (uri->hasQuery()) {
buf.append("?" + uri->getQuery());
}
if (uri->hasFragment()) {
buf.append("#" + uri->getFragment());
}
return buf.makeStringAndClear();
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.