/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */
 
#include <memory>
#include <string_view>
 
#include <sal/config.h>
 
#include <unicode/idna.h>
 
#include <svl/urihelper.hxx>
#include <com/sun/star/ucb/Command.hpp>
#include <com/sun/star/ucb/IllegalIdentifierException.hpp>
#include <com/sun/star/ucb/UniversalContentBroker.hpp>
#include <com/sun/star/ucb/UnsupportedCommandException.hpp>
#include <com/sun/star/ucb/XCommandEnvironment.hpp>
#include <com/sun/star/ucb/XCommandProcessor.hpp>
#include <com/sun/star/ucb/XContent.hpp>
#include <com/sun/star/ucb/XUniversalContentBroker.hpp>
#include <com/sun/star/uno/Any.hxx>
#include <com/sun/star/uno/Exception.hpp>
#include <com/sun/star/uno/Reference.hxx>
#include <com/sun/star/uno/RuntimeException.hpp>
#include <com/sun/star/uno/XComponentContext.hpp>
#include <com/sun/star/uri/UriReferenceFactory.hpp>
#include <com/sun/star/uri/XUriReference.hpp>
#include <com/sun/star/uri/XUriReferenceFactory.hpp>
#include <comphelper/processfactory.hxx>
#include <osl/diagnose.h>
#include <rtl/character.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/ustring.hxx>
#include <sal/types.h>
#include <sal/log.hxx>
#include <tools/inetmime.hxx>
#include <unotools/charclass.hxx>
 
using namespace com::sun::star;
 
OUString URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
                                 OUString const & rTheRelURIRef,
                                 Link<OUString *, bool> const & rMaybeFileHdl,
                                 bool bCheckFileExists,
                                 bool bIgnoreFragment,
                                 INetURLObject::EncodeMechanism eEncodeMechanism,
                                 INetURLObject::DecodeMechanism eDecodeMechanism,
                                 rtl_TextEncoding eCharset,
                                 FSysStyle eStyle)
{
    // Backwards compatibility:
    if( rTheRelURIRef.startsWith("#") )
        return rTheRelURIRef;
 
    INetURLObject aAbsURIRef;
    if (rTheBaseURIRef.HasError())
        aAbsURIRef. SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
    else
    {
        bool bWasAbsolute;
        aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef,
                                                 bWasAbsolute,
                                                 bIgnoreFragment,
                                                 eEncodeMechanism,
                                                 eCharset,
                                                 false/*bRelativeNonURIs*/,
                                                 eStyle);
        if (bCheckFileExists
            && !bWasAbsolute
            && (aAbsURIRef.GetProtocol() == INetProtocol::File))
        {
            INetURLObject aNonFileURIRef;
            aNonFileURIRef.SetSmartURL(rTheRelURIRef,
                                       eEncodeMechanism,
                                       eCharset,
                                       eStyle);
            if (!aNonFileURIRef.HasError()
                && aNonFileURIRef.GetProtocol() != INetProtocol::File)
            {
                bool bMaybeFile = false;
                if (rMaybeFileHdl.IsSet())
                {
                    OUString aFilePath(rTheRelURIRef);
                    bMaybeFile = rMaybeFileHdl.Call(&aFilePath);
                }
                if (!bMaybeFile)
                    aAbsURIRef = std::move(aNonFileURIRef);
            }
        }
    }
    return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset);
}
 
namespace { Link<OUString *, bool> gMaybeFileHdl; }
 
void URIHelper::SetMaybeFileHdl(Link<OUString *, bool> const & rTheMaybeFileHdl)
{
    gMaybeFileHdl = rTheMaybeFileHdl;
}
 
Link<OUString *, bool> const & URIHelper::GetMaybeFileHdl()
{
    return gMaybeFileHdl;
}
 
namespace {
 
bool isAbsoluteHierarchicalUriReference(
    css::uno::Reference< css::uri::XUriReference > const & uriReference)
{
    return uriReference.is() && uriReference->isAbsolute()
        && !uriReference->hasRelativePath();
}
 
// To improve performance, assume that if for any prefix URL of a given
// hierarchical URL either a UCB content cannot be created, or the UCB content
// does not support the getCasePreservingURL command, then this will hold for
// any other prefix URL of the given URL, too:
enum Result { Success, GeneralFailure, SpecificFailure };
 
Result normalizePrefix( css::uno::Reference< css::ucb::XUniversalContentBroker > const & broker,
                        OUString const & uri, OUString * normalized)
{
    assert(broker.is() && normalized != nullptr);
    css::uno::Reference< css::ucb::XContent > content;
    try {
        content = broker->queryContent(broker->createContentIdentifier(uri));
    } catch (css::ucb::IllegalIdentifierException &) {}
    if (!content.is()) {
        return GeneralFailure;
    }
    try {
        bool ok =
            (css::uno::Reference< css::ucb::XCommandProcessor >(
                   content, css::uno::UNO_QUERY_THROW)->execute(
                       css::ucb::Command(u"getCasePreservingURL"_ustr,
                           -1, css::uno::Any()),
                       0,
                       css::uno::Reference< css::ucb::XCommandEnvironment >())
               >>= *normalized);
        OSL_ASSERT(ok);
    } catch (css::uno::RuntimeException &) {
        throw;
    } catch (css::ucb::UnsupportedCommandException &) {
        return GeneralFailure;
    } catch (css::uno::Exception &) {
        return SpecificFailure;
    }
    return Success;
}
 
OUString normalize(
    css::uno::Reference< css::ucb::XUniversalContentBroker > const & broker,
    css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory,
    OUString const & uriReference)
{
    // normalizePrefix can potentially fail (a typically example being a file
    // URL that denotes a non-existing resource); in such a case, try to
    // normalize as long a prefix of the given URL as possible (i.e., normalize
    // all the existing directories within the path):
    OUString normalized;
    sal_Int32 n = uriReference.indexOf('#');
    normalized = n == -1 ? uriReference : uriReference.copy(0, n);
    switch (normalizePrefix(broker, normalized, &normalized)) {
    case Success:
        return n == -1 ? normalized : normalized + uriReference.subView(n);
    case GeneralFailure:
        return uriReference;
    case SpecificFailure:
    default:
        break;
    }
    css::uno::Reference< css::uri::XUriReference > ref(
        uriFactory->parse(uriReference));
    if (!isAbsoluteHierarchicalUriReference(ref)) {
        return uriReference;
    }
    sal_Int32 count = ref->getPathSegmentCount();
    if (count < 2) {
        return uriReference;
    }
    OUStringBuffer head(ref->getScheme());
    head.append(':');
    if (ref->hasAuthority()) {
        head.append("//" + ref->getAuthority());
    }
    for (sal_Int32 i = count - 1; i > 0; --i) {
        OUStringBuffer buf(head);
        for (sal_Int32 j = 0; j < i; ++j) {
            buf.append('/');
            buf.append(ref->getPathSegment(j));
        }
        normalized = buf.makeStringAndClear();
        if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
        {
            buf.append(normalized);
            css::uno::Reference< css::uri::XUriReference > preRef(
                uriFactory->parse(normalized));
            if (!isAbsoluteHierarchicalUriReference(preRef)) {
                // This could only happen if something is inconsistent:
                break;
            }
            sal_Int32 preCount = preRef->getPathSegmentCount();
            // normalizePrefix may have added or removed a final slash:
            if (preCount != i) {
                if (preCount == i - 1) {
                    buf.append('/');
                } else if (preCount - 1 == i && !buf.isEmpty()
                           && buf[buf.getLength() - 1] == '/')
                {
                    buf.setLength(buf.getLength() - 1);
                } else {
                    // This could only happen if something is inconsistent:
                    break;
                }
            }
            for (sal_Int32 j = i; j < count; ++j) {
                buf.append('/');
                buf.append(ref->getPathSegment(j));
            }
            if (ref->hasQuery()) {
                buf.append('?');
                buf.append(ref->getQuery());
            }
            if (ref->hasFragment()) {
                buf.append('#');
                buf.append(ref->getFragment());
            }
            return buf.makeStringAndClear();
        }
    }
    return uriReference;
}
 
}
 
css::uno::Reference< css::uri::XUriReference >
URIHelper::normalizedMakeRelative(
    css::uno::Reference< css::uno::XComponentContext > const & context,
    OUString const & baseUriReference, OUString const & uriReference)
{
    OSL_ASSERT(context.is());
    css::uno::Reference< css::ucb::XUniversalContentBroker > broker(
        css::ucb::UniversalContentBroker::create(context));
    css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
        css::uri::UriReferenceFactory::create(context));
    return uriFactory->makeRelative(
        uriFactory->parse(normalize(broker, uriFactory, baseUriReference)),
        uriFactory->parse(normalize(broker, uriFactory, uriReference)), true,
        true, false);
}
 
OUString URIHelper::simpleNormalizedMakeRelative(
    OUString const & baseUriReference, OUString const & uriReference)
{
    css::uno::Reference< css::uri::XUriReference > rel(
        URIHelper::normalizedMakeRelative(
            comphelper::getProcessComponentContext(), baseUriReference,
            uriReference));
    return rel.is() ? rel->getUriReference() : uriReference;
}
 
 
//  FindFirstURLInText
 
 
namespace {
 
sal_Int32 nextChar(std::u16string_view rStr, sal_Int32 nPos)
{
    return rtl::isHighSurrogate(rStr[nPos])
           && rStr.size() - nPos >= 2
           && rtl::isLowSurrogate(rStr[nPos + 1]) ?
        nPos + 2 : nPos + 1;
}
 
bool isBoundary1(CharClass const & rCharClass, OUString const & rStr,
                 sal_Int32 nPos, sal_Int32 nEnd)
{
    if (nPos == nEnd)
        return true;
    if (rCharClass.isLetterNumeric(rStr, nPos))
        return false;
    switch (rStr[nPos])
    {
    case '$':
    case '%':
    case '&':
    case '-':
    case '/':
    case '@':
    case '\\':
        return false;
    default:
        return true;
    }
}
 
bool isBoundary2(CharClass const & rCharClass, OUString const & rStr,
                 sal_Int32 nPos, sal_Int32 nEnd)
{
    if (nPos == nEnd)
        return true;
    if (rCharClass.isLetterNumeric(rStr, nPos))
        return false;
    switch (rStr[nPos])
    {
    case '!':
    case '#':
    case '$':
    case '%':
    case '&':
    case '\'':
    case '*':
    case '+':
    case '-':
    case '/':
    case '=':
    case '?':
    case '@':
    case '^':
    case '_':
    case '`':
    case '{':
    case '|':
    case '}':
    case '~':
        return false;
    default:
        return true;
    }
}
 
// tdf#145381 Added MatchingBracketDepth counter to detect matching closing
// brackets that are part of the uri
bool checkWChar(CharClass const & rCharClass, OUString const & rStr,
                sal_Int32 * pPos, sal_Int32 * pEnd,
                sal_Int32 * pMatchingBracketDepth = nullptr,
                bool bBackslash = false, bool bPipe = false)
{
    sal_Unicode c = rStr[*pPos];
    if (rtl::isAscii(c))
    {
        static sal_uInt8 const aMap[128]
            = { 0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0,
                0, 1, 0, 0, 4, 4, 4, 1,   //  !"#$%&'
                5, 6, 1, 1, 1, 4, 1, 4,   // ()*+,-./
                4, 4, 4, 4, 4, 4, 4, 4,   // 01234567
                4, 4, 1, 1, 0, 1, 0, 1,   // 89:;<=>?
                4, 4, 4, 4, 4, 4, 4, 4,   // @ABCDEFG
                4, 4, 4, 4, 4, 4, 4, 4,   // HIJKLMNO
                4, 4, 4, 4, 4, 4, 4, 4,   // PQRSTUVW
                4, 4, 4, 1, 2, 1, 0, 1,   // XYZ[\]^_
                0, 4, 4, 4, 4, 4, 4, 4,   // `abcdefg
                4, 4, 4, 4, 4, 4, 4, 4,   // hijklmno
                4, 4, 4, 4, 4, 4, 4, 4,   // pqrstuvw
                4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
        switch (aMap[c])
        {
            default: // not uric
                return false;
 
            case 1: // uric
                ++(*pPos);
                return true;
 
            case 2: // "\"
                if (bBackslash)
                {
                    *pEnd = ++(*pPos);
                    return true;
                }
                else
                    return false;
 
            case 3: // "|"
                if (bPipe)
                {
                    *pEnd = ++(*pPos);
                    return true;
                }
                else
                    return false;
 
            case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
                    // isBoundary1)
                *pEnd = ++(*pPos);
                return true;
 
            case 5: // opening bracket
                ++(*pPos);
                if(nullptr != pMatchingBracketDepth)
                    ++(*pMatchingBracketDepth);
                return true;
 
            case 6: // closing bracket
                ++(*pPos);
                if(nullptr != pMatchingBracketDepth && *pMatchingBracketDepth > 0)
                {
                    --(*pMatchingBracketDepth);
                    // tdf#145381 When there was an opening bracket, detect this closing bracket
                    // as part of the uri
                    *pEnd = *pPos;
                }
                return true;
 
        }
    }
    else if (rCharClass.isLetterNumeric(rStr, *pPos))
    {
        *pEnd = *pPos = nextChar(rStr, *pPos);
        return true;
    }
    else
        return false;
}
 
sal_uInt32 scanDomain(OUString const & rStr, sal_Int32 * pPos,
                      sal_Int32 nEnd)
{
    sal_Unicode const * pBuffer = rStr.getStr();
    sal_Unicode const * p = pBuffer + *pPos;
    sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false);
    *pPos = sal::static_int_cast< sal_Int32 >(p - pBuffer);
    return nLabels;
}
 
}
 
OUString URIHelper::FindFirstURLInText(OUString const & rText,
                                       sal_Int32 & rBegin,
                                       sal_Int32 & rEnd,
                                       CharClass const & rCharClass,
                                       INetURLObject::EncodeMechanism eMechanism,
                                       rtl_TextEncoding eCharset)
{
    if (rBegin > rEnd || rEnd > rText.getLength())
        return OUString();
 
    // Search for the first substring of [rBegin..rEnd[ that matches any of the
    // following productions (for which the appropriate style bit is set in
    // eStyle, if applicable).
 
    // 1st Production (known scheme):
    //    \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
    //        \B1
 
    // 2nd Production (file):
    //    \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
 
    // 3rd Production (ftp):
    //    \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
 
    // 4th Production (http):
    //    \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
 
    // 5th Production (mailto):
    //    \B2 local-part "@" domain \B1
 
    // 6th Production (UNC file):
    //    \B1 "\\" domain "\" *(wchar / "\") \B1
 
    // 7th Production (DOS file):
    //    \B1 ALPHA ":\" *(wchar / "\") \B1
 
    // 8th Production (Unix-like DOS file):
    //    \B1 ALPHA ":/" *(wchar / "\") \B1
 
    // The productions use the following auxiliary rules.
 
    //    local-part = atom *("." atom)
    //    atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
    //              / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
    //              / "~")
    //    domain = label *("." label)
    //    label = alphanum [*(alphanum / "-") alphanum]
    //    alphanum = ALPHA / DIGIT
    //    wchar = <any uric character (ignoring the escaped rule), or "%", or
    //             a letter or digit (according to rCharClass)>
 
    // "\B1" (boundary 1) stands for the beginning or end of the block of text,
    // or a character that is neither (a) a letter or digit (according to
    // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
    // (FIXME:  What was the rationale for this set of punctuation characters?)
 
    // "\B2" (boundary 2) stands for the beginning or end of the block of text,
    // or a character that is neither (a) a letter or digit (according to
    // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
    // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
    // 822 <atom> character, or "@" from \B1's set above).
 
    // Productions 1--4, and 6--8 try to find a maximum-length match, but they
    // stop at the first <wchar> character that is a "\B1" character which is
    // only followed by "\B1" characters (taking "\" and "|" characters into
    // account appropriately).  Production 5 simply tries to find a maximum-
    // length match.
 
    // Productions 1--4 use the given eMechanism and eCharset.  Productions 5--9
    // use EncodeMechanism::All.
 
    // Productions 6--9 are only applicable if the FSysStyle::Dos bit is set in
    // eStyle.
 
    // tdf#145381: In addition to the productions I added a mechanism to detect
    // matching brackets. The task presents the case of an url that ends on a
    // closing bracket. This needs to be detected as part of the uri in the case
    // that a matching opening bracket exists.
 
    bool bBoundary1 = true;
    bool bBoundary2 = true;
    for (sal_Int32 nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
    {
        sal_Unicode c = rText[nPos];
        if (bBoundary1)
        {
            if (rtl::isAsciiAlpha(c))
            {
                sal_Int32 i = nPos;
                INetProtocol eScheme = INetURLObject::CompareProtocolScheme(rText.subView(i, rEnd - i));
                if (eScheme == INetProtocol::File) // 2nd
                {
                    while (rText[i++] != ':') ;
                    sal_Int32 nPrefixEnd = i;
                    sal_Int32 nUriEnd = i;
                    while (i != rEnd
                           && checkWChar(rCharClass, rText, &i, &nUriEnd, nullptr, true,
                                         true)) ;
                    if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
                    {
                        ++i;
                        while (i != rEnd
                               && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
                    }
                    if (nUriEnd != nPrefixEnd
                        && isBoundary1(rCharClass, rText, nUriEnd, rEnd))
                    {
                        INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
                                           INetProtocol::File, eMechanism, eCharset,
                                           FSysStyle::Detect);
                        if (!aUri.HasError())
                        {
                            rBegin = nPos;
                            rEnd = nUriEnd;
                            return
                                aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
                        }
                    }
                }
                else if (eScheme != INetProtocol::NotValid) // 1st
                {
                    while (rText[i++] != ':') ;
                    sal_Int32 nPrefixEnd = i;
                    sal_Int32 nUriEnd = i;
                    sal_Int32 nMatchingBracketDepth = 0;
                    while (i != rEnd
                           && checkWChar(rCharClass, rText, &i, &nUriEnd,
                                         &nMatchingBracketDepth)) ;
                    if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
                    {
                        ++i;
                        while (i != rEnd
                               && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
                    }
                    if (nUriEnd != nPrefixEnd
                        && (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
                            || rText[nUriEnd] == '\\'))
                    {
                        INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
                                           INetProtocol::Http, eMechanism,
                                           eCharset);
                        if (!aUri.HasError())
                        {
                            rBegin = nPos;
                            rEnd = nUriEnd;
                            return
                                aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
                        }
                    }
                }
 
                // 3rd, 4th:
                i = nPos;
                sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
                if (nLabels >= 3
                    && rText[nPos + 3] == '.'
                    && (((rText[nPos] == 'w'
                          || rText[nPos] == 'W')
                         && (rText[nPos + 1] == 'w'
                             || rText[nPos + 1] == 'W')
                         && (rText[nPos + 2] == 'w'
                             || rText[nPos + 2] == 'W'))
                        || ((rText[nPos] == 'f'
                             || rText[nPos] == 'F')
                            && (rText[nPos + 1] == 't'
                                || rText[nPos + 1] == 'T')
                            && (rText[nPos + 2] == 'p'
                                || rText[nPos + 2] == 'P'))))
                    // (note that rText.GetChar(nPos + 3) is guaranteed to be
                    // valid)
                {
                    sal_Int32 nUriEnd = i;
                    if (i != rEnd && rText[i] == '/')
                    {
                        nUriEnd = ++i;
                        while (i != rEnd
                               && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
                    }
                    if (i != rEnd && rText[i] == '#')
                    {
                        ++i;
                        while (i != rEnd
                               && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
                    }
                    if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
                        || rText[nUriEnd] == '\\')
                    {
                        INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
                                           INetProtocol::Http, eMechanism,
                                           eCharset);
                        if (!aUri.HasError())
                        {
                            rBegin = nPos;
                            rEnd = nUriEnd;
                            return
                                aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
                        }
                    }
                }
 
                if (rEnd - nPos >= 3
                    && rText[nPos + 1] == ':'
                    && (rText[nPos + 2] == '/'
                        || rText[nPos + 2] == '\\')) // 7th, 8th
                {
                    i = nPos + 3;
                    sal_Int32 nUriEnd = i;
                    while (i != rEnd
                           && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
                    if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
                    {
                        INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
                                           INetProtocol::File,
                                           INetURLObject::EncodeMechanism::All,
                                           RTL_TEXTENCODING_UTF8,
                                           FSysStyle::Dos);
                        if (!aUri.HasError())
                        {
                            rBegin = nPos;
                            rEnd = nUriEnd;
                            return
                                aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
                        }
                    }
                }
            }
            else if (rEnd - nPos >= 2
                     && rText[nPos] == '\\'
                     && rText[nPos + 1] == '\\') // 6th
            {
                sal_Int32 i = nPos + 2;
                sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
                if (nLabels >= 1 && i != rEnd && rText[i] == '\\')
                {
                    sal_Int32 nUriEnd = ++i;
                    while (i != rEnd
                           && checkWChar(rCharClass, rText, &i, &nUriEnd,
                                         nullptr, true)) ;
                    if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
                    {
                        INetURLObject aUri(rText.subView(nPos, nUriEnd - nPos),
                                           INetProtocol::File,
                                           INetURLObject::EncodeMechanism::All,
                                           RTL_TEXTENCODING_UTF8,
                                           FSysStyle::Dos);
                        if (!aUri.HasError())
                        {
                            rBegin = nPos;
                            rEnd = nUriEnd;
                            return
                                aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
                        }
                    }
                }
            }
        }
        if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th
        {
            bool bDot = false;
            for (sal_Int32 i = nPos + 1; i != rEnd; ++i)
            {
                sal_Unicode c2 = rText[i];
                if (INetMIME::isAtomChar(c2))
                    bDot = false;
                else if (bDot)
                    break;
                else if (c2 == '.')
                    bDot = true;
                else
                {
                    if (c2 == '@')
                    {
                        ++i;
                        sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
                        if (nLabels >= 1
                            && isBoundary1(rCharClass, rText, i, rEnd))
                        {
                            INetURLObject aUri(rText.subView(nPos, i - nPos),
                                               INetProtocol::Mailto,
                                               INetURLObject::EncodeMechanism::All);
                            if (!aUri.HasError())
                            {
                                rBegin = nPos;
                                rEnd = i;
                                return aUri.GetMainURL(
                                           INetURLObject::DecodeMechanism::ToIUri);
                            }
                        }
                    }
                    break;
                }
            }
        }
        bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd);
        bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd);
    }
    rBegin = rEnd;
    return OUString();
}
 
OUString URIHelper::FindFirstDOIInText(std::u16string_view rText,
                                       sal_Int32 & rBegin,
                                       sal_Int32 & rEnd,
                                       CharClass const & rCharClass)
{
    if (rBegin > rEnd || rEnd > static_cast<sal_Int32>(rText.size()))
        return OUString();
 
    sal_Int32 start = 7;
    sal_Int32 count = rEnd-rBegin;
    OUString candidate(rText.substr(rBegin, count));
    // Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+"
    if (candidate.startsWithIgnoreAsciiCase("doi:10."))
    {
        bool flag = true;
        sal_Int32 digit = 0;
        for (sal_Int32 i=start; i<count; i++)
        {
            sal_Unicode c = candidate[i];
            // Match 4 to 9 digits before slash
            if (digit >= 0)
            {
                if (digit>9)
                {
                    flag = false;
                    break;
                }
 
                if ( rCharClass.isDigit(candidate,i) )
                {
                    digit++;
                }
                else if (c=='/' && digit>=4 && i<count-1)
                {
                    digit=-1;
                }
                else
                {
                    flag = false;
                    break;
                }
            }
            // Match [-._;()\/:a-zA-Z0-9] after slash
            else if (!( rCharClass.isAlphaNumeric(candidate, i) || c == '.' || c == '-' || c=='_' ||
                        c==';' || c=='(' || c==')' || c=='\\' || (c=='/' && i<count-1) || c==':'))
            {
                flag = false;
                break;
            }
        }
        if (flag && digit==-1)
        {
            return OUString::Concat("https://doi.org/")+candidate.subView(4);
        }
    }
    rBegin = rEnd;
    return OUString();
}
 
OUString URIHelper::removePassword(OUString const & rURI,
                                   INetURLObject::EncodeMechanism eEncodeMechanism,
                                   INetURLObject::DecodeMechanism eDecodeMechanism,
                                   rtl_TextEncoding eCharset)
{
    INetURLObject aObj(rURI, eEncodeMechanism, eCharset);
    return aObj.HasError() ?
               rURI :
               aObj.GetURLNoPass(eDecodeMechanism, eCharset);
}
 
OUString URIHelper::resolveIdnaHost(OUString const & url) {
    css::uno::Reference<css::uri::XUriReference> uri(
        css::uri::UriReferenceFactory::create(
            comphelper::getProcessComponentContext())
        ->parse(url));
    if (!(uri.is() && uri->hasAuthority())) {
        return url;
    }
    auto auth(uri->getAuthority());
    if (auth.isEmpty())
        return url;
    sal_Int32 hostStart = auth.indexOf('@') + 1;
    sal_Int32 hostEnd = auth.getLength();
    while (hostEnd > hostStart && rtl::isAsciiDigit(auth[hostEnd - 1])) {
        --hostEnd;
    }
    if (hostEnd > hostStart && auth[hostEnd - 1] == ':') {
        --hostEnd;
    } else {
        hostEnd = auth.getLength();
    }
    auto asciiOnly = true;
    for (auto i = hostStart; i != hostEnd; ++i) {
        if (!rtl::isAscii(auth[i])) {
            asciiOnly = false;
            break;
        }
    }
    if (asciiOnly) {
        // Avoid icu::IDNA case normalization in purely non-IDNA domain names:
        return url;
    }
    UErrorCode e = U_ZERO_ERROR;
    std::unique_ptr<icu::IDNA> idna(
        icu::IDNA::createUTS46Instance(
            (UIDNA_USE_STD3_RULES | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ | UIDNA_CHECK_CONTEXTO),
            e));
    if (U_FAILURE(e)) {
        SAL_WARN("vcl.gdi", "icu::IDNA::createUTS46Instance " << e);
        return url;
    }
    icu::UnicodeString ascii;
    icu::IDNAInfo info;
    idna->nameToASCII(
        icu::UnicodeString(
            reinterpret_cast<UChar const *>(auth.getStr() + hostStart),
            hostEnd - hostStart),
        ascii, info, e);
    if (U_FAILURE(e) || info.hasErrors()) {
        return url;
    }
    OUStringBuffer buf(uri->getScheme());
    buf.append(OUString::Concat("://") + auth.subView(0, hostStart));
    buf.append(
        reinterpret_cast<sal_Unicode const *>(ascii.getBuffer()),
        ascii.length());
    buf.append(auth.subView(hostEnd) + uri->getPath());
    if (uri->hasQuery()) {
        buf.append("?" + uri->getQuery());
    }
    if (uri->hasFragment()) {
        buf.append("#" + uri->getFragment());
    }
    return buf.makeStringAndClear();
}
 
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

V530 The return value of function 'append' is required to be utilized.

V530 The return value of function 'append' is required to be utilized.

V530 The return value of function 'append' is required to be utilized.

V530 The return value of function 'append' is required to be utilized.

V530 The return value of function 'append' is required to be utilized.

V530 The return value of function 'append' is required to be utilized.

V530 The return value of function 'append' is required to be utilized.

V530 The return value of function 'append' is required to be utilized.