/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */
 
#include <com/sun/star/uno/Reference.h>
 
#include <comphelper/lok.hxx>
#include <comphelper/sequence.hxx>
#include <comphelper/processfactory.hxx>
#include <cppuhelper/factory.hxx>
#include <cppuhelper/supportsservice.hxx>
#include <cppuhelper/weak.hxx>
#include <com/sun/star/linguistic2/XLinguProperties.hpp>
#include <com/sun/star/linguistic2/LinguServiceManager.hpp>
#include <com/sun/star/linguistic2/XSpellChecker1.hpp>
#include <i18nlangtag/languagetag.hxx>
#include <tools/debug.hxx>
#include <osl/mutex.hxx>
#include <osl/thread.h>
 
#include <hyphen.h>
#include "hyphenimp.hxx"
 
#include <linguistic/hyphdta.hxx>
#include <rtl/ustring.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/textenc.h>
#include <sal/log.hxx>
 
#include <linguistic/misc.hxx>
#include <svtools/strings.hrc>
#include <unotools/charclass.hxx>
#include <unotools/lingucfg.hxx>
#include <unotools/resmgr.hxx>
#include <osl/file.hxx>
 
#include <stdio.h>
#include <string.h>
 
#include <cassert>
#include <numeric>
#include <vector>
#include <set>
#include <memory>
#include <o3tl/string_view.hxx>
 
// XML-header to query SPELLML support
constexpr OUStringLiteral SPELLML_SUPPORT = u"<?xml?>";
 
using namespace osl;
using namespace com::sun::star;
using namespace com::sun::star::beans;
using namespace com::sun::star::lang;
using namespace com::sun::star::uno;
using namespace com::sun::star::linguistic2;
using namespace linguistic;
 
static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl()
{
    const uno::Reference< XComponentContext >& xContext( comphelper::getProcessComponentContext() );
    uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ;
    return xRes;
}
 
Hyphenator::Hyphenator() :
    aEvtListeners   ( GetLinguMutex() )
{
    bDisposing = false;
}
 
Hyphenator::~Hyphenator()
{
    for (auto & rInfo : mvDicts)
    {
        if (rInfo.aPtr)
            hnj_hyphen_free(rInfo.aPtr);
    }
 
    if (pPropHelper)
    {
        pPropHelper->RemoveAsPropListener();
    }
}
 
PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
{
    if (!pPropHelper)
    {
        Reference< XLinguProperties >   xPropSet = GetLinguProperties();
 
        pPropHelper.reset( new PropertyHelper_Hyphenation (static_cast<XHyphenator *>(this), xPropSet ) );
        pPropHelper->AddAsPropListener();   //! after a reference is established
    }
    return *pPropHelper;
}
 
// Requires GetLinguMutex locked
void Hyphenator::ensureLocales()
{
    // this routine should return the locales supported by the installed
    // dictionaries.
    if (mvDicts.empty())
    {
        SvtLinguConfig aLinguCfg;
 
        // get list of dictionaries-to-use
        // (or better speaking: the list of dictionaries using the
        // new configuration entries).
        std::vector< SvtLinguConfigDictionaryEntry > aDics;
        uno::Sequence< OUString > aFormatList;
        aLinguCfg.GetSupportedDictionaryFormatsFor( u"Hyphenators"_ustr,
                u"org.openoffice.lingu.LibHnjHyphenator"_ustr, aFormatList );
        for (const auto& rFormat : aFormatList)
        {
            std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
                    aLinguCfg.GetActiveDictionariesByFormat( rFormat ) );
            aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
        }
 
        //!! for compatibility with old dictionaries (the ones not using extensions
        //!! or new configuration entries, but still using the dictionary.lst file)
        //!! Get the list of old style spell checking dictionaries to use...
        std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
                GetOldStyleDics( "HYPH" ) );
 
        // to prefer dictionaries with configuration entries we will only
        // use those old style dictionaries that add a language that
        // is not yet supported by the list of new style dictionaries
        MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
 
        if (!aDics.empty())
        {
            // get supported locales from the dictionaries-to-use...
            std::set<OUString> aLocaleNamesSet;
            for (auto const& dict : aDics)
            {
                for (const auto& rLocaleName : dict.aLocaleNames)
                {
                    if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(rLocaleName))
                        continue;
                    aLocaleNamesSet.insert( rLocaleName );
                }
            }
            // ... and add them to the resulting sequence
            std::vector<Locale> aLocalesVec;
            aLocalesVec.reserve(aLocaleNamesSet.size());
 
            std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec),
                [](const OUString& localeName) { return LanguageTag::convertToLocale(localeName); });
 
            aSuppLocales = comphelper::containerToSequence(aLocalesVec);
 
            //! For each dictionary and each locale we need a separate entry.
            //! If this results in more than one dictionary per locale than (for now)
            //! it is undefined which dictionary gets used.
            //! In the future the implementation should support using several dictionaries
            //! for one locale.
            sal_Int32 numdict = std::accumulate(aDics.begin(), aDics.end(), 0,
                [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
                    return nSum + dict.aLocaleNames.getLength(); });
 
            // add dictionary information
            mvDicts.resize(numdict);
 
            sal_Int32 k = 0;
            for (auto const& dict :  aDics)
            {
                if (dict.aLocaleNames.hasElements() &&
                    dict.aLocations.hasElements())
                {
                    // currently only one language per dictionary is supported in the actual implementation...
                    // Thus here we work-around this by adding the same dictionary several times.
                    // Once for each of its supported locales.
                    for (const auto& rLocaleName : dict.aLocaleNames)
                    {
                        LanguageTag aLanguageTag(rLocaleName);
                        mvDicts[k].aPtr = nullptr;
                        mvDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
                        mvDicts[k].aLoc = aLanguageTag.getLocale();
                        mvDicts[k].apCC.reset( new CharClass( std::move(aLanguageTag) ) );
                        // also both files have to be in the same directory and the
                        // file names must only differ in the extension (.aff/.dic).
                        // Thus we use the first location only and strip the extension part.
                        OUString aLocation = dict.aLocations[0];
                        sal_Int32 nPos = aLocation.lastIndexOf( '.' );
                        aLocation = aLocation.copy( 0, nPos );
                        mvDicts[k].aName = aLocation;
 
                        ++k;
                    }
                }
            }
            DBG_ASSERT( k == numdict, "index mismatch?" );
        }
        else
        {
            // no dictionary found so register no dictionaries
            mvDicts.clear();
            aSuppLocales.realloc(0);
        }
    }
}
 
Sequence< Locale > SAL_CALL Hyphenator::getLocales()
{
    MutexGuard aGuard(GetLinguMutex());
    ensureLocales();
    return aSuppLocales;
}
 
sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
{
    MutexGuard  aGuard( GetLinguMutex() );
    ensureLocales();
    return comphelper::findValue(aSuppLocales, rLocale) != -1;
}
 
namespace {
bool LoadDictionary(HDInfo& rDict)
{
    OUString DictFN = rDict.aName + ".dic";
    OUString dictpath;
 
    osl::FileBase::getSystemPathFromFileURL(DictFN, dictpath);
 
#if defined(_WIN32)
    // hnj_hyphen_load expects UTF-8 encoded paths with \\?\ long path prefix.
    OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8));
#else
    OString sTmp(OU2ENC(dictpath, osl_getThreadTextEncoding()));
#endif
    HyphenDict* dict = hnj_hyphen_load(sTmp.getStr());
    if (!dict)
    {
        SAL_WARN(
            "lingucomponent",
            "Couldn't find file " << dictpath);
        return false;
    }
    rDict.aPtr = dict;
    rDict.eEnc = getTextEncodingFromCharset(dict->cset);
    return true;
}
 
OUString makeLowerCase(const OUString& aTerm, CharClass const* pCC)
{
    if (pCC)
        return pCC->lowercase(aTerm);
    return aTerm;
}
 
OUString makeUpperCase(const OUString& aTerm, CharClass const* pCC)
{
    if (pCC)
        return pCC->uppercase(aTerm);
    return aTerm;
}
 
OUString makeInitCap(const OUString& aTerm, CharClass const* pCC)
{
    sal_Int32 tlen = aTerm.getLength();
    if (pCC && tlen)
    {
        OUString bTemp = aTerm.copy(0, 1);
        if (tlen > 1)
            return (pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm, 1, (tlen - 1)));
 
        return pCC->uppercase(bTemp, 0, 1);
    }
    return aTerm;
}
 
struct hyphenation_result
{
    int n = 0;
    bool failed = true;
    char** rep = nullptr; // replacements of discretionary hyphenation
    int* pos = nullptr; // array of [hyphenation point] minus [deletion position]
    int* cut = nullptr; // length of deletions in original word
    std::unique_ptr<char[]> hyphens;
 
    ~hyphenation_result()
    {
        if (rep)
        {
            for (int i = 0; i < n; i++)
            {
                if (rep[i])
                    free(rep[i]);
            }
            free(rep);
        }
        if (pos)
            free(pos);
        if (cut)
            free(cut);
    }
};
 
hyphenation_result getHyphens(std::u16string_view word, const HDInfo& hdInfo, sal_Int16 minLead,
                              sal_Int16 minTrail)
{
    // first convert any smart quotes or apostrophes to normal ones
    OUStringBuffer aBuf(word);
    for (sal_Int32 ix = 0; ix < aBuf.getLength(); ix++)
    {
        sal_Unicode ch = aBuf[ix];
        if ((ch == 0x201C) || (ch == 0x201D))
            aBuf[ix] = u'"';
        if ((ch == 0x2018) || (ch == 0x2019))
            aBuf[ix] = u'\'';
    }
 
    // now convert word to all lowercase for pattern recognition
    OUString nTerm(makeLowerCase(OUString::unacquired(aBuf), hdInfo.apCC.get()));
 
    // now convert word to needed encoding
    OString encWord(OU2ENC(nTerm, hdInfo.eEnc));
 
    // now strip off any ending periods
    auto lastValidPos = std::string_view(encWord).find_last_not_of('.');
    if (lastValidPos == std::string_view::npos)
        return {};
 
    int n = lastValidPos + 1;
    std::unique_ptr<char[]> hyphens(new char[n + 5]);
    char** rep = nullptr; // replacements of discretionary hyphenation
    int* pos = nullptr; // array of [hyphenation point] minus [deletion position]
    int* cut = nullptr; // length of deletions in original word
 
    HyphenDict* dict = hdInfo.aPtr;
    const bool failed = 0 != hnj_hyphen_hyphenate3( dict, encWord.getStr(), n, hyphens.get(), nullptr,
                &rep, &pos, &cut, minLead, minTrail,
                // TODO add "no-limit"/0 clh/crh support to libhyphen, also add minCompoundLead/Trail support
                std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead  - std::max<sal_Int16>(dict->lhmin, 2)),
                std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2)) );
    return { n, failed, rep, pos, cut, std::move(hyphens) }; // buffers will free in dtor
}
}
 
const HDInfo* Hyphenator::getMatchingDict(const css::lang::Locale& aLocale)
{
    MutexGuard aGuard(GetLinguMutex());
    ensureLocales();
    auto it = std::find_if(mvDicts.rbegin(), mvDicts.rend(),
                           [&aLocale](auto& el) { return el.aLoc == aLocale; });
    if (it == mvDicts.rend())
        return nullptr;
 
    // if this dictionary has not been loaded yet do that
    if (!it->aPtr)
    {
        if (!LoadDictionary(*it))
            return nullptr;
    }
 
    // we don't want to work with a default text encoding since following incorrect
    // results may occur only for specific text and thus may be hard to notice.
    // Thus better always make a clean exit here if the text encoding is in question.
    // Hopefully something not working at all will raise proper attention quickly. ;-)
    DBG_ASSERT(it->eEnc != RTL_TEXTENCODING_DONTKNOW,
               "failed to get text encoding! (maybe incorrect encoding string in file)");
    if (it->eEnc == RTL_TEXTENCODING_DONTKNOW)
        return nullptr;
 
    return &*it;
}
 
Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord,
       const css::lang::Locale& aLocale,
       sal_Int16 nMaxLeading,
       const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
{
    PropertyHelper_Hyphenation& rHelper = GetPropHelper();
    rHelper.SetTmpPropVals(aProperties);
    sal_Int16 minTrail = rHelper.GetMinTrailing();
    sal_Int16 minLead = rHelper.GetMinLeading();
    sal_Int16 minCompoundLead = rHelper.GetCompoundMinLeading();
    sal_Int16 minCompoundTrail = rHelper.GetCompoundMinTrailing();
    sal_Int16 minLen = rHelper.GetMinWordLength();
    bool bNoHyphenateCaps = rHelper.IsNoHyphenateCaps();
 
    // if we have a hyphenation dictionary matching this locale
    if (auto pHDInfo = getMatchingDict(aLocale))
    {
        int nHyphenationPos = -1;
        int nHyphenationPosAlt = -1;
        int nHyphenationPosAltHyph = -1;
        int nPrevHyphenationPos = -1;
        int nPrevHyphenationPosAlt = -1;
        int nPrevHyphenationPosAltHyph = -1;
        int nCompoundHyphenationPos = -1;
 
        // hyphenate the word with that dictionary
        rtl_TextEncoding eEnc = pHDInfo->eEnc;
        CharClass* pCC = pHDInfo->apCC.get();
 
        // Don't hyphenate uppercase words if requested
        if (bNoHyphenateCaps && aWord == makeUpperCase(aWord, pCC))
        {
            return nullptr;
        }
 
        CapType ct = capitalType(aWord, pCC);
 
        auto result = getHyphens(aWord, *pHDInfo, minLead, minTrail);
        if (result.failed)
            return nullptr;
 
        sal_Int32 Leading =  GetPosInWordToCheck( aWord, nMaxLeading );
 
        // use morphological analysis of Hunspell to get better hyphenation of compound words
        // optionally when hyphenation zone is enabled
        // pa: fields contain stems resulted by compound word analysis of non-dictionary words
        // hy: fields contain hyphenation data of dictionary (compound) words
        Reference< XSpellAlternatives > xTmpRes;
        bool bAnalyzed = false; // enough the analyse once the word
        bool bCompoundHyphenation = true; // try to hyphenate compound words better
        OUString sStems; // processed result of the compound word analysis, e.g. com|pound|word
        sal_Int32 nSuffixLen = 0; // do not remove break points in suffixes
 
        for (sal_Int32 i = 0; i < result.n; i++)
        {
            int leftrep = 0;
            bool hit = (result.n >= minLen);
            bool compoundhit = (result.n >= 1) && (result.hyphens[i] & 1);;
            bool bPrevCompound = false;
            if (!result.rep || !result.rep[i])
            {
                hit = hit && (result.hyphens[i] & 1) && (i < Leading);
                hit = hit && (i >= (minLead-1) );
                hit = hit && ((result.n - i - 1) >= minTrail);
            }
            else
            {
                // calculate change character length before hyphenation point signed with '='
                for (char * c = result.rep[i]; *c && (*c != '='); c++)
                {
                    if (eEnc == RTL_TEXTENCODING_UTF8)
                    {
                        if (static_cast<unsigned char>(*c) >> 6 != 2)
                            leftrep++;
                    }
                    else
                        leftrep++;
                }
                hit = hit && (result.hyphens[i] & 1) && ((i + leftrep - result.pos[i]) < Leading);
                hit = hit && ((i + leftrep - result.pos[i]) >= (minLead-1) );
                hit = hit && ((result.n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(result.rep[i])) - leftrep - 1) >= minTrail);
            }
            if (hit || compoundhit)
            {
                // skip hyphenation inside compound constituents right after constituent boundaries in compound words
                // if minCompoundLead > 2 (default value: less than n=minCompoundLead character distance)
                // or always skip hyphenation inside compound constituents, if minCompoundLead == 0,
                // also skip hyphenation inside compound constituents right before constituent boundaries in compound words
                // or always skip hyphenation inside compound constituents, if minCompoundTrail == 0
                // if both minCompoundLead and minCompoundTrail == 0, don't hyphenate inside suffixes, too
                if ( bCompoundHyphenation && (nCompoundHyphenationPos > -1 || minCompoundLead == 0 || minCompoundTrail > 2 || minCompoundTrail == 0 ) &&
                        // check minimal distance from the left constituent boundary
                        ( ( minCompoundLead > 2 && i - nCompoundHyphenationPos < minCompoundLead ) ||
                        // check minimal distance from the right constituent boundary
                        ( minCompoundTrail > 2 && i - nCompoundHyphenationPos < minCompoundTrail ) ||
                        // special value: zero compound limit means forbidden hyphenation
                        // inside (stems of) compound constituents
                        minCompoundLead == 0 || minCompoundTrail == 0 ) )
                {
                    uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() );
                    uno::Reference< XSpellChecker1 > xSpell;
 
                    LanguageType nLanguage = LinguLocaleToLanguage( aLocale );
 
                    xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
 
                    // get morphological analysis of the word
                    if ( ( bAnalyzed && xTmpRes.is() ) || ( xSpell.is() && xSpell->isValid(
                            SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage),
                            uno::Sequence< beans::PropertyValue >() ) ) )
                    {
                        if ( !bAnalyzed )
                        {
                            xTmpRes = xSpell->spell( "<?xml?><query type='analyze'><word>" +
                                                       aWord + "</word></query>",
                                               static_cast<sal_uInt16>(nLanguage),
                                               uno::Sequence< beans::PropertyValue >() );
                            bAnalyzed = true;
 
                            if (xTmpRes.is())
                            {
                                Sequence<OUString>seq = xTmpRes->getAlternatives();
                                if (seq.hasElements())
                                {
                                    sal_Int32 nEndOfFirstAnalysis = seq[0].indexOf("</a>");
                                    // FIXME use only the first analysis
                                    OUString morph(
                                            seq[0].copy(0, nEndOfFirstAnalysis));
 
                                    // concatenate pa: fields, i.e. stems in the analysis:
                                    // pa:stem1 pa:stem2 pa:stem3 -> stem1||stem2||stem3
                                    sal_Int32 nPa = -1;
                                    while ( (nPa = morph.indexOf(u" pa:", nPa + 1)) > -1 )
                                    {
                                        // use hy: field of the actual stem, if it exists
                                        // pa:stem1 hy:st|em1 pa:stem2 -> st|em1||stem2
                                        sal_Int32 nHy = morph.indexOf(u" hy:", nPa + 3);
                                        sal_Int32 nPa2 = morph.indexOf(u" pa:", nPa + 3);
 
                                        if ( nHy > -1 && ( nPa2 == -1 || nHy < nPa2 ) )
                                        {
                                            OUString sStems2(morph.getToken(1, ' ', nHy).copy(3));
                                            if ( sStems2.indexOf('|') > -1 )
                                                sStems += sStems2+ u"||";
                                            else if ( sal_Int32 nBreak = o3tl::toInt32(sStems2) )
                                            {
                                                OUString sPa(morph.getToken(1, ' ', nPa).copy(3));
                                                if ( nBreak < sPa.getLength() )
                                                    sStems += OUString::Concat(sPa.subView(0, nBreak)) + u"|" +
                                                           sPa.subView(nBreak);
                                            }
                                        }
                                        else
                                        {
                                            OUString sPa(morph.getToken(1, ' ', nPa).copy(3));
 
                                            // handle special case: missing pa: in morphological analysis
                                            // before in-word suffixes (German, Sweden etc. dictionaries)
                                            // (recognized by the single last pa:)
                                            if (sStems.isEmpty() && nPa2 == -1 && aWord.endsWith(sPa))
                                            {
                                                sStems = OUString::Concat(aWord.subView(0, aWord.getLength() -
                                                             sPa.getLength())) + u"||" +
                                                         aWord.subView(aWord.getLength() -
                                                             sPa.getLength());
                                                break;
                                            }
 
                                            sStems += sPa + "||";
 
                                            // count suffix length
                                            sal_Int32 nSt = morph.lastIndexOf(" st:");
                                            if ( nSt > -1 )
                                            {
                                                sal_Int32 nStemLen =
                                                    o3tl::getToken(morph, 1, ' ', nSt).length() - 3;
                                                if ( nStemLen < sPa.getLength() )
                                                    nSuffixLen = sPa.getLength() - nStemLen;
                                            }
                                        }
 
                                        if ( nPa == -1 ) // getToken() can modify nPa
                                            break;
                                    }
 
                                    // only hy:, but not pa:
                                    if ( sStems.isEmpty() )
                                    {
                                        // check hy: (pre-defined hyphenation)
                                        sal_Int32 nHy = morph.indexOf(" hy:");
                                        if (nHy > -1)
                                        {
                                            sStems = morph.getToken(1, ' ', nHy).copy(3);
                                            if ( sStems.indexOf('|') == -1 && sStems.indexOf('-') == -1 )
                                            {
                                                if ( sal_Int32 nBreak = o3tl::toInt32(sStems) )
                                                {
                                                    if ( nBreak < aWord.getLength() )
                                                        sStems += OUString::Concat(aWord.subView(0, nBreak)) + u"|" +
                                                               aWord.subView(nBreak);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
 
                        // handle string separated by |, e.g "program hy:pro|gram"
                        if ( sStems.indexOf('|') > -1 )
                        {
                            sal_Int32 nLetters = 0; // count not separator characters
                            sal_Int32 nSepPos = -1; // position of last character | used for stem boundaries
                            sal_Int32 nSepLetterPos = -1; // letter position of the character associated to nSepPos
                            bool bWeightedSep = false; // double separator || = weighted stem boundary
                            sal_Int32 j = 0;
                            for (; j < sStems.getLength() && nLetters <= i; j++)
                            {
                                if ( sStems[j] == '|' )
                                {
                                    bWeightedSep = nSepPos > -1 && (j - 1 == nSepPos);
                                    nSepPos = j;
                                    nSepLetterPos = nLetters;
                                }
                                else if ( sStems[j] != '-' && sStems[j] != '=' && sStems[j] != '*' )
                                    ++nLetters;
                            }
                            // skip break points near after the stem boundaries
                            if ( hit &&
                                // too close to the left break point or special case?
                                ( i - nCompoundHyphenationPos < minCompoundLead || minCompoundLead == 0 ) &&
                                // there is a stem boundary before the actual break point
                                nSepPos > -1 &&
                                // and the break point is within a stem, i.e. not in the
                                // suffix of the last stem or both minCompoundLead and
                                // minCompoundTrail == 0, i.e. hyphenation is allowed only
                                // at compound constituent boundaries
                                ( i < aWord.getLength() - nSuffixLen - 1 ||
                                    ( minCompoundLead == 0 && minCompoundTrail == 0) ) &&
                                // and it is not another stem boundary
                                j + 1 < sStems.getLength() &&
                                ( sStems[j] != u'|' ||
                                // except if it's only the previous was a weighted one
                                    ( bWeightedSep && ( j + 1 == sStems.getLength() ||
                                                        sStems[j + 1] != u'|' ) ) ) )
                            {
                                continue;
                            }
                            // skip break points near before the stem boundaries
                            // (hit is not mandatory, only compondhit, because the
                            // compound boundary can be after the lead limit)
                            if (!hit &&
                                // too close to the right break point?
                                ( i - nCompoundHyphenationPos < minCompoundTrail || minCompoundTrail == 0 ) &&
                                // and the break point is within a stem, i.e. not in the
                                // suffix of the last stem suffix or both minCompoundLead and
                                // minCompoundTrail == 0, i.e. hyphenation is allowed only
                                // at compound constituent boundaries
                                ( i < aWord.getLength() - nSuffixLen - 1 ||
                                    ( minCompoundLead == 0 && minCompoundTrail == 0) ) &&
                                // and it is a stem boundary
                                j + 1 < sStems.getLength() && sStems[j+1] == u'|' &&
                                // the previous break point is not a boundary
                                (nSepPos == -1 || nCompoundHyphenationPos != nSepLetterPos - 1 ) )
                            {
                                nHyphenationPos = nPrevHyphenationPos;
                                nHyphenationPosAlt = nPrevHyphenationPosAlt;
                                nHyphenationPosAltHyph = nPrevHyphenationPosAltHyph;
                                continue;
                            }
                            bPrevCompound = nSepPos == j || nSepPos == j - 1;
                        }
                        else
                            // not a compound word
                            bCompoundHyphenation = false;
                    }
                    else
                        // no SPELLML support, no morphological analysis
                        bCompoundHyphenation = false;
                }
 
                nCompoundHyphenationPos = i;
                if (hit)
                {
                    if ( bPrevCompound )
                        nPrevHyphenationPos = nHyphenationPos;
                    nHyphenationPos = i;
                }
                if (result.rep && result.rep[i])
                {
                    if ( bPrevCompound )
                    {
                        nPrevHyphenationPosAlt = nHyphenationPosAlt;
                        nPrevHyphenationPosAltHyph = nHyphenationPosAltHyph;
                    }
                    nHyphenationPosAlt = i - result.pos[i];
                    nHyphenationPosAltHyph = i + leftrep - result.pos[i];
                }
            }
        }
 
        Reference<XHyphenatedWord> xRes;
        if (nHyphenationPos != -1)
        {
            if (result.rep && result.rep[nHyphenationPos])
            {
                // remove equal sign
                char * s = result.rep[nHyphenationPos];
                int eq = 0;
                for (; *s; s++)
                {
                    if (*s == '=') eq = 1;
                    if (eq) *s = *(s + 1);
                }
                OUString repHyphlow(result.rep[nHyphenationPos], strlen(result.rep[nHyphenationPos]), eEnc);
                OUString repHyph;
                switch (ct)
                {
                    case CapType::ALLCAP:
                    {
                        repHyph = makeUpperCase(repHyphlow, pCC);
                        break;
                    }
                    case CapType::INITCAP:
                    {
                        if (nHyphenationPosAlt == -1)
                            repHyph = makeInitCap(repHyphlow, pCC);
                        else
                             repHyph = repHyphlow;
                        break;
                    }
                    default:
                    {
                        repHyph = repHyphlow;
                        break;
                    }
                }
 
                // handle shortening
                sal_Int16 nPos = static_cast<sal_Int16>((nHyphenationPosAltHyph < nHyphenationPos) ?
                nHyphenationPosAltHyph : nHyphenationPos);
                // discretionary hyphenation
                xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos,
                    aWord.replaceAt(nHyphenationPosAlt + 1, result.cut[nHyphenationPos], repHyph),
                    static_cast<sal_Int16>(nHyphenationPosAltHyph));
            }
            else
            {
                xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ),
                    static_cast<sal_Int16>(nHyphenationPos), aWord, static_cast<sal_Int16>(nHyphenationPos));
            }
        }
        return xRes;
    }
    return nullptr;
}
 
Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
        const OUString& aWord,
        const css::lang::Locale& aLocale,
        sal_Int16 nIndex,
        const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
{
    // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
    for (int extrachar = 1; extrachar <= 2; extrachar++)
    {
        Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
        if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
            return xRes;
    }
    return nullptr;
}
 
Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord,
        const css::lang::Locale& aLocale,
        const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
{
    PropertyHelper_Hyphenation& rHelper = GetPropHelper();
    rHelper.SetTmpPropVals(aProperties);
    sal_Int16 minTrail = rHelper.GetMinTrailing();
    sal_Int16 minLead = rHelper.GetMinLeading();
    sal_Int16 minLen = rHelper.GetMinWordLength();
 
    // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
    // well as "hyphenate"
    if (aWord.getLength() < minLen)
        return nullptr;
 
    // if we have a hyphenation dictionary matching this locale
    if (auto pHDInfo = getMatchingDict(aLocale))
    {
        // hyphenate the word with that dictionary
        auto result = getHyphens(aWord, *pHDInfo, minLead, minTrail);
        if (result.failed)
            return nullptr;
 
        sal_Int32 nHyphCount = 0;
 
        // FIXME: shouldn't we iterate code points instead?
        for (sal_Int32 i = 0; i < aWord.getLength(); i++)
        {
            if (result.hyphens[i] & 1)
                nHyphCount++;
        }
 
        Sequence< sal_Int16 > aHyphPos(nHyphCount);
        sal_Int16 *pPos = aHyphPos.getArray();
        OUStringBuffer hyphenatedWordBuffer;
        nHyphCount = 0;
 
        for (sal_Int32 i = 0; i < aWord.getLength(); i++)
        {
            hyphenatedWordBuffer.append(aWord[i]);
            // hyphenation position
            if (result.hyphens[i] & 1)
            {
                // linguistic::PossibleHyphens is stuck with
                // css::uno::Sequence<sal_Int16> because of
                // css.linguistic2.XPossibleHyphens.getHyphenationPositions, so
                // any further positions need to be ignored:
                assert(i >= SAL_MIN_INT16);
                if (i > SAL_MAX_INT16)
                {
                    SAL_WARN(
                        "lingucomponent",
                        "hyphen pos " << i << " > SAL_MAX_INT16 in \"" << aWord
                            << "\"");
                    continue;
                }
                pPos[nHyphCount] = i;
                hyphenatedWordBuffer.append('=');
                nHyphCount++;
            }
        }
 
        OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
 
        return PossibleHyphens::CreatePossibleHyphens(
            aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos);
    }
 
    return nullptr;
}
 
sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
        const Reference< XLinguServiceEventListener >& rxLstnr )
{
    MutexGuard  aGuard( GetLinguMutex() );
 
    bool bRes = false;
    if (!bDisposing && rxLstnr.is())
    {
        bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
    }
    return bRes;
}
 
sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
        const Reference< XLinguServiceEventListener >& rxLstnr )
{
    MutexGuard  aGuard( GetLinguMutex() );
 
    bool bRes = false;
    if (!bDisposing && rxLstnr.is())
    {
        bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
    }
    return bRes;
}
 
OUString SAL_CALL Hyphenator::getServiceDisplayName(const Locale& rLocale)
{
    std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
    return Translate::get(STR_DESCRIPTION_LIBHYPHEN, loc);
}
 
void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
{
    MutexGuard  aGuard( GetLinguMutex() );
 
    if (pPropHelper)
        return;
 
    sal_Int32 nLen = rArguments.getLength();
    if (2 == nLen)
    {
        Reference< XLinguProperties >   xPropSet;
        rArguments[0] >>= xPropSet;
        // rArguments[1] >>= xDicList;
 
        //! Pointer allows for access of the non-UNO functions.
        //! And the reference to the UNO-functions while increasing
        //! the ref-count and will implicitly free the memory
        //! when the object is no longer used.
        pPropHelper.reset( new PropertyHelper_Hyphenation( static_cast<XHyphenator *>(this), xPropSet ) );
        pPropHelper->AddAsPropListener();   //! after a reference is established
    }
    else {
        OSL_FAIL( "wrong number of arguments in sequence" );
    }
}
 
void SAL_CALL Hyphenator::dispose()
{
    MutexGuard  aGuard( GetLinguMutex() );
 
    if (!bDisposing)
    {
        bDisposing = true;
        EventObject aEvtObj( static_cast<XHyphenator *>(this) );
        aEvtListeners.disposeAndClear( aEvtObj );
        if (pPropHelper)
        {
            pPropHelper->RemoveAsPropListener();
            pPropHelper.reset();
        }
    }
}
 
void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
{
    MutexGuard  aGuard( GetLinguMutex() );
 
    if (!bDisposing && rxListener.is())
        aEvtListeners.addInterface( rxListener );
}
 
void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
{
    MutexGuard  aGuard( GetLinguMutex() );
 
    if (!bDisposing && rxListener.is())
        aEvtListeners.removeInterface( rxListener );
}
 
// Service specific part
OUString SAL_CALL Hyphenator::getImplementationName()
{
    return u"org.openoffice.lingu.LibHnjHyphenator"_ustr;
}
 
sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
{
    return cppu::supportsService(this, ServiceName);
}
 
Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
{
    return { SN_HYPHENATOR };
}
 
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
lingucomponent_Hyphenator_get_implementation(
    css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
{
    return cppu::acquire(new Hyphenator());
}
 
 
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

V530 The return value of function 'append' is required to be utilized.

V530 The return value of function 'append' is required to be utilized.

V781 The value of the 'i' index is checked after it was used. Perhaps there is a mistake in program logic.

V560 A part of conditional expression is always true: (result.n >= 1).