/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <com/sun/star/uno/Reference.h>
#include <com/sun/star/linguistic2/SpellFailure.hpp>
#include <com/sun/star/linguistic2/XLinguProperties.hpp>
#include <comphelper/lok.hxx>
#include <comphelper/processfactory.hxx>
#include <cppuhelper/supportsservice.hxx>
#include <cppuhelper/weak.hxx>
#include <com/sun/star/lang/XMultiServiceFactory.hpp>
#include <tools/debug.hxx>
#include <osl/mutex.hxx>
#include <osl/thread.h>
#include <com/sun/star/ucb/XSimpleFileAccess.hpp>
#include <lingutil.hxx>
#include <hunspell.hxx>
#include "sspellimp.hxx"
#include <linguistic/misc.hxx>
#include <linguistic/spelldta.hxx>
#include <i18nlangtag/languagetag.hxx>
#include <svtools/strings.hrc>
#include <unotools/lingucfg.hxx>
#include <unotools/resmgr.hxx>
#include <osl/diagnose.h>
#include <osl/file.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/textenc.h>
#include <sal/log.hxx>
#include <numeric>
#include <utility>
#include <vector>
#include <set>
#include <string.h>
using namespace osl;
using namespace com::sun::star;
using namespace com::sun::star::beans;
using namespace com::sun::star::lang;
using namespace com::sun::star::uno;
using namespace com::sun::star::linguistic2;
using namespace linguistic;
// XML-header of SPELLML queries
#if !defined SPELL_XML
constexpr OUStringLiteral SPELL_XML = u"<?xml?>";
#endif
// only available in hunspell >= 1.5
#if !defined MAXWORDLEN
#define MAXWORDLEN 176
#endif
SpellChecker::SpellChecker() :
m_aEvtListeners(GetLinguMutex()),
m_bDisposing(false)
{
}
SpellChecker::DictItem::DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc)
: m_aDName(std::move(i_DName))
, m_aDLoc(std::move(i_DLoc))
, m_aDEnc(i_DEnc)
{
}
SpellChecker::~SpellChecker()
{
if (m_pPropHelper)
{
m_pPropHelper->RemoveAsPropListener();
}
}
PropertyHelper_Spelling & SpellChecker::GetPropHelper_Impl()
{
if (!m_pPropHelper)
{
Reference< XLinguProperties > xPropSet = GetLinguProperties();
m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) );
m_pPropHelper->AddAsPropListener(); //! after a reference is established
}
return *m_pPropHelper;
}
Sequence< Locale > SAL_CALL SpellChecker::getLocales()
{
MutexGuard aGuard( GetLinguMutex() );
// this routine should return the locales supported by the installed
// dictionaries.
if (m_DictItems.empty())
{
SvtLinguConfig aLinguCfg;
// get list of extension dictionaries-to-use
// (or better speaking: the list of dictionaries using the
// new configuration entries).
std::vector< SvtLinguConfigDictionaryEntry > aDics;
uno::Sequence< OUString > aFormatList;
aLinguCfg.GetSupportedDictionaryFormatsFor( u"SpellCheckers"_ustr,
u"org.openoffice.lingu.MySpellSpellChecker"_ustr, aFormatList );
for (auto const& format : aFormatList)
{
std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
aLinguCfg.GetActiveDictionariesByFormat(format) );
aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
}
//!! for compatibility with old dictionaries (the ones not using extensions
//!! or new configuration entries, but still using the dictionary.lst file)
//!! Get the list of old style spell checking dictionaries to use...
std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
GetOldStyleDics( "DICT" ) );
// to prefer dictionaries with configuration entries we will only
// use those old style dictionaries that add a language that
// is not yet supported by the list of new style dictionaries
MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
if (!aDics.empty())
{
uno::Reference< lang::XMultiServiceFactory > xServiceFactory(comphelper::getProcessServiceFactory());
uno::Reference< ucb::XSimpleFileAccess > xAccess(xServiceFactory->createInstance(u"com.sun.star.ucb.SimpleFileAccess"_ustr), uno::UNO_QUERY);
// get supported locales from the dictionaries-to-use...
std::set<OUString> aLocaleNamesSet;
for (auto const& dict : aDics)
{
const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames );
uno::Sequence< OUString > aLocations( dict.aLocations );
SAL_WARN_IF(
aLocaleNames.hasElements() && !aLocations.hasElements(),
"lingucomponent", "no locations");
if (aLocations.hasElements())
{
if (xAccess.is() && xAccess->exists(aLocations[0]))
{
for (auto const& locale : aLocaleNames)
{
if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(locale))
continue;
aLocaleNamesSet.insert(locale);
}
}
else
{
SAL_WARN(
"lingucomponent",
"missing <" << aLocations[0] << ">");
}
}
}
// ... and add them to the resulting sequence
m_aSuppLocales.realloc( aLocaleNamesSet.size() );
std::transform(
aLocaleNamesSet.begin(), aLocaleNamesSet.end(), m_aSuppLocales.getArray(),
[](auto const& localeName) { return LanguageTag::convertToLocale(localeName); });
//! For each dictionary and each locale we need a separate entry.
//! If this results in more than one dictionary per locale than (for now)
//! it is undefined which dictionary gets used.
//! In the future the implementation should support using several dictionaries
//! for one locale.
sal_uInt32 nDictSize = std::accumulate(aDics.begin(), aDics.end(), sal_uInt32(0),
[](const sal_uInt32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
return nSum + dict.aLocaleNames.getLength(); });
// add dictionary information
m_DictItems.reserve(nDictSize);
for (auto const& dict : aDics)
{
if (dict.aLocaleNames.hasElements() &&
dict.aLocations.hasElements())
{
const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames );
// currently only one language per dictionary is supported in the actual implementation...
// Thus here we work-around this by adding the same dictionary several times.
// Once for each of its supported locales.
for (auto const& localeName : aLocaleNames)
{
// also both files have to be in the same directory and the
// file names must only differ in the extension (.aff/.dic).
// Thus we use the first location only and strip the extension part.
OUString aLocation = dict.aLocations[0];
sal_Int32 nPos = aLocation.lastIndexOf( '.' );
aLocation = aLocation.copy( 0, nPos );
m_DictItems.emplace_back(aLocation, LanguageTag::convertToLocale(localeName), RTL_TEXTENCODING_DONTKNOW);
}
}
}
DBG_ASSERT( nDictSize == m_DictItems.size(), "index mismatch?" );
}
else
{
// no dictionary found so register no dictionaries
m_aSuppLocales.realloc(0);
}
}
return m_aSuppLocales;
}
sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& rLocale)
{
MutexGuard aGuard( GetLinguMutex() );
bool bRes = false;
if (!m_aSuppLocales.hasElements())
getLocales();
for (auto const& suppLocale : m_aSuppLocales)
{
if (rLocale == suppLocale)
{
bRes = true;
break;
}
}
return bRes;
}
sal_Int16 SpellChecker::GetSpellFailure(const OUString &rWord, const Locale &rLocale, int& rInfo)
{
if (rWord.getLength() > MAXWORDLEN)
return -1;
Hunspell * pMS = nullptr;
rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
// initialize a myspell object for each dictionary once
// (note: mutex is held higher up in isValid)
sal_Int16 nRes = -1;
// first handle smart quotes both single and double
OUStringBuffer rBuf(rWord);
sal_Int32 n = rBuf.getLength();
sal_Unicode c;
sal_Int32 extrachar = 0;
for (sal_Int32 ix=0; ix < n; ix++)
{
c = rBuf[ix];
if ((c == 0x201C) || (c == 0x201D))
rBuf[ix] = u'"';
else if ((c == 0x2018) || (c == 0x2019))
rBuf[ix] = u'\'';
// recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only
// with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries
// set ICONV and IGNORE aff file options, if needed.)
else if ((c == 0x200C) || (c == 0x200D) ||
((c >= 0xFB00) && (c <= 0xFB04)))
extrachar = 1;
}
OUString nWord(rBuf.makeStringAndClear());
if (n)
{
for (auto& currDict : m_DictItems)
{
pMS = nullptr;
eEnc = RTL_TEXTENCODING_DONTKNOW;
if (rLocale == currDict.m_aDLoc)
{
if (!currDict.m_pDict)
{
OUString dicpath = currDict.m_aDName + ".dic";
OUString affpath = currDict.m_aDName + ".aff";
OUString dict;
OUString aff;
osl::FileBase::getSystemPathFromFileURL(dicpath,dict);
osl::FileBase::getSystemPathFromFileURL(affpath,aff);
#if defined(_WIN32)
// workaround for Windows specific problem that the
// path length in calls to 'fopen' is limited to somewhat
// about 120+ characters which will usually be exceed when
// using dictionaries as extensions. (Hunspell waits UTF-8 encoded
// path with \\?\ long path prefix.)
OString aTmpaff = Win_AddLongPathPrefix(OUStringToOString(aff, RTL_TEXTENCODING_UTF8));
OString aTmpdict = Win_AddLongPathPrefix(OUStringToOString(dict, RTL_TEXTENCODING_UTF8));
#else
OString aTmpaff(OU2ENC(aff,osl_getThreadTextEncoding()));
OString aTmpdict(OU2ENC(dict,osl_getThreadTextEncoding()));
#endif
currDict.m_pDict = std::make_unique<Hunspell>(aTmpaff.getStr(),aTmpdict.getStr());
#if defined(H_DEPRECATED)
currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dict_encoding().c_str());
#else
currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dic_encoding());
#endif
}
pMS = currDict.m_pDict.get();
eEnc = currDict.m_aDEnc;
}
if (pMS)
{
// we don't want to work with a default text encoding since following incorrect
// results may occur only for specific text and thus may be hard to notice.
// Thus better always make a clean exit here if the text encoding is in question.
// Hopefully something not working at all will raise proper attention quickly. ;-)
DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
if (eEnc == RTL_TEXTENCODING_DONTKNOW)
return -1;
OString aWrd(OU2ENC(nWord,eEnc));
#if defined(H_DEPRECATED)
bool bVal = pMS->spell(std::string(aWrd), &rInfo);
#else
bool bVal = pMS->spell(aWrd.getStr(), &rInfo) != 0;
#endif
if (!bVal) {
if (extrachar && (eEnc != RTL_TEXTENCODING_UTF8)) {
OUStringBuffer aBuf(nWord);
n = aBuf.getLength();
for (sal_Int32 ix=n-1; ix >= 0; ix--)
{
switch (aBuf[ix]) {
case 0xFB00: aBuf.remove(ix, 1); aBuf.insert(ix, "ff"); break;
case 0xFB01: aBuf.remove(ix, 1); aBuf.insert(ix, "fi"); break;
case 0xFB02: aBuf.remove(ix, 1); aBuf.insert(ix, "fl"); break;
case 0xFB03: aBuf.remove(ix, 1); aBuf.insert(ix, "ffi"); break;
case 0xFB04: aBuf.remove(ix, 1); aBuf.insert(ix, "ffl"); break;
case 0x200C:
case 0x200D: aBuf.remove(ix, 1); break;
}
}
OUString aWord(aBuf.makeStringAndClear());
OString bWrd(OU2ENC(aWord, eEnc));
#if defined(H_DEPRECATED)
bVal = pMS->spell(std::string(bWrd), &rInfo);
#else
bVal = pMS->spell(bWrd.getStr(), &rInfo) != 0;
#endif
if (bVal) return -1;
}
nRes = SpellFailure::SPELLING_ERROR;
} else {
return -1;
}
pMS = nullptr;
}
}
}
return nRes;
}
sal_Bool SAL_CALL SpellChecker::isValid( const OUString& rWord, const Locale& rLocale,
const css::uno::Sequence< css::beans::PropertyValue >& rProperties )
{
MutexGuard aGuard( GetLinguMutex() );
if (rLocale == Locale() || rWord.isEmpty())
return true;
if (!hasLocale( rLocale ))
return true;
// return sal_False to process SPELLML requests (they are longer than the header)
if (rWord.match(SPELL_XML, 0) && (rWord.getLength() > 10)) return false;
// Get property values to be used.
// These are be the default values set in the SN_LINGU_PROPERTIES
// PropertySet which are overridden by the supplied ones from the
// last argument.
// You'll probably like to use a simpler solution than the provided
// one using the PropertyHelper_Spell.
PropertyHelper_Spelling& rHelper = GetPropHelper();
rHelper.SetTmpPropVals( rProperties );
int nInfo = 0;
sal_Int16 nFailure = GetSpellFailure( rWord, rLocale, nInfo );
if (nFailure != -1 && !rWord.match(SPELL_XML, 0))
{
LanguageType nLang = LinguLocaleToLanguage( rLocale );
// postprocess result for errors that should be ignored
const bool bIgnoreError =
(!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) ||
(!rHelper.IsSpellWithDigits() && HasDigits( rWord ));
if (bIgnoreError)
nFailure = -1;
}
//#define SPELL_COMPOUND 1 << 0
// valid word, but it's a rule-based compound word
if ( nFailure == -1 && (nInfo & SPELL_COMPOUND) )
{
bool bHasHyphen = rWord.indexOf('-') > -1;
if ( (bHasHyphen && !rHelper.IsSpellHyphenatedCompound()) ||
(!bHasHyphen && !rHelper.IsSpellClosedCompound()) )
{
return false;
}
}
return (nFailure == -1);
}
Reference< XSpellAlternatives >
SpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale )
{
// Retrieves the return values for the 'spell' function call in case
// of a misspelled word.
// Especially it may give a list of suggested (correct) words:
Reference< XSpellAlternatives > xRes;
// note: mutex is held by higher up by spell which covers both
Hunspell* pMS = nullptr;
rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
// first handle smart quotes (single and double)
OUStringBuffer rBuf(rWord);
sal_Int32 n = rBuf.getLength();
sal_Unicode c;
for (sal_Int32 ix=0; ix < n; ix++)
{
c = rBuf[ix];
if ((c == 0x201C) || (c == 0x201D))
rBuf[ix] = u'"';
if ((c == 0x2018) || (c == 0x2019))
rBuf[ix] = u'\'';
}
OUString nWord(rBuf.makeStringAndClear());
if (n)
{
LanguageType nLang = LinguLocaleToLanguage( rLocale );
int numsug = 0;
Sequence< OUString > aStr( 0 );
for (const auto& currDict : m_DictItems)
{
pMS = nullptr;
eEnc = RTL_TEXTENCODING_DONTKNOW;
if (rLocale == currDict.m_aDLoc)
{
pMS = currDict.m_pDict.get();
eEnc = currDict.m_aDEnc;
}
if (pMS)
{
OString aWrd(OU2ENC(nWord,eEnc));
#if defined(H_DEPRECATED)
std::vector<std::string> suglst = pMS->suggest(std::string(aWrd));
if (!suglst.empty())
{
aStr.realloc(numsug + suglst.size());
OUString *pStr = aStr.getArray();
for (size_t ii = 0; ii < suglst.size(); ++ii)
{
pStr[numsug + ii] = OUString(suglst[ii].c_str(), suglst[ii].size(), eEnc);
}
numsug += suglst.size();
}
#else
char ** suglst = nullptr;
int count = pMS->suggest(&suglst, aWrd.getStr());
if (count)
{
aStr.realloc( numsug + count );
OUString *pStr = aStr.getArray();
for (int ii=0; ii < count; ++ii)
{
OUString cvtwrd(suglst[ii],strlen(suglst[ii]),eEnc);
pStr[numsug + ii] = cvtwrd;
}
numsug += count;
}
pMS->free_list(&suglst, count);
#endif
}
}
// now return an empty alternative for no suggestions or the list of alternatives if some found
xRes = SpellAlternatives::CreateSpellAlternatives( rWord, nLang, SpellFailure::SPELLING_ERROR, aStr );
return xRes;
}
return xRes;
}
Reference< XSpellAlternatives > SAL_CALL SpellChecker::spell(
const OUString& rWord, const Locale& rLocale,
const css::uno::Sequence< css::beans::PropertyValue >& rProperties )
{
MutexGuard aGuard( GetLinguMutex() );
if (rLocale == Locale() || rWord.isEmpty())
return nullptr;
if (!hasLocale( rLocale ))
return nullptr;
Reference< XSpellAlternatives > xAlt;
if (!isValid( rWord, rLocale, rProperties ))
{
xAlt = GetProposals( rWord, rLocale );
}
return xAlt;
}
sal_Bool SAL_CALL SpellChecker::addLinguServiceEventListener(
const Reference< XLinguServiceEventListener >& rxLstnr )
{
MutexGuard aGuard( GetLinguMutex() );
bool bRes = false;
if (!m_bDisposing && rxLstnr.is())
{
bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
}
return bRes;
}
sal_Bool SAL_CALL SpellChecker::removeLinguServiceEventListener(
const Reference< XLinguServiceEventListener >& rxLstnr )
{
MutexGuard aGuard( GetLinguMutex() );
bool bRes = false;
if (!m_bDisposing && rxLstnr.is())
{
bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
}
return bRes;
}
OUString SAL_CALL SpellChecker::getServiceDisplayName(const Locale& rLocale)
{
std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
return Translate::get(STR_DESCRIPTION_HUNSPELL, loc);
}
void SAL_CALL SpellChecker::initialize( const Sequence< Any >& rArguments )
{
MutexGuard aGuard( GetLinguMutex() );
if (m_pPropHelper)
return;
sal_Int32 nLen = rArguments.getLength();
if (2 == nLen)
{
Reference< XLinguProperties > xPropSet;
rArguments.getConstArray()[0] >>= xPropSet;
// rArguments.getConstArray()[1] >>= xDicList;
//! Pointer allows for access of the non-UNO functions.
//! And the reference to the UNO-functions while increasing
//! the ref-count and will implicitly free the memory
//! when the object is no longer used.
m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) );
m_pPropHelper->AddAsPropListener(); //! after a reference is established
}
else {
OSL_FAIL( "wrong number of arguments in sequence" );
}
}
void SAL_CALL SpellChecker::dispose()
{
MutexGuard aGuard( GetLinguMutex() );
if (!m_bDisposing)
{
m_bDisposing = true;
EventObject aEvtObj( static_cast<XSpellChecker *>(this) );
m_aEvtListeners.disposeAndClear( aEvtObj );
if (m_pPropHelper)
{
m_pPropHelper->RemoveAsPropListener();
m_pPropHelper.reset();
}
}
}
void SAL_CALL SpellChecker::addEventListener( const Reference< XEventListener >& rxListener )
{
MutexGuard aGuard( GetLinguMutex() );
if (!m_bDisposing && rxListener.is())
m_aEvtListeners.addInterface( rxListener );
}
void SAL_CALL SpellChecker::removeEventListener( const Reference< XEventListener >& rxListener )
{
MutexGuard aGuard( GetLinguMutex() );
if (!m_bDisposing && rxListener.is())
m_aEvtListeners.removeInterface( rxListener );
}
// Service specific part
OUString SAL_CALL SpellChecker::getImplementationName()
{
return u"org.openoffice.lingu.MySpellSpellChecker"_ustr;
}
sal_Bool SAL_CALL SpellChecker::supportsService( const OUString& ServiceName )
{
return cppu::supportsService(this, ServiceName);
}
Sequence< OUString > SAL_CALL SpellChecker::getSupportedServiceNames()
{
return { SN_SPELLCHECKER };
}
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
lingucomponent_SpellChecker_get_implementation(
css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
{
return cppu::acquire(new SpellChecker());
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
↑ V530 The return value of function 'remove' is required to be utilized.
↑ V530 The return value of function 'remove' is required to be utilized.
↑ V530 The return value of function 'remove' is required to be utilized.
↑ V530 The return value of function 'remove' is required to be utilized.
↑ V530 The return value of function 'remove' is required to be utilized.
↑ V530 The return value of function 'remove' is required to be utilized.