/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <i18nlangtag/languagetag.hxx>
#include <i18nlangtag/mslangid.hxx>
#include <rtl/ustrbuf.hxx>
#include <sal/macros.h>
#include <nativenumbersupplier.hxx>
#include <localedata.hxx>
#include "data/numberchar.h"
#include <comphelper/processfactory.hxx>
#include <cppuhelper/supportsservice.hxx>
#include <o3tl/string_view.hxx>
#include <cstddef>
#include <map>
#include <mutex>
#include <memory>
#include <string_view>
#include <unordered_map>
#include <com/sun/star/i18n/CharacterClassification.hpp>
#include <com/sun/star/i18n/NativeNumberMode.hpp>
#include <com/sun/star/linguistic2/NumberText.hpp>
using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::i18n;
using namespace ::com::sun::star::lang;
namespace {
struct Number {
sal_Int16 number;
const sal_Unicode *multiplierChar;
sal_Int16 numberFlag;
sal_Int16 exponentCount;
const sal_Int16 *multiplierExponent;
};
}
#define NUMBER_OMIT_ZERO (1 << 0)
#define NUMBER_OMIT_ONLY_ZERO (1 << 1)
#define NUMBER_OMIT_ONE_1 (1 << 2)
#define NUMBER_OMIT_ONE_2 (1 << 3)
#define NUMBER_OMIT_ONE_3 (1 << 4)
#define NUMBER_OMIT_ONE_4 (1 << 5)
#define NUMBER_OMIT_ONE_5 (1 << 6)
#define NUMBER_OMIT_ONE_6 (1 << 7)
#define NUMBER_OMIT_ONE_7 (1 << 8)
#define NUMBER_OMIT_ONE (NUMBER_OMIT_ONE_1|NUMBER_OMIT_ONE_2|NUMBER_OMIT_ONE_3|NUMBER_OMIT_ONE_4|NUMBER_OMIT_ONE_5|NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7)
#define NUMBER_OMIT_ONE_CHECK(bit) (1 << (2 + bit))
#define NUMBER_OMIT_ALL ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE|NUMBER_OMIT_ONLY_ZERO )
#define NUMBER_OMIT_ZERO_ONE ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE )
#define NUMBER_OMIT_ONE_67 (NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7)
#define NUMBER_OMIT_ZERO_ONE_67 ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE_67 )
namespace i18npool {
namespace {
std::mutex theNatNumMutex;
}
static OUString getHebrewNativeNumberString(const OUString& aNumberString, bool useGeresh);
static OUString getCyrillicNativeNumberString(const OUString& aNumberString);
/// @throws RuntimeException
static OUString AsciiToNativeChar( const OUString& inStr, sal_Int32 nCount,
Sequence< sal_Int32 >* pOffset, sal_Int16 number )
{
const sal_Unicode *src = inStr.getStr();
rtl_uString *newStr = rtl_uString_alloc(nCount);
if (pOffset)
pOffset->realloc(nCount);
auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
for (sal_Int32 i = 0; i < nCount; i++)
{
sal_Unicode ch = src[i];
if (isNumber(ch))
newStr->buffer[i] = NumberChar[number][ ch - NUMBER_ZERO ];
else if (i+1 < nCount && isNumber(src[i+1])) {
if (i > 0 && isNumber(src[i-1]) && isSeparator(ch))
newStr->buffer[i] = SeparatorChar[number] ? SeparatorChar[number] : ch;
else
newStr->buffer[i] = isDecimal(ch) ? (DecimalChar[number] ? DecimalChar[number] : ch) :
isMinus(ch) ? (MinusChar[number] ? MinusChar[number] : ch) : ch;
}
else
newStr->buffer[i] = ch;
if (ppOffset)
ppOffset[i] = i;
}
return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
}
static bool AsciiToNative_numberMaker(const sal_Unicode *str, sal_Int32 begin, sal_Int32 len,
sal_Unicode *dst, sal_Int32& count, sal_Int16 multiChar_index, Sequence< sal_Int32 >* pOffset, sal_Int32 startPos,
const Number *number, const sal_Unicode* numberChar)
{
sal_Unicode multiChar = (multiChar_index == -1 ? 0 : number->multiplierChar[multiChar_index]);
auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
if ( len <= number->multiplierExponent[number->exponentCount-1] ) {
if (number->multiplierExponent[number->exponentCount-1] > 1) {
bool bNotZero = false;
for (const sal_Int32 end = begin+len; begin < end; begin++) {
if (bNotZero || str[begin] != NUMBER_ZERO) {
dst[count] = numberChar[str[begin] - NUMBER_ZERO];
if (ppOffset)
ppOffset[count] = begin + startPos;
count++;
bNotZero = true;
}
}
if (bNotZero && multiChar > 0) {
dst[count] = multiChar;
if (ppOffset)
ppOffset[count] = begin + startPos;
count++;
}
return bNotZero;
} else if (str[begin] != NUMBER_ZERO) {
if (!(number->numberFlag & (multiChar_index < 0 ? 0 : NUMBER_OMIT_ONE_CHECK(multiChar_index))) || str[begin] != NUMBER_ONE) {
dst[count] = numberChar[str[begin] - NUMBER_ZERO];
if (ppOffset)
ppOffset[count] = begin + startPos;
count++;
}
if (multiChar > 0) {
dst[count] = multiChar;
if (ppOffset)
ppOffset[count] = begin + startPos;
count++;
}
} else if (!(number->numberFlag & NUMBER_OMIT_ZERO) && count > 0 && dst[count-1] != numberChar[0]) {
dst[count] = numberChar[0];
if (ppOffset)
ppOffset[count] = begin + startPos;
count++;
}
return str[begin] != NUMBER_ZERO;
} else {
bool bPrintPower = false;
// sal_Int16 last = 0;
for (sal_Int16 i = 1; i <= number->exponentCount; i++) {
sal_Int32 tmp = len - (i == number->exponentCount ? 0 : number->multiplierExponent[i]);
if (tmp > 0) {
bPrintPower |= AsciiToNative_numberMaker(str, begin, tmp, dst, count,
(i == number->exponentCount ? -1 : i), pOffset, startPos, number, numberChar);
begin += tmp;
len -= tmp;
}
}
if (bPrintPower) {
if (count > 0 && number->multiplierExponent[number->exponentCount-1] == 1 &&
dst[count-1] == numberChar[0])
count--;
if (multiChar > 0) {
dst[count] = multiChar;
if (ppOffset)
ppOffset[count] = begin + startPos;
count++;
}
}
return bPrintPower;
}
}
/// @throws RuntimeException
static OUString AsciiToNative( const OUString& inStr, sal_Int32 nCount,
Sequence< sal_Int32 >* pOffset, const Number* number )
{
OUString aRet;
sal_Int32 strLen = inStr.getLength();
const sal_Unicode *numberChar = NumberChar[number->number];
if (nCount > strLen)
nCount = strLen;
if (nCount > 0)
{
const sal_Unicode *str = inStr.getStr();
std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nCount * 2 + 1]);
std::unique_ptr<sal_Unicode[]> srcStr(new sal_Unicode[nCount + 1]); // for keeping number without comma
sal_Int32 i, len = 0, count = 0;
if (pOffset)
pOffset->realloc( nCount * 2 );
auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
bool bDoDecimal = false;
for (i = 0; i <= nCount; i++)
{
if (i < nCount && isNumber(str[i])) {
if (bDoDecimal) {
newStr[count] = numberChar[str[i] - NUMBER_ZERO];
if (ppOffset)
ppOffset[count] = i;
count++;
}
else
srcStr[len++] = str[i];
} else {
if (len > 0) {
if (i < nCount-1 && isSeparator(str[i]) && isNumber(str[i+1]))
continue; // skip comma inside number string
bool bNotZero = false;
for (sal_Int32 begin = 0, end = len % number->multiplierExponent[0];
end <= len; begin = end, end += number->multiplierExponent[0]) {
if (end == 0) continue;
sal_Int32 _count = count;
bNotZero |= AsciiToNative_numberMaker(srcStr.get(), begin, end - begin, newStr.get(), count,
end == len ? -1 : 0, pOffset, i - len, number, numberChar);
if (count > 0 && number->multiplierExponent[number->exponentCount-1] == 1 &&
newStr[count-1] == numberChar[0])
count--;
if (bNotZero && _count == count && end != len) {
newStr[count] = number->multiplierChar[0];
if (ppOffset)
ppOffset[count] = i - len;
count++;
}
}
if (! bNotZero && ! (number->numberFlag & NUMBER_OMIT_ONLY_ZERO)) {
newStr[count] = numberChar[0];
if (ppOffset)
ppOffset[count] = i - len;
count++;
}
len = 0;
}
if (i < nCount) {
bDoDecimal = (!bDoDecimal && i < nCount-1 && isDecimal(str[i]) && isNumber(str[i+1]));
if (bDoDecimal)
newStr[count] = (DecimalChar[number->number] ? DecimalChar[number->number] : str[i]);
else if (i < nCount-1 && isMinus(str[i]) && isNumber(str[i+1]))
newStr[count] = (MinusChar[number->number] ? MinusChar[number->number] : str[i]);
else if (i < nCount-1 && isSeparator(str[i]) && isNumber(str[i+1]))
newStr[count] = (SeparatorChar[number->number] ? SeparatorChar[number->number] : str[i]);
else
newStr[count] = str[i];
if (ppOffset)
ppOffset[count] = i;
count++;
}
}
}
if (pOffset)
pOffset->realloc(count);
aRet = OUString(newStr.get(), count);
}
return aRet;
}
namespace
{
void NativeToAscii_numberMaker(sal_Int16 max, sal_Int16 prev, const sal_Unicode *str,
sal_Int32& i, sal_Int32 nCount, sal_Unicode *dst, sal_Int32& count, Sequence< sal_Int32 >* pOffset,
OUString& numberChar, OUString& multiplierChar)
{
auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
sal_Int16 curr = 0, num = 0, end = 0, shift = 0;
while (++i < nCount) {
if ((curr = sal::static_int_cast<sal_Int16>( numberChar.indexOf(str[i]) )) >= 0) {
if (num > 0)
break;
num = curr % 10;
} else if ((curr = sal::static_int_cast<sal_Int16>( multiplierChar.indexOf(str[i]) )) >= 0) {
curr = MultiplierExponent_7_CJK[curr % ExponentCount_7_CJK];
if (prev > curr && num == 0) num = 1; // One may be omitted in informal format
shift = end = 0;
if (curr >= max)
max = curr;
else if (curr > prev)
shift = max - curr;
else
end = curr;
while (end++ < prev) {
dst[count] = NUMBER_ZERO + (end == prev ? num : 0);
if (ppOffset)
ppOffset[count] = i;
count++;
}
if (shift) {
count -= max;
for (const sal_Int32 countEnd = count+shift; count < countEnd; count++) {
dst[count] = dst[count + curr];
if (ppOffset)
ppOffset[count] = ppOffset[count + curr];
}
max = curr;
}
NativeToAscii_numberMaker(max, curr, str, i, nCount, dst,
count, pOffset, numberChar, multiplierChar);
return;
} else
break;
}
while (end++ < prev) {
dst[count] = NUMBER_ZERO + (end == prev ? num : 0);
if (ppOffset)
ppOffset[count] = i - 1;
count++;
}
}
/// @throws RuntimeException
OUString NativeToAscii(const OUString& inStr,
sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
{
OUString aRet;
sal_Int32 strLen = inStr.getLength();
if (nCount > strLen)
nCount = strLen;
if (nCount > 0) {
const sal_Unicode *str = inStr.getStr();
std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nCount * MultiplierExponent_7_CJK[0] + 2]);
if (pOffset)
pOffset->realloc( nCount * MultiplierExponent_7_CJK[0] + 1 );
auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
sal_Int32 count = 0, index;
sal_Int32 i;
OUString numberChar, multiplierChar, decimalChar, separatorChar;
numberChar = OUString(NumberChar[0], 10*NumberChar_Count);
multiplierChar = OUString(MultiplierChar_7_CJK[0], ExponentCount_7_CJK*Multiplier_Count);
decimalChar = OUString(DecimalChar, NumberChar_Count);
std::u16string_view const minusChar(MinusChar, NumberChar_Count);
separatorChar = OUString(
reinterpret_cast<sal_Unicode *>(SeparatorChar), NumberChar_Count);
for ( i = 0; i < nCount; i++) {
if ((index = multiplierChar.indexOf(str[i])) >= 0) {
if (count == 0 || !isNumber(newStr[count-1])) { // add 1 in front of multiplier
newStr[count] = NUMBER_ONE;
if (ppOffset)
ppOffset[count] = i;
count++;
}
index = MultiplierExponent_7_CJK[index % ExponentCount_7_CJK];
NativeToAscii_numberMaker(
sal::static_int_cast<sal_Int16>( index ), sal::static_int_cast<sal_Int16>( index ),
str, i, nCount, newStr.get(), count, pOffset,
numberChar, multiplierChar);
} else {
if ((index = numberChar.indexOf(str[i])) >= 0)
newStr[count] = sal::static_int_cast<sal_Unicode>( (index % 10) + NUMBER_ZERO );
else if (separatorChar.indexOf(str[i]) >= 0 &&
(i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
multiplierChar.indexOf(str[i+1]) >= 0)))
newStr[count] = SeparatorChar[NumberChar_HalfWidth];
else if (decimalChar.indexOf(str[i]) >= 0 &&
(i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
multiplierChar.indexOf(str[i+1]) >= 0)))
// Only when decimal point is followed by numbers,
// it will be convert to ASCII decimal point
newStr[count] = DecimalChar[NumberChar_HalfWidth];
else if (minusChar.find(str[i]) != std::u16string_view::npos &&
(i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
multiplierChar.indexOf(str[i+1]) >= 0)))
// Only when minus is followed by numbers,
// it will be convert to ASCII minus sign
newStr[count] = MinusChar[NumberChar_HalfWidth];
else
newStr[count] = str[i];
if (ppOffset)
ppOffset[count] = i;
count++;
}
}
if (pOffset) {
pOffset->realloc(count);
}
aRet = OUString(newStr.get(), count);
}
return aRet;
}
const Number natnum4[4] = {
{ NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh], 0,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
{ NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], 0,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
{ NumberChar_Modern_ja, MultiplierChar_7_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE_67,
ExponentCount_7_CJK, MultiplierExponent_7_CJK },
{ NumberChar_Lower_ko, MultiplierChar_6_CJK[Multiplier_Lower_ko], NUMBER_OMIT_ZERO,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
};
const Number natnum5[4] = {
{ NumberChar_Upper_zh, MultiplierChar_6_CJK[Multiplier_Upper_zh], 0,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
{ NumberChar_Upper_zh_TW, MultiplierChar_6_CJK[Multiplier_Upper_zh_TW], 0,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
{ NumberChar_Traditional_ja, MultiplierChar_7_CJK[Multiplier_Traditional_ja], NUMBER_OMIT_ZERO_ONE_67,
ExponentCount_7_CJK, MultiplierExponent_7_CJK },
{ NumberChar_Upper_ko, MultiplierChar_6_CJK[Multiplier_Upper_ko], 0,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
};
const Number natnum6[4] = {
{ NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Lower_zh], 0,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
{ NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], 0,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
{ NumberChar_FullWidth, MultiplierChar_7_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE_67,
ExponentCount_7_CJK, MultiplierExponent_7_CJK },
{ NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ZERO,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
};
const Number natnum7[4] = {
{ NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh], NUMBER_OMIT_ALL,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
{ NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], NUMBER_OMIT_ALL,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
{ NumberChar_Modern_ja, MultiplierChar_2_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE,
ExponentCount_2_CJK, MultiplierExponent_2_CJK },
{ NumberChar_Lower_ko, MultiplierChar_6_CJK[Multiplier_Lower_ko], NUMBER_OMIT_ALL,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
};
const Number natnum8[4] = {
{ NumberChar_Upper_zh, MultiplierChar_6_CJK[Multiplier_Upper_zh], NUMBER_OMIT_ALL,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
{ NumberChar_Upper_zh_TW, MultiplierChar_6_CJK[Multiplier_Upper_zh_TW], NUMBER_OMIT_ALL,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
{ NumberChar_Traditional_ja, MultiplierChar_2_CJK[Multiplier_Traditional_ja], NUMBER_OMIT_ZERO_ONE,
ExponentCount_2_CJK, MultiplierExponent_2_CJK },
{ NumberChar_Upper_ko, MultiplierChar_6_CJK[Multiplier_Upper_ko], NUMBER_OMIT_ALL,
ExponentCount_6_CJK, MultiplierExponent_6_CJK },
};
const Number natnum10 = { NumberChar_Hangul_ko, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ZERO,
ExponentCount_6_CJK, MultiplierExponent_6_CJK };
const Number natnum11 = { NumberChar_Hangul_ko, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ALL,
ExponentCount_6_CJK, MultiplierExponent_6_CJK };
//! ATTENTION: Do not change order of elements!
//! Append new languages to the end of the list!
const char *natnum1Locales[] = {
"zh_CN",
"zh_TW",
"ja",
"ko",
"he",
"ar",
"th",
"hi",
"or",
"mr",
"bn",
"pa",
"gu",
"ta",
"te",
"kn",
"ml",
"lo",
"bo",
"my",
"km",
"mn",
"ne",
"dz",
"fa",
"cu"
};
const sal_Int16 nbOfLocale = SAL_N_ELEMENTS(natnum1Locales);
//! ATTENTION: Do not change order of elements!
//! Number and order must match elements of natnum1Locales!
const sal_Int16 natnum1[] = {
NumberChar_Lower_zh,
NumberChar_Lower_zh,
NumberChar_Modern_ja,
NumberChar_Lower_ko,
NumberChar_he,
NumberChar_Indic_ar,
NumberChar_th,
NumberChar_hi,
NumberChar_or,
NumberChar_mr,
NumberChar_bn,
NumberChar_pa,
NumberChar_gu,
NumberChar_ta,
NumberChar_te,
NumberChar_kn,
NumberChar_ml,
NumberChar_lo,
NumberChar_bo,
NumberChar_my,
NumberChar_km,
NumberChar_mn,
NumberChar_ne,
NumberChar_dz,
NumberChar_EastIndic_ar,
NumberChar_cu
};
const sal_Int16 sizeof_natnum1 = SAL_N_ELEMENTS(natnum1);
//! ATTENTION: Do not change order of elements!
//! Order must match first elements of natnum1Locales!
const sal_Int16 natnum2[] = {
NumberChar_Upper_zh,
NumberChar_Upper_zh_TW,
NumberChar_Traditional_ja,
NumberChar_Upper_ko,
NumberChar_he
};
const sal_Int16 sizeof_natnum2 = SAL_N_ELEMENTS(natnum2);
sal_Int16 getLanguageNumber( const Locale& rLocale)
{
// return zh_TW for TW, HK and MO, return zh_CN for other zh locales.
if (rLocale.Language == "zh") return MsLangId::isTraditionalChinese(rLocale) ? 1 : 0;
for (sal_Int16 i = 2; i < nbOfLocale; i++)
if (rLocale.Language.equalsAsciiL(natnum1Locales[i], 2))
return i;
return -1;
}
struct Separators
{
sal_Unicode DecimalSeparator;
sal_Unicode ThousandSeparator;
Separators(const Locale& rLocale)
{
LocaleDataItem aLocaleItem = LocaleDataImpl::get()->getLocaleItem(rLocale);
DecimalSeparator = aLocaleItem.decimalSeparator.toChar();
ThousandSeparator = aLocaleItem.thousandSeparator.toChar();
}
};
Separators getLocaleSeparators(const Locale& rLocale, const OUString& rLocStr)
{
// Guard the static variable below.
std::scoped_lock aGuard(theNatNumMutex);
// Maximum a couple hundred of pairs with 4-byte structs - so no need for smart managing
static std::unordered_map<OUString, Separators> aLocaleSeparatorsBuf;
auto it = aLocaleSeparatorsBuf.find(rLocStr);
if (it == aLocaleSeparatorsBuf.end())
{
it = aLocaleSeparatorsBuf.emplace(rLocStr, Separators(rLocale)).first;
}
return it->second;
}
OUString getNumberText(const Locale& rLocale, const OUString& rNumberString,
std::u16string_view sNumberTextParams)
{
sal_Int32 i, count = 0;
const sal_Int32 len = rNumberString.getLength();
const sal_Unicode* src = rNumberString.getStr();
OUString aLoc = LanguageTag::convertToBcp47(rLocale);
Separators aSeparators = getLocaleSeparators(rLocale, aLoc);
OUStringBuffer sBuf(len);
for (i = 0; i < len; i++)
{
sal_Unicode ch = src[i];
if (isNumber(ch) || ch == aSeparators.DecimalSeparator)
{
++count;
sBuf.append(ch);
}
else if (ch == aSeparators.ThousandSeparator && count > 0)
continue;
else if (isMinus(ch) && count == 0)
sBuf.append(ch);
else
break;
}
// Handle also month and day names for NatNum12 date formatting
const OUString aNumberStr = (count == 0) ? rNumberString : sBuf.makeStringAndClear();
static auto xNumberText
= css::linguistic2::NumberText::create(comphelper::getProcessComponentContext());
// Guard the static variables below.
std::scoped_lock aGuard( theNatNumMutex );
OUString numbertext_prefix;
// default "cardinal" gets empty prefix
if (!sNumberTextParams.empty() && sNumberTextParams != u"cardinal")
numbertext_prefix = OUString::Concat(sNumberTextParams) + " ";
// Several hundreds of headings could result typing lags because
// of the continuous update of the multiple number names during typing.
// We fix this by buffering the result of the conversion.
static std::unordered_map<OUString, std::map<OUString, OUString>> aBuff;
auto& rItems = aBuff[aNumberStr];
auto& rItem = rItems[numbertext_prefix + aLoc];
if (rItem.isEmpty())
{
rItem = xNumberText->getNumberText(numbertext_prefix + aNumberStr, rLocale);
// use number at missing number to text conversion
if (rItem.isEmpty())
rItem = aNumberStr;
}
OUString sResult = rItem;
if (i != 0 && i < len)
sResult += rNumberString.subView(i);
return sResult;
}
}
OUString NativeNumberSupplierService::getNativeNumberString(const OUString& aNumberString, const Locale& rLocale,
sal_Int16 nNativeNumberMode,
Sequence<sal_Int32>* pOffset,
std::u16string_view rNativeNumberParams)
{
if (!isValidNatNumImpl(rLocale, nNativeNumberMode))
return aNumberString;
if (nNativeNumberMode == NativeNumberMode::NATNUM12)
{
// handle capitalization prefixes "capitalize", "upper", "lower" and "title"
enum WhichCasing
{
CAPITALIZE,
UPPER,
LOWER,
TITLE
};
struct CasingEntry
{
std::u16string_view aLiteral;
WhichCasing eCasing;
};
static const CasingEntry Casings[] =
{
{ std::u16string_view(u"capitalize"), CAPITALIZE },
{ std::u16string_view(u"upper"), UPPER },
{ std::u16string_view(u"lower"), LOWER },
{ std::u16string_view(u"title"), TITLE }
};
std::size_t nStripCase = 0;
size_t nCasing;
for (nCasing = 0; nCasing < std::size(Casings); ++nCasing)
{
if (o3tl::starts_with(rNativeNumberParams, Casings[nCasing].aLiteral))
{
nStripCase = Casings[nCasing].aLiteral.size();
break;
}
}
if (nStripCase > 0 && (rNativeNumberParams.size() == nStripCase ||
rNativeNumberParams[nStripCase++] == ' '))
{
OUString aStr = getNumberText(rLocale, aNumberString, rNativeNumberParams.substr(nStripCase));
if (!xCharClass.is())
xCharClass = CharacterClassification::create(comphelper::getProcessComponentContext());
switch (Casings[nCasing].eCasing)
{
case CAPITALIZE:
return xCharClass->toTitle(aStr, 0, 1, aLocale) +
(aStr.getLength() > 1 ? aStr.subView(1) : u"");
case UPPER:
return xCharClass->toUpper(aStr, 0, aStr.getLength(), aLocale);
case LOWER:
return xCharClass->toLower(aStr, 0, aStr.getLength(), aLocale);
case TITLE:
{
if ( rLocale.Language == "en" )
{
// title case is common in English, so fix bugs of toTitle():
// not "One Dollar *And* *Twenty-two* Cents", but
// "One Dollar *and* *Twenty-Two* Cents".
// Add spaces after hyphens to separate the elements of the
// hyphenated compound words temporarily, allowing their
// capitalization by toTitle()
aStr = aStr.replaceAll("-", "- ");
aStr = xCharClass->toTitle(aStr, 0, aStr.getLength(), aLocale);
return aStr.replaceAll("- ", "-").replaceAll(" And ", " and ");
}
else
return xCharClass->toTitle(aStr, 0, aStr.getLength(), aLocale);
}
}
}
else
{
return getNumberText(rLocale, aNumberString, rNativeNumberParams);
}
}
sal_Int16 langnum = getLanguageNumber(rLocale);
if (langnum == -1)
return aNumberString;
const Number *number = nullptr;
sal_Int16 num = -1;
switch (nNativeNumberMode)
{
case NativeNumberMode::NATNUM0: // Ascii
return NativeToAscii(aNumberString, aNumberString.getLength(), pOffset);
case NativeNumberMode::NATNUM1: // Char, Lower
num = natnum1[langnum];
break;
case NativeNumberMode::NATNUM2: // Char, Upper
num = natnum2[langnum];
break;
case NativeNumberMode::NATNUM3: // Char, FullWidth
num = NumberChar_FullWidth;
break;
case NativeNumberMode::NATNUM4: // Text, Lower, Long
number = &natnum4[langnum];
break;
case NativeNumberMode::NATNUM5: // Text, Upper, Long
number = &natnum5[langnum];
break;
case NativeNumberMode::NATNUM6: // Text, FullWidth
number = &natnum6[langnum];
break;
case NativeNumberMode::NATNUM7: // Text. Lower, Short
number = &natnum7[langnum];
break;
case NativeNumberMode::NATNUM8: // Text, Upper, Short
number = &natnum8[langnum];
break;
case NativeNumberMode::NATNUM9: // Char, Hangul
num = NumberChar_Hangul_ko;
break;
case NativeNumberMode::NATNUM10: // Text, Hangul, Long
number = &natnum10;
break;
case NativeNumberMode::NATNUM11: // Text, Hangul, Short
number = &natnum11;
break;
default:
break;
}
if (number || num >= 0) {
if (aLocale.Language != rLocale.Language ||
aLocale.Country != rLocale.Country ||
aLocale.Variant != rLocale.Variant) {
LocaleDataItem item = LocaleDataImpl::get()->getLocaleItem( rLocale );
aLocale = rLocale;
DecimalChar[NumberChar_HalfWidth]=item.decimalSeparator.toChar();
if (DecimalChar[NumberChar_HalfWidth] > 0x7E || DecimalChar[NumberChar_HalfWidth] < 0x21)
DecimalChar[NumberChar_FullWidth]=0xFF0E;
else
DecimalChar[NumberChar_FullWidth]=DecimalChar[NumberChar_HalfWidth]+0xFEE0;
SeparatorChar[NumberChar_HalfWidth]=item.thousandSeparator.toChar();
if (SeparatorChar[NumberChar_HalfWidth] > 0x7E || SeparatorChar[NumberChar_HalfWidth] < 0x21)
SeparatorChar[NumberChar_FullWidth]=0xFF0C;
else
SeparatorChar[NumberChar_FullWidth]=SeparatorChar[NumberChar_HalfWidth]+0xFEE0;
}
if (number)
return AsciiToNative( aNumberString, aNumberString.getLength(), pOffset, number );
else if (num == NumberChar_he)
return getHebrewNativeNumberString(aNumberString,
nNativeNumberMode == NativeNumberMode::NATNUM2);
else if (num == NumberChar_cu)
return getCyrillicNativeNumberString(aNumberString);
else
return AsciiToNativeChar(aNumberString, aNumberString.getLength(), pOffset, num);
}
else
return aNumberString;
}
OUString SAL_CALL NativeNumberSupplierService::getNativeNumberString(const OUString& aNumberString, const Locale& rLocale,
sal_Int16 nNativeNumberMode)
{
return getNativeNumberString(aNumberString, rLocale, nNativeNumberMode, nullptr);
}
OUString SAL_CALL NativeNumberSupplierService::getNativeNumberStringParams(
const OUString& rNumberString, const css::lang::Locale& rLocale, sal_Int16 nNativeNumberMode,
const OUString& rNativeNumberParams)
{
return getNativeNumberString(rNumberString, rLocale, nNativeNumberMode, nullptr, rNativeNumberParams);
}
sal_Unicode NativeNumberSupplierService::getNativeNumberChar( const sal_Unicode inChar, const Locale& rLocale, sal_Int16 nNativeNumberMode )
{
if (nNativeNumberMode == NativeNumberMode::NATNUM0) { // Ascii
for (const auto & i : NumberChar)
for (sal_Int16 j = 0; j < 10; j++)
if (inChar == i[j])
return j;
return inChar;
}
if (!isNumber(inChar))
return inChar;
if (!isValidNatNumImpl(rLocale, nNativeNumberMode))
return inChar;
sal_Int16 langnum = getLanguageNumber(rLocale);
if (langnum == -1)
return inChar;
switch (nNativeNumberMode)
{
case NativeNumberMode::NATNUM1: // Char, Lower
case NativeNumberMode::NATNUM4: // Text, Lower, Long
case NativeNumberMode::NATNUM7: // Text. Lower, Short
return NumberChar[natnum1[langnum]][inChar - NUMBER_ZERO];
case NativeNumberMode::NATNUM2: // Char, Upper
case NativeNumberMode::NATNUM5: // Text, Upper, Long
case NativeNumberMode::NATNUM8: // Text, Upper, Short
return NumberChar[natnum2[langnum]][inChar - NUMBER_ZERO];
case NativeNumberMode::NATNUM3: // Char, FullWidth
case NativeNumberMode::NATNUM6: // Text, FullWidth
return NumberChar[NumberChar_FullWidth][inChar - NUMBER_ZERO];
case NativeNumberMode::NATNUM9: // Char, Hangul
case NativeNumberMode::NATNUM10: // Text, Hangul, Long
case NativeNumberMode::NATNUM11: // Text, Hangul, Short
return NumberChar[NumberChar_Hangul_ko][inChar - NUMBER_ZERO];
default:
break;
}
return inChar;
}
bool NativeNumberSupplierService::isValidNatNumImpl( const Locale& rLocale, sal_Int16 nNativeNumberMode )
{
sal_Int16 langnum = getLanguageNumber(rLocale);
switch (nNativeNumberMode) {
case NativeNumberMode::NATNUM0: // Ascii
case NativeNumberMode::NATNUM3: // Char, FullWidth
case NativeNumberMode::NATNUM12: // spell out numbers, dates and money amounts
return true;
case NativeNumberMode::NATNUM1: // Char, Lower
return (langnum >= 0);
case NativeNumberMode::NATNUM2: // Char, Upper
if (langnum == 4) // Hebrew numbering
return true;
[[fallthrough]];
case NativeNumberMode::NATNUM4: // Text, Lower, Long
case NativeNumberMode::NATNUM5: // Text, Upper, Long
case NativeNumberMode::NATNUM6: // Text, FullWidth
case NativeNumberMode::NATNUM7: // Text. Lower, Short
case NativeNumberMode::NATNUM8: // Text, Upper, Short
return (langnum >= 0 && langnum < 4); // CJK numbering
case NativeNumberMode::NATNUM9: // Char, Hangul
case NativeNumberMode::NATNUM10: // Text, Hangul, Long
case NativeNumberMode::NATNUM11: // Text, Hangul, Short
return (langnum == 3); // Korean numbering
}
return false;
}
NativeNumberXmlAttributes SAL_CALL NativeNumberSupplierService::convertToXmlAttributes( const Locale& rLocale, sal_Int16 nNativeNumberMode )
{
static const sal_Int16 attShort = 0;
static const sal_Int16 attMedium = 1;
static const sal_Int16 attLong = 2;
static const char *attType[] = { "short", "medium", "long" };
sal_Int16 number = NumberChar_HalfWidth, type = attShort;
sal_Int16 langnum = -1;
if (isValidNatNum(rLocale, nNativeNumberMode)) {
langnum = getLanguageNumber(rLocale);
}
if (langnum != -1) {
switch (nNativeNumberMode) {
case NativeNumberMode::NATNUM0: // Ascii
number = NumberChar_HalfWidth;
type = attShort;
break;
case NativeNumberMode::NATNUM1: // Char, Lower
number = natnum1[langnum];
type = attShort;
break;
case NativeNumberMode::NATNUM2: // Char, Upper
number = natnum2[langnum];
type = number == NumberChar_he ? attMedium : attShort;
break;
case NativeNumberMode::NATNUM3: // Char, FullWidth
number = NumberChar_FullWidth;
type = attShort;
break;
case NativeNumberMode::NATNUM4: // Text, Lower, Long
number = natnum1[langnum];
type = attLong;
break;
case NativeNumberMode::NATNUM5: // Text, Upper, Long
number = natnum2[langnum];
type = attLong;
break;
case NativeNumberMode::NATNUM6: // Text, FullWidth
number = NumberChar_FullWidth;
type = attLong;
break;
case NativeNumberMode::NATNUM7: // Text. Lower, Short
number = natnum1[langnum];
type = attMedium;
break;
case NativeNumberMode::NATNUM8: // Text, Upper, Short
number = natnum2[langnum];
type = attMedium;
break;
case NativeNumberMode::NATNUM9: // Char, Hangul
number = NumberChar_Hangul_ko;
type = attShort;
break;
case NativeNumberMode::NATNUM10: // Text, Hangul, Long
number = NumberChar_Hangul_ko;
type = attLong;
break;
case NativeNumberMode::NATNUM11: // Text, Hangul, Short
number = NumberChar_Hangul_ko;
type = attMedium;
break;
default:
break;
}
}
return NativeNumberXmlAttributes(rLocale, OUString(&NumberChar[number][1], 1),
OUString::createFromAscii(attType[type]));
}
static bool natNumIn(sal_Int16 num, const sal_Int16 natnum[], sal_Int16 len)
{
for (sal_Int16 i = 0; i < len; i++)
if (natnum[i] == num)
return true;
return false;
}
sal_Int16 SAL_CALL NativeNumberSupplierService::convertFromXmlAttributes( const NativeNumberXmlAttributes& aAttr )
{
sal_Unicode numberChar[NumberChar_Count];
for (sal_Int16 i = 0; i < NumberChar_Count; i++)
numberChar[i] = NumberChar[i][1];
OUString number(numberChar, NumberChar_Count);
sal_Int16 num = sal::static_int_cast<sal_Int16>( number.indexOf(aAttr.Format) );
if ( aAttr.Style == "short" ) {
if (num == NumberChar_FullWidth)
return NativeNumberMode::NATNUM3;
else if (num == NumberChar_Hangul_ko)
return NativeNumberMode::NATNUM9;
else if (natNumIn(num, natnum1, sizeof_natnum1))
return NativeNumberMode::NATNUM1;
else if (natNumIn(num, natnum2, sizeof_natnum2))
return NativeNumberMode::NATNUM2;
} else if ( aAttr.Style == "medium" ) {
if (num == NumberChar_Hangul_ko)
return NativeNumberMode::NATNUM11;
else if (num == NumberChar_he)
return NativeNumberMode::NATNUM2;
else if (natNumIn(num, natnum1, sizeof_natnum1))
return NativeNumberMode::NATNUM7;
else if (natNumIn(num, natnum2, sizeof_natnum2))
return NativeNumberMode::NATNUM8;
} else if ( aAttr.Style == "long" ) {
if (num == NumberChar_FullWidth)
return NativeNumberMode::NATNUM6;
else if (num == NumberChar_Hangul_ko)
return NativeNumberMode::NATNUM10;
else if (natNumIn(num, natnum1, sizeof_natnum1))
return NativeNumberMode::NATNUM4;
else if (natNumIn(num, natnum2, sizeof_natnum2))
return NativeNumberMode::NATNUM5;
} else {
throw RuntimeException();
}
return NativeNumberMode::NATNUM0;
}
// Following code generates Hebrew Number,
// see numerical system in the Hebrew Numbering System in following link for details,
// http://smontagu.org/writings/HebrewNumbers.html
namespace {
struct HebrewNumberChar {
sal_Unicode code;
sal_Int16 value;
};
}
HebrewNumberChar const HebrewNumberCharArray[] = {
{ 0x05ea, 400 },
{ 0x05ea, 400 },
{ 0x05e9, 300 },
{ 0x05e8, 200 },
{ 0x05e7, 100 },
{ 0x05e6, 90 },
{ 0x05e4, 80 },
{ 0x05e2, 70 },
{ 0x05e1, 60 },
{ 0x05e0, 50 },
{ 0x05de, 40 },
{ 0x05dc, 30 },
{ 0x05db, 20 },
{ 0x05d9, 10 },
{ 0x05d8, 9 },
{ 0x05d7, 8 },
{ 0x05d6, 7 },
{ 0x05d5, 6 },
{ 0x05d4, 5 },
{ 0x05d3, 4 },
{ 0x05d2, 3 },
{ 0x05d1, 2 },
{ 0x05d0, 1 }
};
const sal_Unicode thousand[] = {0x05d0, 0x05dc, 0x05e3, 0x0};
const sal_Unicode thousands[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x0};
const sal_Unicode thousands_last[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x05dd, 0x0};
const sal_Unicode geresh = 0x05f3;
const sal_Unicode gershayim = 0x05f4;
static void makeHebrewNumber(sal_Int64 value, OUStringBuffer& output, bool isLast, bool useGeresh)
{
sal_Int16 num = sal::static_int_cast<sal_Int16>(value % 1000);
if (value > 1000) {
makeHebrewNumber(value / 1000, output, num != 0, useGeresh);
output.append(" ");
}
if (num == 0) {
output.append(value == 1000 ? thousand : isLast ? thousands_last : thousands);
} else {
sal_Int16 nbOfChar = 0;
for (sal_Int32 j = 0; num > 0 && j < sal_Int32(std::size(HebrewNumberCharArray)); j++) {
if (num - HebrewNumberCharArray[j].value >= 0) {
nbOfChar++;
// https://en.wikipedia.org/wiki/Hebrew_numerals#Key_exceptions
// By convention, the numbers 15 and 16 are represented as 9 + 6 and 9 + 7
if (num == 15 || num == 16) // substitution for 15 and 16
j++;
assert(j < sal_Int32(SAL_N_ELEMENTS(HebrewNumberCharArray)));
num = sal::static_int_cast<sal_Int16>( num - HebrewNumberCharArray[j].value );
output.append(HebrewNumberCharArray[j].code);
}
}
if (useGeresh) {
if (nbOfChar > 1) // a number is written as more than one character
output.insert(output.getLength() - 1, gershayim);
else if (nbOfChar == 1) // a number is written as a single character
output.append(geresh);
}
}
}
OUString getHebrewNativeNumberString(const OUString& aNumberString, bool useGeresh)
{
sal_Int64 value = 0;
sal_Int32 i, count = 0, len = aNumberString.getLength();
const sal_Unicode *src = aNumberString.getStr();
for (i = 0; i < len; i++) {
sal_Unicode ch = src[i];
if (isNumber(ch)) {
if (++count >= 20) // Number is too long, could not be handled.
return aNumberString;
value = value * 10 + (ch - NUMBER_ZERO);
}
else if (isSeparator(ch) && count > 0) continue;
else if (isMinus(ch) && count == 0) continue;
else break;
}
if (value > 0) {
OUStringBuffer output(count*2 + 2 + len - i);
makeHebrewNumber(value, output, true, useGeresh);
if (i < len)
output.append(aNumberString.subView(i));
return output.makeStringAndClear();
}
else
return aNumberString;
}
// Support for Cyrillic Numerals
// See UTN 41 for implementation information
// http://www.unicode.org/notes/tn41/
const sal_Unicode cyrillicThousandsMark = 0x0482;
const sal_Unicode cyrillicTitlo = 0x0483;
const sal_Unicode cyrillicTen = 0x0456;
namespace {
struct CyrillicNumberChar {
sal_Unicode code;
sal_Int16 value;
};
}
CyrillicNumberChar const CyrillicNumberCharArray[] = {
{ 0x0446, 900 },
{ 0x047f, 800 },
{ 0x0471, 700 },
{ 0x0445, 600 },
{ 0x0444, 500 },
{ 0x0443, 400 },
{ 0x0442, 300 },
{ 0x0441, 200 },
{ 0x0440, 100 },
{ 0x0447, 90 },
{ 0x043f, 80 },
{ 0x047b, 70 },
{ 0x046f, 60 },
{ 0x043d, 50 },
{ 0x043c, 40 },
{ 0x043b, 30 },
{ 0x043a, 20 },
{ 0x0456, 10 },
{ 0x0473, 9 },
{ 0x0438, 8 },
{ 0x0437, 7 },
{ 0x0455, 6 },
{ 0x0454, 5 },
{ 0x0434, 4 },
{ 0x0433, 3 },
{ 0x0432, 2 },
{ 0x0430, 1 }
};
static void makeCyrillicNumber(sal_Int64 value, OUStringBuffer& output, bool addTitlo)
{
sal_Int16 num = sal::static_int_cast<sal_Int16>(value % 1000);
if (value >= 1000) {
output.append(cyrillicThousandsMark);
makeCyrillicNumber(value / 1000, output, false);
if (value >= 10000 && (value - 10000) % 1000 != 0) {
output.append(" ");
}
if (value % 1000 == 0)
addTitlo = false;
}
for (sal_Int32 j = 0; num > 0 && j < sal_Int32(std::size(CyrillicNumberCharArray)); j++) {
if (num < 20 && num > 10) {
num -= 10;
makeCyrillicNumber(num, output, false);
output.append(cyrillicTen);
break;
}
if (CyrillicNumberCharArray[j].value <= num) {
output.append(CyrillicNumberCharArray[j].code);
num = sal::static_int_cast<sal_Int16>( num - CyrillicNumberCharArray[j].value );
}
}
if (!addTitlo)
return;
if (output.getLength() == 1) {
output.append(cyrillicTitlo);
} else if (output.getLength() == 2) {
if (value > 800 && value < 900) {
output.append(cyrillicTitlo);
} else {
output.insert(1, cyrillicTitlo);
}
} else if (output.getLength() > 2) {
if (output.indexOf(" ") == output.getLength() - 2) {
output.append(cyrillicTitlo);
} else {
output.insert(output.getLength() - 1, cyrillicTitlo);
}
}
}
OUString getCyrillicNativeNumberString(const OUString& aNumberString)
{
sal_Int64 value = 0;
sal_Int32 i, count = 0, len = aNumberString.getLength();
const sal_Unicode *src = aNumberString.getStr();
for (i = 0; i < len; i++) {
sal_Unicode ch = src[i];
if (isNumber(ch)) {
if (++count >= 8) // Number is too long, could not be handled.
return aNumberString;
value = value * 10 + (ch - NUMBER_ZERO);
}
else if (isSeparator(ch) && count > 0) continue;
else if (isMinus(ch) && count == 0) continue;
else break;
}
if (value > 0) {
OUStringBuffer output(count*2 + 2 + len - i);
makeCyrillicNumber(value, output, true);
if (i < len)
output.append(aNumberString.subView(i));
return output.makeStringAndClear();
}
else
return aNumberString;
}
constexpr OUString implementationName = u"com.sun.star.i18n.NativeNumberSupplier"_ustr;
OUString SAL_CALL NativeNumberSupplierService::getImplementationName()
{
return implementationName;
}
sal_Bool SAL_CALL
NativeNumberSupplierService::supportsService(const OUString& rServiceName)
{
return cppu::supportsService(this, rServiceName);
}
Sequence< OUString > SAL_CALL
NativeNumberSupplierService::getSupportedServiceNames()
{
return {implementationName, u"com.sun.star.i18n.NativeNumberSupplier2"_ustr};
}
}
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
com_sun_star_i18n_NativeNumberSupplier_get_implementation(
css::uno::XComponentContext *,
css::uno::Sequence<css::uno::Any> const &)
{
return cppu::acquire(new i18npool::NativeNumberSupplierService());
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'insert' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'insert' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V530 The return value of function 'insert' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.