/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <sax/fastparser.hxx>
#include <sax/fastattribs.hxx>
#include <utility>
#include <xml2utf.hxx>
#include <com/sun/star/io/XSeekable.hpp>
#include <com/sun/star/lang/DisposedException.hpp>
#include <com/sun/star/lang/IllegalArgumentException.hpp>
#include <com/sun/star/uno/XComponentContext.hpp>
#include <com/sun/star/xml/sax/FastToken.hpp>
#include <com/sun/star/xml/sax/SAXParseException.hpp>
#include <com/sun/star/xml/sax/XFastContextHandler.hpp>
#include <cppuhelper/implbase.hxx>
#include <cppuhelper/supportsservice.hxx>
#include <cppuhelper/exc_hlp.hxx>
#include <osl/conditn.hxx>
#include <rtl/ref.hxx>
#include <sal/log.hxx>
#include <salhelper/thread.hxx>
#include <comphelper/diagnose_ex.hxx>
#include <o3tl/string_view.hxx>
#include <queue>
#include <memory>
#include <mutex>
#include <optional>
#include <stack>
#include <string_view>
#include <unordered_map>
#include <vector>
#include <cassert>
#include <cstring>
#include <libxml/parser.h>
// Inverse of libxml's BAD_CAST.
#define XML_CAST( str ) reinterpret_cast< const char* >( str )
using namespace ::osl;
using namespace ::cppu;
using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::lang;
using namespace ::com::sun::star::xml::sax;
using namespace ::com::sun::star::io;
using namespace com::sun::star;
using namespace sax_fastparser;
static void NormalizeURI( OUString& rName );
namespace {
struct Event;
class FastLocatorImpl;
struct NamespaceDefine;
struct Entity;
typedef std::unordered_map< OUString, sal_Int32 > NamespaceMap;
struct EventList
{
std::vector<Event> maEvents;
bool mbIsAttributesEmpty;
};
enum class CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, PROCESSING_INSTRUCTION, DONE, EXCEPTION };
struct Event
{
CallbackType maType;
sal_Int32 mnElementToken;
OUString msNamespace;
OUString msElementName;
rtl::Reference< FastAttributeList > mxAttributes;
rtl::Reference< FastAttributeList > mxDeclAttributes;
OUString msChars;
};
struct NameWithToken
{
OUString msName;
sal_Int32 mnToken;
NameWithToken(OUString sName, sal_Int32 nToken) :
msName(std::move(sName)), mnToken(nToken) {}
};
struct SaxContext
{
Reference< XFastContextHandler > mxContext;
sal_Int32 mnElementToken;
std::optional<OUString> moNamespace;
std::optional<OUString> moElementName;
SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName ):
mnElementToken(nElementToken)
{
if (nElementToken == FastToken::DONTKNOW)
{
moNamespace = aNamespace;
moElementName = aElementName;
}
}
};
struct ParserData
{
css::uno::Reference< css::xml::sax::XFastDocumentHandler > mxDocumentHandler;
rtl::Reference<FastTokenHandlerBase> mxTokenHandler;
css::uno::Reference< css::xml::sax::XErrorHandler > mxErrorHandler;
css::uno::Reference< css::xml::sax::XFastNamespaceHandler >mxNamespaceHandler;
ParserData();
};
struct NamespaceDefine
{
OString maPrefix;
sal_Int32 mnToken;
OUString maNamespaceURL;
NamespaceDefine( OString aPrefix, sal_Int32 nToken, OUString aNamespaceURL )
: maPrefix(std::move( aPrefix )), mnToken( nToken ), maNamespaceURL(std::move( aNamespaceURL )) {}
NamespaceDefine() : mnToken(-1) {}
};
// Entity binds all information needed for a single file | single call of parseStream
struct Entity : public ParserData
{
// Amount of work producer sends to consumer in one iteration:
static const size_t mnEventListSize = 1000;
// unique for each Entity instance:
// Number of valid events in mxProducedEvents:
size_t mnProducedEventsSize;
std::optional<EventList> mxProducedEvents;
std::queue<EventList> maPendingEvents;
std::queue<EventList> maUsedEvents;
std::mutex maEventProtector;
static const size_t mnEventLowWater = 4;
static const size_t mnEventHighWater = 8;
osl::Condition maConsumeResume;
osl::Condition maProduceResume;
// Event we use to store data if threading is disabled:
Event maSharedEvent;
// copied in copy constructor:
// Allow to disable threading for small documents:
bool mbEnableThreads;
css::xml::sax::InputSource maStructSource;
xmlParserCtxtPtr mpParser;
::sax_expatwrap::XMLFile2UTFConverter maConverter;
// Exceptions cannot be thrown through the C-XmlParser (possible
// resource leaks), therefore any exception thrown by a UNO callback
// must be saved somewhere until the C-XmlParser is stopped.
css::uno::Any maSavedException;
std::mutex maSavedExceptionMutex;
void saveException( const Any & e );
// Thread-safe check if maSavedException has value
bool hasException();
void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
bool mbDuringParse );
std::stack< NameWithToken, std::vector<NameWithToken> > maNamespaceStack;
/* Context for main thread consuming events.
* startElement() stores the data, which characters() and endElement() uses
*/
std::stack< SaxContext, std::vector<SaxContext> > maContextStack;
// Determines which elements of maNamespaceDefines are valid in current context
std::stack< sal_uInt32, std::vector<sal_uInt32> > maNamespaceCount;
std::vector< NamespaceDefine > maNamespaceDefines;
explicit Entity( const ParserData& rData );
Entity( const Entity& rEntity ) = delete;
Entity& operator=( const Entity& rEntity ) = delete;
void startElement( Event const *pEvent );
void characters( const OUString& sChars );
void endElement();
void processingInstruction( const OUString& rTarget, const OUString& rData );
EventList& getEventList();
Event& getEvent( CallbackType aType );
};
// Stuff for custom entity names
struct ReplacementPair
{
OUString name;
OUString replacement;
};
inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs)
{
return lhs.name < rhs.name;
}
inline bool operator<(const ReplacementPair& lhs, const char* rhs)
{
return lhs.name.compareToAscii(rhs) < 0;
}
} // namespace
namespace sax_fastparser {
class FastSaxParserImpl
{
public:
explicit FastSaxParserImpl();
~FastSaxParserImpl();
private:
std::vector<ReplacementPair> m_Replacements;
std::vector<xmlEntityPtr> m_TemporalEntities;
public:
// XFastParser
/// @throws css::xml::sax::SAXException
/// @throws css::io::IOException
/// @throws css::uno::RuntimeException
void parseStream( const css::xml::sax::InputSource& aInputSource );
/// @throws css::uno::RuntimeException
void setFastDocumentHandler( const css::uno::Reference< css::xml::sax::XFastDocumentHandler >& Handler );
/// @throws css::uno::RuntimeException
void setTokenHandler( const css::uno::Reference< css::xml::sax::XFastTokenHandler >& Handler );
/// @throws css::lang::IllegalArgumentException
/// @throws css::uno::RuntimeException
void registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken );
/// @throws css::lang::IllegalArgumentException
/// @throws css::uno::RuntimeException
OUString const & getNamespaceURL( std::u16string_view rPrefix );
/// @throws css::uno::RuntimeException
void setErrorHandler( const css::uno::Reference< css::xml::sax::XErrorHandler >& Handler );
/// @throws css::uno::RuntimeException
void setNamespaceHandler( const css::uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler);
// Fake DTD file
void setCustomEntityNames(
const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements);
// called by the C callbacks of the expat parser
void callbackStartElement( const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes );
void callbackEndElement();
void callbackCharacters( const xmlChar* s, int nLen );
void callbackProcessingInstruction( const xmlChar *target, const xmlChar *data );
xmlEntityPtr callbackGetEntity( const xmlChar *name );
void pushEntity(const ParserData&, xml::sax::InputSource const&);
void popEntity();
Entity& getEntity() { return *mpTop; }
void parse();
void produce( bool bForceFlush = false );
bool m_bIgnoreMissingNSDecl;
bool m_bDisableThreadedParser;
private:
bool consume(EventList&);
void deleteUsedEvents();
void sendPendingCharacters();
void addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes);
sal_Int32 GetToken( const xmlChar* pName );
/// @throws css::xml::sax::SAXException
sal_Int32 GetTokenWithPrefix( const xmlChar* pPrefix, const xmlChar* pName );
/// @throws css::xml::sax::SAXException
OUString const & GetNamespaceURL( std::string_view rPrefix );
sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL );
sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName );
void DefineNamespace( const OString& rPrefix, const OUString& namespaceURL );
private:
std::mutex maMutex; ///< Protecting whole parseStream() execution
::rtl::Reference< FastLocatorImpl > mxDocumentLocator;
NamespaceMap maNamespaceMap;
ParserData maData; /// Cached parser configuration for next call of parseStream().
Entity *mpTop; /// std::stack::top() is amazingly slow => cache this.
std::stack< Entity > maEntities; /// Entity stack for each call of parseStream().
std::vector<char> pendingCharacters; /// Data from characters() callback that needs to be sent.
};
} // namespace sax_fastparser
namespace {
class ParserThread: public salhelper::Thread
{
FastSaxParserImpl *mpParser;
public:
explicit ParserThread(FastSaxParserImpl *pParser): Thread("Parser"), mpParser(pParser) {}
private:
virtual void execute() override
{
try
{
mpParser->parse();
}
catch (...)
{
Entity &rEntity = mpParser->getEntity();
rEntity.getEvent( CallbackType::EXCEPTION );
mpParser->produce( true );
}
}
};
extern "C" {
static void call_callbackStartElement(void *userData, const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes)
{
FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
pFastParser->callbackStartElement( localName, prefix, URI, numNamespaces, namespaces, numAttributes, attributes );
}
static void call_callbackEndElement(void *userData, const xmlChar* /*localName*/, const xmlChar* /*prefix*/, const xmlChar* /*URI*/)
{
FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
pFastParser->callbackEndElement();
}
static void call_callbackCharacters( void *userData , const xmlChar *s , int nLen )
{
FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
pFastParser->callbackCharacters( s, nLen );
}
static void call_callbackProcessingInstruction( void *userData, const xmlChar *target, const xmlChar *data )
{
FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
pFastParser->callbackProcessingInstruction( target, data );
}
static xmlEntityPtr call_callbackGetEntity( void *userData, const xmlChar *name)
{
FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
return pFastParser->callbackGetEntity( name );
}
}
class FastLocatorImpl : public WeakImplHelper< XLocator >
{
public:
explicit FastLocatorImpl(FastSaxParserImpl *p) : mpParser(p) {}
void dispose() { mpParser = nullptr; }
/// @throws RuntimeException
void checkDispose() const { if( !mpParser ) throw DisposedException(); }
//XLocator
virtual sal_Int32 SAL_CALL getColumnNumber() override;
virtual sal_Int32 SAL_CALL getLineNumber() override;
virtual OUString SAL_CALL getPublicId() override;
virtual OUString SAL_CALL getSystemId() override;
private:
FastSaxParserImpl *mpParser;
};
sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber()
{
checkDispose();
return xmlSAX2GetColumnNumber( mpParser->getEntity().mpParser );
}
sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber()
{
checkDispose();
return xmlSAX2GetLineNumber( mpParser->getEntity().mpParser );
}
OUString SAL_CALL FastLocatorImpl::getPublicId()
{
checkDispose();
return mpParser->getEntity().maStructSource.sPublicId;
}
OUString SAL_CALL FastLocatorImpl::getSystemId()
{
checkDispose();
return mpParser->getEntity().maStructSource.sSystemId;
}
ParserData::ParserData()
{}
Entity::Entity(const ParserData& rData)
: ParserData(rData)
, mnProducedEventsSize(0)
, mbEnableThreads(false)
, mpParser(nullptr)
{
}
void Entity::startElement( Event const *pEvent )
{
const sal_Int32& nElementToken = pEvent->mnElementToken;
const OUString& aNamespace = pEvent->msNamespace;
const OUString& aElementName = pEvent->msElementName;
// Use un-wrapped pointers to avoid significant acquire/release overhead
XFastContextHandler *pParentContext = nullptr;
if( !maContextStack.empty() )
{
pParentContext = maContextStack.top().mxContext.get();
if( !pParentContext )
{
maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) );
return;
}
}
maContextStack.push( SaxContext( nElementToken, aNamespace, aElementName ) );
try
{
const Reference< XFastAttributeList > xAttr( pEvent->mxAttributes );
Reference< XFastContextHandler > xContext;
if ( mxNamespaceHandler.is() )
{
const Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes();
for (const auto& rNSDeclAttrib : NSDeclAttribs)
{
mxNamespaceHandler->registerNamespace( rNSDeclAttrib.Name, rNSDeclAttrib.Value );
}
}
if( nElementToken == FastToken::DONTKNOW )
{
if( pParentContext )
xContext = pParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
else if( mxDocumentHandler.is() )
xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
if( xContext.is() )
{
xContext->startUnknownElement( aNamespace, aElementName, xAttr );
}
}
else
{
if( pParentContext )
xContext = pParentContext->createFastChildContext( nElementToken, xAttr );
else if( mxDocumentHandler.is() )
xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
if( xContext.is() )
xContext->startFastElement( nElementToken, xAttr );
}
// swap the reference we own in to avoid referencing thrash.
maContextStack.top().mxContext = std::move( xContext );
}
catch (...)
{
saveException( ::cppu::getCaughtException() );
}
}
void Entity::characters( const OUString& sChars )
{
if (maContextStack.empty())
{
// Malformed XML stream !?
return;
}
XFastContextHandler * pContext( maContextStack.top().mxContext.get() );
if( pContext ) try
{
pContext->characters( sChars );
}
catch (...)
{
saveException( ::cppu::getCaughtException() );
}
}
void Entity::endElement()
{
if (maContextStack.empty())
{
// Malformed XML stream !?
return;
}
const SaxContext& aContext = maContextStack.top();
XFastContextHandler* pContext( aContext.mxContext.get() );
if( pContext )
try
{
sal_Int32 nElementToken = aContext.mnElementToken;
if( nElementToken != FastToken::DONTKNOW )
pContext->endFastElement( nElementToken );
else
pContext->endUnknownElement( *aContext.moNamespace, *aContext.moElementName );
}
catch (...)
{
saveException( ::cppu::getCaughtException() );
}
maContextStack.pop();
}
void Entity::processingInstruction( const OUString& rTarget, const OUString& rData )
{
if( mxDocumentHandler.is() ) try
{
mxDocumentHandler->processingInstruction( rTarget, rData );
}
catch (...)
{
saveException( ::cppu::getCaughtException() );
}
}
EventList& Entity::getEventList()
{
if (!mxProducedEvents)
{
std::unique_lock aGuard(maEventProtector);
if (!maUsedEvents.empty())
{
mxProducedEvents = std::move(maUsedEvents.front());
maUsedEvents.pop();
aGuard.unlock(); // unlock
mnProducedEventsSize = 0;
}
if (!mxProducedEvents)
{
mxProducedEvents.emplace();
mxProducedEvents->maEvents.resize(mnEventListSize);
mxProducedEvents->mbIsAttributesEmpty = false;
mnProducedEventsSize = 0;
}
}
return *mxProducedEvents;
}
Event& Entity::getEvent( CallbackType aType )
{
if (!mbEnableThreads)
return maSharedEvent;
EventList& rEventList = getEventList();
if (mnProducedEventsSize == rEventList.maEvents.size())
{
SAL_WARN_IF(!maSavedException.hasValue(), "sax",
"Event vector should only exceed " << mnEventListSize <<
" temporarily while an exception is pending");
rEventList.maEvents.resize(mnProducedEventsSize + 1);
}
Event& rEvent = rEventList.maEvents[mnProducedEventsSize++];
rEvent.maType = aType;
return rEvent;
}
OUString lclGetErrorMessage( xmlParserCtxtPtr ctxt, std::u16string_view sSystemId, sal_Int32 nLine )
{
const char* pMessage;
const xmlError* error = xmlCtxtGetLastError( ctxt );
if( error && error->message )
pMessage = error->message;
else
pMessage = "unknown error";
return OUString::Concat("[") + sSystemId + " line " + OUString::number(nLine) + "]: " +
OUString(pMessage, strlen(pMessage), RTL_TEXTENCODING_ASCII_US);
}
// throw an exception, but avoid callback if
// during a threaded produce
void Entity::throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
bool mbDuringParse )
{
// Error during parsing !
Any savedException;
{
std::scoped_lock g(maSavedExceptionMutex);
if (maSavedException.hasValue())
{
savedException.setValue(&maSavedException, cppu::UnoType<decltype(maSavedException)>::get());
}
}
SAXParseException aExcept(
lclGetErrorMessage( mpParser,
xDocumentLocator->getSystemId(),
xDocumentLocator->getLineNumber() ),
Reference< XInterface >(),
savedException,
xDocumentLocator->getPublicId(),
xDocumentLocator->getSystemId(),
xDocumentLocator->getLineNumber(),
xDocumentLocator->getColumnNumber()
);
// error handler is set, it may throw the exception
if( !mbDuringParse || !mbEnableThreads )
{
if (mxErrorHandler.is() )
mxErrorHandler->fatalError( Any( aExcept ) );
}
// error handler has not thrown, but parsing must stop => throw ourselves
throw aExcept;
}
// In the single threaded case we emit events via our C
// callbacks, so any exception caught must be queued up until
// we can safely re-throw it from our C++ parent of parse()
// If multi-threaded, we need to push an EXCEPTION event, at
// which point we transfer ownership of maSavedException to
// the consuming thread.
void Entity::saveException( const Any & e )
{
// fdo#81214 - allow the parser to run on after an exception,
// unexpectedly some 'startElements' produce a UNO_QUERY_THROW
// for XComponent; and yet expect to continue parsing.
SAL_WARN("sax", "Unexpected exception from XML parser " << exceptionToString(e));
std::scoped_lock g(maSavedExceptionMutex);
if (maSavedException.hasValue())
{
SAL_INFO("sax.fastparser", "discarding exception, already have one");
}
else
{
maSavedException = e;
}
}
bool Entity::hasException()
{
std::scoped_lock g(maSavedExceptionMutex);
return maSavedException.hasValue();
}
} // namespace
namespace sax_fastparser {
FastSaxParserImpl::FastSaxParserImpl() :
m_bIgnoreMissingNSDecl(false),
m_bDisableThreadedParser(false),
mpTop(nullptr)
{
mxDocumentLocator.set( new FastLocatorImpl( this ) );
}
FastSaxParserImpl::~FastSaxParserImpl()
{
if( mxDocumentLocator.is() )
mxDocumentLocator->dispose();
for (auto& entity : m_TemporalEntities)
{
if (!entity)
continue;
xmlNodePtr pPtr = reinterpret_cast<xmlNodePtr>(entity);
xmlUnlinkNode(pPtr);
xmlFreeNode(pPtr);
}
}
void FastSaxParserImpl::DefineNamespace( const OString& rPrefix, const OUString& namespaceURL )
{
Entity& rEntity = getEntity();
assert(!rEntity.maNamespaceCount.empty()); // need a context!
sal_uInt32 nOffset = rEntity.maNamespaceCount.top()++;
if( rEntity.maNamespaceDefines.size() <= nOffset )
rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
rEntity.maNamespaceDefines[nOffset] = NamespaceDefine( rPrefix, GetNamespaceToken( namespaceURL ), namespaceURL );
}
sal_Int32 FastSaxParserImpl::GetToken(const xmlChar* pName)
{
return FastTokenHandlerBase::getTokenFromChars( getEntity(). mxTokenHandler.get(),
XML_CAST( pName ) ); // uses utf-8
}
sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( const xmlChar* pPrefix, const xmlChar* pName )
{
Entity& rEntity = getEntity();
if (rEntity.maNamespaceCount.empty())
return FastToken::DONTKNOW;
std::string_view sPrefix(XML_CAST(pPrefix));
sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
while( nNamespace-- )
{
const auto & rNamespaceDefine = rEntity.maNamespaceDefines[nNamespace];
if( rNamespaceDefine.maPrefix == sPrefix )
return GetTokenWithContextNamespace(rNamespaceDefine.mnToken, pName);
}
if (!m_bIgnoreMissingNSDecl)
throw SAXException("No namespace defined for " + OStringToOUString(sPrefix,
RTL_TEXTENCODING_UTF8), {}, {});
return FastToken::DONTKNOW;
}
sal_Int32 FastSaxParserImpl::GetNamespaceToken( const OUString& rNamespaceURL )
{
NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) );
if( aIter != maNamespaceMap.end() )
return (*aIter).second;
else
return FastToken::DONTKNOW;
}
OUString const & FastSaxParserImpl::GetNamespaceURL( std::string_view rPrefix )
{
Entity& rEntity = getEntity();
if( !rEntity.maNamespaceCount.empty() )
{
sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
while( nNamespace-- )
if( rEntity.maNamespaceDefines[nNamespace].maPrefix == rPrefix )
return rEntity.maNamespaceDefines[nNamespace].maNamespaceURL;
}
throw SAXException("No namespace defined for " + OUString::fromUtf8(rPrefix),
Reference< XInterface >(), Any());
}
sal_Int32 FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName )
{
if( nNamespaceToken != FastToken::DONTKNOW )
{
sal_Int32 nNameToken = GetToken( pName );
if( nNameToken != FastToken::DONTKNOW )
return nNamespaceToken | nNameToken;
}
return FastToken::DONTKNOW;
}
namespace
{
class ParserCleanup
{
private:
FastSaxParserImpl& m_rParser;
Entity& m_rEntity;
rtl::Reference<ParserThread> m_xParser;
public:
ParserCleanup(FastSaxParserImpl& rParser, Entity& rEntity)
: m_rParser(rParser)
, m_rEntity(rEntity)
{
}
~ParserCleanup()
{
if (m_rEntity.mpParser)
{
if (m_rEntity.mpParser->myDoc)
xmlFreeDoc(m_rEntity.mpParser->myDoc);
xmlFreeParserCtxt(m_rEntity.mpParser);
}
joinThread();
m_rParser.popEntity();
}
void setThread(const rtl::Reference<ParserThread> &xParser)
{
m_xParser = xParser;
}
void joinThread()
{
if (m_xParser.is())
{
rtl::Reference<ParserThread> xToJoin = m_xParser;
m_xParser.clear();
xToJoin->join();
}
}
};
}
/***************
*
* parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
* the file-specific initialization work. (During a parser run, external files may be opened)
*
****************/
void FastSaxParserImpl::parseStream(const InputSource& rStructSource)
{
xmlInitParser();
// Only one text at one time
std::unique_lock guard( maMutex );
pushEntity(maData, rStructSource);
Entity& rEntity = getEntity();
ParserCleanup aEnsureFree(*this, rEntity);
// start the document
if( rEntity.mxDocumentHandler.is() )
{
rEntity.mxDocumentHandler->setDocumentLocator( mxDocumentLocator );
rEntity.mxDocumentHandler->startDocument();
}
#ifdef EMSCRIPTEN
rEntity.mbEnableThreads = false;
#else
if (!getenv("SAX_DISABLE_THREADS") && !m_bDisableThreadedParser)
{
Reference<css::io::XSeekable> xSeekable(rEntity.maStructSource.aInputStream, UNO_QUERY);
// available() is not __really__ relevant here, but leave it in as a heuristic for non-seekable streams
rEntity.mbEnableThreads = (xSeekable.is() && xSeekable->getLength() > 10000)
|| (rEntity.maStructSource.aInputStream->available() > 10000);
}
#endif
if (rEntity.mbEnableThreads)
{
rtl::Reference<ParserThread> xParser = new ParserThread(this);
xParser->launch();
aEnsureFree.setThread(xParser);
bool done = false;
do {
rEntity.maConsumeResume.wait();
rEntity.maConsumeResume.reset();
std::unique_lock aGuard(rEntity.maEventProtector);
while (!rEntity.maPendingEvents.empty())
{
if (rEntity.maPendingEvents.size() <= Entity::mnEventLowWater)
rEntity.maProduceResume.set(); // start producer again
EventList aEventList = std::move(rEntity.maPendingEvents.front());
rEntity.maPendingEvents.pop();
aGuard.unlock(); // unlock
if (!consume(aEventList))
done = true;
aGuard.lock(); // lock
if ( rEntity.maPendingEvents.size() <= Entity::mnEventLowWater )
{
aGuard.unlock();
for (auto& rEvent : aEventList.maEvents)
{
if (rEvent.mxAttributes.is())
{
rEvent.mxAttributes->clear();
if( rEntity.mxNamespaceHandler.is() )
rEvent.mxDeclAttributes->clear();
}
aEventList.mbIsAttributesEmpty = true;
}
aGuard.lock();
}
rEntity.maUsedEvents.push(std::move(aEventList));
}
} while (!done);
aEnsureFree.joinThread();
deleteUsedEvents();
// callbacks used inside XML_Parse may have caught an exception
// No need to lock maSavedExceptionMutex here because parser
// thread is joined.
if( rEntity.maSavedException.hasValue() )
rEntity.throwException( mxDocumentLocator, true );
}
else
{
parse();
}
// finish document
if( rEntity.mxDocumentHandler.is() )
{
rEntity.mxDocumentHandler->endDocument();
}
}
void FastSaxParserImpl::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler )
{
maData.mxDocumentHandler = Handler;
}
void FastSaxParserImpl::setTokenHandler( const Reference< XFastTokenHandler >& xHandler )
{
assert( dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ) && "we expect this handler to be a subclass of FastTokenHandlerBase" );
maData.mxTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() );
}
void FastSaxParserImpl::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
{
if( NamespaceToken < FastToken::NAMESPACE )
throw IllegalArgumentException("Invalid namespace token " + OUString::number(NamespaceToken), css::uno::Reference<css::uno::XInterface >(), 0);
if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW )
{
maNamespaceMap[ NamespaceURL ] = NamespaceToken;
return;
}
throw IllegalArgumentException("namespace URL is already registered: " + NamespaceURL, css::uno::Reference<css::uno::XInterface >(), 0);
}
OUString const & FastSaxParserImpl::getNamespaceURL( std::u16string_view rPrefix )
{
try
{
return GetNamespaceURL( OUStringToOString( rPrefix, RTL_TEXTENCODING_UTF8 ) );
}
catch (const Exception&)
{
}
throw IllegalArgumentException();
}
void FastSaxParserImpl::setErrorHandler(const Reference< XErrorHandler > & Handler)
{
maData.mxErrorHandler = Handler;
}
void FastSaxParserImpl::setNamespaceHandler( const Reference< XFastNamespaceHandler >& Handler )
{
maData.mxNamespaceHandler = Handler;
}
void FastSaxParserImpl::setCustomEntityNames(
const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
{
m_Replacements.resize(replacements.size());
for (size_t i = 0; i < replacements.size(); ++i)
{
m_Replacements[i].name = replacements[i].First;
m_Replacements[i].replacement = replacements[i].Second;
}
if (m_Replacements.size() > 1)
std::sort(m_Replacements.begin(), m_Replacements.end());
}
void FastSaxParserImpl::deleteUsedEvents()
{
Entity& rEntity = getEntity();
std::unique_lock aGuard(rEntity.maEventProtector);
while (!rEntity.maUsedEvents.empty())
{
{ // the block makes sure that aEventList is destructed outside the lock
EventList aEventList = std::move(rEntity.maUsedEvents.front());
rEntity.maUsedEvents.pop();
aGuard.unlock(); // unlock
}
aGuard.lock(); // lock
}
}
void FastSaxParserImpl::produce( bool bForceFlush )
{
Entity& rEntity = getEntity();
if (!(bForceFlush ||
rEntity.mnProducedEventsSize >= Entity::mnEventListSize))
return;
std::unique_lock aGuard(rEntity.maEventProtector);
while (rEntity.maPendingEvents.size() >= Entity::mnEventHighWater)
{ // pause parsing for a bit
aGuard.unlock(); // unlock
rEntity.maProduceResume.wait();
rEntity.maProduceResume.reset();
aGuard.lock(); // lock
}
rEntity.maPendingEvents.push(std::move(*rEntity.mxProducedEvents));
rEntity.mxProducedEvents.reset();
assert(!rEntity.mxProducedEvents);
aGuard.unlock(); // unlock
rEntity.maConsumeResume.set();
}
bool FastSaxParserImpl::consume(EventList& rEventList)
{
Entity& rEntity = getEntity();
rEventList.mbIsAttributesEmpty = false;
for (auto& rEvent : rEventList.maEvents)
{
switch (rEvent.maType)
{
case CallbackType::START_ELEMENT:
rEntity.startElement( &rEvent );
break;
case CallbackType::END_ELEMENT:
rEntity.endElement();
break;
case CallbackType::CHARACTERS:
rEntity.characters( rEvent.msChars );
break;
case CallbackType::PROCESSING_INSTRUCTION:
rEntity.processingInstruction(
rEvent.msNamespace, rEvent.msElementName ); // ( target, data )
break;
case CallbackType::DONE:
return false;
case CallbackType::EXCEPTION:
rEntity.throwException( mxDocumentLocator, false );
[[fallthrough]]; // avoid unreachable code warning with some compilers
default:
assert(false);
return false;
}
}
return true;
}
void FastSaxParserImpl::pushEntity(const ParserData& rEntityData,
xml::sax::InputSource const& rSource)
{
if (!rSource.aInputStream.is())
throw SAXException(u"No input source"_ustr, Reference<XInterface>(), Any());
maEntities.emplace(rEntityData);
mpTop = &maEntities.top();
mpTop->maStructSource = rSource;
mpTop->maConverter.setInputStream(mpTop->maStructSource.aInputStream);
if (!mpTop->maStructSource.sEncoding.isEmpty())
{
mpTop->maConverter.setEncoding(OUStringToOString(mpTop->maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US));
}
}
void FastSaxParserImpl::popEntity()
{
maEntities.pop();
mpTop = !maEntities.empty() ? &maEntities.top() : nullptr;
}
// starts parsing with actual parser !
void FastSaxParserImpl::parse()
{
const int BUFFER_SIZE = 16 * 1024;
Sequence< sal_Int8 > seqOut( BUFFER_SIZE );
Entity& rEntity = getEntity();
// set all necessary C-Callbacks
static xmlSAXHandler callbacks;
callbacks.startElementNs = call_callbackStartElement;
callbacks.endElementNs = call_callbackEndElement;
callbacks.characters = call_callbackCharacters;
callbacks.processingInstruction = call_callbackProcessingInstruction;
callbacks.getEntity = call_callbackGetEntity;
callbacks.initialized = XML_SAX2_MAGIC;
int nRead = 0;
do
{
nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE );
if( nRead <= 0 )
{
if( rEntity.mpParser != nullptr )
{
if( xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), 0, 1 ) != XML_ERR_OK )
rEntity.throwException( mxDocumentLocator, true );
if (rEntity.hasException())
rEntity.throwException(mxDocumentLocator, true);
}
break;
}
bool bContinue = true;
if( rEntity.mpParser == nullptr )
{
// create parser with proper encoding (needs the first chunk of data)
rEntity.mpParser = xmlCreatePushParserCtxt( &callbacks, this,
reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, nullptr );
if( !rEntity.mpParser )
throw SAXException(u"Couldn't create parser"_ustr, Reference< XInterface >(), Any() );
// Tell libxml2 parser to decode entities in attribute values.
// Also allow XML attribute values which are larger than 10MB, because this used to work
// with expat.
// coverity[unsafe_xml_parse_config] - entity support is required
xmlCtxtUseOptions(rEntity.mpParser, XML_PARSE_NOENT | XML_PARSE_HUGE);
}
else
{
bContinue = xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, 0 )
== XML_ERR_OK;
}
// callbacks used inside XML_Parse may have caught an exception
if (!bContinue)
{
rEntity.throwException( mxDocumentLocator, true );
}
if (rEntity.hasException())
{
rEntity.throwException( mxDocumentLocator, true );
}
} while( nRead > 0 );
rEntity.getEvent( CallbackType::DONE );
if( rEntity.mbEnableThreads )
produce( true );
}
// The C-Callbacks
void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes)
{
if (!pendingCharacters.empty())
sendPendingCharacters();
Entity& rEntity = getEntity();
if( rEntity.maNamespaceCount.empty() )
{
rEntity.maNamespaceCount.push(0);
DefineNamespace( "xml"_ostr, u"http://www.w3.org/XML/1998/namespace"_ustr);
}
else
{
rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
}
// create attribute map and process namespace instructions
Event& rEvent = rEntity.getEvent( CallbackType::START_ELEMENT );
bool bIsAttributesEmpty = false;
if ( rEntity.mbEnableThreads )
bIsAttributesEmpty = rEntity.getEventList().mbIsAttributesEmpty;
if (rEvent.mxAttributes.is())
{
if( !bIsAttributesEmpty )
rEvent.mxAttributes->clear();
}
else
rEvent.mxAttributes.set(
new FastAttributeList( rEntity.mxTokenHandler.get() ) );
if( rEntity.mxNamespaceHandler.is() )
{
if (rEvent.mxDeclAttributes.is())
{
if( !bIsAttributesEmpty )
rEvent.mxDeclAttributes->clear();
}
else
rEvent.mxDeclAttributes.set(
new FastAttributeList( rEntity.mxTokenHandler.get() ) );
}
OUString sNamespace;
sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
if (!rEntity.maNamespaceStack.empty())
{
sNamespace = rEntity.maNamespaceStack.top().msName;
nNamespaceToken = rEntity.maNamespaceStack.top().mnToken;
}
try
{
/* #158414# Each element may define new namespaces, also for attributes.
First, process all namespaces, second, process the attributes after namespaces
have been initialized. */
// #158414# first: get namespaces
for (int i = 0; i < numNamespaces * 2; i += 2)
{
// namespaces[] is (prefix/URI)
if( namespaces[ i ] != nullptr )
{
OString aPrefix( XML_CAST( namespaces[ i ] ));
OUString namespaceURL( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
NormalizeURI( namespaceURL );
DefineNamespace(aPrefix, namespaceURL);
if( rEntity.mxNamespaceHandler.is() )
rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
}
else
{
// default namespace
sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
NormalizeURI( sNamespace );
nNamespaceToken = GetNamespaceToken( sNamespace );
if( rEntity.mxNamespaceHandler.is() )
rEvent.mxDeclAttributes->addUnknown( ""_ostr, OString( XML_CAST( namespaces[ i + 1 ] ) ) );
}
}
if ( rEntity.mxTokenHandler.is() )
{
// #158414# second: fill attribute list with other attributes
rEvent.mxAttributes->reserve( numAttributes );
for (int i = 0; i < numAttributes * 5; i += 5)
{
// attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd )
if( attributes[ i + 1 ] != nullptr )
{
sal_Int32 nAttributeToken = GetTokenWithPrefix(attributes[ i + 1 ], attributes[ i ]);
if( nAttributeToken != FastToken::DONTKNOW )
rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) );
else
addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
}
else
{
sal_Int32 nAttributeToken = GetToken(attributes[ i ]);
if( nAttributeToken != FastToken::DONTKNOW )
rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) );
else
{
SAL_WARN("xmloff", "unknown attribute " << XML_CAST( attributes[ i ] ) << "=" <<
OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
}
}
}
if( prefix != nullptr )
rEvent.mnElementToken = GetTokenWithPrefix(prefix, localName);
else if( !sNamespace.isEmpty() )
rEvent.mnElementToken = GetTokenWithContextNamespace(nNamespaceToken, localName);
else
rEvent.mnElementToken = GetToken(localName);
}
else
{
for (int i = 0; i < numAttributes * 5; i += 5)
{
if( attributes[ i + 1 ] != nullptr )
addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
else
rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
}
rEvent.mnElementToken = FastToken::DONTKNOW;
}
if( rEvent.mnElementToken == FastToken::DONTKNOW )
{
OUString aElementPrefix;
if( prefix != nullptr )
{
aElementPrefix = OUString( XML_CAST( prefix ), strlen( XML_CAST( prefix )), RTL_TEXTENCODING_UTF8 );
if ( URI != nullptr )
sNamespace = OUString( XML_CAST( URI ), strlen( XML_CAST( URI )), RTL_TEXTENCODING_UTF8 );
else if ( m_bIgnoreMissingNSDecl )
sNamespace.clear();
else
throw SAXException("No namespace defined for " + aElementPrefix, {}, {});
nNamespaceToken = GetNamespaceToken( sNamespace );
}
OUString aElementLocalName( XML_CAST( localName ), strlen( XML_CAST( localName )), RTL_TEXTENCODING_UTF8 );
rEvent.msNamespace = sNamespace;
if( aElementPrefix.isEmpty() )
rEvent.msElementName = std::move(aElementLocalName);
else
rEvent.msElementName = aElementPrefix + ":" + aElementLocalName;
}
else // token is always preferred.
rEvent.msElementName.clear();
rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
if (rEntity.mbEnableThreads)
produce();
else
{
SAL_INFO("sax.fastparser", " startElement line " << mxDocumentLocator->getLineNumber() << " column " << mxDocumentLocator->getColumnNumber() << " " << ( prefix ? XML_CAST(prefix) : "(null)" ) << ":" << localName);
rEntity.startElement( &rEvent );
}
}
catch (...)
{
rEntity.saveException( ::cppu::getCaughtException() );
}
}
void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes)
{
OUString aNamespaceURI;
if ( !m_bIgnoreMissingNSDecl || attributes[i + 2] != nullptr )
aNamespaceURI = OUString( XML_CAST( attributes[ i + 2 ] ), strlen( XML_CAST( attributes[ i + 2 ] )), RTL_TEXTENCODING_UTF8 );
const OString aPrefix( XML_CAST( attributes[ i + 1 ] ));
const OString aLocalName( XML_CAST( attributes[ i ] ));
OString aQualifiedName = (aPrefix.isEmpty())? aLocalName : aPrefix + ":" + aLocalName;
xAttributes->addUnknown( aNamespaceURI, aQualifiedName,
OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
SAL_INFO("xmloff", "unknown element " << aQualifiedName << " " << aNamespaceURI);
}
void FastSaxParserImpl::callbackEndElement()
{
if (!pendingCharacters.empty())
sendPendingCharacters();
Entity& rEntity = getEntity();
SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount");
if( !rEntity.maNamespaceCount.empty() )
rEntity.maNamespaceCount.pop();
SAL_WARN_IF(rEntity.maNamespaceStack.empty(), "sax", "Empty NamespaceStack");
if( !rEntity.maNamespaceStack.empty() )
rEntity.maNamespaceStack.pop();
rEntity.getEvent( CallbackType::END_ELEMENT );
if (rEntity.mbEnableThreads)
produce();
else
rEntity.endElement();
}
void FastSaxParserImpl::callbackCharacters( const xmlChar* s, int nLen )
{
// SAX interface allows that the characters callback splits content of one XML node
// (e.g. because there's an entity that needs decoding), however for consumers it's
// simpler FastSaxParser's character callback provides the whole string at once,
// so merge data from possible multiple calls and send them at once (before the element
// ends or another one starts).
//
// We use a std::vector<char> to avoid calling into the OUString constructor more than once when
// we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly
// often in writer documents.
int nOriginalLen = pendingCharacters.size();
pendingCharacters.resize(nOriginalLen + nLen);
memcpy(pendingCharacters.data() + nOriginalLen, s, nLen);
}
void FastSaxParserImpl::sendPendingCharacters()
{
Entity& rEntity = getEntity();
OUString sChars( pendingCharacters.data(), pendingCharacters.size(), RTL_TEXTENCODING_UTF8 );
if (rEntity.mbEnableThreads)
{
Event& rEvent = rEntity.getEvent( CallbackType::CHARACTERS );
rEvent.msChars = std::move(sChars);
produce();
}
else
rEntity.characters( sChars );
pendingCharacters.resize(0);
}
void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar *target, const xmlChar *data )
{
if (!pendingCharacters.empty())
sendPendingCharacters();
Entity& rEntity = getEntity();
Event& rEvent = rEntity.getEvent( CallbackType::PROCESSING_INSTRUCTION );
// This event is very rare, so no need to waste extra space for this
// Using namespace and element strings to be target and data in that order.
rEvent.msNamespace = OUString( XML_CAST( target ), strlen( XML_CAST( target ) ), RTL_TEXTENCODING_UTF8 );
if ( data != nullptr )
rEvent.msElementName = OUString( XML_CAST( data ), strlen( XML_CAST( data ) ), RTL_TEXTENCODING_UTF8 );
else
rEvent.msElementName.clear();
if (rEntity.mbEnableThreads)
produce();
else
rEntity.processingInstruction( rEvent.msNamespace, rEvent.msElementName );
}
xmlEntityPtr FastSaxParserImpl::callbackGetEntity( const xmlChar *name )
{
if( !name )
return xmlGetPredefinedEntity(name);
const char* dname = XML_CAST(name);
int lname = strlen(dname);
if( lname == 0 )
return xmlGetPredefinedEntity(name);
if (m_Replacements.size() > 0)
{
auto it = std::lower_bound(m_Replacements.begin(), m_Replacements.end(), dname);
if (it != m_Replacements.end() && it->name.compareToAscii(dname) == 0)
{
xmlEntityPtr entpt = xmlNewEntity(
nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
BAD_CAST(OUStringToOString(it->replacement, RTL_TEXTENCODING_UTF8).getStr()));
m_TemporalEntities.push_back(entpt);
return entpt;
}
}
if( lname < 2 )
return xmlGetPredefinedEntity(name);
if ( dname[0] == '#' )
{
sal_uInt32 cval = 0;
if( dname[1] == 'x' || dname[1] == 'X' )
{
if( lname < 3 )
return xmlGetPredefinedEntity(name);
cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 16 ) );
if( cval == 0 )
return xmlGetPredefinedEntity(name);
OUString vname( &cval, 1 );
xmlEntityPtr entpt
= xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr()));
m_TemporalEntities.push_back(entpt);
return entpt;
}
else
{
cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 10 ) );
if( cval == 0 )
return xmlGetPredefinedEntity(name);
OUString vname(&cval, 1);
xmlEntityPtr entpt
= xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr()));
m_TemporalEntities.push_back(entpt);
return entpt;
}
}
return xmlGetPredefinedEntity(name);
}
FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl) {}
FastSaxParser::~FastSaxParser()
{
}
void SAL_CALL
FastSaxParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments)
{
if (!rArguments.hasElements())
return;
OUString str;
if ( !(rArguments[0] >>= str) )
throw IllegalArgumentException();
if ( str == "IgnoreMissingNSDecl" )
mpImpl->m_bIgnoreMissingNSDecl = true;
else if ( str == "DoSmeplease" )
; //just ignore as this is already immune to billion laughs
else if ( str == "DisableThreadedParser" )
mpImpl->m_bDisableThreadedParser = true;
else
throw IllegalArgumentException();
}
void FastSaxParser::parseStream( const xml::sax::InputSource& aInputSource )
{
mpImpl->parseStream(aInputSource);
}
void FastSaxParser::setFastDocumentHandler( const uno::Reference<xml::sax::XFastDocumentHandler>& Handler )
{
mpImpl->setFastDocumentHandler(Handler);
}
void FastSaxParser::setTokenHandler( const uno::Reference<xml::sax::XFastTokenHandler>& Handler )
{
mpImpl->setTokenHandler(Handler);
}
void FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
{
mpImpl->registerNamespace(NamespaceURL, NamespaceToken);
}
OUString FastSaxParser::getNamespaceURL( const OUString& rPrefix )
{
return mpImpl->getNamespaceURL(rPrefix);
}
void FastSaxParser::setErrorHandler( const uno::Reference< xml::sax::XErrorHandler >& Handler )
{
mpImpl->setErrorHandler(Handler);
}
void FastSaxParser::setEntityResolver( const uno::Reference< xml::sax::XEntityResolver >& )
{
// not implemented
}
void FastSaxParser::setLocale( const lang::Locale& )
{
// not implemented
}
void FastSaxParser::setNamespaceHandler( const uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler)
{
mpImpl->setNamespaceHandler(Handler);
}
OUString FastSaxParser::getImplementationName()
{
return u"com.sun.star.comp.extensions.xml.sax.FastParser"_ustr;
}
void FastSaxParser::setCustomEntityNames(
const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
{
mpImpl->setCustomEntityNames(replacements);
}
sal_Bool FastSaxParser::supportsService( const OUString& ServiceName )
{
return cppu::supportsService(this, ServiceName);
}
uno::Sequence<OUString> FastSaxParser::getSupportedServiceNames()
{
return { u"com.sun.star.xml.sax.FastParser"_ustr };
}
} // namespace sax_fastparser
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation(
css::uno::XComponentContext *,
css::uno::Sequence<css::uno::Any> const &)
{
return cppu::acquire(new FastSaxParser);
}
// ----------------------------------------------------------
// copy of the code in xmloff/source/core/namespace.cxx, which adds namespace aliases
// for various dodgy namespace decls in the wild.
static bool NormalizeW3URI( OUString& rName );
static bool NormalizeOasisURN( OUString& rName );
static void NormalizeURI( OUString& rName )
{
// try OASIS + W3 URI normalization
bool bSuccess = NormalizeOasisURN( rName );
if( ! bSuccess )
NormalizeW3URI( rName );
}
constexpr OUStringLiteral XML_URI_W3_PREFIX(u"http://www.w3.org/");
constexpr OUStringLiteral XML_URI_XFORMS_SUFFIX(u"/xforms");
constexpr OUStringLiteral XML_N_XFORMS_1_0(u"http://www.w3.org/2002/xforms");
constexpr OUStringLiteral XML_N_SVG(u"http://www.w3.org/2000/svg");
constexpr OUStringLiteral XML_N_SVG_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0");
constexpr OUStringLiteral XML_N_FO(u"http://www.w3.org/1999/XSL/Format");
constexpr OUStringLiteral XML_N_FO_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0");
constexpr OUStringLiteral XML_N_SMIL(u"http://www.w3.org/2001/SMIL20/");
constexpr OUStringLiteral XML_N_SMIL_OLD(u"http://www.w3.org/2001/SMIL20");
constexpr OUStringLiteral XML_N_SMIL_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0");
constexpr OUStringLiteral XML_URN_OASIS_NAMES_TC(u"urn:oasis:names:tc");
constexpr OUStringLiteral XML_XMLNS(u"xmlns");
constexpr OUStringLiteral XML_OPENDOCUMENT(u"opendocument");
constexpr OUStringLiteral XML_1_0(u"1.0");
static bool NormalizeW3URI( OUString& rName )
{
// check if URI matches:
// http://www.w3.org/[0-9]*/[:letter:]*
// (year)/(WG name)
// For the following WG/standards names:
// - xforms
bool bSuccess = false;
const OUString sURIPrefix = XML_URI_W3_PREFIX;
if( rName.startsWith( sURIPrefix ) )
{
const OUString sURISuffix = XML_URI_XFORMS_SUFFIX ;
sal_Int32 nCompareFrom = rName.getLength() - sURISuffix.getLength();
if( rName.subView( nCompareFrom ) == sURISuffix )
{
// found W3 prefix, and xforms suffix
rName = XML_N_XFORMS_1_0;
bSuccess = true;
}
}
return bSuccess;
}
static bool NormalizeOasisURN( OUString& rName )
{
// #i38644#
// we exported the wrong namespace for smil, so we correct this here on load
// for older documents
if( rName == XML_N_SVG )
{
rName = XML_N_SVG_COMPAT;
return true;
}
else if( rName == XML_N_FO )
{
rName = XML_N_FO_COMPAT;
return true;
}
else if( rName == XML_N_SMIL || rName == XML_N_SMIL_OLD )
{
rName = XML_N_SMIL_COMPAT;
return true;
}
// Check if URN matches
// :urn:oasis:names:tc:[^:]*:xmlns:[^:]*:1.[^:]*
// |---| |---| |-----|
// TC-Id Sub-Id Version
sal_Int32 nNameLen = rName.getLength();
// :urn:oasis:names:tc.*
const OUString aOasisURN = XML_URN_OASIS_NAMES_TC;
if( !rName.startsWith( aOasisURN ) )
return false;
// :urn:oasis:names:tc:.*
sal_Int32 nPos = aOasisURN.getLength();
if( nPos >= nNameLen || rName[nPos] != ':' )
return false;
// :urn:oasis:names:tc:[^:]:.*
sal_Int32 nTCIdStart = nPos+1;
sal_Int32 nTCIdEnd = rName.indexOf( ':', nTCIdStart );
if( -1 == nTCIdEnd )
return false;
// :urn:oasis:names:tc:[^:]:xmlns.*
nPos = nTCIdEnd + 1;
std::u16string_view sTmp( rName.subView( nPos ) );
const OUString aXMLNS = XML_XMLNS;
if( !o3tl::starts_with(sTmp, aXMLNS ) )
return false;
// :urn:oasis:names:tc:[^:]:xmlns:.*
nPos += aXMLNS.getLength();
if( nPos >= nNameLen || rName[nPos] != ':' )
return false;
// :urn:oasis:names:tc:[^:]:xmlns:[^:]*:.*
nPos = rName.indexOf( ':', nPos+1 );
if( -1 == nPos )
return false;
// :urn:oasis:names:tc:[^:]:xmlns:[^:]*:[^:][^:][^:][^:]*
sal_Int32 nVersionStart = nPos+1;
if( nVersionStart+2 >= nNameLen ||
-1 != rName.indexOf( ':', nVersionStart ) )
return false;
// :urn:oasis:names:tc:[^:]:xmlns:[^:]*:1\.[^:][^:]*
if( rName[nVersionStart] != '1' || rName[nVersionStart+1] != '.' )
return false;
// replace [tcid] with current TCID and version with current version.
rName = rName.subView( 0, nTCIdStart ) +
XML_OPENDOCUMENT +
rName.subView( nTCIdEnd, nVersionStart-nTCIdEnd ) +
XML_1_0;
return true;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
↑ V654 The condition 'nRead > 0' of loop is always true.
↑ V730 Not all members of a class are initialized inside the constructor. Consider inspecting: maSharedEvent.