pdfdocument.cxx


/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */
 
#include <vcl/filter/pdfdocument.hxx>
#include <pdf/pdfcompat.hxx>
 
#include <map>
#include <memory>
#include <vector>
 
#include <com/sun/star/uno/Sequence.hxx>
#include <com/sun/star/security/XCertificate.hpp>
 
#include <comphelper/scopeguard.hxx>
#include <comphelper/string.hxx>
#include <o3tl/string_view.hxx>
#include <rtl/character.hxx>
#include <rtl/strbuf.hxx>
#include <rtl/string.hxx>
#include <sal/log.hxx>
#include <sal/types.h>
#include <svl/cryptosign.hxx>
#include <tools/zcodec.hxx>
#include <vcl/pdfwriter.hxx>
#include <o3tl/safeint.hxx>
 
#include <pdf/objectcopier.hxx>
 
using namespace com::sun::star;
 
namespace vcl::filter
{
XRefEntry::XRefEntry() = default;
 
PDFDocument::PDFDocument() = default;
 
PDFDocument::~PDFDocument() = default;
 
bool PDFDocument::RemoveSignature(size_t nPosition)
{
    std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
    if (nPosition >= aSignatures.size())
    {
        SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
        return false;
    }
 
    if (aSignatures.size() != m_aEOFs.size() - 1)
    {
        SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
                               "and incremental updates");
        return false;
    }
 
    // The EOF offset is the end of the original file, without the signature at
    // nPosition.
    m_aEditBuffer.Seek(m_aEOFs[nPosition]);
    // Drop all bytes after the current position.
    m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
 
    return m_aEditBuffer.good();
}
 
sal_Int32 PDFDocument::createObject()
{
    sal_Int32 nObject = m_aXRef.size();
    m_aXRef[nObject] = XRefEntry();
    return nObject;
}
 
bool PDFDocument::updateObject(sal_Int32 nObject)
{
    if (o3tl::make_unsigned(nObject) >= m_aXRef.size())
    {
        SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
        return false;
    }
 
    XRefEntry aEntry;
    aEntry.SetOffset(m_aEditBuffer.Tell());
    aEntry.SetDirty(true);
    m_aXRef[nObject] = aEntry;
    return true;
}
 
bool PDFDocument::writeBufferBytes(const void* pBuffer, sal_uInt64 nBytes)
{
    std::size_t nWritten = m_aEditBuffer.WriteBytes(pBuffer, nBytes);
    return nWritten == nBytes;
}
 
void PDFDocument::SetSignatureLine(std::vector<sal_Int8>&& rSignatureLine)
{
    m_aSignatureLine = std::move(rSignatureLine);
}
 
void PDFDocument::SetSignaturePage(size_t nPage) { m_nSignaturePage = nPage; }
 
sal_uInt32 PDFDocument::GetNextSignature()
{
    sal_uInt32 nRet = 0;
    for (const auto& pSignature : GetSignatureWidgets())
    {
        auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"_ostr));
        if (!pT)
            continue;
 
        const OString& rValue = pT->GetValue();
        std::string_view rest;
        if (!rValue.startsWith("Signature", &rest))
            continue;
 
        nRet = std::max(nRet, o3tl::toUInt32(rest));
    }
 
    return nRet + 1;
}
 
sal_Int32 PDFDocument::WriteSignatureObject(svl::crypto::SigningContext& rSigningContext,
                                            const OUString& rDescription, bool bAdES,
                                            sal_uInt64& rLastByteRangeOffset,
                                            sal_Int64& rContentOffset)
{
    // Write signature object.
    sal_Int32 nSignatureId = m_aXRef.size();
    XRefEntry aSignatureEntry;
    aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
    aSignatureEntry.SetDirty(true);
    m_aXRef[nSignatureId] = aSignatureEntry;
    OStringBuffer aSigBuffer(OString::number(nSignatureId)
                             + " 0 obj\n"
                               "<</Contents <");
    rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
    // Reserve space for the PKCS#7 object.
    OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
    comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
    aSigBuffer.append(aContentFiller + ">\n/Type/Sig/SubFilter");
    if (bAdES)
        aSigBuffer.append("/ETSI.CAdES.detached");
    else
        aSigBuffer.append("/adbe.pkcs7.detached");
 
    // Time of signing.
    aSigBuffer.append(" /M (" + vcl::PDFWriter::GetDateTime(&rSigningContext)
                      + ")"
 
                        // Byte range: we can write offset1-length1 and offset2 right now, will
                        // write length2 later.
                        " /ByteRange [ 0 "
                      // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
                      + OString::number(rContentOffset - 1) + " "
                      + OString::number(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1) + " ");
    rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
    // We don't know how many bytes we need for the last ByteRange value, this
    // should be enough.
    OStringBuffer aByteRangeFiller;
    comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
    aSigBuffer.append(aByteRangeFiller
                      // Finish the Sig obj.
                      + " /Filter/Adobe.PPKMS");
 
    if (!rDescription.isEmpty())
    {
        aSigBuffer.append("/Reason<");
        vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
        aSigBuffer.append(">");
    }
 
    aSigBuffer.append(" >>\nendobj\n\n");
    m_aEditBuffer.WriteOString(aSigBuffer);
 
    return nSignatureId;
}
 
sal_Int32 PDFDocument::WriteAppearanceObject(tools::Rectangle& rSignatureRectangle)
{
    PDFDocument aPDFDocument;
    filter::PDFObjectElement* pPage = nullptr;
    std::vector<filter::PDFObjectElement*> aContentStreams;
 
    if (!m_aSignatureLine.empty())
    {
        // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
        // based on it.
        SvMemoryStream aPDFStream;
        aPDFStream.WriteBytes(m_aSignatureLine.data(), m_aSignatureLine.size());
        aPDFStream.Seek(0);
        if (!aPDFDocument.Read(aPDFStream))
        {
            SAL_WARN("vcl.filter",
                     "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
            return -1;
        }
 
        std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
        if (aPages.empty())
        {
            SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
            return -1;
        }
 
        pPage = aPages[0];
        if (!pPage)
        {
            SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
            return -1;
        }
 
        // Calculate the bounding box.
        PDFElement* pMediaBox = pPage->Lookup("MediaBox"_ostr);
        auto pMediaBoxArray = dynamic_cast<PDFArrayElement*>(pMediaBox);
        if (!pMediaBoxArray || pMediaBoxArray->GetElements().size() < 4)
        {
            SAL_WARN("vcl.filter",
                     "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
            return -1;
        }
        const std::vector<PDFElement*>& rMediaBoxElements = pMediaBoxArray->GetElements();
        auto pWidth = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[2]);
        if (!pWidth)
        {
            SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
            return -1;
        }
        rSignatureRectangle.setWidth(pWidth->GetValue());
        auto pHeight = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[3]);
        if (!pHeight)
        {
            SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
            return -1;
        }
        rSignatureRectangle.setHeight(pHeight->GetValue());
 
        if (PDFObjectElement* pContentStream = pPage->LookupObject("Contents"_ostr))
        {
            aContentStreams.push_back(pContentStream);
        }
 
        if (aContentStreams.empty())
        {
            SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
            return -1;
        }
    }
    m_aSignatureLine.clear();
 
    // Write appearance object: allocate an ID.
    sal_Int32 nAppearanceId = m_aXRef.size();
    m_aXRef[nAppearanceId] = XRefEntry();
 
    // Write the object content.
    SvMemoryStream aEditBuffer;
    aEditBuffer.WriteNumberAsString(nAppearanceId);
    aEditBuffer.WriteOString(" 0 obj\n");
    aEditBuffer.WriteOString("<</Type/XObject\n/Subtype/Form\n");
 
    PDFObjectCopier aCopier(*this);
    if (!aContentStreams.empty())
    {
        assert(pPage && "aContentStreams is only filled if there was a pPage");
        OStringBuffer aBuffer;
        aCopier.copyPageResources(pPage, aBuffer);
        aEditBuffer.WriteOString(aBuffer);
    }
 
    aEditBuffer.WriteOString("/BBox[0 0 ");
    aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenWidth());
    aEditBuffer.WriteOString(" ");
    aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenHeight());
    aEditBuffer.WriteOString("]\n/Length ");
 
    // Add the object to the doc-level edit buffer and update the offset.
    SvMemoryStream aStream;
    bool bCompressed = false;
    sal_Int32 nLength = 0;
    if (!aContentStreams.empty())
    {
        nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed);
    }
    aEditBuffer.WriteNumberAsString(nLength);
    if (bCompressed)
    {
        aEditBuffer.WriteOString(" /Filter/FlateDecode");
    }
 
    aEditBuffer.WriteOString("\n>>\n");
 
    aEditBuffer.WriteOString("stream\n");
 
    // Copy the original page streams to the form XObject stream.
    aStream.Seek(0);
    aEditBuffer.WriteStream(aStream);
 
    aEditBuffer.WriteOString("\nendstream\nendobj\n\n");
 
    aEditBuffer.Seek(0);
    XRefEntry aAppearanceEntry;
    aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
    aAppearanceEntry.SetDirty(true);
    m_aXRef[nAppearanceId] = aAppearanceEntry;
    m_aEditBuffer.WriteStream(aEditBuffer);
 
    return nAppearanceId;
}
 
sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
                                        sal_Int32 nAppearanceId,
                                        const tools::Rectangle& rSignatureRectangle)
{
    // Decide what identifier to use for the new signature.
    sal_uInt32 nNextSignature = GetNextSignature();
 
    // Write the Annot object, references nSignatureId and nAppearanceId.
    sal_Int32 nAnnotId = m_aXRef.size();
    XRefEntry aAnnotEntry;
    aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
    aAnnotEntry.SetDirty(true);
    m_aXRef[nAnnotId] = aAnnotEntry;
    m_aEditBuffer.WriteNumberAsString(nAnnotId);
    m_aEditBuffer.WriteOString(" 0 obj\n");
    m_aEditBuffer.WriteOString("<</Type/Annot/Subtype/Widget/F 132\n");
    m_aEditBuffer.WriteOString("/Rect[0 0 ");
    m_aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenWidth());
    m_aEditBuffer.WriteOString(" ");
    m_aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenHeight());
    m_aEditBuffer.WriteOString("]\n");
    m_aEditBuffer.WriteOString("/FT/Sig\n");
    m_aEditBuffer.WriteOString("/P ");
    m_aEditBuffer.WriteNumberAsString(rFirstPage.GetObjectValue());
    m_aEditBuffer.WriteOString(" 0 R\n");
    m_aEditBuffer.WriteOString("/T(Signature");
    m_aEditBuffer.WriteNumberAsString(nNextSignature);
    m_aEditBuffer.WriteOString(")\n");
    m_aEditBuffer.WriteOString("/V ");
    m_aEditBuffer.WriteNumberAsString(nSignatureId);
    m_aEditBuffer.WriteOString(" 0 R\n");
    m_aEditBuffer.WriteOString("/DV ");
    m_aEditBuffer.WriteNumberAsString(nSignatureId);
    m_aEditBuffer.WriteOString(" 0 R\n");
    m_aEditBuffer.WriteOString("/AP<<\n/N ");
    m_aEditBuffer.WriteNumberAsString(nAppearanceId);
    m_aEditBuffer.WriteOString(" 0 R\n>>\n");
    m_aEditBuffer.WriteOString(">>\nendobj\n\n");
 
    return nAnnotId;
}
 
bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
{
    PDFElement* pAnnots = rFirstPage.Lookup("Annots"_ostr);
    auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
    if (pAnnotsReference)
    {
        // Write the updated Annots key of the Page object.
        PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
        if (!pAnnotsObject)
        {
            SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
            return false;
        }
 
        sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
        m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
        m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
        m_aXRef[nAnnotsId].SetDirty(true);
        m_aEditBuffer.WriteNumberAsString(nAnnotsId);
        m_aEditBuffer.WriteOString(" 0 obj\n[");
 
        // Write existing references.
        PDFArrayElement* pArray = pAnnotsObject->GetArray();
        if (!pArray)
        {
            SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
            return false;
        }
 
        for (size_t i = 0; i < pArray->GetElements().size(); ++i)
        {
            auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
            if (!pReference)
                continue;
 
            if (i)
                m_aEditBuffer.WriteOString(" ");
            m_aEditBuffer.WriteNumberAsString(pReference->GetObjectValue());
            m_aEditBuffer.WriteOString(" 0 R");
        }
        // Write our reference.
        m_aEditBuffer.WriteOString(" ");
        m_aEditBuffer.WriteNumberAsString(nAnnotId);
        m_aEditBuffer.WriteOString(" 0 R");
 
        m_aEditBuffer.WriteOString("]\nendobj\n\n");
    }
    else
    {
        // Write the updated first page object, references nAnnotId.
        sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
        if (nFirstPageId >= m_aXRef.size())
        {
            SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
            return false;
        }
        m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
        m_aXRef[nFirstPageId].SetDirty(true);
        m_aEditBuffer.WriteNumberAsString(nFirstPageId);
        m_aEditBuffer.WriteOString(" 0 obj\n");
        m_aEditBuffer.WriteOString("<<");
        auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
        if (!pAnnotsArray)
        {
            // No Annots key, just write the key with a single reference.
            m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
                                         + rFirstPage.GetDictionaryOffset(),
                                     rFirstPage.GetDictionaryLength());
            m_aEditBuffer.WriteOString("/Annots[");
            m_aEditBuffer.WriteNumberAsString(nAnnotId);
            m_aEditBuffer.WriteOString(" 0 R]");
        }
        else
        {
            // Annots key is already there, insert our reference at the end.
            PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
 
            // Offset right before the end of the Annots array.
            sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots"_ostr)
                                          + pDictionary->GetKeyValueLength("Annots"_ostr) - 1;
            // Length of beginning of the dictionary -> Annots end.
            sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
            m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
                                         + rFirstPage.GetDictionaryOffset(),
                                     nAnnotsBeforeEndLength);
            m_aEditBuffer.WriteOString(" ");
            m_aEditBuffer.WriteNumberAsString(nAnnotId);
            m_aEditBuffer.WriteOString(" 0 R");
            // Length of Annots end -> end of the dictionary.
            sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
                                               + rFirstPage.GetDictionaryLength()
                                               - nAnnotsEndOffset;
            m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
                                         + nAnnotsEndOffset,
                                     nAnnotsAfterEndLength);
        }
        m_aEditBuffer.WriteOString(">>");
        m_aEditBuffer.WriteOString("\nendobj\n\n");
    }
 
    return true;
}
 
bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
{
    if (m_pXRefStream)
        pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"_ostr));
    else
    {
        if (!m_pTrailer)
        {
            SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
            return false;
        }
        pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"_ostr));
    }
    if (!pRoot)
    {
        SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
        return false;
    }
    PDFObjectElement* pCatalog = pRoot->LookupObject();
    if (!pCatalog)
    {
        SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
        return false;
    }
    sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
    if (nCatalogId >= m_aXRef.size())
    {
        SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
        return false;
    }
    PDFElement* pAcroForm = pCatalog->Lookup("AcroForm"_ostr);
    auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
    if (pAcroFormReference)
    {
        // Write the updated AcroForm key of the Catalog object.
        PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
        if (!pAcroFormObject)
        {
            SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
            return false;
        }
 
        sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
        m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
        m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
        m_aXRef[nAcroFormId].SetDirty(true);
        m_aEditBuffer.WriteNumberAsString(nAcroFormId);
        m_aEditBuffer.WriteOString(" 0 obj\n");
 
        // If this is nullptr, then the AcroForm object is not in an object stream.
        SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
 
        if (!pAcroFormObject->Lookup("Fields"_ostr))
        {
            SAL_WARN("vcl.filter",
                     "PDFDocument::Sign: AcroForm object without required Fields key");
            return false;
        }
 
        PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
        if (!pAcroFormDictionary)
        {
            SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
            return false;
        }
 
        // Offset right before the end of the Fields array.
        sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields"_ostr)
                                      + pAcroFormDictionary->GetKeyValueLength("Fields"_ostr)
                                      - strlen("]");
 
        // Length of beginning of the object dictionary -> Fields end.
        sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
        if (pStreamBuffer)
            m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
        else
        {
            nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
            m_aEditBuffer.WriteOString("<<");
            m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
                                         + pAcroFormObject->GetDictionaryOffset(),
                                     nFieldsBeforeEndLength);
        }
 
        // Append our reference at the end of the Fields array.
        m_aEditBuffer.WriteOString(" ");
        m_aEditBuffer.WriteNumberAsString(nAnnotId);
        m_aEditBuffer.WriteOString(" 0 R");
 
        // Length of Fields end -> end of the object dictionary.
        if (pStreamBuffer)
        {
            sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
            m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
                                         + nFieldsEndOffset,
                                     nFieldsAfterEndLength);
        }
        else
        {
            sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
                                               + pAcroFormObject->GetDictionaryLength()
                                               - nFieldsEndOffset;
            m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
                                         + nFieldsEndOffset,
                                     nFieldsAfterEndLength);
            m_aEditBuffer.WriteOString(">>");
        }
 
        m_aEditBuffer.WriteOString("\nendobj\n\n");
    }
    else
    {
        // Write the updated Catalog object, references nAnnotId.
        auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
        m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
        m_aXRef[nCatalogId].SetDirty(true);
        m_aEditBuffer.WriteNumberAsString(nCatalogId);
        m_aEditBuffer.WriteOString(" 0 obj\n");
        m_aEditBuffer.WriteOString("<<");
        if (!pAcroFormDictionary)
        {
            // No AcroForm key, assume no signatures.
            m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
                                         + pCatalog->GetDictionaryOffset(),
                                     pCatalog->GetDictionaryLength());
            m_aEditBuffer.WriteOString("/AcroForm<</Fields[\n");
            m_aEditBuffer.WriteNumberAsString(nAnnotId);
            m_aEditBuffer.WriteOString(" 0 R\n]/SigFlags 3>>\n");
        }
        else
        {
            // AcroForm key is already there, insert our reference at the Fields end.
            auto it = pAcroFormDictionary->GetItems().find("Fields"_ostr);
            if (it == pAcroFormDictionary->GetItems().end())
            {
                SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
                return false;
            }
 
            auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
            if (!pFields)
            {
                SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
                return false;
            }
 
            // Offset right before the end of the Fields array.
            sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields"_ostr)
                                          + pAcroFormDictionary->GetKeyValueLength("Fields"_ostr)
                                          - 1;
            // Length of beginning of the Catalog dictionary -> Fields end.
            sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
            m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
                                         + pCatalog->GetDictionaryOffset(),
                                     nFieldsBeforeEndLength);
            m_aEditBuffer.WriteOString(" ");
            m_aEditBuffer.WriteNumberAsString(nAnnotId);
            m_aEditBuffer.WriteOString(" 0 R");
            // Length of Fields end -> end of the Catalog dictionary.
            sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
                                               + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
            m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
                                         + nFieldsEndOffset,
                                     nFieldsAfterEndLength);
        }
        m_aEditBuffer.WriteOString(">>\nendobj\n\n");
    }
 
    return true;
}
 
void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
{
    if (m_pXRefStream)
    {
        // Write the xref stream.
        // This is a bit meta: the xref stream stores its own offset.
        sal_Int32 nXRefStreamId = m_aXRef.size();
        XRefEntry aXRefStreamEntry;
        aXRefStreamEntry.SetOffset(nXRefOffset);
        aXRefStreamEntry.SetDirty(true);
        m_aXRef[nXRefStreamId] = aXRefStreamEntry;
 
        // Write stream data.
        SvMemoryStream aXRefStream;
        const size_t nOffsetLen = 3;
        // 3 additional bytes: predictor, the first and the third field.
        const size_t nLineLength = nOffsetLen + 3;
        // This is the line as it appears before tweaking according to the predictor.
        std::vector<unsigned char> aOrigLine(nLineLength);
        // This is the previous line.
        std::vector<unsigned char> aPrevLine(nLineLength);
        // This is the line as written to the stream.
        std::vector<unsigned char> aFilteredLine(nLineLength);
        for (const auto& rXRef : m_aXRef)
        {
            const XRefEntry& rEntry = rXRef.second;
 
            if (!rEntry.GetDirty())
                continue;
 
            // Predictor.
            size_t nPos = 0;
            // PNG prediction: up (on all rows).
            aOrigLine[nPos++] = 2;
 
            // First field.
            unsigned char nType = 0;
            switch (rEntry.GetType())
            {
                case XRefEntryType::FREE:
                    nType = 0;
                    break;
                case XRefEntryType::NOT_COMPRESSED:
                    nType = 1;
                    break;
                case XRefEntryType::COMPRESSED:
                    nType = 2;
                    break;
            }
            aOrigLine[nPos++] = nType;
 
            // Second field.
            for (size_t i = 0; i < nOffsetLen; ++i)
            {
                size_t nByte = nOffsetLen - i - 1;
                // Fields requiring more than one byte are stored with the
                // high-order byte first.
                unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
                aOrigLine[nPos++] = nCh;
            }
 
            // Third field.
            aOrigLine[nPos++] = 0;
 
            // Now apply the predictor.
            aFilteredLine[0] = aOrigLine[0];
            for (size_t i = 1; i < nLineLength; ++i)
            {
                // Count the delta vs the previous line.
                aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
                // Remember the new reference.
                aPrevLine[i] = aOrigLine[i];
            }
 
            aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
        }
 
        m_aEditBuffer.WriteNumberAsString(nXRefStreamId);
        m_aEditBuffer.WriteOString(
            " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
 
        // ID.
        auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"_ostr));
        if (pID)
        {
            const std::vector<PDFElement*>& rElements = pID->GetElements();
            m_aEditBuffer.WriteOString("/ID [ <");
            for (size_t i = 0; i < rElements.size(); ++i)
            {
                auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
                if (!pIDString)
                    continue;
 
                m_aEditBuffer.WriteOString(pIDString->GetValue());
                if ((i + 1) < rElements.size())
                    m_aEditBuffer.WriteOString("> <");
            }
            m_aEditBuffer.WriteOString("> ] ");
        }
 
        // Index.
        m_aEditBuffer.WriteOString("/Index [ ");
        for (const auto& rXRef : m_aXRef)
        {
            if (!rXRef.second.GetDirty())
                continue;
 
            m_aEditBuffer.WriteNumberAsString(rXRef.first);
            m_aEditBuffer.WriteOString(" 1 ");
        }
        m_aEditBuffer.WriteOString("] ");
 
        // Info.
        auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"_ostr));
        if (pInfo)
        {
            m_aEditBuffer.WriteOString("/Info ");
            m_aEditBuffer.WriteNumberAsString(pInfo->GetObjectValue());
            m_aEditBuffer.WriteOString(" ");
            m_aEditBuffer.WriteNumberAsString(pInfo->GetGenerationValue());
            m_aEditBuffer.WriteOString(" R ");
        }
 
        // Length.
        m_aEditBuffer.WriteOString("/Length ");
        {
            ZCodec aZCodec;
            aZCodec.BeginCompression();
            aXRefStream.Seek(0);
            SvMemoryStream aStream;
            aZCodec.Compress(aXRefStream, aStream);
            aZCodec.EndCompression();
            aXRefStream.Seek(0);
            aXRefStream.SetStreamSize(0);
            aStream.Seek(0);
            aXRefStream.WriteStream(aStream);
        }
        m_aEditBuffer.WriteNumberAsString(aXRefStream.GetSize());
 
        if (!m_aStartXRefs.empty())
        {
            // Write location of the previous cross-reference section.
            m_aEditBuffer.WriteOString("/Prev ");
            m_aEditBuffer.WriteNumberAsString(m_aStartXRefs.back());
        }
 
        // Root.
        m_aEditBuffer.WriteOString("/Root ");
        m_aEditBuffer.WriteNumberAsString(pRoot->GetObjectValue());
        m_aEditBuffer.WriteOString(" ");
        m_aEditBuffer.WriteNumberAsString(pRoot->GetGenerationValue());
        m_aEditBuffer.WriteOString(" R ");
 
        // Size.
        m_aEditBuffer.WriteOString("/Size ");
        m_aEditBuffer.WriteNumberAsString(m_aXRef.size());
 
        m_aEditBuffer.WriteOString("/Type/XRef/W[1 3 1]>>\nstream\n");
        aXRefStream.Seek(0);
        m_aEditBuffer.WriteStream(aXRefStream);
        m_aEditBuffer.WriteOString("\nendstream\nendobj\n\n");
    }
    else
    {
        // Write the xref table.
        m_aEditBuffer.WriteOString("xref\n");
        for (const auto& rXRef : m_aXRef)
        {
            size_t nObject = rXRef.first;
            size_t nOffset = rXRef.second.GetOffset();
            if (!rXRef.second.GetDirty())
                continue;
 
            m_aEditBuffer.WriteNumberAsString(nObject);
            m_aEditBuffer.WriteOString(" 1\n");
            OStringBuffer aBuffer = OString::number(static_cast<sal_Int32>(nOffset));
            while (aBuffer.getLength() < 10)
                aBuffer.insert(0, "0");
            if (nObject == 0)
                aBuffer.append(" 65535 f \n");
            else
                aBuffer.append(" 00000 n \n");
            m_aEditBuffer.WriteOString(aBuffer);
        }
 
        // Write the trailer.
        m_aEditBuffer.WriteOString("trailer\n<</Size ");
        m_aEditBuffer.WriteNumberAsString(m_aXRef.size());
        m_aEditBuffer.WriteOString("/Root ");
        m_aEditBuffer.WriteNumberAsString(pRoot->GetObjectValue());
        m_aEditBuffer.WriteOString(" ");
        m_aEditBuffer.WriteNumberAsString(pRoot->GetGenerationValue());
        m_aEditBuffer.WriteOString(" R\n");
        auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"_ostr));
        if (pInfo)
        {
            m_aEditBuffer.WriteOString("/Info ");
            m_aEditBuffer.WriteNumberAsString(pInfo->GetObjectValue());
            m_aEditBuffer.WriteOString(" ");
            m_aEditBuffer.WriteNumberAsString(pInfo->GetGenerationValue());
            m_aEditBuffer.WriteOString(" R\n");
        }
        auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"_ostr));
        if (pID)
        {
            const std::vector<PDFElement*>& rElements = pID->GetElements();
            m_aEditBuffer.WriteOString("/ID [ <");
            for (size_t i = 0; i < rElements.size(); ++i)
            {
                auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
                if (!pIDString)
                    continue;
 
                m_aEditBuffer.WriteOString(pIDString->GetValue());
                if ((i + 1) < rElements.size())
                    m_aEditBuffer.WriteOString(">\n<");
            }
            m_aEditBuffer.WriteOString("> ]\n");
        }
 
        if (!m_aStartXRefs.empty())
        {
            // Write location of the previous cross-reference section.
            m_aEditBuffer.WriteOString("/Prev ");
            m_aEditBuffer.WriteNumberAsString(m_aStartXRefs.back());
        }
 
        m_aEditBuffer.WriteOString(">>\n");
    }
}
 
bool PDFDocument::Sign(svl::crypto::SigningContext& rSigningContext, const OUString& rDescription,
                       bool bAdES)
{
    m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
    m_aEditBuffer.WriteOString("\n");
 
    sal_uInt64 nSignatureLastByteRangeOffset = 0;
    sal_Int64 nSignatureContentOffset = 0;
    sal_Int32 nSignatureId
        = WriteSignatureObject(rSigningContext, rDescription, bAdES, nSignatureLastByteRangeOffset,
                               nSignatureContentOffset);
    assert(nSignatureContentOffset > 0
           && "WriteSignatureObject guarantees a length for nSignatureContentOffset");
    tools::Rectangle aSignatureRectangle;
    sal_Int32 nAppearanceId = WriteAppearanceObject(aSignatureRectangle);
 
    std::vector<PDFObjectElement*> aPages = GetPages();
    if (aPages.empty())
    {
        SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
        return false;
    }
 
    size_t nPage = 0;
    if (m_nSignaturePage < aPages.size())
    {
        nPage = m_nSignaturePage;
    }
    if (!aPages[nPage])
    {
        SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage);
        return false;
    }
 
    PDFObjectElement& rPage = *aPages[nPage];
    sal_Int32 nAnnotId = WriteAnnotObject(rPage, nSignatureId, nAppearanceId, aSignatureRectangle);
 
    if (!WritePageObject(rPage, nAnnotId))
    {
        SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
        return false;
    }
 
    PDFReferenceElement* pRoot = nullptr;
    if (!WriteCatalogObject(nAnnotId, pRoot))
    {
        SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
        return false;
    }
 
    sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
    WriteXRef(nXRefOffset, pRoot);
 
    // Write startxref.
    m_aEditBuffer.WriteOString("startxref\n");
    m_aEditBuffer.WriteNumberAsString(nXRefOffset);
    m_aEditBuffer.WriteOString("\n%%EOF\n");
 
    // Finalize the signature, now that we know the total file size.
    // Calculate the length of the last byte range.
    sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
    sal_Int64 nLastByteRangeLength
        = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
    // Write the length to the buffer.
    m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
    OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
    m_aEditBuffer.WriteOString(aByteRangeBuffer);
 
    // Create the PKCS#7 object.
    if (rSigningContext.m_xCertificate)
    {
        css::uno::Sequence<sal_Int8> aDerEncoded = rSigningContext.m_xCertificate->getEncoded();
        if (!aDerEncoded.hasElements())
        {
            SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
            return false;
        }
    }
 
    m_aEditBuffer.Seek(0);
    sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
    std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
    m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
 
    m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
    sal_uInt64 nBufferSize2 = nLastByteRangeLength;
    std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
    m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
 
    OStringBuffer aCMSHexBuffer;
    svl::crypto::Signing aSigning(rSigningContext);
    aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
    aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
    if (!aSigning.Sign(aCMSHexBuffer))
    {
        if (rSigningContext.m_xCertificate.is())
        {
            SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
        }
        return false;
    }
 
    assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
 
    m_aEditBuffer.Seek(nSignatureContentOffset);
    m_aEditBuffer.WriteOString(aCMSHexBuffer);
 
    return true;
}
 
bool PDFDocument::Write(SvStream& rStream)
{
    m_aEditBuffer.Seek(0);
    rStream.WriteStream(m_aEditBuffer);
    return rStream.good();
}
 
bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
                           std::vector<std::unique_ptr<PDFElement>>& rElements,
                           PDFObjectElement* pObjectElement)
{
    // Last seen object token.
    PDFObjectElement* pObject = pObjectElement;
    PDFNameElement* pObjectKey = nullptr;
    PDFObjectElement* pObjectStream = nullptr;
    bool bInXRef = false;
    // The next number will be an xref offset.
    bool bInStartXRef = false;
    // Dictionary depth, so we know when we're outside any dictionaries.
    int nDepth = 0;
    // Last seen array token that's outside any dictionaries.
    PDFArrayElement* pArray = nullptr;
    // If we're inside an obj/endobj pair.
    bool bInObject = false;
 
    while (true)
    {
        char ch;
        rStream.ReadChar(ch);
        if (rStream.eof())
            break;
 
        switch (ch)
        {
            case '%':
            {
                auto pComment = new PDFCommentElement(*this);
                rElements.push_back(std::unique_ptr<PDFElement>(pComment));
                rStream.SeekRel(-1);
                if (!rElements.back()->Read(rStream))
                {
                    SAL_WARN("vcl.filter",
                             "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
                    return false;
                }
                if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
                    && m_aEOFs.back() == rStream.Tell())
                {
                    // Found EOF and partial parsing requested, we're done.
                    return true;
                }
                break;
            }
            case '<':
            {
                // Dictionary or hex string.
                rStream.ReadChar(ch);
                rStream.SeekRel(-2);
                if (ch == '<')
                {
                    rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
                    ++nDepth;
                }
                else
                    rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
                if (!rElements.back()->Read(rStream))
                {
                    SAL_WARN("vcl.filter",
                             "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
                    return false;
                }
                break;
            }
            case '>':
            {
                rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
                --nDepth;
                rStream.SeekRel(-1);
                if (!rElements.back()->Read(rStream))
                {
                    SAL_WARN("vcl.filter",
                             "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
                    return false;
                }
                break;
            }
            case '[':
            {
                auto pArr = new PDFArrayElement(pObject);
                rElements.push_back(std::unique_ptr<PDFElement>(pArr));
                if (nDepth == 0)
                {
                    // The array is attached directly, inform the object.
                    pArray = pArr;
                    if (pObject)
                    {
                        pObject->SetArray(pArray);
                        pObject->SetArrayOffset(rStream.Tell());
                    }
                }
                ++nDepth;
                rStream.SeekRel(-1);
                if (!rElements.back()->Read(rStream))
                {
                    SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
                    return false;
                }
                break;
            }
            case ']':
            {
                rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
                --nDepth;
                rStream.SeekRel(-1);
                if (nDepth == 0)
                {
                    if (pObject)
                    {
                        pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
                    }
                }
                if (!rElements.back()->Read(rStream))
                {
                    SAL_WARN("vcl.filter",
                             "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
                    return false;
                }
                break;
            }
            case '/':
            {
                auto pNameElement = new PDFNameElement();
                rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
                rStream.SeekRel(-1);
                if (!pNameElement->Read(rStream))
                {
                    SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
                    return false;
                }
 
                if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
                    && pNameElement->GetValue() == "ObjStm")
                    pObjectStream = pObject;
                else
                    pObjectKey = pNameElement;
 
                if (bInObject && !nDepth && pObject)
                {
                    // Name element inside an object, but outside a
                    // dictionary / array: remember it.
                    pObject->SetNameElement(pNameElement);
                }
 
                break;
            }
            case '(':
            {
                rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
                rStream.SeekRel(-1);
                if (!rElements.back()->Read(rStream))
                {
                    SAL_WARN("vcl.filter",
                             "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
                    return false;
                }
                break;
            }
            default:
            {
                if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-' || ch == '+'
                    || ch == '.')
                {
                    // Numbering object: an integer or a real.
                    auto pNumberElement = new PDFNumberElement();
                    rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
                    rStream.SeekRel(-1);
                    if (!pNumberElement->Read(rStream))
                    {
                        SAL_WARN("vcl.filter",
                                 "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
                        return false;
                    }
                    if (bInStartXRef)
                    {
                        bInStartXRef = false;
                        m_aStartXRefs.push_back(pNumberElement->GetValue());
 
                        auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
                        if (it != m_aOffsetObjects.end())
                            m_pXRefStream = it->second;
                    }
                    else if (bInObject && !nDepth && pObject)
                        // Number element inside an object, but outside a
                        // dictionary / array: remember it.
                        pObject->SetNumberElement(pNumberElement);
                }
                else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
                {
                    // Possible keyword, like "obj".
                    rStream.SeekRel(-1);
                    OString aKeyword = ReadKeyword(rStream);
 
                    bool bObj = aKeyword == "obj";
                    if (bObj || aKeyword == "R")
                    {
                        size_t nElements = rElements.size();
                        if (nElements < 2)
                        {
                            SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
                                                   "tokens before 'obj' or 'R' keyword");
                            return false;
                        }
 
                        auto pObjectNumber
                            = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
                        auto pGenerationNumber
                            = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
                        if (!pObjectNumber || !pGenerationNumber)
                        {
                            SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
                                                   "generation number before 'obj' or 'R' keyword");
                            return false;
                        }
 
                        if (bObj)
                        {
                            pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
                                                           pGenerationNumber->GetValue());
                            rElements.push_back(std::unique_ptr<PDFElement>(pObject));
                            m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
                            m_aIDObjects[pObjectNumber->GetValue()] = pObject;
                            bInObject = true;
                        }
                        else
                        {
                            auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
                                                                      *pGenerationNumber);
                            rElements.push_back(std::unique_ptr<PDFElement>(pReference));
                            if (bInObject && nDepth > 0 && pObject)
                                // Inform the object about a new in-dictionary reference.
                                pObject->AddDictionaryReference(pReference);
                        }
                        if (!rElements.back()->Read(rStream))
                        {
                            SAL_WARN("vcl.filter",
                                     "PDFDocument::Tokenize: PDFElement::Read() failed");
                            return false;
                        }
                    }
                    else if (aKeyword == "stream")
                    {
                        // Look up the length of the stream from the parent object's dictionary.
                        size_t nLength = 0;
                        for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
                        {
                            // Iterate in reverse order.
                            size_t nIndex = rElements.size() - nElement - 1;
                            PDFElement* pElement = rElements[nIndex].get();
                            auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
                            if (!pObj)
                                continue;
 
                            PDFElement* pLookup = pObj->Lookup("Length"_ostr);
                            auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
                            if (pReference)
                            {
                                // Length is provided as a reference.
                                nLength = pReference->LookupNumber(rStream);
                                break;
                            }
 
                            auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
                            if (pNumber)
                            {
                                // Length is provided directly.
                                nLength = pNumber->GetValue();
                                break;
                            }
 
                            SAL_WARN(
                                "vcl.filter",
                                "PDFDocument::Tokenize: found no Length key for stream keyword");
                            return false;
                        }
 
                        PDFDocument::SkipLineBreaks(rStream);
                        auto pStreamElement = new PDFStreamElement(nLength);
                        if (pObject)
                            pObject->SetStream(pStreamElement);
                        rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
                        if (!rElements.back()->Read(rStream))
                        {
                            SAL_WARN("vcl.filter",
                                     "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
                            return false;
                        }
                    }
                    else if (aKeyword == "endstream")
                    {
                        rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
                        if (!rElements.back()->Read(rStream))
                        {
                            SAL_WARN("vcl.filter",
                                     "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
                            return false;
                        }
                    }
                    else if (aKeyword == "endobj")
                    {
                        rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
                        if (!rElements.back()->Read(rStream))
                        {
                            SAL_WARN("vcl.filter",
                                     "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
                            return false;
                        }
                        if (eMode == TokenizeMode::END_OF_OBJECT)
                        {
                            // Found endobj and only object parsing was requested, we're done.
                            return true;
                        }
 
                        if (pObjectStream)
                        {
                            // We're at the end of an object stream, parse the stored objects.
                            pObjectStream->ParseStoredObjects();
                            pObjectStream = nullptr;
                            pObjectKey = nullptr;
                        }
                        bInObject = false;
                    }
                    else if (aKeyword == "true" || aKeyword == "false")
                        rElements.push_back(std::unique_ptr<PDFElement>(
                            new PDFBooleanElement(aKeyword.toBoolean())));
                    else if (aKeyword == "null")
                        rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
                    else if (aKeyword == "xref")
                        // Allow 'f' and 'n' keywords.
                        bInXRef = true;
                    else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
                    {
                    }
                    else if (aKeyword == "trailer")
                    {
                        auto pTrailer = new PDFTrailerElement(*this);
 
                        // Make it possible to find this trailer later by offset.
                        pTrailer->Read(rStream);
                        m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
 
                        // When reading till the first EOF token only, remember
                        // just the first trailer token.
                        if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
                            m_pTrailer = pTrailer;
                        rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
                    }
                    else if (aKeyword == "startxref")
                    {
                        bInStartXRef = true;
                    }
                    else
                    {
                        SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
                                                   << aKeyword << "' keyword at byte position "
                                                   << rStream.Tell());
                        return false;
                    }
                }
                else
                {
                    auto uChar = static_cast<unsigned char>(ch);
                    // Be more lenient and allow unexpected null char
                    if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
                    {
                        SAL_WARN("vcl.filter",
                                 "PDFDocument::Tokenize: unexpected character with code "
                                     << sal_Int32(ch) << " at byte position " << rStream.Tell());
                        return false;
                    }
                    SAL_WARN_IF(uChar == 0, "vcl.filter",
                                "PDFDocument::Tokenize: unexpected null character at "
                                    << rStream.Tell() << " - ignoring");
                }
                break;
            }
        }
    }
 
    return true;
}
 
void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
{
    m_aIDObjects[nID] = pObject;
}
 
bool PDFDocument::ReadWithPossibleFixup(SvStream& rStream)
{
    if (Read(rStream))
        return true;
 
    // Read failed, try a roundtrip through pdfium and then retry.
    rStream.Seek(0);
    SvMemoryStream aStandardizedStream;
    vcl::pdf::convertToHighestSupported(rStream, aStandardizedStream);
    return Read(aStandardizedStream);
}
 
bool PDFDocument::Read(SvStream& rStream)
{
    // Check file magic.
    std::vector<sal_Int8> aHeader(5);
    rStream.Seek(0);
    rStream.ReadBytes(aHeader.data(), aHeader.size());
    if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
        || aHeader[4] != '-')
    {
        SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
        return false;
    }
 
    // Allow later editing of the contents in-memory.
    rStream.Seek(0);
    m_aEditBuffer.WriteStream(rStream);
 
    // clear out key items that may have been filled with info from any previous read attempt
    m_aOffsetTrailers.clear();
    m_aTrailerOffsets.clear();
    m_pTrailer = nullptr;
    m_pXRefStream = nullptr;
 
    // Look up the offset of the xref table.
    size_t nStartXRef = FindStartXRef(rStream);
    SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
    if (nStartXRef == 0)
    {
        SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
        return false;
    }
    while (true)
    {
        rStream.Seek(nStartXRef);
        OString aKeyword = ReadKeyword(rStream);
        if (aKeyword.isEmpty())
            ReadXRefStream(rStream);
 
        else
        {
            if (aKeyword != "xref")
            {
                SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
                return false;
            }
            ReadXRef(rStream);
            if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
            {
                SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
                return false;
            }
        }
 
        PDFNumberElement* pPrev = nullptr;
        if (m_pTrailer)
        {
            pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"_ostr));
 
            // Remember the offset of this trailer in the correct order. It's
            // possible that newer trailers don't have a larger offset.
            m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
        }
        else if (m_pXRefStream)
            pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"_ostr));
        if (pPrev)
            nStartXRef = pPrev->GetValue();
 
        // Reset state, except the edit buffer.
        m_aOffsetTrailers.clear(); // contents are lifecycle managed by m_aElements
        m_aElements.clear();
        m_aOffsetObjects.clear();
        m_aIDObjects.clear();
        m_aStartXRefs.clear();
        m_aEOFs.clear();
        m_pTrailer = nullptr;
        m_pXRefStream = nullptr;
        if (!pPrev)
            break;
    }
 
    // Then we can tokenize the stream.
    rStream.Seek(0);
    return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
}
 
OString PDFDocument::ReadKeyword(SvStream& rStream)
{
    OStringBuffer aBuf;
    char ch;
    rStream.ReadChar(ch);
    if (rStream.eof())
        return {};
    while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
    {
        aBuf.append(ch);
        rStream.ReadChar(ch);
        if (rStream.eof())
            return aBuf.toString();
    }
    rStream.SeekRel(-1);
    return aBuf.toString();
}
 
size_t PDFDocument::FindStartXRef(SvStream& rStream)
{
    // Find the "startxref" token, somewhere near the end of the document.
    std::vector<char> aBuf(1024);
    rStream.Seek(STREAM_SEEK_TO_END);
    if (rStream.Tell() > aBuf.size())
        rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
    else
        // The document is really short, then just read it from the start.
        rStream.Seek(0);
    size_t nBeforePeek = rStream.Tell();
    size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
    rStream.Seek(nBeforePeek);
    if (nSize != aBuf.size())
        aBuf.resize(nSize);
    OString aPrefix("startxref"_ostr);
    // Find the last startxref at the end of the document.
    auto itLastValid = aBuf.end();
    auto it = aBuf.begin();
    while (true)
    {
        it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
        if (it == aBuf.end())
            break;
 
        itLastValid = it;
        ++it;
    }
    if (itLastValid == aBuf.end())
    {
        SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
        return 0;
    }
 
    rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
    if (rStream.eof())
    {
        SAL_WARN("vcl.filter",
                 "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
        return 0;
    }
 
    PDFDocument::SkipWhitespace(rStream);
    PDFNumberElement aNumber;
    if (!aNumber.Read(rStream))
        return 0;
    return aNumber.GetValue();
}
 
void PDFDocument::ReadXRefStream(SvStream& rStream)
{
    // Look up the stream length in the object dictionary.
    if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
    {
        SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
        return;
    }
 
    if (m_aElements.empty())
    {
        SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
        return;
    }
 
    PDFObjectElement* pObject = nullptr;
    for (const auto& pElement : m_aElements)
    {
        if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
        {
            pObject = pObj;
            break;
        }
    }
    if (!pObject)
    {
        SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
        return;
    }
 
    // So that the Prev key can be looked up later.
    m_pXRefStream = pObject;
 
    PDFElement* pLookup = pObject->Lookup("Length"_ostr);
    auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
    if (!pNumber)
    {
        SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
        return;
    }
    sal_uInt64 nLength = pNumber->GetValue();
 
    // Look up the stream offset.
    PDFStreamElement* pStream = nullptr;
    for (const auto& pElement : m_aElements)
    {
        if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
        {
            pStream = pS;
            break;
        }
    }
    if (!pStream)
    {
        SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
        return;
    }
 
    // Read and decompress it.
    rStream.Seek(pStream->GetOffset());
    std::vector<char> aBuf(nLength);
    rStream.ReadBytes(aBuf.data(), aBuf.size());
 
    auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"_ostr));
    if (!pFilter)
    {
        SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
        return;
    }
 
    if (pFilter->GetValue() != "FlateDecode")
    {
        SAL_WARN("vcl.filter",
                 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
        return;
    }
 
    int nColumns = 1;
    int nPredictor = 1;
    if (auto pDecodeParams
        = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms"_ostr)))
    {
        const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
        auto it = rItems.find("Columns"_ostr);
        if (it != rItems.end())
            if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
                nColumns = pColumns->GetValue();
        it = rItems.find("Predictor"_ostr);
        if (it != rItems.end())
            if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
                nPredictor = pPredictor->GetValue();
    }
 
    SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
    SvMemoryStream aStream;
    ZCodec aZCodec;
    aZCodec.BeginCompression();
    aZCodec.Decompress(aSource, aStream);
    if (!aZCodec.EndCompression())
    {
        SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
        return;
    }
 
    // Look up the first and the last entry we need to read.
    auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"_ostr));
    std::vector<size_t> aFirstObjects;
    std::vector<size_t> aNumberOfObjects;
    if (!pIndex)
    {
        auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"_ostr));
        if (pSize)
        {
            aFirstObjects.push_back(0);
            aNumberOfObjects.push_back(pSize->GetValue());
        }
        else
        {
            SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
            return;
        }
    }
    else
    {
        const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
        size_t nFirstObject = 0;
        for (size_t i = 0; i < rIndexElements.size(); ++i)
        {
            if (i % 2 == 0)
            {
                auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
                if (!pFirstObject)
                {
                    SAL_WARN("vcl.filter",
                             "PDFDocument::ReadXRefStream: Index has no first object");
                    return;
                }
                nFirstObject = pFirstObject->GetValue();
                continue;
            }
 
            auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
            if (!pNumberOfObjects)
            {
                SAL_WARN("vcl.filter",
                         "PDFDocument::ReadXRefStream: Index has no number of objects");
                return;
            }
            aFirstObjects.push_back(nFirstObject);
            aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
        }
    }
 
    // Look up the format of a single entry.
    const int nWSize = 3;
    auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"_ostr));
    if (!pW || pW->GetElements().size() < nWSize)
    {
        SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
        return;
    }
    int aW[nWSize];
    // First character is the (kind of) repeated predictor.
    int nLineLength = 1;
    for (size_t i = 0; i < nWSize; ++i)
    {
        auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
        if (!pI)
        {
            SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
            return;
        }
        aW[i] = pI->GetValue();
        nLineLength += aW[i];
    }
 
    if (nPredictor > 1 && nLineLength - 1 != nColumns)
    {
        SAL_WARN("vcl.filter",
                 "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
        return;
    }
 
    aStream.Seek(0);
    for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
    {
        size_t nFirstObject = aFirstObjects[nSubSection];
        size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
 
        // This is the line as read from the stream.
        std::vector<unsigned char> aOrigLine(nLineLength);
        // This is the line as it appears after tweaking according to nPredictor.
        std::vector<unsigned char> aFilteredLine(nLineLength);
        for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
        {
            size_t nIndex = nFirstObject + nEntry;
 
            aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
            if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
            {
                SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
                                       "inconsistent with /DecodeParms/Predictor for object #"
                                           << nIndex);
                return;
            }
 
            for (int i = 0; i < nLineLength; ++i)
            {
                switch (nPredictor)
                {
                    case 1:
                        // No prediction.
                        break;
                    case 12:
                        // PNG prediction: up (on all rows).
                        aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
                        break;
                    default:
                        SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
                                                   << nPredictor);
                        return;
                }
            }
 
            // First character is already handled above.
            int nPos = 1;
            size_t nType = 0;
            // Start of the current field in the stream data.
            int nOffset = nPos;
            for (; nPos < nOffset + aW[0]; ++nPos)
            {
                unsigned char nCh = aFilteredLine[nPos];
                nType = (nType << 8) + nCh;
            }
 
            // Start of the object in the file stream.
            size_t nStreamOffset = 0;
            nOffset = nPos;
            for (; nPos < nOffset + aW[1]; ++nPos)
            {
                unsigned char nCh = aFilteredLine[nPos];
                nStreamOffset = (nStreamOffset << 8) + nCh;
            }
 
            // Generation number of the object.
            size_t nGenerationNumber = 0;
            nOffset = nPos;
            for (; nPos < nOffset + aW[2]; ++nPos)
            {
                unsigned char nCh = aFilteredLine[nPos];
                nGenerationNumber = (nGenerationNumber << 8) + nCh;
            }
 
            // Ignore invalid nType.
            if (nType <= 2)
            {
                if (m_aXRef.find(nIndex) == m_aXRef.end())
                {
                    XRefEntry aEntry;
                    switch (nType)
                    {
                        case 0:
                            aEntry.SetType(XRefEntryType::FREE);
                            break;
                        case 1:
                            aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
                            break;
                        case 2:
                            aEntry.SetType(XRefEntryType::COMPRESSED);
                            break;
                    }
                    aEntry.SetOffset(nStreamOffset);
                    m_aXRef[nIndex] = aEntry;
                }
            }
        }
    }
}
 
void PDFDocument::ReadXRef(SvStream& rStream)
{
    PDFDocument::SkipWhitespace(rStream);
 
    while (true)
    {
        PDFNumberElement aFirstObject;
        if (!aFirstObject.Read(rStream))
        {
            // Next token is not a number, it'll be the trailer.
            return;
        }
 
        if (aFirstObject.GetValue() < 0)
        {
            SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
            return;
        }
 
        PDFDocument::SkipWhitespace(rStream);
        PDFNumberElement aNumberOfEntries;
        if (!aNumberOfEntries.Read(rStream))
        {
            SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
            return;
        }
 
        if (aNumberOfEntries.GetValue() < 0)
        {
            SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
            return;
        }
 
        size_t nSize = aNumberOfEntries.GetValue();
        for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
        {
            size_t nIndex = aFirstObject.GetValue() + nEntry;
            PDFDocument::SkipWhitespace(rStream);
            PDFNumberElement aOffset;
            if (!aOffset.Read(rStream))
            {
                SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
                return;
            }
 
            PDFDocument::SkipWhitespace(rStream);
            PDFNumberElement aGenerationNumber;
            if (!aGenerationNumber.Read(rStream))
            {
                SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
                return;
            }
 
            PDFDocument::SkipWhitespace(rStream);
            OString aKeyword = ReadKeyword(rStream);
            if (aKeyword != "f" && aKeyword != "n")
            {
                SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
                return;
            }
            // xrefs are read in reverse order, so never update an existing
            // offset with an older one.
            if (m_aXRef.find(nIndex) == m_aXRef.end())
            {
                XRefEntry aEntry;
                aEntry.SetOffset(aOffset.GetValue());
                // Initially only the first entry is dirty.
                if (nIndex == 0)
                    aEntry.SetDirty(true);
                m_aXRef[nIndex] = aEntry;
            }
            PDFDocument::SkipWhitespace(rStream);
        }
    }
}
 
void PDFDocument::SkipWhitespace(SvStream& rStream)
{
    char ch = 0;
 
    while (true)
    {
        rStream.ReadChar(ch);
        if (rStream.eof())
            break;
 
        if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
        {
            rStream.SeekRel(-1);
            return;
        }
    }
}
 
void PDFDocument::SkipLineBreaks(SvStream& rStream)
{
    char ch = 0;
 
    while (true)
    {
        rStream.ReadChar(ch);
        if (rStream.eof())
            break;
 
        if (ch != '\n' && ch != '\r')
        {
            rStream.SeekRel(-1);
            return;
        }
    }
}
 
size_t PDFDocument::GetObjectOffset(size_t nIndex) const
{
    auto it = m_aXRef.find(nIndex);
    if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
    {
        SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
                                   << nIndex << ", but failed");
        return 0;
    }
 
    return it->second.GetOffset();
}
 
const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
{
    return m_aElements;
}
 
/// Visits the page tree recursively, looking for page objects.
static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
{
    auto pKidsRef = pPages->Lookup("Kids"_ostr);
    auto pKids = dynamic_cast<PDFArrayElement*>(pKidsRef);
    if (!pKids)
    {
        auto pRefKids = dynamic_cast<PDFReferenceElement*>(pKidsRef);
        if (!pRefKids)
        {
            SAL_WARN("vcl.filter", "visitPages: pages has no kids");
            return;
        }
        auto pObjWithKids = pRefKids->LookupObject();
        if (!pObjWithKids)
        {
            SAL_WARN("vcl.filter", "visitPages: pages has no kids");
            return;
        }
 
        pKids = pObjWithKids->GetArray();
    }
 
    if (!pKids)
    {
        SAL_WARN("vcl.filter", "visitPages: pages has no kids");
        return;
    }
 
    pPages->setVisiting(true);
 
    for (const auto& pKid : pKids->GetElements())
    {
        auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
        if (!pReference)
            continue;
 
        PDFObjectElement* pKidObject = pReference->LookupObject();
        if (!pKidObject)
            continue;
 
        // detect if visiting reenters itself
        if (pKidObject->alreadyVisiting())
        {
            SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
            continue;
        }
 
        auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"_ostr));
        if (pName && pName->GetValue() == "Pages")
            // Pages inside pages: recurse.
            visitPages(pKidObject, rRet);
        else
            // Found an actual page.
            rRet.push_back(pKidObject);
    }
 
    pPages->setVisiting(false);
}
 
PDFObjectElement* PDFDocument::GetCatalog()
{
    PDFReferenceElement* pRoot = nullptr;
 
    PDFTrailerElement* pTrailer = nullptr;
    if (!m_aTrailerOffsets.empty())
    {
        // Get access to the latest trailer, and work with the keys of that
        // one.
        auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
        if (it != m_aOffsetTrailers.end())
            pTrailer = it->second;
    }
 
    if (pTrailer)
        pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"_ostr));
    else if (m_pXRefStream)
        pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"_ostr));
 
    if (!pRoot)
    {
        SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
        return nullptr;
    }
 
    return pRoot->LookupObject();
}
 
std::vector<PDFObjectElement*> PDFDocument::GetPages()
{
    std::vector<PDFObjectElement*> aRet;
 
    PDFObjectElement* pCatalog = GetCatalog();
    if (!pCatalog)
    {
        SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
        return aRet;
    }
 
    PDFObjectElement* pPages = pCatalog->LookupObject("Pages"_ostr);
    if (!pPages)
    {
        SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
                                                                      << ") has no pages");
        return aRet;
    }
 
    visitPages(pPages, aRet);
 
    return aRet;
}
 
void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
 
std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
{
    std::vector<PDFObjectElement*> aRet;
 
    std::vector<PDFObjectElement*> aPages = GetPages();
 
    for (const auto& pPage : aPages)
    {
        if (!pPage)
            continue;
 
        PDFElement* pAnnotsElement = pPage->Lookup("Annots"_ostr);
        auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
        if (!pAnnots)
        {
            // Annots is not an array, see if it's a reference to an object
            // with a direct array.
            auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
            if (pAnnotsRef)
            {
                if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
                {
                    pAnnots = pAnnotsObject->GetArray();
                }
            }
        }
 
        if (!pAnnots)
            continue;
 
        for (const auto& pAnnot : pAnnots->GetElements())
        {
            auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
            if (!pReference)
                continue;
 
            PDFObjectElement* pAnnotObject = pReference->LookupObject();
            if (!pAnnotObject)
                continue;
 
            auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"_ostr));
            if (!pFT || pFT->GetValue() != "Sig")
                continue;
 
            aRet.push_back(pAnnotObject);
        }
    }
 
    return aRet;
}
 
std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
{
    return svl::crypto::DecodeHexString(pElement->GetValue());
}
 
OUString PDFDocument::DecodeHexStringUTF16BE(PDFHexStringElement const& rElement)
{
    std::vector<unsigned char> const encoded(DecodeHexString(&rElement));
    // Text strings can be PDF-DocEncoding or UTF-16BE with mandatory BOM;
    // only the latter supported is here
    if (encoded.size() < 2 || encoded[0] != 0xFE || encoded[1] != 0xFF || (encoded.size() & 1) != 0)
    {
        return {};
    }
    OUStringBuffer buf(encoded.size() - 2);
    for (size_t i = 2; i < encoded.size(); i += 2)
    {
        buf.append(sal_Unicode((static_cast<sal_uInt16>(encoded[i]) << 8) | encoded[i + 1]));
    }
    return buf.makeStringAndClear();
}
 
PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
    : m_rDoc(rDoc)
{
}
 
bool PDFCommentElement::Read(SvStream& rStream)
{
    // Read from (including) the % char till (excluding) the end of the line/stream.
    OStringBuffer aBuf;
    char ch;
    rStream.ReadChar(ch);
    while (true)
    {
        if (ch == '\n' || ch == '\r' || rStream.eof())
        {
            m_aComment = aBuf.makeStringAndClear();
 
            if (m_aComment.startsWith("%%EOF"))
            {
                sal_uInt64 nPos = rStream.Tell();
                if (ch == '\r')
                {
                    rStream.ReadChar(ch);
                    rStream.SeekRel(-1);
                    // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
                    // behavior.
                    if (ch == '\n')
                    {
                        nPos += 1;
                    }
                }
                m_rDoc.PushBackEOF(nPos);
            }
 
            SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
            return true;
        }
        aBuf.append(ch);
        rStream.ReadChar(ch);
    }
 
    return false;
}
 
PDFNumberElement::PDFNumberElement() = default;
 
bool PDFNumberElement::Read(SvStream& rStream)
{
    OStringBuffer aBuf;
    m_nOffset = rStream.Tell();
    char ch;
    rStream.ReadChar(ch);
    if (rStream.eof())
    {
        return false;
    }
    if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+' && ch != '.')
    {
        rStream.SeekRel(-1);
        return false;
    }
    while (!rStream.eof())
    {
        if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+'
            && ch != '.')
        {
            rStream.SeekRel(-1);
            m_nLength = rStream.Tell() - m_nOffset;
            m_fValue = o3tl::toDouble(aBuf);
            aBuf.setLength(0);
            SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
            return true;
        }
        aBuf.append(ch);
        rStream.ReadChar(ch);
    }
 
    return false;
}
 
sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
 
sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
 
bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
 
bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
 
bool PDFHexStringElement::Read(SvStream& rStream)
{
    char ch;
    rStream.ReadChar(ch);
    if (ch != '<')
    {
        SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
        return false;
    }
    rStream.ReadChar(ch);
 
    OStringBuffer aBuf;
    while (!rStream.eof())
    {
        if (ch == '>')
        {
            m_aValue = aBuf.makeStringAndClear();
            SAL_INFO("vcl.filter",
                     "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
            return true;
        }
        aBuf.append(ch);
        rStream.ReadChar(ch);
    }
 
    return false;
}
 
const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
 
bool PDFLiteralStringElement::Read(SvStream& rStream)
{
    char nPrevCh = 0;
    char ch = 0;
    rStream.ReadChar(ch);
    if (ch != '(')
    {
        SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
        return false;
    }
    nPrevCh = ch;
    rStream.ReadChar(ch);
 
    // Start with 1 nesting level as we read a '(' above already.
    int nDepth = 1;
    OStringBuffer aBuf;
    while (!rStream.eof())
    {
        if (ch == '(' && nPrevCh != '\\')
            ++nDepth;
 
        if (ch == ')' && nPrevCh != '\\')
            --nDepth;
 
        if (nDepth == 0)
        {
            // ')' of the outermost '(' is reached.
            m_aValue = aBuf.makeStringAndClear();
            SAL_INFO("vcl.filter",
                     "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
            return true;
        }
        aBuf.append(ch);
        nPrevCh = ch;
        rStream.ReadChar(ch);
    }
 
    return false;
}
 
const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
 
PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc)
    : m_rDoc(rDoc)
    , m_pDictionaryElement(nullptr)
{
}
 
bool PDFTrailerElement::Read(SvStream& rStream)
{
    m_nOffset = rStream.Tell();
    return true;
}
 
PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
{
    if (!m_pDictionaryElement)
    {
        PDFObjectParser aParser(m_rDoc.GetElements());
        aParser.parse(this);
    }
    if (!m_pDictionaryElement)
        return nullptr;
    return m_pDictionaryElement->LookupElement(rDictionaryKey);
}
 
sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
 
double PDFNumberElement::GetValue() const { return m_fValue; }
 
PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
    : m_rDoc(rDoc)
    , m_fObjectValue(fObjectValue)
    , m_fGenerationValue(fGenerationValue)
    , m_pNumberElement(nullptr)
    , m_pNameElement(nullptr)
    , m_nDictionaryOffset(0)
    , m_nDictionaryLength(0)
    , m_pDictionaryElement(nullptr)
    , m_nArrayOffset(0)
    , m_nArrayLength(0)
    , m_pArrayElement(nullptr)
    , m_pStreamElement(nullptr)
    , m_bParsed(false)
{
}
 
bool PDFObjectElement::Read(SvStream& /*rStream*/)
{
    SAL_INFO("vcl.filter",
             "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
    return true;
}
 
PDFDictionaryElement::PDFDictionaryElement() = default;
 
PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
                                         const OString& rKey)
{
    auto it = rDictionary.find(rKey);
    if (it == rDictionary.end())
        return nullptr;
 
    return it->second;
}
 
PDFObjectElement* PDFDictionaryElement::LookupObject(const OString& rDictionaryKey)
{
    auto pKey = dynamic_cast<PDFReferenceElement*>(
        PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
    if (!pKey)
    {
        SAL_WARN("vcl.filter",
                 "PDFDictionaryElement::LookupObject: no such key with reference value: "
                     << rDictionaryKey);
        return nullptr;
    }
 
    return pKey->LookupObject();
}
 
PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
{
    return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
}
 
void PDFObjectElement::parseIfNecessary()
{
    if (m_bParsed)
        return;
 
    if (!m_aElements.empty())
    {
        // This is a stored object in an object stream.
        PDFObjectParser aParser(m_aElements);
        aParser.parse(this);
    }
    else
    {
        // Normal object: elements are stored as members of the document itself.
        PDFObjectParser aParser(m_rDoc.GetElements());
        aParser.parse(this);
    }
    m_bParsed = true;
}
 
PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
{
    parseIfNecessary();
    if (!m_pDictionaryElement)
        return nullptr;
    return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey);
}
 
PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
{
    auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
    if (!pKey)
    {
        SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
                                   << rDictionaryKey);
        return nullptr;
    }
 
    return pKey->LookupObject();
}
 
double PDFObjectElement::GetObjectValue() const { return m_fObjectValue; }
 
void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
{
    m_nDictionaryOffset = nDictionaryOffset;
}
 
sal_uInt64 PDFObjectElement::GetDictionaryOffset()
{
    parseIfNecessary();
    return m_nDictionaryOffset;
}
 
void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
 
sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
 
void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
{
    m_aDictionaryKeyOffset[rKey] = nOffset;
}
 
void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
{
    m_aDictionaryKeyValueLength[rKey] = nLength;
}
 
sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
{
    auto it = m_aDictionaryKeyOffset.find(rKey);
    if (it == m_aDictionaryKeyOffset.end())
        return 0;
 
    return it->second;
}
 
sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
{
    auto it = m_aDictionaryKeyValueLength.find(rKey);
    if (it == m_aDictionaryKeyValueLength.end())
        return 0;
 
    return it->second;
}
 
const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
 
void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
{
    m_nDictionaryLength = nDictionaryLength;
}
 
sal_uInt64 PDFObjectElement::GetDictionaryLength()
{
    parseIfNecessary();
    return m_nDictionaryLength;
}
 
void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
 
sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
 
PDFDictionaryElement* PDFObjectElement::GetDictionary()
{
    parseIfNecessary();
    return m_pDictionaryElement;
}
 
void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement)
{
    m_pDictionaryElement = pDictionaryElement;
}
 
void PDFObjectElement::SetNumberElement(PDFNumberElement* pNumberElement)
{
    m_pNumberElement = pNumberElement;
}
 
PDFNumberElement* PDFObjectElement::GetNumberElement() const { return m_pNumberElement; }
 
void PDFObjectElement::SetNameElement(PDFNameElement* pNameElement)
{
    m_pNameElement = pNameElement;
}
 
PDFNameElement* PDFObjectElement::GetNameElement() const { return m_pNameElement; }
 
const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
{
    return m_aDictionaryReferences;
}
 
void PDFObjectElement::AddDictionaryReference(PDFReferenceElement* pReference)
{
    m_aDictionaryReferences.push_back(pReference);
}
 
const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
{
    parseIfNecessary();
    return m_pDictionaryElement->GetItems();
}
 
void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
 
void PDFObjectElement::SetStream(PDFStreamElement* pStreamElement)
{
    m_pStreamElement = pStreamElement;
}
 
PDFStreamElement* PDFObjectElement::GetStream() const { return m_pStreamElement; }
 
PDFArrayElement* PDFObjectElement::GetArray()
{
    parseIfNecessary();
    return m_pArrayElement;
}
 
void PDFObjectElement::ParseStoredObjects()
{
    if (!m_pStreamElement)
    {
        SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
        return;
    }
 
    auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"_ostr));
    if (!pType || pType->GetValue() != "ObjStm")
    {
        if (!pType)
            SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
        else
            SAL_WARN("vcl.filter",
                     "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
        return;
    }
 
    auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"_ostr));
    if (!pFilter || pFilter->GetValue() != "FlateDecode")
    {
        if (!pFilter)
            SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
        else
            SAL_WARN("vcl.filter",
                     "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
        return;
    }
 
    auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"_ostr));
    if (!pFirst)
    {
        SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
        return;
    }
 
    auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"_ostr));
    if (!pN)
    {
        SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
        return;
    }
    size_t nN = pN->GetValue();
 
    auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"_ostr));
    if (!pLength)
    {
        SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
        return;
    }
    size_t nLength = pLength->GetValue();
 
    // Read and decompress it.
    SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
    rEditBuffer.Seek(m_pStreamElement->GetOffset());
    std::vector<char> aBuf(nLength);
    rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
    SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
    SvMemoryStream aStream;
    ZCodec aZCodec;
    aZCodec.BeginCompression();
    aZCodec.Decompress(aSource, aStream);
    if (!aZCodec.EndCompression())
    {
        SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
        return;
    }
 
    nLength = aStream.TellEnd();
    aStream.Seek(0);
    std::vector<size_t> aObjNums;
    std::vector<size_t> aOffsets;
    std::vector<size_t> aLengths;
    // First iterate over and find out the lengths.
    for (size_t nObject = 0; nObject < nN; ++nObject)
    {
        PDFNumberElement aObjNum;
        if (!aObjNum.Read(aStream))
        {
            SAL_WARN("vcl.filter",
                     "PDFObjectElement::ParseStoredObjects: failed to read object number");
            return;
        }
        aObjNums.push_back(aObjNum.GetValue());
 
        PDFDocument::SkipWhitespace(aStream);
 
        PDFNumberElement aByteOffset;
        if (!aByteOffset.Read(aStream))
        {
            SAL_WARN("vcl.filter",
                     "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
            return;
        }
        aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
 
        if (aOffsets.size() > 1)
            aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
        if (nObject + 1 == nN)
            aLengths.push_back(nLength - aOffsets.back());
 
        PDFDocument::SkipWhitespace(aStream);
    }
 
    // Now create streams with the proper length and tokenize the data.
    for (size_t nObject = 0; nObject < nN; ++nObject)
    {
        size_t nObjNum = aObjNums[nObject];
        size_t nOffset = aOffsets[nObject];
        size_t nLen = aLengths[nObject];
 
        aStream.Seek(nOffset);
        m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
        PDFObjectElement* pStored = m_aStoredElements.back().get();
 
        aBuf.clear();
        aBuf.resize(nLen);
        aStream.ReadBytes(aBuf.data(), aBuf.size());
        SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
 
        m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
                        pStored);
        // This is how references know the object is stored inside this object stream.
        m_rDoc.SetIDObject(nObjNum, pStored);
 
        // Store the stream of the object in the object stream for later use.
        std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
        aStoredStream.Seek(0);
        pStreamBuffer->WriteStream(aStoredStream);
        pStored->SetStreamBuffer(pStreamBuffer);
    }
}
 
std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
{
    return m_aElements;
}
 
SvMemoryStream* PDFObjectElement::GetStreamBuffer() const { return m_pStreamBuffer.get(); }
 
void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
{
    m_pStreamBuffer = std::move(pStreamBuffer);
}
 
PDFDocument& PDFObjectElement::GetDocument() { return m_rDoc; }
 
PDFReferenceElement::PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject,
                                         PDFNumberElement const& rGeneration)
    : m_rDoc(rDoc)
    , m_fObjectValue(rObject.GetValue())
    , m_fGenerationValue(rGeneration.GetValue())
    , m_rObject(rObject)
{
}
 
PDFNumberElement& PDFReferenceElement::GetObjectElement() const { return m_rObject; }
 
bool PDFReferenceElement::Read(SvStream& rStream)
{
    SAL_INFO("vcl.filter",
             "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
    m_nOffset = rStream.Tell();
    return true;
}
 
sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
 
double PDFReferenceElement::LookupNumber(SvStream& rStream) const
{
    size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
    if (nOffset == 0)
    {
        SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
                                   << m_fObjectValue);
        return 0;
    }
 
    sal_uInt64 nOrigPos = rStream.Tell();
    comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
 
    rStream.Seek(nOffset);
    {
        PDFDocument::SkipWhitespace(rStream);
        PDFNumberElement aNumber;
        bool bRet = aNumber.Read(rStream);
        if (!bRet || aNumber.GetValue() != m_fObjectValue)
        {
            SAL_WARN("vcl.filter",
                     "PDFReferenceElement::LookupNumber: offset points to not matching object");
            return 0;
        }
    }
 
    {
        PDFDocument::SkipWhitespace(rStream);
        PDFNumberElement aNumber;
        bool bRet = aNumber.Read(rStream);
        if (!bRet || aNumber.GetValue() != m_fGenerationValue)
        {
            SAL_WARN("vcl.filter",
                     "PDFReferenceElement::LookupNumber: offset points to not matching generation");
            return 0;
        }
    }
 
    {
        PDFDocument::SkipWhitespace(rStream);
        OString aKeyword = PDFDocument::ReadKeyword(rStream);
        if (aKeyword != "obj")
        {
            SAL_WARN("vcl.filter",
                     "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
            return 0;
        }
    }
 
    PDFDocument::SkipWhitespace(rStream);
    PDFNumberElement aNumber;
    if (!aNumber.Read(rStream))
    {
        SAL_WARN("vcl.filter",
                 "PDFReferenceElement::LookupNumber: failed to read referenced number");
        return 0;
    }
 
    return aNumber.GetValue();
}
 
PDFObjectElement* PDFReferenceElement::LookupObject()
{
    return m_rDoc.LookupObject(m_fObjectValue);
}
 
PDFObjectElement* PDFDocument::LookupObject(size_t nObjectNumber)
{
    auto itIDObjects = m_aIDObjects.find(nObjectNumber);
 
    if (itIDObjects != m_aIDObjects.end())
        return itIDObjects->second;
 
    SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
    return nullptr;
}
 
SvMemoryStream& PDFDocument::GetEditBuffer() { return m_aEditBuffer; }
 
int PDFReferenceElement::GetObjectValue() const { return m_fObjectValue; }
 
int PDFReferenceElement::GetGenerationValue() const { return m_fGenerationValue; }
 
bool PDFDictionaryElement::Read(SvStream& rStream)
{
    char ch;
    rStream.ReadChar(ch);
    if (ch != '<')
    {
        SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
        return false;
    }
 
    if (rStream.eof())
    {
        SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
        return false;
    }
 
    rStream.ReadChar(ch);
    if (ch != '<')
    {
        SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
        return false;
    }
 
    m_nLocation = rStream.Tell();
 
    SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
 
    return true;
}
 
PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
 
sal_uInt64 PDFEndDictionaryElement::GetLocation() const { return m_nLocation; }
 
bool PDFEndDictionaryElement::Read(SvStream& rStream)
{
    m_nLocation = rStream.Tell();
    char ch;
    rStream.ReadChar(ch);
    if (ch != '>')
    {
        SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
        return false;
    }
 
    if (rStream.eof())
    {
        SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
        return false;
    }
 
    rStream.ReadChar(ch);
    if (ch != '>')
    {
        SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
        return false;
    }
 
    SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
 
    return true;
}
 
PDFNameElement::PDFNameElement() = default;
 
bool PDFNameElement::Read(SvStream& rStream)
{
    char ch;
    rStream.ReadChar(ch);
    if (ch != '/')
    {
        SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
        return false;
    }
    m_nLocation = rStream.Tell();
 
    if (rStream.eof())
    {
        SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
        return false;
    }
 
    // Read till the first white-space.
    OStringBuffer aBuf;
    rStream.ReadChar(ch);
    while (!rStream.eof())
    {
        if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
            || ch == ']' || ch == '<' || ch == '>' || ch == '(')
        {
            rStream.SeekRel(-1);
            m_aValue = aBuf.makeStringAndClear();
            SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
            return true;
        }
        aBuf.append(ch);
        rStream.ReadChar(ch);
    }
 
    return false;
}
 
const OString& PDFNameElement::GetValue() const { return m_aValue; }
 
sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
 
PDFStreamElement::PDFStreamElement(size_t nLength)
    : m_nLength(nLength)
    , m_nOffset(0)
{
}
 
bool PDFStreamElement::Read(SvStream& rStream)
{
    SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
    m_nOffset = rStream.Tell();
    std::vector<unsigned char> aBytes(m_nLength);
    rStream.ReadBytes(aBytes.data(), aBytes.size());
    m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
 
    return rStream.good();
}
 
SvMemoryStream& PDFStreamElement::GetMemory() { return m_aMemory; }
 
sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
 
bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
 
bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
 
PDFArrayElement::PDFArrayElement(PDFObjectElement* pObject)
    : m_pObject(pObject)
{
}
 
bool PDFArrayElement::Read(SvStream& rStream)
{
    char ch;
    rStream.ReadChar(ch);
    if (ch != '[')
    {
        SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
        return false;
    }
 
    SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
 
    return true;
}
 
void PDFArrayElement::PushBack(PDFElement* pElement)
{
    if (m_pObject)
        SAL_INFO("vcl.filter",
                 "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
    m_aElements.push_back(pElement);
}
 
const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
 
PDFEndArrayElement::PDFEndArrayElement() = default;
 
bool PDFEndArrayElement::Read(SvStream& rStream)
{
    m_nOffset = rStream.Tell();
    char ch;
    rStream.ReadChar(ch);
    if (ch != ']')
    {
        SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
        return false;
    }
 
    SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
 
    return true;
}
 
sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
 
// PDFObjectParser
 
size_t PDFObjectParser::parse(PDFElement* pParsingElement, size_t nStartIndex, int nCurrentDepth)
{
    // The index of last parsed element
    size_t nReturnIndex = 0;
 
    pParsingElement->setParsing(true);
 
    comphelper::ScopeGuard aGuard([pParsingElement]() { pParsingElement->setParsing(false); });
 
    // Current object, if root is an object, else nullptr
    auto pParsingObject = dynamic_cast<PDFObjectElement*>(pParsingElement);
    auto pParsingTrailer = dynamic_cast<PDFTrailerElement*>(pParsingElement);
 
    // Current dictionary, if root is an dictionary, else nullptr
    auto pParsingDictionary = dynamic_cast<PDFDictionaryElement*>(pParsingElement);
 
    // Current parsing array, if root is an array, else nullptr
    auto pParsingArray = dynamic_cast<PDFArrayElement*>(pParsingElement);
 
    // Find out where the dictionary for this object starts.
    size_t nIndex = nStartIndex;
    for (size_t i = nStartIndex; i < mrElements.size(); ++i)
    {
        if (mrElements[i].get() == pParsingElement)
        {
            nIndex = i;
            break;
        }
    }
 
    OString aName;
    sal_uInt64 nNameOffset = 0;
    std::vector<PDFNumberElement*> aNumbers;
 
    sal_uInt64 nDictionaryOffset = 0;
 
    // Current depth; 1 is current
    int nDepth = 0;
 
    for (size_t i = nIndex; i < mrElements.size(); ++i)
    {
        auto* pCurrentElement = mrElements[i].get();
 
        // Dictionary tokens can be nested, track enter/leave.
        if (auto pCurrentDictionary = dynamic_cast<PDFDictionaryElement*>(pCurrentElement))
        {
            // Handle previously stored number
            if (!aNumbers.empty())
            {
                if (pParsingDictionary)
                {
                    PDFNumberElement* pNumber = aNumbers.back();
                    sal_uInt64 nLength
                        = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
 
                    pParsingDictionary->insert(aName, pNumber);
                    pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                    pParsingDictionary->SetKeyValueLength(aName, nLength);
                }
                else if (pParsingArray)
                {
                    for (auto& pNumber : aNumbers)
                        pParsingArray->PushBack(pNumber);
                }
                else
                {
                    SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
                }
                aName.clear();
                aNumbers.clear();
            }
 
            nDepth++;
 
            if (nDepth == 1) // pParsingDictionary is the current one
            {
                // First dictionary start, track start offset.
                nDictionaryOffset = pCurrentDictionary->GetLocation();
 
                if (pParsingObject)
                {
                    // Then the toplevel dictionary of the object.
                    pParsingObject->SetDictionary(pCurrentDictionary);
                    pParsingObject->SetDictionaryOffset(nDictionaryOffset);
                    pParsingDictionary = pCurrentDictionary;
                }
                else if (pParsingTrailer)
                {
                    pParsingTrailer->SetDictionary(pCurrentDictionary);
                    pParsingDictionary = pCurrentDictionary;
                }
            }
            else if (!pCurrentDictionary->alreadyParsing())
            {
                if (pParsingArray)
                {
                    pParsingArray->PushBack(pCurrentDictionary);
                }
                else if (pParsingDictionary)
                {
                    // Dictionary toplevel value.
                    pParsingDictionary->insert(aName, pCurrentDictionary);
                }
                else
                {
                    SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
                }
                // Nested dictionary.
                const size_t nNextElementIndex = parse(pCurrentDictionary, i, nCurrentDepth + 1);
                i = std::max(i, nNextElementIndex - 1);
            }
        }
        else if (auto pCurrentEndDictionary
                 = dynamic_cast<PDFEndDictionaryElement*>(pCurrentElement))
        {
            // Handle previously stored number
            if (!aNumbers.empty())
            {
                if (pParsingDictionary)
                {
                    PDFNumberElement* pNumber = aNumbers.back();
                    sal_uInt64 nLength
                        = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
 
                    pParsingDictionary->insert(aName, pNumber);
                    pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                    pParsingDictionary->SetKeyValueLength(aName, nLength);
                }
                else if (pParsingArray)
                {
                    for (auto& pNumber : aNumbers)
                        pParsingArray->PushBack(pNumber);
                }
                else
                {
                    SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
                }
                aName.clear();
                aNumbers.clear();
            }
 
            if (pParsingDictionary)
            {
                pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                sal_uInt64 nLength = pCurrentEndDictionary->GetLocation() - nNameOffset + 2;
                pParsingDictionary->SetKeyValueLength(aName, nLength);
                aName.clear();
            }
 
            if (nDepth == 1) // did the parsing ended
            {
                // Last dictionary end, track length and stop parsing.
                if (pParsingObject)
                {
                    sal_uInt64 nDictionaryLength
                        = pCurrentEndDictionary->GetLocation() - nDictionaryOffset;
                    pParsingObject->SetDictionaryLength(nDictionaryLength);
                }
                nReturnIndex = i;
                break;
            }
 
            nDepth--;
        }
        else if (auto pCurrentArray = dynamic_cast<PDFArrayElement*>(pCurrentElement))
        {
            // Handle previously stored number
            if (!aNumbers.empty())
            {
                if (pParsingDictionary)
                {
                    PDFNumberElement* pNumber = aNumbers.back();
 
                    sal_uInt64 nLength
                        = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
                    pParsingDictionary->insert(aName, pNumber);
                    pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                    pParsingDictionary->SetKeyValueLength(aName, nLength);
                }
                else if (pParsingArray)
                {
                    for (auto& pNumber : aNumbers)
                        pParsingArray->PushBack(pNumber);
                }
                else
                {
                    SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
                }
                aName.clear();
                aNumbers.clear();
            }
 
            nDepth++;
            if (nDepth == 1) // pParsingDictionary is the current one
            {
                if (pParsingObject)
                {
                    pParsingObject->SetArray(pCurrentArray);
                    pParsingArray = pCurrentArray;
                }
            }
            else if (!pCurrentArray->alreadyParsing())
            {
                if (pParsingArray)
                {
                    // Array is toplevel
                    pParsingArray->PushBack(pCurrentArray);
                }
                else if (pParsingDictionary)
                {
                    // Dictionary toplevel value.
                    pParsingDictionary->insert(aName, pCurrentArray);
                }
 
                const size_t nNextElementIndex = parse(pCurrentArray, i, nCurrentDepth + 1);
 
                // ensure we go forwards and not endlessly loop
                i = std::max(i, nNextElementIndex - 1);
            }
        }
        else if (auto pCurrentEndArray = dynamic_cast<PDFEndArrayElement*>(pCurrentElement))
        {
            // Handle previously stored number
            if (!aNumbers.empty())
            {
                if (pParsingDictionary)
                {
                    PDFNumberElement* pNumber = aNumbers.back();
 
                    sal_uInt64 nLength
                        = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
                    pParsingDictionary->insert(aName, pNumber);
                    pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                    pParsingDictionary->SetKeyValueLength(aName, nLength);
                }
                else if (pParsingArray)
                {
                    for (auto& pNumber : aNumbers)
                        pParsingArray->PushBack(pNumber);
                }
                else
                {
                    SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
                }
                aName.clear();
                aNumbers.clear();
            }
 
            if (nDepth == 1) // did the pParsing ended
            {
                // Last array end, track length and stop parsing.
                nReturnIndex = i;
                break;
            }
 
            if (pParsingDictionary)
            {
                pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                // Include the ending ']' in the length of the key - (array)value pair length.
                sal_uInt64 nLength = pCurrentEndArray->GetOffset() - nNameOffset + 1;
                pParsingDictionary->SetKeyValueLength(aName, nLength);
                aName.clear();
            }
            nDepth--;
        }
        else if (auto pCurrentName = dynamic_cast<PDFNameElement*>(pCurrentElement))
        {
            // Handle previously stored number
            if (!aNumbers.empty())
            {
                if (pParsingDictionary)
                {
                    PDFNumberElement* pNumber = aNumbers.back();
 
                    sal_uInt64 nLength
                        = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
                    pParsingDictionary->insert(aName, pNumber);
                    pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                    pParsingDictionary->SetKeyValueLength(aName, nLength);
                }
                else if (pParsingArray)
                {
                    for (auto& pNumber : aNumbers)
                        pParsingArray->PushBack(pNumber);
                }
                aName.clear();
                aNumbers.clear();
            }
 
            // Now handle name
            if (pParsingArray)
            {
                // if we are in an array, just push the name to array
                pParsingArray->PushBack(pCurrentName);
            }
            else if (pParsingDictionary)
            {
                // if we are in a dictionary, we need to store the name as a possible key
                if (aName.isEmpty())
                {
                    aName = pCurrentName->GetValue();
                    nNameOffset = pCurrentName->GetLocation();
                }
                else
                {
                    sal_uInt64 nKeyLength
                        = pCurrentName->GetLocation() + pCurrentName->GetLength() - nNameOffset;
                    pParsingDictionary->insert(aName, pCurrentName);
                    pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                    pParsingDictionary->SetKeyValueLength(aName, nKeyLength);
                    aName.clear();
                }
            }
        }
        else if (auto pReference = dynamic_cast<PDFReferenceElement*>(pCurrentElement))
        {
            // Handle previously stored number
            if (aNumbers.size() > 2)
            {
                aNumbers.resize(aNumbers.size() - 2);
                if (pParsingArray)
                {
                    for (auto& pNumber : aNumbers)
                        pParsingArray->PushBack(pNumber);
                }
                aNumbers.clear();
            }
 
            if (pParsingArray)
            {
                pParsingArray->PushBack(pReference);
            }
            else if (pParsingDictionary)
            {
                sal_uInt64 nLength = pReference->GetOffset() - nNameOffset;
                pParsingDictionary->insert(aName, pReference);
                pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                pParsingDictionary->SetKeyValueLength(aName, nLength);
                aName.clear();
            }
            else
            {
                SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
            }
            aNumbers.clear();
        }
        else if (auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(pCurrentElement))
        {
            // Handle previously stored number
            if (!aNumbers.empty())
            {
                if (pParsingArray)
                {
                    for (auto& pNumber : aNumbers)
                        pParsingArray->PushBack(pNumber);
                }
                aNumbers.clear();
            }
 
            if (pParsingArray)
            {
                pParsingArray->PushBack(pLiteralString);
            }
            else if (pParsingDictionary)
            {
                pParsingDictionary->insert(aName, pLiteralString);
                pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                aName.clear();
            }
            else
            {
                SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
            }
        }
        else if (auto pBoolean = dynamic_cast<PDFBooleanElement*>(pCurrentElement))
        {
            // Handle previously stored number
            if (!aNumbers.empty())
            {
                if (pParsingArray)
                {
                    for (auto& pNumber : aNumbers)
                        pParsingArray->PushBack(pNumber);
                }
                aNumbers.clear();
            }
 
            if (pParsingArray)
            {
                pParsingArray->PushBack(pBoolean);
            }
            else if (pParsingDictionary)
            {
                pParsingDictionary->insert(aName, pBoolean);
                pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                aName.clear();
            }
            else
            {
                SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
            }
        }
        else if (auto pHexString = dynamic_cast<PDFHexStringElement*>(pCurrentElement))
        {
            // Handle previously stored number
            if (!aNumbers.empty())
            {
                if (pParsingArray)
                {
                    for (auto& pNumber : aNumbers)
                        pParsingArray->PushBack(pNumber);
                }
                aNumbers.clear();
            }
 
            if (pParsingArray)
            {
                pParsingArray->PushBack(pHexString);
            }
            else if (pParsingDictionary)
            {
                pParsingDictionary->insert(aName, pHexString);
                pParsingDictionary->SetKeyOffset(aName, nNameOffset);
                aName.clear();
            }
        }
        else if (auto pNumberElement = dynamic_cast<PDFNumberElement*>(pCurrentElement))
        {
            // Just remember this, so that in case it's not a reference parameter,
            // we can handle it later.
            aNumbers.push_back(pNumberElement);
        }
        else if (dynamic_cast<PDFEndObjectElement*>(pCurrentElement))
        {
            // parsing of the object is finished
            break;
        }
        else if (dynamic_cast<PDFObjectElement*>(pCurrentElement)
                 || dynamic_cast<PDFTrailerElement*>(pCurrentElement))
        {
            continue;
        }
        else
        {
            SAL_INFO("vcl.filter", "Unhandled element while parsing.");
        }
    }
 
    return nReturnIndex;
}
 
} // namespace vcl
 
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
↑ V530 The return value of function 'padToLength' is required to be utilized.
↑ V530 The return value of function 'append' is required to be utilized.
↑ V1085 Negative value is implicitly converted to unsigned integer type in arithmetic expression. This may lead to unexpected results.