Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-10 10:23:49

0001 //========================================================================
0002 //
0003 // PDFDoc.h
0004 //
0005 // Copyright 1996-2003 Glyph & Cog, LLC
0006 //
0007 //========================================================================
0008 
0009 //========================================================================
0010 //
0011 // Modified under the Poppler project - http://poppler.freedesktop.org
0012 //
0013 // All changes made under the Poppler project to this file are licensed
0014 // under GPL version 2 or later
0015 //
0016 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
0017 // Copyright (C) 2005, 2009, 2014, 2015, 2017-2022 Albert Astals Cid <aacid@kde.org>
0018 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
0019 // Copyright (C) 2008 Pino Toscano <pino@kde.org>
0020 // Copyright (C) 2008 Carlos Garcia Campos <carlosgc@gnome.org>
0021 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
0022 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
0023 // Copyright (C) 2010, 2014 Hib Eris <hib@hiberis.nl>
0024 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
0025 // Copyright (C) 2011, 2013, 2014, 2016 Thomas Freitag <Thomas.Freitag@alfa.de>
0026 // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
0027 // Copyright (C) 2013, 2017 Adrian Johnson <ajohnson@redneon.com>
0028 // Copyright (C) 2013, 2018 Adam Reichold <adamreichold@myopera.com>
0029 // Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
0030 // Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com>
0031 // Copyright (C) 2015 André Esser <bepandre@hotmail.com>
0032 // Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
0033 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
0034 // Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
0035 // Copyright (C) 2020-2022 Oliver Sander <oliver.sander@tu-dresden.de>
0036 // Copyright (C) 2020 Nelson Benítez León <nbenitezl@gmail.com>
0037 // Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com>
0038 // Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net.
0039 // Copyright (C) 2021 Marek Kasik <mkasik@redhat.com>
0040 // Copyright (C) 2022 Felix Jung <fxjung@posteo.de>
0041 // Copyright (C) 2022 crt <chluo@cse.cuhk.edu.hk>
0042 // Copyright 2023 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
0043 //
0044 // To see a description of the changes please see the Changelog file that
0045 // came with your tarball or type make ChangeLog if you are building from git
0046 //
0047 //========================================================================
0048 
0049 #ifndef PDFDOC_H
0050 #define PDFDOC_H
0051 
0052 #include <algorithm>
0053 #include <cstdio>
0054 #include <mutex>
0055 
0056 #include "poppler-config.h"
0057 
0058 #include "poppler_private_export.h"
0059 
0060 #include "XRef.h"
0061 #include "Catalog.h"
0062 #include "Page.h"
0063 #include "Annot.h"
0064 #include "ErrorCodes.h"
0065 #include "Form.h"
0066 #include "OptionalContent.h"
0067 #include "Stream.h"
0068 
0069 class GooString;
0070 class GooFile;
0071 class BaseStream;
0072 class OutputDev;
0073 class Links;
0074 class LinkAction;
0075 class LinkDest;
0076 class Outline;
0077 class Linearization;
0078 class SecurityHandler;
0079 class Hints;
0080 class StructTreeRoot;
0081 
0082 enum PDFWriteMode
0083 {
0084     writeStandard,
0085     writeForceRewrite,
0086     writeForceIncremental
0087 };
0088 
0089 enum PDFSubtype
0090 {
0091     subtypeNull,
0092     subtypePDFA,
0093     subtypePDFE,
0094     subtypePDFUA,
0095     subtypePDFVT,
0096     subtypePDFX,
0097     subtypeNone
0098 };
0099 
0100 enum PDFSubtypePart
0101 {
0102     subtypePartNull,
0103     subtypePart1,
0104     subtypePart2,
0105     subtypePart3,
0106     subtypePart4,
0107     subtypePart5,
0108     subtypePart6,
0109     subtypePart7,
0110     subtypePart8,
0111     subtypePartNone
0112 };
0113 
0114 enum PDFSubtypeConformance
0115 {
0116     subtypeConfNull,
0117     subtypeConfA,
0118     subtypeConfB,
0119     subtypeConfG,
0120     subtypeConfN,
0121     subtypeConfP,
0122     subtypeConfPG,
0123     subtypeConfU,
0124     subtypeConfNone
0125 };
0126 
0127 //------------------------------------------------------------------------
0128 // PDFDoc
0129 //------------------------------------------------------------------------
0130 
0131 class POPPLER_PRIVATE_EXPORT PDFDoc
0132 {
0133 public:
0134     explicit PDFDoc(std::unique_ptr<GooString> &&fileNameA, const std::optional<GooString> &ownerPassword = {}, const std::optional<GooString> &userPassword = {}, void *guiDataA = nullptr,
0135                     const std::function<void()> &xrefReconstructedCallback = {});
0136 
0137 #ifdef _WIN32
0138     PDFDoc(wchar_t *fileNameA, int fileNameLen, const std::optional<GooString> &ownerPassword = {}, const std::optional<GooString> &userPassword = {}, void *guiDataA = nullptr, const std::function<void()> &xrefReconstructedCallback = {});
0139 #endif
0140 
0141     explicit PDFDoc(BaseStream *strA, const std::optional<GooString> &ownerPassword = {}, const std::optional<GooString> &userPassword = {}, void *guiDataA = nullptr, const std::function<void()> &xrefReconstructedCallback = {});
0142     ~PDFDoc();
0143 
0144     PDFDoc(const PDFDoc &) = delete;
0145     PDFDoc &operator=(const PDFDoc &) = delete;
0146 
0147     static std::unique_ptr<PDFDoc> ErrorPDFDoc(int errorCode, std::unique_ptr<GooString> &&fileNameA);
0148 
0149     // Was PDF document successfully opened?
0150     bool isOk() const { return ok; }
0151 
0152     // Get the error code (if isOk() returns false).
0153     int getErrorCode() const { return errCode; }
0154 
0155     // Get the error code returned by fopen() (if getErrorCode() ==
0156     // errOpenFile).
0157     int getFopenErrno() const { return fopenErrno; }
0158 
0159     // Get file name.
0160     const GooString *getFileName() const { return fileName.get(); }
0161 #ifdef _WIN32
0162     wchar_t *getFileNameU() { return fileNameU; }
0163 #endif
0164 
0165     // Get the linearization table.
0166     Linearization *getLinearization();
0167     bool checkLinearization();
0168 
0169     // Get the xref table.
0170     XRef *getXRef() const { return xref; }
0171 
0172     // Get catalog.
0173     Catalog *getCatalog() const { return catalog; }
0174 
0175     // Get optional content configuration
0176     OCGs *getOptContentConfig() const { return catalog->getOptContentConfig(); }
0177 
0178     // Get base stream.
0179     BaseStream *getBaseStream() const { return str; }
0180 
0181     // Get page parameters.
0182     double getPageMediaWidth(int page) { return getPage(page) ? getPage(page)->getMediaWidth() : 0.0; }
0183     double getPageMediaHeight(int page) { return getPage(page) ? getPage(page)->getMediaHeight() : 0.0; }
0184     double getPageCropWidth(int page) { return getPage(page) ? getPage(page)->getCropWidth() : 0.0; }
0185     double getPageCropHeight(int page) { return getPage(page) ? getPage(page)->getCropHeight() : 0.0; }
0186     int getPageRotate(int page) { return getPage(page) ? getPage(page)->getRotate() : 0; }
0187 
0188     // Get number of pages.
0189     int getNumPages();
0190 
0191     // Return the contents of the metadata stream, or nullptr if there is
0192     // no metadata.
0193     std::unique_ptr<GooString> readMetadata() const { return catalog->readMetadata(); }
0194 
0195     // Return the structure tree root object.
0196     const StructTreeRoot *getStructTreeRoot() const { return catalog->getStructTreeRoot(); }
0197 
0198     // Get page.
0199     Page *getPage(int page);
0200 
0201     // Display a page.
0202     void displayPage(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data) = nullptr, void *abortCheckCbkData = nullptr,
0203                      bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr, bool copyXRef = false);
0204 
0205     // Display a range of pages.
0206     void displayPages(OutputDev *out, int firstPage, int lastPage, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data) = nullptr, void *abortCheckCbkData = nullptr,
0207                       bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr);
0208 
0209     // Display part of a page.
0210     void displayPageSlice(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, int sliceX, int sliceY, int sliceW, int sliceH, bool (*abortCheckCbk)(void *data) = nullptr,
0211                           void *abortCheckCbkData = nullptr, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr, bool copyXRef = false);
0212 
0213     // Find a page, given its object ID.  Returns page number, or 0 if
0214     // not found.
0215     int findPage(const Ref ref) { return catalog->findPage(ref); }
0216 
0217     // Returns the links for the current page, transferring ownership to
0218     // the caller.
0219     std::unique_ptr<Links> getLinks(int page);
0220 
0221     // Find a named destination.  Returns the link destination, or
0222     // nullptr if <name> is not a destination.
0223     std::unique_ptr<LinkDest> findDest(const GooString *name) { return catalog->findDest(name); }
0224 
0225     // Process the links for a page.
0226     void processLinks(OutputDev *out, int page);
0227 
0228     // Return the outline object.
0229     Outline *getOutline();
0230 
0231     // Is the file encrypted?
0232     bool isEncrypted() { return xref->isEncrypted(); }
0233 
0234     std::vector<FormFieldSignature *> getSignatureFields();
0235 
0236     // Check various permissions.
0237     bool okToPrint(bool ignoreOwnerPW = false) { return xref->okToPrint(ignoreOwnerPW); }
0238     bool okToPrintHighRes(bool ignoreOwnerPW = false) { return xref->okToPrintHighRes(ignoreOwnerPW); }
0239     bool okToChange(bool ignoreOwnerPW = false) { return xref->okToChange(ignoreOwnerPW); }
0240     bool okToCopy(bool ignoreOwnerPW = false) { return xref->okToCopy(ignoreOwnerPW); }
0241     bool okToAddNotes(bool ignoreOwnerPW = false) { return xref->okToAddNotes(ignoreOwnerPW); }
0242     bool okToFillForm(bool ignoreOwnerPW = false) { return xref->okToFillForm(ignoreOwnerPW); }
0243     bool okToAccessibility(bool ignoreOwnerPW = false) { return xref->okToAccessibility(ignoreOwnerPW); }
0244     bool okToAssemble(bool ignoreOwnerPW = false) { return xref->okToAssemble(ignoreOwnerPW); }
0245 
0246     // Is this document linearized?
0247     bool isLinearized(bool tryingToReconstruct = false);
0248 
0249     // Return the document's Info dictionary (if any).
0250     Object getDocInfo() { return xref->getDocInfo(); }
0251     Object getDocInfoNF() { return xref->getDocInfoNF(); }
0252 
0253     // Remove the document's Info dictionary and update the trailer dictionary.
0254     void removeDocInfo() { xref->removeDocInfo(); }
0255 
0256     // Set doc info string entry. nullptr or empty value will cause a removal.
0257     // Takes ownership of value.
0258     void setDocInfoStringEntry(const char *key, GooString *value);
0259 
0260     // Set document's properties in document's Info dictionary.
0261     // nullptr or empty value will cause a removal.
0262     // Takes ownership of value.
0263     void setDocInfoTitle(GooString *title) { setDocInfoStringEntry("Title", title); }
0264     void setDocInfoAuthor(GooString *author) { setDocInfoStringEntry("Author", author); }
0265     void setDocInfoSubject(GooString *subject) { setDocInfoStringEntry("Subject", subject); }
0266     void setDocInfoKeywords(GooString *keywords) { setDocInfoStringEntry("Keywords", keywords); }
0267     void setDocInfoCreator(GooString *creator) { setDocInfoStringEntry("Creator", creator); }
0268     void setDocInfoProducer(GooString *producer) { setDocInfoStringEntry("Producer", producer); }
0269     void setDocInfoCreatDate(GooString *creatDate) { setDocInfoStringEntry("CreationDate", creatDate); }
0270     void setDocInfoModDate(GooString *modDate) { setDocInfoStringEntry("ModDate", modDate); }
0271 
0272     // Get document's properties from document's Info dictionary.
0273     // Returns nullptr on fail.
0274     std::unique_ptr<GooString> getDocInfoStringEntry(const char *key);
0275 
0276     std::unique_ptr<GooString> getDocInfoTitle() { return getDocInfoStringEntry("Title"); }
0277     std::unique_ptr<GooString> getDocInfoAuthor() { return getDocInfoStringEntry("Author"); }
0278     std::unique_ptr<GooString> getDocInfoSubject() { return getDocInfoStringEntry("Subject"); }
0279     std::unique_ptr<GooString> getDocInfoKeywords() { return getDocInfoStringEntry("Keywords"); }
0280     std::unique_ptr<GooString> getDocInfoCreator() { return getDocInfoStringEntry("Creator"); }
0281     std::unique_ptr<GooString> getDocInfoProducer() { return getDocInfoStringEntry("Producer"); }
0282     std::unique_ptr<GooString> getDocInfoCreatDate() { return getDocInfoStringEntry("CreationDate"); }
0283     std::unique_ptr<GooString> getDocInfoModDate() { return getDocInfoStringEntry("ModDate"); }
0284 
0285     // Return the PDF subtype, part, and conformance
0286     PDFSubtype getPDFSubtype() const { return pdfSubtype; }
0287     PDFSubtypePart getPDFSubtypePart() const { return pdfPart; }
0288     PDFSubtypeConformance getPDFSubtypeConformance() const { return pdfConformance; }
0289 
0290     // Return the PDF version specified by the file (either header or catalog).
0291     int getPDFMajorVersion() const { return std::max(headerPdfMajorVersion, catalog->getPDFMajorVersion()); }
0292     int getPDFMinorVersion() const
0293     {
0294         const int catalogMajorVersion = catalog->getPDFMajorVersion();
0295         if (catalogMajorVersion > headerPdfMajorVersion) {
0296             return catalog->getPDFMinorVersion();
0297         } else if (headerPdfMajorVersion > catalogMajorVersion) {
0298             return headerPdfMinorVersion;
0299         } else {
0300             return std::max(headerPdfMinorVersion, catalog->getPDFMinorVersion());
0301         }
0302     }
0303 
0304     // Return the PDF ID in the trailer dictionary (if any).
0305     bool getID(GooString *permanent_id, GooString *update_id) const;
0306 
0307     // Save one page with another name.
0308     int savePageAs(const GooString &name, int pageNo);
0309     // Save this file with another name.
0310     int saveAs(const GooString &name, PDFWriteMode mode = writeStandard);
0311     // Save this file in the given output stream.
0312     int saveAs(OutStream *outStr, PDFWriteMode mode = writeStandard);
0313     // Save this file with another name without saving changes
0314     int saveWithoutChangesAs(const GooString &name);
0315     // Save this file in the given output stream without saving changes
0316     int saveWithoutChangesAs(OutStream *outStr);
0317 
0318     // Return a pointer to the GUI (XPDFCore or WinPDFCore object).
0319     void *getGUIData() { return guiData; }
0320 
0321     // rewrite pageDict with MediaBox, CropBox and new page CTM
0322     bool replacePageDict(int pageNo, int rotate, const PDFRectangle *mediaBox, const PDFRectangle *cropBox);
0323     bool markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts = nullptr);
0324     bool markAnnotations(Object *annots, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldPageNum, int newPageNum, std::set<Dict *> *alreadyMarkedDicts = nullptr);
0325     void markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum);
0326     // write all objects used by pageDict to outStr
0327     unsigned int writePageObjects(OutStream *outStr, XRef *xRef, unsigned int numOffset, bool combine = false);
0328     static void writeObject(Object *obj, OutStream *outStr, XRef *xref, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict *> *alreadyWrittenDicts = nullptr);
0329     static void writeObject(Object *obj, OutStream *outStr, XRef *xref, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts = nullptr);
0330     static void writeHeader(OutStream *outStr, int major, int minor);
0331 
0332     static Object createTrailerDict(int uxrefSize, bool incrUpdate, Goffset startxRef, Ref *root, XRef *xRef, const char *fileName, Goffset fileSize);
0333     static void writeXRefTableTrailer(Object &&trailerDict, XRef *uxref, bool writeAllEntries, Goffset uxrefOffset, OutStream *outStr, XRef *xRef);
0334     static void writeXRefStreamTrailer(Object &&trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream *outStr, XRef *xRef);
0335     // scans the PDF and returns whether it contains any javascript
0336     bool hasJavascript();
0337 
0338     // Arguments signatureText and signatureTextLeft are UTF-16 big endian strings with BOM.
0339     // Arguments reason and location are UTF-16 big endian strings with BOM. An empty string and nullptr are acceptable too.
0340     // Argument imagePath is a background image (a path to a file).
0341     // sign() takes ownership of partialFieldName.
0342     bool sign(const std::string &saveFilename, const std::string &certNickname, const std::string &password, GooString *partialFieldName, int page, const PDFRectangle &rect, const GooString &signatureText,
0343               const GooString &signatureTextLeft, double fontSize, double leftFontSize, std::unique_ptr<AnnotColor> &&fontColor, double borderWidth, std::unique_ptr<AnnotColor> &&borderColor, std::unique_ptr<AnnotColor> &&backgroundColor,
0344               const GooString *reason = nullptr, const GooString *location = nullptr, const std::string &imagePath = "", const std::optional<GooString> &ownerPassword = {}, const std::optional<GooString> &userPassword = {});
0345 
0346 private:
0347     // insert referenced objects in XRef
0348     bool markDictionary(Dict *dict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts);
0349     bool markObject(Object *obj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts = nullptr);
0350     static void writeDictionary(Dict *dict, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts);
0351 
0352     // Write object header to current file stream and return its offset
0353     static Goffset writeObjectHeader(Ref *ref, OutStream *outStr);
0354     static void writeObjectFooter(OutStream *outStr);
0355 
0356     inline void writeObject(Object *obj, OutStream *outStr, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
0357     {
0358         writeObject(obj, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, { objNum, objGen });
0359     }
0360     inline void writeObject(Object *obj, OutStream *outStr, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref) { writeObject(obj, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, ref); }
0361     static void writeStream(Stream *str, OutStream *outStr);
0362     static void writeRawStream(Stream *str, OutStream *outStr);
0363     void writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, bool writeAllEntries, int uxrefSize, OutStream *outStr, bool incrUpdate);
0364     static void writeString(const GooString *s, OutStream *outStr, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref);
0365     void saveIncrementalUpdate(OutStream *outStr);
0366     void saveCompleteRewrite(OutStream *outStr);
0367 
0368     Page *parsePage(int page);
0369 
0370     // Get hints.
0371     Hints *getHints();
0372 
0373     PDFDoc();
0374     bool setup(const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword, const std::function<void()> &xrefReconstructedCallback);
0375     bool checkFooter();
0376     void checkHeader();
0377     bool checkEncryption(const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword);
0378     void extractPDFSubtype();
0379 
0380     // Get the offset of the start xref table.
0381     Goffset getStartXRef(bool tryingToReconstruct = false);
0382     // Get the offset of the entries in the main XRef table of a
0383     // linearized document (0 for non linearized documents).
0384     Goffset getMainXRefEntriesOffset(bool tryingToReconstruct = false);
0385     long long strToLongLong(const char *s);
0386 
0387     std::unique_ptr<GooString> fileName;
0388 #ifdef _WIN32
0389     wchar_t *fileNameU = nullptr;
0390 #endif
0391     std::unique_ptr<GooFile> file;
0392     BaseStream *str = nullptr;
0393     void *guiData = nullptr;
0394     int headerPdfMajorVersion;
0395     int headerPdfMinorVersion;
0396     PDFSubtype pdfSubtype;
0397     PDFSubtypePart pdfPart;
0398     PDFSubtypeConformance pdfConformance;
0399     Linearization *linearization = nullptr;
0400     // linearizationState = 0: unchecked
0401     // linearizationState = 1: checked and valid
0402     // linearizationState = 2: checked and invalid
0403     int linearizationState;
0404     XRef *xref = nullptr;
0405     SecurityHandler *secHdlr = nullptr;
0406     Catalog *catalog = nullptr;
0407     Hints *hints = nullptr;
0408     Outline *outline = nullptr;
0409     Page **pageCache = nullptr;
0410 
0411     bool ok = false;
0412     int errCode = errNone;
0413     // If there is an error opening the PDF file with fopen() in the constructor,
0414     // then the POSIX errno will be here.
0415     int fopenErrno;
0416 
0417     Goffset startXRefPos = -1; // offset of last xref table
0418     mutable std::recursive_mutex mutex;
0419 };
0420 
0421 #endif