Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-10 10:23:51

0001 //========================================================================
0002 //
0003 // StructElement.h
0004 //
0005 // This file is licensed under the GPLv2 or later
0006 //
0007 // Copyright 2013, 2014 Igalia S.L.
0008 // Copyright 2014 Luigi Scarso <luigi.scarso@gmail.com>
0009 // Copyright 2014, 2018, 2019, 2021 Albert Astals Cid <aacid@kde.org>
0010 // Copyright 2018 Adam Reichold <adam.reichold@t-online.de>
0011 // Copyright 2021 Adrian Johnson <ajohnson@redneon.com>
0012 //
0013 //========================================================================
0014 
0015 #ifndef STRUCTELEMENT_H
0016 #define STRUCTELEMENT_H
0017 
0018 #include "goo/GooString.h"
0019 #include "MarkedContentOutputDev.h"
0020 #include "Object.h"
0021 #include "poppler_private_export.h"
0022 #include <vector>
0023 #include <set>
0024 
0025 class GooString;
0026 class Dict;
0027 class StructElement;
0028 class StructTreeRoot;
0029 
0030 class POPPLER_PRIVATE_EXPORT Attribute
0031 {
0032 public:
0033     enum Type
0034     {
0035         Unknown = 0, // Uninitialized, parsing error, etc.
0036         UserProperty, // User defined attribute (i.e. non-standard)
0037 
0038         // Common standard attributes
0039         Placement,
0040         WritingMode,
0041         BackgroundColor,
0042         BorderColor,
0043         BorderStyle,
0044         BorderThickness,
0045         Color,
0046         Padding,
0047 
0048         // Block element standard attributes
0049         SpaceBefore,
0050         SpaceAfter,
0051         StartIndent,
0052         EndIndent,
0053         TextIndent,
0054         TextAlign,
0055         BBox,
0056         Width,
0057         Height,
0058         BlockAlign,
0059         InlineAlign,
0060         TBorderStyle,
0061         TPadding,
0062 
0063         // Inline element standard attributes
0064         BaselineShift,
0065         LineHeight,
0066         TextDecorationColor,
0067         TextDecorationThickness,
0068         TextDecorationType,
0069         RubyAlign,
0070         RubyPosition,
0071         GlyphOrientationVertical,
0072 
0073         // Column-only standard attributes
0074         ColumnCount,
0075         ColumnGap,
0076         ColumnWidths,
0077 
0078         // List-only standard attributes
0079         ListNumbering,
0080 
0081         // PrintField-only standard attributes
0082         Role,
0083         checked,
0084         Desc,
0085 
0086         // Table-only standard attributes
0087         RowSpan,
0088         ColSpan,
0089         Headers,
0090         Scope,
0091         Summary,
0092     };
0093 
0094     enum Owner
0095     {
0096         UnknownOwner = 0,
0097         // User-defined attributes
0098         UserProperties,
0099         // Standard attributes
0100         Layout,
0101         List,
0102         PrintField,
0103         Table,
0104         // Translation to other formats
0105         XML_1_00,
0106         HTML_3_20,
0107         HTML_4_01,
0108         OEB_1_00,
0109         RTF_1_05,
0110         CSS_1_00,
0111         CSS_2_00,
0112     };
0113 
0114     // Creates a standard attribute. The name is predefined, and the
0115     // value is type-checked to conform to the PDF specification.
0116     Attribute(Type type, Object *value);
0117 
0118     // Creates an UserProperty attribute, with an arbitrary name and value.
0119     Attribute(GooString &&name, Object *value);
0120 
0121     bool isOk() const { return type != Unknown; }
0122 
0123     // Name, type and value can be set only on construction.
0124     Type getType() const { return type; }
0125     Owner getOwner() const { return owner; }
0126     const char *getTypeName() const;
0127     const char *getOwnerName() const;
0128     const Object *getValue() const { return &value; }
0129     static Object *getDefaultValue(Type type);
0130 
0131     // The caller gets the ownership of the return GooString and is responsible of deleting it
0132     std::unique_ptr<GooString> getName() const { return std::make_unique<GooString>(type == UserProperty ? name.c_str() : getTypeName()); }
0133 
0134     // The revision is optional, and defaults to zero.
0135     unsigned int getRevision() const { return revision; }
0136     void setRevision(unsigned int revisionA) { revision = revisionA; }
0137 
0138     // Hidden elements should not be displayed by the user agent
0139     bool isHidden() const { return hidden; }
0140     void setHidden(bool hiddenA) { hidden = hiddenA; }
0141 
0142     // The formatted value may be in the PDF, or be left undefined (nullptr).
0143     // In the later case the user agent should provide a default representation.
0144     const char *getFormattedValue() const { return formatted ? formatted->c_str() : nullptr; }
0145     void setFormattedValue(const char *formattedA);
0146 
0147     ~Attribute();
0148 
0149 private:
0150     Type type;
0151     Owner owner;
0152     unsigned int revision;
0153     GooString name;
0154     Object value;
0155     bool hidden;
0156     GooString *formatted;
0157 
0158     bool checkType(StructElement *element = nullptr);
0159     static Type getTypeForName(const char *name, StructElement *element = nullptr);
0160     static Attribute *parseUserProperty(Dict *property);
0161 
0162     friend class StructElement;
0163 };
0164 
0165 class POPPLER_PRIVATE_EXPORT StructElement
0166 {
0167 public:
0168     enum Type
0169     {
0170         Unknown = 0,
0171         MCID, // MCID reference, used internally
0172         OBJR, // Object reference, used internally
0173 
0174         Document,
0175         Part,
0176         Art,
0177         Sect,
0178         Div, // Structural elements
0179 
0180         Span,
0181         Quote,
0182         Note,
0183         Reference,
0184         BibEntry, // Inline elements
0185         Code,
0186         Link,
0187         Annot,
0188         BlockQuote,
0189         Caption,
0190         NonStruct,
0191         TOC,
0192         TOCI,
0193         Index,
0194         Private,
0195 
0196         P,
0197         H,
0198         H1,
0199         H2,
0200         H3,
0201         H4,
0202         H5,
0203         H6, // Paragraph-like
0204 
0205         L,
0206         LI,
0207         Lbl,
0208         LBody, // List elements
0209 
0210         Table,
0211         TR,
0212         TH,
0213         TD,
0214         THead,
0215         TFoot,
0216         TBody, // Table elements
0217 
0218         Ruby,
0219         RB,
0220         RT,
0221         RP, // Ruby text elements
0222         Warichu,
0223         WT,
0224         WP,
0225 
0226         Figure,
0227         Formula,
0228         Form, // Illustration-like elements
0229     };
0230 
0231     static const Ref InvalidRef;
0232 
0233     const char *getTypeName() const;
0234     Type getType() const { return type; }
0235     bool isOk() const { return type != Unknown; }
0236     bool isBlock() const;
0237     bool isInline() const;
0238     bool isGrouping() const;
0239 
0240     inline bool isContent() const { return (type == MCID) || isObjectRef(); }
0241     inline bool isObjectRef() const { return (type == OBJR && c->ref != Ref::INVALID()); }
0242 
0243     int getMCID() const { return c->mcid; }
0244     Ref getObjectRef() const { return c->ref; }
0245     Ref getParentRef() { return isContent() ? parent->getParentRef() : s->parentRef; }
0246     bool hasPageRef() const;
0247     bool getPageRef(Ref &ref) const;
0248     StructTreeRoot *getStructTreeRoot() { return treeRoot; }
0249 
0250     // Optional element identifier.
0251     const GooString *getID() const { return isContent() ? nullptr : s->id; }
0252     GooString *getID() { return isContent() ? nullptr : s->id; }
0253 
0254     // Optional ISO language name, e.g. en_US
0255     GooString *getLanguage()
0256     {
0257         if (!isContent() && s->language) {
0258             return s->language;
0259         }
0260         return parent ? parent->getLanguage() : nullptr;
0261     }
0262     const GooString *getLanguage() const
0263     {
0264         if (!isContent() && s->language) {
0265             return s->language;
0266         }
0267         return parent ? parent->getLanguage() : nullptr;
0268     }
0269 
0270     // Optional revision number, defaults to zero.
0271     unsigned int getRevision() const { return isContent() ? 0 : s->revision; }
0272     void setRevision(unsigned int revision)
0273     {
0274         if (isContent()) {
0275             s->revision = revision;
0276         }
0277     }
0278 
0279     // Optional element title, in human-readable form.
0280     const GooString *getTitle() const { return isContent() ? nullptr : s->title; }
0281     GooString *getTitle() { return isContent() ? nullptr : s->title; }
0282 
0283     // Optional element expanded abbreviation text.
0284     const GooString *getExpandedAbbr() const { return isContent() ? nullptr : s->expandedAbbr; }
0285     GooString *getExpandedAbbr() { return isContent() ? nullptr : s->expandedAbbr; }
0286 
0287     unsigned getNumChildren() const { return isContent() ? 0 : s->elements.size(); }
0288     const StructElement *getChild(int i) const { return isContent() ? nullptr : s->elements.at(i); }
0289     StructElement *getChild(int i) { return isContent() ? nullptr : s->elements.at(i); }
0290 
0291     void appendChild(StructElement *element)
0292     {
0293         if (!isContent() && element && element->isOk()) {
0294             s->elements.push_back(element);
0295         }
0296     }
0297 
0298     unsigned getNumAttributes() const { return isContent() ? 0 : s->attributes.size(); }
0299     const Attribute *getAttribute(int i) const { return isContent() ? nullptr : s->attributes.at(i); }
0300     Attribute *getAttribute(int i) { return isContent() ? nullptr : s->attributes.at(i); }
0301 
0302     void appendAttribute(Attribute *attribute)
0303     {
0304         if (!isContent() && attribute) {
0305             s->attributes.push_back(attribute);
0306         }
0307     }
0308 
0309     const Attribute *findAttribute(Attribute::Type attributeType, bool inherit = false, Attribute::Owner owner = Attribute::UnknownOwner) const;
0310 
0311     const GooString *getAltText() const { return isContent() ? nullptr : s->altText; }
0312     GooString *getAltText() { return isContent() ? nullptr : s->altText; }
0313 
0314     const GooString *getActualText() const { return isContent() ? nullptr : s->actualText; }
0315     GooString *getActualText() { return isContent() ? nullptr : s->actualText; }
0316 
0317     // Content text referenced by the element:
0318     //
0319     // - For MCID reference elements, this is just the text of the
0320     //   corresponding marked content object in the page stream, regardless
0321     //   of the setting of the "recursive" flag.
0322     // - For other elements, if the "recursive" flag is set, the text
0323     //   enclosed by *all* the child MCID reference elements of the subtree
0324     //   is returned. The text is assembled by traversing the leaf MCID
0325     //   reference elements in logical order.
0326     // - In any other case, the function returns nullptr.
0327     //
0328     // A new string is returned, and the ownership passed to the caller.
0329     //
0330     GooString *getText(bool recursive = true) const { return appendSubTreeText(nullptr, recursive); }
0331 
0332     const TextSpanArray getTextSpans() const
0333     {
0334         if (!isContent()) {
0335             return TextSpanArray();
0336         }
0337         MarkedContentOutputDev mcdev(getMCID(), stmRef);
0338         return getTextSpansInternal(mcdev);
0339     }
0340 
0341     ~StructElement();
0342 
0343 private:
0344     GooString *appendSubTreeText(GooString *string, bool recursive) const;
0345     const TextSpanArray &getTextSpansInternal(MarkedContentOutputDev &mcdev) const;
0346 
0347     typedef std::vector<Attribute *> AttrPtrArray;
0348     typedef std::vector<StructElement *> ElemPtrArray;
0349 
0350     struct StructData
0351     {
0352         Ref parentRef;
0353         GooString *altText;
0354         GooString *actualText;
0355         GooString *id;
0356         GooString *title;
0357         GooString *expandedAbbr;
0358         GooString *language;
0359         unsigned int revision;
0360         ElemPtrArray elements;
0361         AttrPtrArray attributes;
0362 
0363         StructData();
0364         ~StructData();
0365 
0366         StructData(const StructData &) = delete;
0367         StructData &operator=(const StructData &) = delete;
0368     };
0369 
0370     // Data in content elements (MCID, MCR)
0371     struct ContentData
0372     {
0373         union {
0374             int mcid;
0375             Ref ref;
0376         };
0377 
0378         explicit ContentData(int mcidA) : mcid(mcidA) { }
0379         explicit ContentData(const Ref r) { ref = r; }
0380     };
0381 
0382     // Common data
0383     Type type;
0384     StructTreeRoot *treeRoot;
0385     StructElement *parent;
0386     mutable Object pageRef;
0387     Object stmRef;
0388 
0389     union {
0390         StructData *s;
0391         ContentData *c;
0392     };
0393 
0394     StructElement(Dict *elementDict, StructTreeRoot *treeRootA, StructElement *parentA, std::set<int> &seen);
0395     StructElement(int mcid, StructTreeRoot *treeRootA, StructElement *parentA);
0396     StructElement(const Ref ref, StructTreeRoot *treeRootA, StructElement *parentA);
0397 
0398     void parse(Dict *elementDict);
0399     StructElement *parseChild(const Object *ref, Object *childObj, std::set<int> &seen);
0400     void parseChildren(Dict *element, std::set<int> &seen);
0401     void parseAttributes(Dict *attributes, bool keepExisting = false);
0402 
0403     friend class StructTreeRoot;
0404 };
0405 
0406 #endif