File indexing completed on 2025-12-10 10:23:51
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 #ifndef STRUCTELEMENT_H
0016 #define STRUCTELEMENT_H
0017
0018 #include "goo/GooString.h"
0019 #include "MarkedContentOutputDev.h"
0020 #include "Object.h"
0021 #include "poppler_private_export.h"
0022 #include <vector>
0023 #include <set>
0024
0025 class GooString;
0026 class Dict;
0027 class StructElement;
0028 class StructTreeRoot;
0029
0030 class POPPLER_PRIVATE_EXPORT Attribute
0031 {
0032 public:
0033 enum Type
0034 {
0035 Unknown = 0,
0036 UserProperty,
0037
0038
0039 Placement,
0040 WritingMode,
0041 BackgroundColor,
0042 BorderColor,
0043 BorderStyle,
0044 BorderThickness,
0045 Color,
0046 Padding,
0047
0048
0049 SpaceBefore,
0050 SpaceAfter,
0051 StartIndent,
0052 EndIndent,
0053 TextIndent,
0054 TextAlign,
0055 BBox,
0056 Width,
0057 Height,
0058 BlockAlign,
0059 InlineAlign,
0060 TBorderStyle,
0061 TPadding,
0062
0063
0064 BaselineShift,
0065 LineHeight,
0066 TextDecorationColor,
0067 TextDecorationThickness,
0068 TextDecorationType,
0069 RubyAlign,
0070 RubyPosition,
0071 GlyphOrientationVertical,
0072
0073
0074 ColumnCount,
0075 ColumnGap,
0076 ColumnWidths,
0077
0078
0079 ListNumbering,
0080
0081
0082 Role,
0083 checked,
0084 Desc,
0085
0086
0087 RowSpan,
0088 ColSpan,
0089 Headers,
0090 Scope,
0091 Summary,
0092 };
0093
0094 enum Owner
0095 {
0096 UnknownOwner = 0,
0097
0098 UserProperties,
0099
0100 Layout,
0101 List,
0102 PrintField,
0103 Table,
0104
0105 XML_1_00,
0106 HTML_3_20,
0107 HTML_4_01,
0108 OEB_1_00,
0109 RTF_1_05,
0110 CSS_1_00,
0111 CSS_2_00,
0112 };
0113
0114
0115
0116 Attribute(Type type, Object *value);
0117
0118
0119 Attribute(GooString &&name, Object *value);
0120
0121 bool isOk() const { return type != Unknown; }
0122
0123
0124 Type getType() const { return type; }
0125 Owner getOwner() const { return owner; }
0126 const char *getTypeName() const;
0127 const char *getOwnerName() const;
0128 const Object *getValue() const { return &value; }
0129 static Object *getDefaultValue(Type type);
0130
0131
0132 std::unique_ptr<GooString> getName() const { return std::make_unique<GooString>(type == UserProperty ? name.c_str() : getTypeName()); }
0133
0134
0135 unsigned int getRevision() const { return revision; }
0136 void setRevision(unsigned int revisionA) { revision = revisionA; }
0137
0138
0139 bool isHidden() const { return hidden; }
0140 void setHidden(bool hiddenA) { hidden = hiddenA; }
0141
0142
0143
0144 const char *getFormattedValue() const { return formatted ? formatted->c_str() : nullptr; }
0145 void setFormattedValue(const char *formattedA);
0146
0147 ~Attribute();
0148
0149 private:
0150 Type type;
0151 Owner owner;
0152 unsigned int revision;
0153 GooString name;
0154 Object value;
0155 bool hidden;
0156 GooString *formatted;
0157
0158 bool checkType(StructElement *element = nullptr);
0159 static Type getTypeForName(const char *name, StructElement *element = nullptr);
0160 static Attribute *parseUserProperty(Dict *property);
0161
0162 friend class StructElement;
0163 };
0164
0165 class POPPLER_PRIVATE_EXPORT StructElement
0166 {
0167 public:
0168 enum Type
0169 {
0170 Unknown = 0,
0171 MCID,
0172 OBJR,
0173
0174 Document,
0175 Part,
0176 Art,
0177 Sect,
0178 Div,
0179
0180 Span,
0181 Quote,
0182 Note,
0183 Reference,
0184 BibEntry,
0185 Code,
0186 Link,
0187 Annot,
0188 BlockQuote,
0189 Caption,
0190 NonStruct,
0191 TOC,
0192 TOCI,
0193 Index,
0194 Private,
0195
0196 P,
0197 H,
0198 H1,
0199 H2,
0200 H3,
0201 H4,
0202 H5,
0203 H6,
0204
0205 L,
0206 LI,
0207 Lbl,
0208 LBody,
0209
0210 Table,
0211 TR,
0212 TH,
0213 TD,
0214 THead,
0215 TFoot,
0216 TBody,
0217
0218 Ruby,
0219 RB,
0220 RT,
0221 RP,
0222 Warichu,
0223 WT,
0224 WP,
0225
0226 Figure,
0227 Formula,
0228 Form,
0229 };
0230
0231 static const Ref InvalidRef;
0232
0233 const char *getTypeName() const;
0234 Type getType() const { return type; }
0235 bool isOk() const { return type != Unknown; }
0236 bool isBlock() const;
0237 bool isInline() const;
0238 bool isGrouping() const;
0239
0240 inline bool isContent() const { return (type == MCID) || isObjectRef(); }
0241 inline bool isObjectRef() const { return (type == OBJR && c->ref != Ref::INVALID()); }
0242
0243 int getMCID() const { return c->mcid; }
0244 Ref getObjectRef() const { return c->ref; }
0245 Ref getParentRef() { return isContent() ? parent->getParentRef() : s->parentRef; }
0246 bool hasPageRef() const;
0247 bool getPageRef(Ref &ref) const;
0248 StructTreeRoot *getStructTreeRoot() { return treeRoot; }
0249
0250
0251 const GooString *getID() const { return isContent() ? nullptr : s->id; }
0252 GooString *getID() { return isContent() ? nullptr : s->id; }
0253
0254
0255 GooString *getLanguage()
0256 {
0257 if (!isContent() && s->language) {
0258 return s->language;
0259 }
0260 return parent ? parent->getLanguage() : nullptr;
0261 }
0262 const GooString *getLanguage() const
0263 {
0264 if (!isContent() && s->language) {
0265 return s->language;
0266 }
0267 return parent ? parent->getLanguage() : nullptr;
0268 }
0269
0270
0271 unsigned int getRevision() const { return isContent() ? 0 : s->revision; }
0272 void setRevision(unsigned int revision)
0273 {
0274 if (isContent()) {
0275 s->revision = revision;
0276 }
0277 }
0278
0279
0280 const GooString *getTitle() const { return isContent() ? nullptr : s->title; }
0281 GooString *getTitle() { return isContent() ? nullptr : s->title; }
0282
0283
0284 const GooString *getExpandedAbbr() const { return isContent() ? nullptr : s->expandedAbbr; }
0285 GooString *getExpandedAbbr() { return isContent() ? nullptr : s->expandedAbbr; }
0286
0287 unsigned getNumChildren() const { return isContent() ? 0 : s->elements.size(); }
0288 const StructElement *getChild(int i) const { return isContent() ? nullptr : s->elements.at(i); }
0289 StructElement *getChild(int i) { return isContent() ? nullptr : s->elements.at(i); }
0290
0291 void appendChild(StructElement *element)
0292 {
0293 if (!isContent() && element && element->isOk()) {
0294 s->elements.push_back(element);
0295 }
0296 }
0297
0298 unsigned getNumAttributes() const { return isContent() ? 0 : s->attributes.size(); }
0299 const Attribute *getAttribute(int i) const { return isContent() ? nullptr : s->attributes.at(i); }
0300 Attribute *getAttribute(int i) { return isContent() ? nullptr : s->attributes.at(i); }
0301
0302 void appendAttribute(Attribute *attribute)
0303 {
0304 if (!isContent() && attribute) {
0305 s->attributes.push_back(attribute);
0306 }
0307 }
0308
0309 const Attribute *findAttribute(Attribute::Type attributeType, bool inherit = false, Attribute::Owner owner = Attribute::UnknownOwner) const;
0310
0311 const GooString *getAltText() const { return isContent() ? nullptr : s->altText; }
0312 GooString *getAltText() { return isContent() ? nullptr : s->altText; }
0313
0314 const GooString *getActualText() const { return isContent() ? nullptr : s->actualText; }
0315 GooString *getActualText() { return isContent() ? nullptr : s->actualText; }
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330 GooString *getText(bool recursive = true) const { return appendSubTreeText(nullptr, recursive); }
0331
0332 const TextSpanArray getTextSpans() const
0333 {
0334 if (!isContent()) {
0335 return TextSpanArray();
0336 }
0337 MarkedContentOutputDev mcdev(getMCID(), stmRef);
0338 return getTextSpansInternal(mcdev);
0339 }
0340
0341 ~StructElement();
0342
0343 private:
0344 GooString *appendSubTreeText(GooString *string, bool recursive) const;
0345 const TextSpanArray &getTextSpansInternal(MarkedContentOutputDev &mcdev) const;
0346
0347 typedef std::vector<Attribute *> AttrPtrArray;
0348 typedef std::vector<StructElement *> ElemPtrArray;
0349
0350 struct StructData
0351 {
0352 Ref parentRef;
0353 GooString *altText;
0354 GooString *actualText;
0355 GooString *id;
0356 GooString *title;
0357 GooString *expandedAbbr;
0358 GooString *language;
0359 unsigned int revision;
0360 ElemPtrArray elements;
0361 AttrPtrArray attributes;
0362
0363 StructData();
0364 ~StructData();
0365
0366 StructData(const StructData &) = delete;
0367 StructData &operator=(const StructData &) = delete;
0368 };
0369
0370
0371 struct ContentData
0372 {
0373 union {
0374 int mcid;
0375 Ref ref;
0376 };
0377
0378 explicit ContentData(int mcidA) : mcid(mcidA) { }
0379 explicit ContentData(const Ref r) { ref = r; }
0380 };
0381
0382
0383 Type type;
0384 StructTreeRoot *treeRoot;
0385 StructElement *parent;
0386 mutable Object pageRef;
0387 Object stmRef;
0388
0389 union {
0390 StructData *s;
0391 ContentData *c;
0392 };
0393
0394 StructElement(Dict *elementDict, StructTreeRoot *treeRootA, StructElement *parentA, std::set<int> &seen);
0395 StructElement(int mcid, StructTreeRoot *treeRootA, StructElement *parentA);
0396 StructElement(const Ref ref, StructTreeRoot *treeRootA, StructElement *parentA);
0397
0398 void parse(Dict *elementDict);
0399 StructElement *parseChild(const Object *ref, Object *childObj, std::set<int> &seen);
0400 void parseChildren(Dict *element, std::set<int> &seen);
0401 void parseAttributes(Dict *attributes, bool keepExisting = false);
0402
0403 friend class StructTreeRoot;
0404 };
0405
0406 #endif