File indexing completed on 2025-02-22 10:41:49
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #ifndef __HTML_PARSER_H__
0014 #define __HTML_PARSER_H__
0015 #include <libxml/xmlversion.h>
0016 #include <libxml/parser.h>
0017
0018 #ifdef LIBXML_HTML_ENABLED
0019
0020 #ifdef __cplusplus
0021 extern "C" {
0022 #endif
0023
0024
0025
0026
0027 typedef xmlParserCtxt htmlParserCtxt;
0028 typedef xmlParserCtxtPtr htmlParserCtxtPtr;
0029 typedef xmlParserNodeInfo htmlParserNodeInfo;
0030 typedef xmlSAXHandler htmlSAXHandler;
0031 typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
0032 typedef xmlParserInput htmlParserInput;
0033 typedef xmlParserInputPtr htmlParserInputPtr;
0034 typedef xmlDocPtr htmlDocPtr;
0035 typedef xmlNodePtr htmlNodePtr;
0036
0037
0038
0039
0040
0041 typedef struct _htmlElemDesc htmlElemDesc;
0042 typedef htmlElemDesc *htmlElemDescPtr;
0043 struct _htmlElemDesc {
0044 const char *name;
0045 char startTag;
0046 char endTag;
0047 char saveEndTag;
0048 char empty;
0049 char depr;
0050 char dtd;
0051 char isinline;
0052 const char *desc;
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064 const char** subelts;
0065 const char* defaultsubelt;
0066
0067 const char** attrs_opt;
0068 const char** attrs_depr;
0069 const char** attrs_req;
0070 };
0071
0072
0073
0074
0075 typedef struct _htmlEntityDesc htmlEntityDesc;
0076 typedef htmlEntityDesc *htmlEntityDescPtr;
0077 struct _htmlEntityDesc {
0078 unsigned int value;
0079 const char *name;
0080 const char *desc;
0081 };
0082
0083 #ifdef LIBXML_SAX1_ENABLED
0084
0085 XML_DEPRECATED
0086 XMLPUBVAR const xmlSAXHandlerV1 htmlDefaultSAXHandler;
0087
0088 #ifdef LIBXML_THREAD_ENABLED
0089 XML_DEPRECATED
0090 XMLPUBFUN const xmlSAXHandlerV1 *__htmlDefaultSAXHandler(void);
0091 #endif
0092
0093 #endif
0094
0095
0096
0097
0098 XML_DEPRECATED
0099 XMLPUBFUN void
0100 htmlInitAutoClose (void);
0101 XMLPUBFUN const htmlElemDesc *
0102 htmlTagLookup (const xmlChar *tag);
0103 XMLPUBFUN const htmlEntityDesc *
0104 htmlEntityLookup(const xmlChar *name);
0105 XMLPUBFUN const htmlEntityDesc *
0106 htmlEntityValueLookup(unsigned int value);
0107
0108 XMLPUBFUN int
0109 htmlIsAutoClosed(htmlDocPtr doc,
0110 htmlNodePtr elem);
0111 XMLPUBFUN int
0112 htmlAutoCloseTag(htmlDocPtr doc,
0113 const xmlChar *name,
0114 htmlNodePtr elem);
0115 XML_DEPRECATED
0116 XMLPUBFUN const htmlEntityDesc *
0117 htmlParseEntityRef(htmlParserCtxtPtr ctxt,
0118 const xmlChar **str);
0119 XML_DEPRECATED
0120 XMLPUBFUN int
0121 htmlParseCharRef(htmlParserCtxtPtr ctxt);
0122 XML_DEPRECATED
0123 XMLPUBFUN void
0124 htmlParseElement(htmlParserCtxtPtr ctxt);
0125
0126 XMLPUBFUN htmlParserCtxtPtr
0127 htmlNewParserCtxt(void);
0128 XMLPUBFUN htmlParserCtxtPtr
0129 htmlNewSAXParserCtxt(const htmlSAXHandler *sax,
0130 void *userData);
0131
0132 XMLPUBFUN htmlParserCtxtPtr
0133 htmlCreateMemoryParserCtxt(const char *buffer,
0134 int size);
0135
0136 XMLPUBFUN int
0137 htmlParseDocument(htmlParserCtxtPtr ctxt);
0138 XML_DEPRECATED
0139 XMLPUBFUN htmlDocPtr
0140 htmlSAXParseDoc (const xmlChar *cur,
0141 const char *encoding,
0142 htmlSAXHandlerPtr sax,
0143 void *userData);
0144 XMLPUBFUN htmlDocPtr
0145 htmlParseDoc (const xmlChar *cur,
0146 const char *encoding);
0147 XMLPUBFUN htmlParserCtxtPtr
0148 htmlCreateFileParserCtxt(const char *filename,
0149 const char *encoding);
0150 XML_DEPRECATED
0151 XMLPUBFUN htmlDocPtr
0152 htmlSAXParseFile(const char *filename,
0153 const char *encoding,
0154 htmlSAXHandlerPtr sax,
0155 void *userData);
0156 XMLPUBFUN htmlDocPtr
0157 htmlParseFile (const char *filename,
0158 const char *encoding);
0159 XMLPUBFUN int
0160 UTF8ToHtml (unsigned char *out,
0161 int *outlen,
0162 const unsigned char *in,
0163 int *inlen);
0164 XMLPUBFUN int
0165 htmlEncodeEntities(unsigned char *out,
0166 int *outlen,
0167 const unsigned char *in,
0168 int *inlen, int quoteChar);
0169 XMLPUBFUN int
0170 htmlIsScriptAttribute(const xmlChar *name);
0171 XML_DEPRECATED
0172 XMLPUBFUN int
0173 htmlHandleOmittedElem(int val);
0174
0175 #ifdef LIBXML_PUSH_ENABLED
0176
0177
0178
0179 XMLPUBFUN htmlParserCtxtPtr
0180 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
0181 void *user_data,
0182 const char *chunk,
0183 int size,
0184 const char *filename,
0185 xmlCharEncoding enc);
0186 XMLPUBFUN int
0187 htmlParseChunk (htmlParserCtxtPtr ctxt,
0188 const char *chunk,
0189 int size,
0190 int terminate);
0191 #endif
0192
0193 XMLPUBFUN void
0194 htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205 typedef enum {
0206 HTML_PARSE_RECOVER = 1<<0,
0207 HTML_PARSE_NODEFDTD = 1<<2,
0208 HTML_PARSE_NOERROR = 1<<5,
0209 HTML_PARSE_NOWARNING= 1<<6,
0210 HTML_PARSE_PEDANTIC = 1<<7,
0211 HTML_PARSE_NOBLANKS = 1<<8,
0212 HTML_PARSE_NONET = 1<<11,
0213 HTML_PARSE_NOIMPLIED= 1<<13,
0214 HTML_PARSE_COMPACT = 1<<16,
0215 HTML_PARSE_IGNORE_ENC=1<<21
0216 } htmlParserOption;
0217
0218 XMLPUBFUN void
0219 htmlCtxtReset (htmlParserCtxtPtr ctxt);
0220 XMLPUBFUN int
0221 htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
0222 int options);
0223 XMLPUBFUN htmlDocPtr
0224 htmlReadDoc (const xmlChar *cur,
0225 const char *URL,
0226 const char *encoding,
0227 int options);
0228 XMLPUBFUN htmlDocPtr
0229 htmlReadFile (const char *URL,
0230 const char *encoding,
0231 int options);
0232 XMLPUBFUN htmlDocPtr
0233 htmlReadMemory (const char *buffer,
0234 int size,
0235 const char *URL,
0236 const char *encoding,
0237 int options);
0238 XMLPUBFUN htmlDocPtr
0239 htmlReadFd (int fd,
0240 const char *URL,
0241 const char *encoding,
0242 int options);
0243 XMLPUBFUN htmlDocPtr
0244 htmlReadIO (xmlInputReadCallback ioread,
0245 xmlInputCloseCallback ioclose,
0246 void *ioctx,
0247 const char *URL,
0248 const char *encoding,
0249 int options);
0250 XMLPUBFUN htmlDocPtr
0251 htmlCtxtParseDocument (htmlParserCtxtPtr ctxt,
0252 xmlParserInputPtr input);
0253 XMLPUBFUN htmlDocPtr
0254 htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
0255 const xmlChar *cur,
0256 const char *URL,
0257 const char *encoding,
0258 int options);
0259 XMLPUBFUN htmlDocPtr
0260 htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
0261 const char *filename,
0262 const char *encoding,
0263 int options);
0264 XMLPUBFUN htmlDocPtr
0265 htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
0266 const char *buffer,
0267 int size,
0268 const char *URL,
0269 const char *encoding,
0270 int options);
0271 XMLPUBFUN htmlDocPtr
0272 htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
0273 int fd,
0274 const char *URL,
0275 const char *encoding,
0276 int options);
0277 XMLPUBFUN htmlDocPtr
0278 htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
0279 xmlInputReadCallback ioread,
0280 xmlInputCloseCallback ioclose,
0281 void *ioctx,
0282 const char *URL,
0283 const char *encoding,
0284 int options);
0285
0286
0287
0288 typedef enum {
0289 HTML_NA = 0 ,
0290 HTML_INVALID = 0x1 ,
0291 HTML_DEPRECATED = 0x2 ,
0292 HTML_VALID = 0x4 ,
0293 HTML_REQUIRED = 0xc
0294 } htmlStatus ;
0295
0296
0297
0298
0299 XMLPUBFUN htmlStatus htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
0300 XMLPUBFUN int htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
0301 XMLPUBFUN htmlStatus htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
0302 XMLPUBFUN htmlStatus htmlNodeStatus(htmlNodePtr, int) ;
0303
0304
0305
0306
0307
0308
0309 #define htmlDefaultSubelement(elt) elt->defaultsubelt
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320 #define htmlElementAllowedHereDesc(parent,elt) \
0321 htmlElementAllowedHere((parent), (elt)->name)
0322
0323
0324
0325
0326
0327
0328 #define htmlRequiredAttrs(elt) (elt)->attrs_req
0329
0330
0331 #ifdef __cplusplus
0332 }
0333 #endif
0334
0335 #endif
0336 #endif