|
||||
File indexing completed on 2025-01-30 10:27:28
0001 /* 0002 * Licensed to the Apache Software Foundation (ASF) under one or more 0003 * contributor license agreements. See the NOTICE file distributed with 0004 * this work for additional information regarding copyright ownership. 0005 * The ASF licenses this file to You under the Apache License, Version 2.0 0006 * (the "License"); you may not use this file except in compliance with 0007 * the License. You may obtain a copy of the License at 0008 * 0009 * http://www.apache.org/licenses/LICENSE-2.0 0010 * 0011 * Unless required by applicable law or agreed to in writing, software 0012 * distributed under the License is distributed on an "AS IS" BASIS, 0013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0014 * See the License for the specific language governing permissions and 0015 * limitations under the License. 0016 */ 0017 0018 /* 0019 * $Id$ 0020 */ 0021 0022 #if !defined(XERCESC_INCLUDE_GUARD_XMLURI_HPP) 0023 #define XERCESC_INCLUDE_GUARD_XMLURI_HPP 0024 0025 #include <xercesc/util/XMemory.hpp> 0026 #include <xercesc/util/XMLString.hpp> 0027 0028 #include <xercesc/internal/XSerializable.hpp> 0029 #include <xercesc/framework/XMLBuffer.hpp> 0030 0031 XERCES_CPP_NAMESPACE_BEGIN 0032 0033 /* 0034 * This class is a direct port of Java's URI class, to distinguish 0035 * itself from the XMLURL, we use the name XMLUri instead of 0036 * XMLURI. 0037 * 0038 * TODO: how to relate XMLUri and XMLURL since URL is part of URI. 0039 * 0040 */ 0041 0042 class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory 0043 { 0044 public: 0045 0046 // ----------------------------------------------------------------------- 0047 // Constructors and Destructor 0048 // ----------------------------------------------------------------------- 0049 0050 /** 0051 * Construct a new URI from a URI specification string. 0052 * 0053 * If the specification follows the "generic URI" syntax, (two slashes 0054 * following the first colon), the specification will be parsed 0055 * accordingly - setting the 0056 * scheme, 0057 * userinfo, 0058 * host, 0059 * port, 0060 * path, 0061 * querystring and 0062 * fragment 0063 * fields as necessary. 0064 * 0065 * If the specification does not follow the "generic URI" syntax, 0066 * the specification is parsed into a 0067 * scheme and 0068 * scheme-specific part (stored as the path) only. 0069 * 0070 * @param uriSpec the URI specification string (cannot be null or empty) 0071 * 0072 * @param manager Pointer to the memory manager to be used to 0073 * allocate objects. 0074 * 0075 * ctor# 2 0076 * 0077 */ 0078 XMLUri(const XMLCh* const uriSpec, 0079 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); 0080 0081 /** 0082 * Construct a new URI from a base URI and a URI specification string. 0083 * The URI specification string may be a relative URI. 0084 * 0085 * @param baseURI the base URI (cannot be null if uriSpec is null or 0086 * empty) 0087 * 0088 * @param uriSpec the URI specification string (cannot be null or 0089 * empty if base is null) 0090 * 0091 * @param manager Pointer to the memory manager to be used to 0092 * allocate objects. 0093 * 0094 * ctor# 7 relative ctor 0095 * 0096 */ 0097 XMLUri(const XMLUri* const baseURI 0098 , const XMLCh* const uriSpec 0099 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); 0100 0101 /** 0102 * Copy constructor 0103 */ 0104 XMLUri(const XMLUri& toCopy); 0105 XMLUri& operator=(const XMLUri& toAssign); 0106 0107 virtual ~XMLUri(); 0108 0109 // ----------------------------------------------------------------------- 0110 // Getter methods 0111 // ----------------------------------------------------------------------- 0112 /** 0113 * Get the URI as a string specification. See RFC 2396 Section 5.2. 0114 * 0115 * @return the URI string specification 0116 */ 0117 const XMLCh* getUriText() const; 0118 0119 /** 0120 * Get the scheme for this URI. 0121 * 0122 * @return the scheme for this URI 0123 */ 0124 const XMLCh* getScheme() const; 0125 0126 /** 0127 * Get the userinfo for this URI. 0128 * 0129 * @return the userinfo for this URI (null if not specified). 0130 */ 0131 const XMLCh* getUserInfo() const; 0132 0133 0134 /** 0135 * Get the host for this URI. 0136 * 0137 * @return the host for this URI (null if not specified). 0138 */ 0139 const XMLCh* getHost() const; 0140 0141 /** 0142 * Get the port for this URI. 0143 * 0144 * @return the port for this URI (-1 if not specified). 0145 */ 0146 int getPort() const; 0147 0148 /** 0149 * Get the registry based authority for this URI. 0150 * 0151 * @return the registry based authority (null if not specified). 0152 */ 0153 const XMLCh* getRegBasedAuthority() const; 0154 0155 /** 0156 * Get the path for this URI. Note that the value returned is the path 0157 * only and does not include the query string or fragment. 0158 * 0159 * @return the path for this URI. 0160 */ 0161 const XMLCh* getPath() const; 0162 0163 /** 0164 * Get the query string for this URI. 0165 * 0166 * @return the query string for this URI. Null is returned if there 0167 * was no "?" in the URI spec, empty string if there was a 0168 * "?" but no query string following it. 0169 */ 0170 const XMLCh* getQueryString() const; 0171 0172 /** 0173 * Get the fragment for this URI. 0174 * 0175 * @return the fragment for this URI. Null is returned if there 0176 * was no "#" in the URI spec, empty string if there was a 0177 * "#" but no fragment following it. 0178 */ 0179 const XMLCh* getFragment() const; 0180 0181 // ----------------------------------------------------------------------- 0182 // Setter methods 0183 // ----------------------------------------------------------------------- 0184 0185 /** 0186 * Set the scheme for this URI. The scheme is converted to lowercase 0187 * before it is set. 0188 * 0189 * @param newScheme the scheme for this URI (cannot be null) 0190 * 0191 */ 0192 void setScheme(const XMLCh* const newScheme); 0193 0194 /** 0195 * Set the userinfo for this URI. If a non-null value is passed in and 0196 * the host value is null, then an exception is thrown. 0197 * 0198 * @param newUserInfo the userinfo for this URI 0199 * 0200 */ 0201 void setUserInfo(const XMLCh* const newUserInfo); 0202 0203 /** 0204 * Set the host for this URI. If null is passed in, the userinfo 0205 * field is also set to null and the port is set to -1. 0206 * 0207 * Note: This method overwrites registry based authority if it 0208 * previously existed in this URI. 0209 * 0210 * @param newHost the host for this URI 0211 * 0212 */ 0213 void setHost(const XMLCh* const newHost); 0214 0215 /** 0216 * Set the port for this URI. -1 is used to indicate that the port is 0217 * not specified, otherwise valid port numbers are between 0 and 65535. 0218 * If a valid port number is passed in and the host field is null, 0219 * an exception is thrown. 0220 * 0221 * @param newPort the port number for this URI 0222 * 0223 */ 0224 void setPort(int newPort); 0225 0226 /** 0227 * Sets the registry based authority for this URI. 0228 * 0229 * Note: This method overwrites server based authority 0230 * if it previously existed in this URI. 0231 * 0232 * @param newRegAuth the registry based authority for this URI 0233 */ 0234 void setRegBasedAuthority(const XMLCh* const newRegAuth); 0235 0236 /** 0237 * Set the path for this URI. 0238 * 0239 * If the supplied path is null, then the 0240 * query string and fragment are set to null as well. 0241 * 0242 * If the supplied path includes a query string and/or fragment, 0243 * these fields will be parsed and set as well. 0244 * 0245 * Note: 0246 * 0247 * For URIs following the "generic URI" syntax, the path 0248 * specified should start with a slash. 0249 * 0250 * For URIs that do not follow the generic URI syntax, this method 0251 * sets the scheme-specific part. 0252 * 0253 * @param newPath the path for this URI (may be null) 0254 * 0255 */ 0256 void setPath(const XMLCh* const newPath); 0257 0258 /** 0259 * Set the query string for this URI. A non-null value is valid only 0260 * if this is an URI conforming to the generic URI syntax and 0261 * the path value is not null. 0262 * 0263 * @param newQueryString the query string for this URI 0264 * 0265 */ 0266 void setQueryString(const XMLCh* const newQueryString); 0267 0268 /** 0269 * Set the fragment for this URI. A non-null value is valid only 0270 * if this is a URI conforming to the generic URI syntax and 0271 * the path value is not null. 0272 * 0273 * @param newFragment the fragment for this URI 0274 * 0275 */ 0276 void setFragment(const XMLCh* const newFragment); 0277 0278 // ----------------------------------------------------------------------- 0279 // Miscellaneous methods 0280 // ----------------------------------------------------------------------- 0281 0282 /** 0283 * Determine whether a given string contains only URI characters (also 0284 * called "uric" in RFC 2396). uric consist of all reserved 0285 * characters, unreserved characters and escaped characters. 0286 * 0287 * @return true if the string is comprised of uric, false otherwise 0288 */ 0289 static bool isURIString(const XMLCh* const uric); 0290 0291 /** 0292 * Determine whether a given string is a valid URI 0293 */ 0294 static bool isValidURI( const XMLUri* const baseURI 0295 , const XMLCh* const uriStr 0296 , bool bAllowSpaces=false); 0297 /** 0298 * Determine whether a given string is a valid URI 0299 */ 0300 static bool isValidURI( bool haveBaseURI 0301 , const XMLCh* const uriStr 0302 , bool bAllowSpaces=false); 0303 0304 0305 static void normalizeURI(const XMLCh* const systemURI, 0306 XMLBuffer& normalizedURI); 0307 0308 /*** 0309 * Support for Serialization/De-serialization 0310 ***/ 0311 DECL_XSERIALIZABLE(XMLUri) 0312 0313 XMLUri(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); 0314 0315 private: 0316 0317 static const XMLCh MARK_OR_RESERVED_CHARACTERS[]; 0318 static const XMLCh RESERVED_CHARACTERS[]; 0319 static const XMLCh MARK_CHARACTERS[]; 0320 static const XMLCh SCHEME_CHARACTERS[]; 0321 static const XMLCh USERINFO_CHARACTERS[]; 0322 static const XMLCh REG_NAME_CHARACTERS[]; 0323 static const XMLCh PATH_CHARACTERS[]; 0324 0325 //helper method for getUriText 0326 void buildFullText(); 0327 0328 // ----------------------------------------------------------------------- 0329 // Private helper methods 0330 // ----------------------------------------------------------------------- 0331 0332 /** 0333 * Determine whether a character is a reserved character: 0334 * 0335 * @return true if the string contains any reserved characters 0336 */ 0337 static bool isReservedCharacter(const XMLCh theChar); 0338 0339 /** 0340 * Determine whether a character is a path character: 0341 * 0342 * @return true if the character is path character 0343 */ 0344 static bool isPathCharacter(const XMLCh theChar); 0345 0346 /** 0347 * Determine whether a char is an unreserved character. 0348 * 0349 * @return true if the char is unreserved, false otherwise 0350 */ 0351 static bool isUnreservedCharacter(const XMLCh theChar); 0352 0353 /** 0354 * Determine whether a char is an reserved or unreserved character. 0355 * 0356 * @return true if the char is reserved or unreserved, false otherwise 0357 */ 0358 static bool isReservedOrUnreservedCharacter(const XMLCh theChar); 0359 0360 /** 0361 * Determine whether a scheme conforms to the rules for a scheme name. 0362 * A scheme is conformant if it starts with an alphanumeric, and 0363 * contains only alphanumerics, '+','-' and '.'. 0364 * 0365 * @return true if the scheme is conformant, false otherwise 0366 */ 0367 static bool isConformantSchemeName(const XMLCh* const scheme); 0368 0369 /** 0370 * Determine whether a userInfo conforms to the rules for a userinfo. 0371 * 0372 * @return true if the scheme is conformant, false otherwise 0373 */ 0374 static void isConformantUserInfo(const XMLCh* const userInfo 0375 , MemoryManager* const manager); 0376 0377 /** 0378 * Determines whether the components host, port, and user info 0379 * are valid as a server authority. 0380 * 0381 * @return true if the given host, port, and userinfo compose 0382 * a valid server authority 0383 */ 0384 static bool isValidServerBasedAuthority(const XMLCh* const host 0385 , const XMLSize_t hostLen 0386 , const int port 0387 , const XMLCh* const userinfo 0388 , const XMLSize_t userLen); 0389 0390 /** 0391 * Determines whether the components host, port, and user info 0392 * are valid as a server authority. 0393 * 0394 * @return true if the given host, port, and userinfo compose 0395 * a valid server authority 0396 */ 0397 static bool isValidServerBasedAuthority(const XMLCh* const host 0398 , const int port 0399 , const XMLCh* const userinfo 0400 , MemoryManager* const manager); 0401 0402 /** 0403 * Determines whether the given string is a registry based authority. 0404 * 0405 * @param authority the authority component of a URI 0406 * 0407 * @return true if the given string is a registry based authority 0408 */ 0409 static bool isValidRegistryBasedAuthority(const XMLCh* const authority 0410 , const XMLSize_t authLen); 0411 0412 /** 0413 * Determines whether the given string is a registry based authority. 0414 * 0415 * @param authority the authority component of a URI 0416 * 0417 * @return true if the given string is a registry based authority 0418 */ 0419 static bool isValidRegistryBasedAuthority(const XMLCh* const authority); 0420 0421 /** 0422 * Determine whether a string is syntactically capable of representing 0423 * a valid IPv4 address, IPv6 reference or the domain name of a network host. 0424 * 0425 * A valid IPv4 address consists of four decimal digit groups 0426 * separated by a '.'. 0427 * 0428 * See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the 0429 * definition of IPv6 references. 0430 * 0431 * A hostname consists of domain labels (each of which must begin and 0432 * end with an alphanumeric but may contain '-') separated by a '.'. 0433 * See RFC 2396 Section 3.2.2. 0434 * 0435 * @return true if the string is a syntactically valid IPv4 address 0436 * or hostname 0437 */ 0438 static bool isWellFormedAddress(const XMLCh* const addr 0439 , MemoryManager* const manager); 0440 0441 /** 0442 * Determines whether a string is an IPv4 address as defined by 0443 * RFC 2373, and under the further constraint that it must be a 32-bit 0444 * address. Though not expressed in the grammar, in order to satisfy 0445 * the 32-bit address constraint, each segment of the address cannot 0446 * be greater than 255 (8 bits of information). 0447 * 0448 * @return true if the string is a syntactically valid IPv4 address 0449 */ 0450 static bool isWellFormedIPv4Address(const XMLCh* const addr, const XMLSize_t length); 0451 0452 /** 0453 * Determines whether a string is an IPv6 reference as defined 0454 * by RFC 2732, where IPv6address is defined in RFC 2373. The 0455 * IPv6 address is parsed according to Section 2.2 of RFC 2373, 0456 * with the additional constraint that the address be composed of 0457 * 128 bits of information. 0458 * 0459 * Note: The BNF expressed in RFC 2373 Appendix B does not 0460 * accurately describe section 2.2, and was in fact removed from 0461 * RFC 3513, the successor of RFC 2373. 0462 * 0463 * @return true if the string is a syntactically valid IPv6 reference 0464 */ 0465 static bool isWellFormedIPv6Reference(const XMLCh* const addr, const XMLSize_t length); 0466 0467 /** 0468 * Helper function for isWellFormedIPv6Reference which scans the 0469 * hex sequences of an IPv6 address. It returns the index of the 0470 * next character to scan in the address, or -1 if the string 0471 * cannot match a valid IPv6 address. 0472 * 0473 * @param address the string to be scanned 0474 * @param index the beginning index (inclusive) 0475 * @param end the ending index (exclusive) 0476 * @param counter a counter for the number of 16-bit sections read 0477 * in the address 0478 * 0479 * @return the index of the next character to scan, or -1 if the 0480 * string cannot match a valid IPv6 address 0481 */ 0482 static int scanHexSequence (const XMLCh* const addr, XMLSize_t index, XMLSize_t end, int& counter); 0483 0484 /** 0485 * Get the indicator as to whether this URI uses the "generic URI" 0486 * syntax. 0487 * 0488 * @return true if this URI uses the "generic URI" syntax, false 0489 * otherwise 0490 */ 0491 bool isGenericURI(); 0492 0493 // ----------------------------------------------------------------------- 0494 // Miscellaneous methods 0495 // ----------------------------------------------------------------------- 0496 0497 /** 0498 * Initialize all fields of this URI from another URI. 0499 * 0500 * @param toCopy the URI to copy (cannot be null) 0501 */ 0502 void initialize(const XMLUri& toCopy); 0503 0504 /** 0505 * Initializes this URI from a base URI and a URI specification string. 0506 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 0507 * the URI and Section 5 for specifications on resolving relative URIs 0508 * and relative paths. 0509 * 0510 * @param baseURI the base URI (may be null if uriSpec is an absolute 0511 * URI) 0512 * 0513 * @param uriSpec the URI spec string which may be an absolute or 0514 * relative URI (can only be null/empty if base 0515 * is not null) 0516 * 0517 */ 0518 void initialize(const XMLUri* const baseURI 0519 , const XMLCh* const uriSpec); 0520 0521 /** 0522 * Initialize the scheme for this URI from a URI string spec. 0523 * 0524 * @param uriSpec the URI specification (cannot be null) 0525 * 0526 */ 0527 void initializeScheme(const XMLCh* const uriSpec); 0528 0529 /** 0530 * Initialize the authority (userinfo, host and port) for this 0531 * URI from a URI string spec. 0532 * 0533 * @param uriSpec the URI specification (cannot be null) 0534 * 0535 */ 0536 void initializeAuthority(const XMLCh* const uriSpec); 0537 0538 /** 0539 * Initialize the path for this URI from a URI string spec. 0540 * 0541 * @param uriSpec the URI specification (cannot be null) 0542 * 0543 */ 0544 void initializePath(const XMLCh* const uriSpec); 0545 0546 /** 0547 * cleanup the data variables 0548 * 0549 */ 0550 void cleanUp(); 0551 0552 static bool isConformantSchemeName(const XMLCh* const scheme, 0553 const XMLSize_t schemeLen); 0554 static bool processScheme(const XMLCh* const uriStr, XMLSize_t& index); 0555 static bool processAuthority(const XMLCh* const uriStr, const XMLSize_t authLen); 0556 static bool isWellFormedAddress(const XMLCh* const addr, const XMLSize_t addrLen); 0557 static bool processPath(const XMLCh* const pathStr, const XMLSize_t pathStrLen, 0558 const bool isSchemePresent, const bool bAllowSpaces=false); 0559 0560 // ----------------------------------------------------------------------- 0561 // Data members 0562 // 0563 // for all the data member, we own it, 0564 // responsible for the creation and/or deletion for 0565 // the memory allocated. 0566 // 0567 // ----------------------------------------------------------------------- 0568 int fPort; 0569 XMLCh* fScheme; 0570 XMLCh* fUserInfo; 0571 XMLCh* fHost; 0572 XMLCh* fRegAuth; 0573 XMLCh* fPath; 0574 XMLCh* fQueryString; 0575 XMLCh* fFragment; 0576 XMLCh* fURIText; 0577 MemoryManager* fMemoryManager; 0578 }; 0579 0580 // --------------------------------------------------------------------------- 0581 // XMLUri: Getter methods 0582 // --------------------------------------------------------------------------- 0583 inline const XMLCh* XMLUri::getScheme() const 0584 { 0585 return fScheme; 0586 } 0587 0588 inline const XMLCh* XMLUri::getUserInfo() const 0589 { 0590 return fUserInfo; 0591 } 0592 0593 inline const XMLCh* XMLUri::getHost() const 0594 { 0595 return fHost; 0596 } 0597 0598 inline int XMLUri::getPort() const 0599 { 0600 return fPort; 0601 } 0602 0603 inline const XMLCh* XMLUri::getRegBasedAuthority() const 0604 { 0605 return fRegAuth; 0606 } 0607 0608 inline const XMLCh* XMLUri::getPath() const 0609 { 0610 return fPath; 0611 } 0612 0613 inline const XMLCh* XMLUri::getQueryString() const 0614 { 0615 return fQueryString; 0616 } 0617 0618 inline const XMLCh* XMLUri::getFragment() const 0619 { 0620 return fFragment; 0621 } 0622 0623 inline const XMLCh* XMLUri::getUriText() const 0624 { 0625 // 0626 // Fault it in if not already. Since this is a const method and we 0627 // can't use mutable members due the compilers we have to support, 0628 // we have to cast off the constness. 0629 // 0630 if (!fURIText) 0631 (const_cast<XMLUri *>(this))->buildFullText(); 0632 0633 return fURIText; 0634 } 0635 0636 // --------------------------------------------------------------------------- 0637 // XMLUri: Helper methods 0638 // --------------------------------------------------------------------------- 0639 inline bool XMLUri::isReservedOrUnreservedCharacter(const XMLCh theChar) 0640 { 0641 return (XMLString::isAlphaNum(theChar) || 0642 XMLString::indexOf(MARK_OR_RESERVED_CHARACTERS, theChar) != -1); 0643 } 0644 0645 inline bool XMLUri::isReservedCharacter(const XMLCh theChar) 0646 { 0647 return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1); 0648 } 0649 0650 inline bool XMLUri::isPathCharacter(const XMLCh theChar) 0651 { 0652 return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1); 0653 } 0654 0655 inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar) 0656 { 0657 return (XMLString::isAlphaNum(theChar) || 0658 XMLString::indexOf(MARK_CHARACTERS, theChar) != -1); 0659 } 0660 0661 XERCES_CPP_NAMESPACE_END 0662 0663 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |