Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 10:27:28

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  * 
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  * 
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  * $Id$
0020  */
0021 
0022 #if !defined(XERCESC_INCLUDE_GUARD_XMLURI_HPP)
0023 #define XERCESC_INCLUDE_GUARD_XMLURI_HPP
0024 
0025 #include <xercesc/util/XMemory.hpp>
0026 #include <xercesc/util/XMLString.hpp>
0027 
0028 #include <xercesc/internal/XSerializable.hpp>
0029 #include <xercesc/framework/XMLBuffer.hpp>
0030 
0031 XERCES_CPP_NAMESPACE_BEGIN
0032 
0033 /*
0034  * This class is a direct port of Java's URI class, to distinguish
0035  * itself from the XMLURL, we use the name XMLUri instead of
0036  * XMLURI.
0037  *
0038  * TODO: how to relate XMLUri and XMLURL since URL is part of URI.
0039  *
0040  */
0041 
0042 class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory
0043 {
0044 public:
0045 
0046     // -----------------------------------------------------------------------
0047     //  Constructors and Destructor
0048     // -----------------------------------------------------------------------
0049 
0050     /**
0051      * Construct a new URI from a URI specification string.
0052      *
0053      * If the specification follows the "generic URI" syntax, (two slashes
0054      * following the first colon), the specification will be parsed
0055      * accordingly - setting the
0056      *                           scheme,
0057      *                           userinfo,
0058      *                           host,
0059      *                           port,
0060      *                           path,
0061      *                           querystring and
0062      *                           fragment
0063      * fields as necessary.
0064      *
0065      * If the specification does not follow the "generic URI" syntax,
0066      * the specification is parsed into a
0067      *                           scheme and
0068      *                           scheme-specific part (stored as the path) only.
0069      *
0070      * @param uriSpec the URI specification string (cannot be null or empty)
0071      *
0072      * @param manager Pointer to the memory manager to be used to
0073      *                allocate objects.
0074      *
0075      * ctor# 2
0076      *
0077      */
0078     XMLUri(const XMLCh* const    uriSpec,
0079            MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
0080 
0081     /**
0082      * Construct a new URI from a base URI and a URI specification string.
0083      * The URI specification string may be a relative URI.
0084      *
0085      * @param baseURI the base URI (cannot be null if uriSpec is null or
0086      *                empty)
0087      *
0088      * @param uriSpec the URI specification string (cannot be null or
0089      *                empty if base is null)
0090      *
0091      * @param manager Pointer to the memory manager to be used to
0092      *                allocate objects.
0093      *
0094      * ctor# 7 relative ctor
0095      *
0096      */
0097     XMLUri(const XMLUri* const  baseURI
0098          , const XMLCh* const   uriSpec
0099          , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
0100 
0101     /**
0102      * Copy constructor
0103      */
0104     XMLUri(const XMLUri& toCopy);
0105     XMLUri& operator=(const XMLUri& toAssign);
0106 
0107     virtual ~XMLUri();
0108 
0109     // -----------------------------------------------------------------------
0110     //  Getter methods
0111     // -----------------------------------------------------------------------
0112     /**
0113      * Get the URI as a string specification. See RFC 2396 Section 5.2.
0114      *
0115      * @return the URI string specification
0116      */
0117     const XMLCh* getUriText() const;
0118 
0119     /**
0120      * Get the scheme for this URI.
0121      *
0122      * @return the scheme for this URI
0123      */
0124      const XMLCh* getScheme() const;
0125 
0126     /**
0127      * Get the userinfo for this URI.
0128      *
0129      * @return the userinfo for this URI (null if not specified).
0130      */
0131      const XMLCh* getUserInfo() const;
0132 
0133 
0134     /**
0135      * Get the host for this URI.
0136      *
0137      * @return the host for this URI (null if not specified).
0138      */
0139      const XMLCh* getHost() const;
0140 
0141     /**
0142      * Get the port for this URI.
0143      *
0144      * @return the port for this URI (-1 if not specified).
0145      */
0146      int getPort() const;
0147      
0148     /**
0149      * Get the registry based authority for this URI.
0150      * 
0151      * @return the registry based authority (null if not specified).
0152      */
0153      const XMLCh* getRegBasedAuthority() const;
0154 
0155     /**
0156      * Get the path for this URI. Note that the value returned is the path
0157      * only and does not include the query string or fragment.
0158      *
0159      * @return the path for this URI.
0160      */
0161      const XMLCh* getPath() const;
0162 
0163     /**
0164      * Get the query string for this URI.
0165      *
0166      * @return the query string for this URI. Null is returned if there
0167      *         was no "?" in the URI spec, empty string if there was a
0168      *         "?" but no query string following it.
0169      */
0170      const XMLCh* getQueryString() const;
0171 
0172     /**
0173      * Get the fragment for this URI.
0174      *
0175      * @return the fragment for this URI. Null is returned if there
0176      *         was no "#" in the URI spec, empty string if there was a
0177      *         "#" but no fragment following it.
0178      */
0179      const XMLCh* getFragment() const;
0180 
0181     // -----------------------------------------------------------------------
0182     //  Setter methods
0183     // -----------------------------------------------------------------------
0184 
0185     /**
0186      * Set the scheme for this URI. The scheme is converted to lowercase
0187      * before it is set.
0188      *
0189      * @param newScheme the scheme for this URI (cannot be null)
0190      *
0191      */
0192      void setScheme(const XMLCh* const newScheme);
0193 
0194     /**
0195      * Set the userinfo for this URI. If a non-null value is passed in and
0196      * the host value is null, then an exception is thrown.
0197      *
0198      * @param newUserInfo the userinfo for this URI
0199      *
0200      */
0201      void setUserInfo(const XMLCh* const newUserInfo);
0202 
0203     /**
0204      * Set the host for this URI. If null is passed in, the userinfo
0205      * field is also set to null and the port is set to -1.
0206      *
0207      * Note: This method overwrites registry based authority if it
0208      * previously existed in this URI.
0209      *
0210      * @param newHost the host for this URI
0211      *
0212      */
0213      void setHost(const XMLCh* const newHost);
0214 
0215     /**
0216      * Set the port for this URI. -1 is used to indicate that the port is
0217      * not specified, otherwise valid port numbers are  between 0 and 65535.
0218      * If a valid port number is passed in and the host field is null,
0219      * an exception is thrown.
0220      *
0221      * @param newPort the port number for this URI
0222      *
0223      */
0224      void setPort(int newPort);
0225      
0226     /**
0227      * Sets the registry based authority for this URI.
0228      * 
0229      * Note: This method overwrites server based authority
0230      * if it previously existed in this URI.
0231      * 
0232      * @param newRegAuth the registry based authority for this URI
0233      */
0234      void setRegBasedAuthority(const XMLCh* const newRegAuth);
0235 
0236     /**
0237      * Set the path for this URI.
0238      *
0239      * If the supplied path is null, then the
0240      * query string and fragment are set to null as well.
0241      *
0242      * If the supplied path includes a query string and/or fragment,
0243      * these fields will be parsed and set as well.
0244      *
0245      * Note:
0246      *
0247      * For URIs following the "generic URI" syntax, the path
0248      * specified should start with a slash.
0249      *
0250      * For URIs that do not follow the generic URI syntax, this method
0251      * sets the scheme-specific part.
0252      *
0253      * @param newPath the path for this URI (may be null)
0254      *
0255      */
0256      void setPath(const XMLCh* const newPath);
0257 
0258     /**
0259      * Set the query string for this URI. A non-null value is valid only
0260      * if this is an URI conforming to the generic URI syntax and
0261      * the path value is not null.
0262      *
0263      * @param newQueryString the query string for this URI
0264      *
0265      */
0266      void setQueryString(const XMLCh* const newQueryString);
0267 
0268     /**
0269      * Set the fragment for this URI. A non-null value is valid only
0270      * if this is a URI conforming to the generic URI syntax and
0271      * the path value is not null.
0272      *
0273      * @param newFragment the fragment for this URI
0274      *
0275      */
0276      void setFragment(const XMLCh* const newFragment);
0277 
0278      // -----------------------------------------------------------------------
0279     //  Miscellaneous methods
0280     // -----------------------------------------------------------------------
0281 
0282     /**
0283      * Determine whether a given string contains only URI characters (also
0284      * called "uric" in RFC 2396). uric consist of all reserved
0285      * characters, unreserved characters and escaped characters.
0286      *
0287      * @return true if the string is comprised of uric, false otherwise
0288      */
0289     static bool isURIString(const XMLCh* const uric);
0290 
0291     /**
0292      * Determine whether a given string is a valid URI
0293      */
0294     static bool isValidURI( const XMLUri* const baseURI
0295                           , const XMLCh* const uriStr
0296                           , bool bAllowSpaces=false);
0297     /**
0298      * Determine whether a given string is a valid URI
0299      */
0300     static bool isValidURI( bool haveBaseURI
0301                           , const XMLCh* const uriStr
0302                           , bool bAllowSpaces=false);
0303 
0304 
0305     static void normalizeURI(const XMLCh*     const systemURI,
0306                                    XMLBuffer&       normalizedURI);
0307 
0308     /***
0309      * Support for Serialization/De-serialization
0310      ***/
0311     DECL_XSERIALIZABLE(XMLUri)
0312 
0313     XMLUri(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
0314 
0315 private:
0316 
0317     static const XMLCh MARK_OR_RESERVED_CHARACTERS[];
0318     static const XMLCh RESERVED_CHARACTERS[];
0319     static const XMLCh MARK_CHARACTERS[];
0320     static const XMLCh SCHEME_CHARACTERS[];
0321     static const XMLCh USERINFO_CHARACTERS[];
0322     static const XMLCh REG_NAME_CHARACTERS[];
0323     static const XMLCh PATH_CHARACTERS[];
0324 
0325     //helper method for getUriText
0326     void buildFullText();
0327 
0328     // -----------------------------------------------------------------------
0329     //  Private helper methods
0330     // -----------------------------------------------------------------------
0331 
0332     /**
0333      * Determine whether a character is a reserved character:
0334      *
0335      * @return true if the string contains any reserved characters
0336      */
0337     static bool isReservedCharacter(const XMLCh theChar);
0338     
0339     /**
0340      * Determine whether a character is a path character:
0341      *
0342      * @return true if the character is path character
0343      */
0344     static bool isPathCharacter(const XMLCh theChar);
0345 
0346     /**
0347      * Determine whether a char is an unreserved character.
0348      *
0349      * @return true if the char is unreserved, false otherwise
0350      */
0351     static bool isUnreservedCharacter(const XMLCh theChar);
0352 
0353     /**
0354      * Determine whether a char is an reserved or unreserved character.
0355      *
0356      * @return true if the char is reserved or unreserved, false otherwise
0357      */                
0358     static bool isReservedOrUnreservedCharacter(const XMLCh theChar);
0359 
0360     /**
0361      * Determine whether a scheme conforms to the rules for a scheme name.
0362      * A scheme is conformant if it starts with an alphanumeric, and
0363      * contains only alphanumerics, '+','-' and '.'.
0364      *
0365      * @return true if the scheme is conformant, false otherwise
0366      */
0367     static bool isConformantSchemeName(const XMLCh* const scheme);
0368 
0369     /**
0370      * Determine whether a userInfo conforms to the rules for a userinfo.
0371      *
0372      * @return true if the scheme is conformant, false otherwise
0373      */
0374     static void isConformantUserInfo(const XMLCh* const userInfo
0375         , MemoryManager* const manager);
0376     
0377     /**
0378      * Determines whether the components host, port, and user info
0379      * are valid as a server authority.
0380      *
0381      * @return true if the given host, port, and userinfo compose
0382      * a valid server authority
0383      */
0384     static bool isValidServerBasedAuthority(const XMLCh* const host
0385                                            , const XMLSize_t hostLen
0386                                            , const int port
0387                                            , const XMLCh* const userinfo
0388                                            , const XMLSize_t userLen);
0389                                            
0390     /**
0391      * Determines whether the components host, port, and user info
0392      * are valid as a server authority.
0393      *
0394      * @return true if the given host, port, and userinfo compose
0395      * a valid server authority
0396      */
0397     static bool isValidServerBasedAuthority(const XMLCh* const host
0398                                            , const int port
0399                                            , const XMLCh* const userinfo
0400                                            , MemoryManager* const manager);
0401       
0402    /**
0403     * Determines whether the given string is a registry based authority.
0404     * 
0405     * @param authority the authority component of a URI
0406     * 
0407     * @return true if the given string is a registry based authority
0408     */
0409     static bool isValidRegistryBasedAuthority(const XMLCh* const authority
0410                                              , const XMLSize_t authLen);
0411 
0412    /**
0413     * Determines whether the given string is a registry based authority.
0414     * 
0415     * @param authority the authority component of a URI
0416     * 
0417     * @return true if the given string is a registry based authority
0418     */
0419     static bool isValidRegistryBasedAuthority(const XMLCh* const authority);
0420 
0421     /**
0422      * Determine whether a string is syntactically capable of representing
0423      * a valid IPv4 address, IPv6 reference or the domain name of a network host.
0424      *
0425      * A valid IPv4 address consists of four decimal digit groups
0426      * separated by a '.'.
0427      *
0428      * See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the 
0429      * definition of IPv6 references.
0430      *
0431      * A hostname consists of domain labels (each of which must begin and
0432      * end with an alphanumeric but may contain '-') separated by a '.'.
0433      * See RFC 2396 Section 3.2.2.
0434      *
0435      * @return true if the string is a syntactically valid IPv4 address
0436      *              or hostname
0437      */
0438      static bool isWellFormedAddress(const XMLCh* const addr
0439          , MemoryManager* const manager);
0440      
0441     /**
0442      * Determines whether a string is an IPv4 address as defined by 
0443      * RFC 2373, and under the further constraint that it must be a 32-bit
0444      * address. Though not expressed in the grammar, in order to satisfy 
0445      * the 32-bit address constraint, each segment of the address cannot 
0446      * be greater than 255 (8 bits of information).
0447      *
0448      * @return true if the string is a syntactically valid IPv4 address
0449      */
0450      static bool isWellFormedIPv4Address(const XMLCh* const addr, const XMLSize_t length);
0451      
0452     /**
0453      * Determines whether a string is an IPv6 reference as defined
0454      * by RFC 2732, where IPv6address is defined in RFC 2373. The 
0455      * IPv6 address is parsed according to Section 2.2 of RFC 2373,
0456      * with the additional constraint that the address be composed of
0457      * 128 bits of information.
0458      *
0459      * Note: The BNF expressed in RFC 2373 Appendix B does not 
0460      * accurately describe section 2.2, and was in fact removed from
0461      * RFC 3513, the successor of RFC 2373.
0462      *
0463      * @return true if the string is a syntactically valid IPv6 reference
0464      */
0465      static bool isWellFormedIPv6Reference(const XMLCh* const addr, const XMLSize_t length);
0466      
0467     /**
0468      * Helper function for isWellFormedIPv6Reference which scans the 
0469      * hex sequences of an IPv6 address. It returns the index of the 
0470      * next character to scan in the address, or -1 if the string 
0471      * cannot match a valid IPv6 address. 
0472      *
0473      * @param address the string to be scanned
0474      * @param index the beginning index (inclusive)
0475      * @param end the ending index (exclusive)
0476      * @param counter a counter for the number of 16-bit sections read
0477      * in the address
0478      *
0479      * @return the index of the next character to scan, or -1 if the
0480      * string cannot match a valid IPv6 address
0481      */
0482      static int scanHexSequence (const XMLCh* const addr, XMLSize_t index, XMLSize_t end, int& counter);
0483 
0484     /**
0485      * Get the indicator as to whether this URI uses the "generic URI"
0486      * syntax.
0487      *
0488      * @return true if this URI uses the "generic URI" syntax, false
0489      *         otherwise
0490      */
0491      bool isGenericURI();
0492 
0493     // -----------------------------------------------------------------------
0494     //  Miscellaneous methods
0495     // -----------------------------------------------------------------------
0496 
0497     /**
0498      * Initialize all fields of this URI from another URI.
0499      *
0500      * @param toCopy the URI to copy (cannot be null)
0501      */
0502      void initialize(const XMLUri& toCopy);
0503 
0504     /**
0505      * Initializes this URI from a base URI and a URI specification string.
0506      * See RFC 2396 Section 4 and Appendix B for specifications on parsing
0507      * the URI and Section 5 for specifications on resolving relative URIs
0508      * and relative paths.
0509      *
0510      * @param baseURI the base URI (may be null if uriSpec is an absolute
0511      *               URI)
0512      *
0513      * @param uriSpec the URI spec string which may be an absolute or
0514      *                  relative URI (can only be null/empty if base
0515      *                  is not null)
0516      *
0517      */
0518      void initialize(const XMLUri* const baseURI
0519                    , const XMLCh*  const uriSpec);
0520 
0521     /**
0522      * Initialize the scheme for this URI from a URI string spec.
0523      *
0524      * @param uriSpec the URI specification (cannot be null)
0525      *
0526      */
0527      void initializeScheme(const XMLCh* const uriSpec);
0528 
0529     /**
0530      * Initialize the authority (userinfo, host and port) for this
0531      * URI from a URI string spec.
0532      *
0533      * @param uriSpec the URI specification (cannot be null)
0534      *
0535      */
0536      void initializeAuthority(const XMLCh* const uriSpec);
0537 
0538     /**
0539      * Initialize the path for this URI from a URI string spec.
0540      *
0541      * @param uriSpec the URI specification (cannot be null)
0542      *
0543      */
0544      void initializePath(const XMLCh* const uriSpec);
0545 
0546      /**
0547       * cleanup the data variables
0548       *
0549       */
0550      void cleanUp();
0551 
0552     static bool isConformantSchemeName(const XMLCh* const scheme,
0553                                        const XMLSize_t schemeLen);
0554     static bool processScheme(const XMLCh* const uriStr, XMLSize_t& index);
0555     static bool processAuthority(const XMLCh* const uriStr, const XMLSize_t authLen);
0556     static bool isWellFormedAddress(const XMLCh* const addr, const XMLSize_t addrLen);
0557     static bool processPath(const XMLCh* const pathStr, const XMLSize_t pathStrLen,
0558                             const bool isSchemePresent, const bool bAllowSpaces=false);
0559 
0560     // -----------------------------------------------------------------------
0561     //  Data members
0562     //
0563     //  for all the data member, we own it,
0564     //  responsible for the creation and/or deletion for
0565     //  the memory allocated.
0566     //
0567     // -----------------------------------------------------------------------
0568     int             fPort;
0569     XMLCh*          fScheme;
0570     XMLCh*          fUserInfo;
0571     XMLCh*          fHost;
0572     XMLCh*          fRegAuth;
0573     XMLCh*          fPath;
0574     XMLCh*          fQueryString;
0575     XMLCh*          fFragment;
0576     XMLCh*          fURIText;
0577     MemoryManager*  fMemoryManager;
0578 };
0579 
0580 // ---------------------------------------------------------------------------
0581 //  XMLUri: Getter methods
0582 // ---------------------------------------------------------------------------
0583 inline const XMLCh* XMLUri::getScheme() const
0584 {
0585     return fScheme;
0586 }
0587 
0588 inline const XMLCh* XMLUri::getUserInfo() const
0589 {
0590     return fUserInfo;
0591 }
0592 
0593 inline const XMLCh* XMLUri::getHost() const
0594 {
0595     return fHost;
0596 }
0597 
0598 inline int XMLUri::getPort() const
0599 {
0600     return fPort;
0601 }
0602 
0603 inline const XMLCh* XMLUri::getRegBasedAuthority() const
0604 {
0605     return fRegAuth;
0606 }
0607 
0608 inline const XMLCh* XMLUri::getPath() const
0609 {
0610     return fPath;
0611 }
0612 
0613 inline const XMLCh* XMLUri::getQueryString() const
0614 {
0615     return fQueryString;
0616 }
0617 
0618 inline const XMLCh* XMLUri::getFragment() const
0619 {
0620     return fFragment;
0621 }
0622 
0623 inline const XMLCh* XMLUri::getUriText() const
0624 {
0625     //
0626     //  Fault it in if not already. Since this is a const method and we
0627     //  can't use mutable members due the compilers we have to support,
0628     //  we have to cast off the constness.
0629     //
0630     if (!fURIText)
0631         (const_cast<XMLUri *>(this))->buildFullText();
0632 
0633     return fURIText;
0634 }
0635 
0636 // ---------------------------------------------------------------------------
0637 //  XMLUri: Helper methods
0638 // ---------------------------------------------------------------------------
0639 inline bool XMLUri::isReservedOrUnreservedCharacter(const XMLCh theChar)
0640 {
0641    return (XMLString::isAlphaNum(theChar) ||
0642            XMLString::indexOf(MARK_OR_RESERVED_CHARACTERS, theChar) != -1);
0643 }
0644 
0645 inline bool XMLUri::isReservedCharacter(const XMLCh theChar)
0646 {
0647     return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1);
0648 }
0649 
0650 inline bool XMLUri::isPathCharacter(const XMLCh theChar)
0651 {
0652     return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1);
0653 }
0654 
0655 inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar)
0656 {
0657     return (XMLString::isAlphaNum(theChar) ||
0658             XMLString::indexOf(MARK_CHARACTERS, theChar) != -1);
0659 }
0660 
0661 XERCES_CPP_NAMESPACE_END
0662 
0663 #endif