XMLChar.hpp

Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  * 
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  * 
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: XMLChar.hpp 568078 2007-08-21 11:43:25Z amassari $
00020  */
00021 
00022 #if !defined(XMLCHAR_HPP)
00023 #define XMLCHAR_HPP
00024 
00025 #include <xercesc/util/XMLUniDefs.hpp>
00026 
00027 XERCES_CPP_NAMESPACE_BEGIN
00028 
00029 // ---------------------------------------------------------------------------
00030 //  This file defines Char and utility that conforms to XML 1.0 and XML 1.1
00031 // ---------------------------------------------------------------------------
00032 // Masks for the fgCharCharsTable1_0 array
00033 const XMLByte   gNCNameCharMask             = 0x1;
00034 const XMLByte   gFirstNameCharMask          = 0x2;
00035 const XMLByte   gNameCharMask               = 0x4;
00036 const XMLByte   gPlainContentCharMask       = 0x8;
00037 const XMLByte   gSpecialStartTagCharMask    = 0x10;
00038 const XMLByte   gControlCharMask            = 0x20;
00039 const XMLByte   gXMLCharMask                = 0x40;
00040 const XMLByte   gWhitespaceCharMask         = 0x80;
00041 
00042 // ---------------------------------------------------------------------------
00043 //  This class is for XML 1.0
00044 // ---------------------------------------------------------------------------
00045 class XMLUTIL_EXPORT XMLChar1_0
00046 {
00047 public:
00048     // -----------------------------------------------------------------------
00049     //  Public, static methods, check the string
00050     // -----------------------------------------------------------------------
00051     static bool isAllSpaces
00052     (
00053         const   XMLCh* const    toCheck
00054         , const unsigned int    count
00055     );
00056 
00057     static bool containsWhiteSpace
00058     (
00059         const   XMLCh* const    toCheck
00060         , const unsigned int    count
00061     );
00062 
00063     static bool isValidNmtoken
00064     (
00065         const   XMLCh*        const    toCheck
00066       , const   unsigned int           count
00067     );
00068 
00069     static bool isValidName
00070     (
00071         const   XMLCh* const    toCheck
00072         , const unsigned int    count
00073     );
00074 
00075     static bool isValidName
00076     (
00077         const   XMLCh* const    toCheck
00078     );
00079 
00080     static bool isValidNCName
00081     (
00082         const   XMLCh* const    toCheck
00083         , const unsigned int    count
00084     );
00085 
00086     static bool isValidQName
00087     (
00088         const   XMLCh* const    toCheck
00089         , const unsigned int    count
00090     );
00091 
00092     // -----------------------------------------------------------------------
00093     //  Public, static methods, check the XMLCh
00094     //  surrogate pair is assumed if second parameter is not null
00095     // -----------------------------------------------------------------------
00096     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00097     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00098     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00099     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00100     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00101     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00102     static bool isWhitespace(const XMLCh toCheck);
00103     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2);
00104     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00105 
00106     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00107     static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00108     static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00109 
00110     // -----------------------------------------------------------------------
00111     //  Special Non-conformant Public, static methods
00112     // -----------------------------------------------------------------------
00116     static bool isNELRecognized();
00117 
00121     static void enableNELWS();
00122 
00123 private:
00124     // -----------------------------------------------------------------------
00125     //  Unimplemented constructors and operators
00126     // -----------------------------------------------------------------------
00127     XMLChar1_0();
00128 
00129     // -----------------------------------------------------------------------
00130     //  Static data members
00131     //
00132     //  fgCharCharsTable1_0
00133     //      The character characteristics table. Bits in each byte, represent
00134     //      the characteristics of each character. It is generated via some
00135     //      code and then hard coded into the cpp file for speed.
00136     //
00137     //  fNEL
00138     //      Flag to respresents whether NEL and LSEP newline recognition is enabled
00139     //      or disabled
00140     // -----------------------------------------------------------------------
00141     static XMLByte  fgCharCharsTable1_0[0x10000];
00142     static bool     enableNEL;
00143 
00144     friend class XMLReader;
00145 };
00146 
00147 
00148 // ---------------------------------------------------------------------------
00149 //  XMLReader: Public, static methods
00150 // ---------------------------------------------------------------------------
00151 inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00152 {
00153     // An XML letter is a FirstNameChar minus ':' and '_'.
00154     if (!toCheck2) {
00155         return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0)
00156                 && (toCheck != chColon) && (toCheck != chUnderscore));
00157     }
00158     return false;
00159 }
00160 
00161 inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00162 {
00163     if (!toCheck2)
00164         return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
00165     return false;
00166 }
00167 
00168 inline bool XMLChar1_0::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00169 {
00170     if (!toCheck2) {
00171         return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
00172     }
00173 
00174     return false;
00175 }
00176 
00177 inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00178 {
00179     if (!toCheck2)
00180         return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
00181     return false;
00182 }
00183 
00184 inline bool XMLChar1_0::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00185 {
00186     if (!toCheck2)
00187         return ((fgCharCharsTable1_0[toCheck] & gNCNameCharMask) != 0);
00188     return false;
00189 }
00190 
00191 inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00192 {
00193     if (!toCheck2)
00194         return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
00195     else {
00196         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00197            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00198                return true;
00199     }
00200     return false;
00201 }
00202 
00203 
00204 inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00205 {
00206     if (!toCheck2)
00207         return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
00208     return false;
00209 }
00210 
00211 inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00212 {
00213     if (!toCheck2)
00214         return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
00215     else {
00216         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00217            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00218                return true;
00219     }
00220     return false;
00221 }
00222 
00223 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck)
00224 {
00225     return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00226 }
00227 
00228 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00229 {
00230     if (!toCheck2)
00231         return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00232     return false;
00233 }
00234 
00235 inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00236 {
00237     if (!toCheck2)
00238         return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
00239     return false;
00240 }
00241 
00242 inline bool XMLChar1_0::isNELRecognized() {
00243 
00244     return enableNEL;
00245 }
00246 
00247 
00248 // ---------------------------------------------------------------------------
00249 //  This class is for XML 1.1
00250 // ---------------------------------------------------------------------------
00251 class XMLUTIL_EXPORT XMLChar1_1
00252 {
00253 public:
00254     // -----------------------------------------------------------------------
00255     //  Public, static methods, check the string
00256     // -----------------------------------------------------------------------
00257     static bool isAllSpaces
00258     (
00259         const   XMLCh* const    toCheck
00260         , const unsigned int    count
00261     );
00262 
00263     static bool containsWhiteSpace
00264     (
00265         const   XMLCh* const    toCheck
00266         , const unsigned int    count
00267     );
00268 
00269     static bool isValidNmtoken
00270     (
00271         const   XMLCh*        const    toCheck
00272       , const   unsigned int           count
00273     );
00274 
00275     static bool isValidName
00276     (
00277         const   XMLCh* const    toCheck
00278         , const unsigned int    count
00279     );
00280 
00281     static bool isValidName
00282     (
00283         const   XMLCh* const    toCheck
00284     );
00285 
00286     static bool isValidNCName
00287     (
00288         const   XMLCh* const    toCheck
00289         , const unsigned int    count
00290     );
00291 
00292     static bool isValidQName
00293     (
00294         const   XMLCh* const    toCheck
00295         , const unsigned int    count
00296     );
00297 
00298     // -----------------------------------------------------------------------
00299     //  Public, static methods, check the XMLCh
00300     // -----------------------------------------------------------------------
00301     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00302     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00303     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00304     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00305     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00306     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00307     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00308     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00309 
00310     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00311     static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00312     static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00313 
00314 private:
00315     // -----------------------------------------------------------------------
00316     //  Unimplemented constructors and operators
00317     // -----------------------------------------------------------------------
00318     XMLChar1_1();
00319 
00320     // -----------------------------------------------------------------------
00321     //  Static data members
00322     //
00323     //  fgCharCharsTable1_1
00324     //      The character characteristics table. Bits in each byte, represent
00325     //      the characteristics of each character. It is generated via some
00326     //      code and then hard coded into the cpp file for speed.
00327     //
00328     // -----------------------------------------------------------------------
00329     static XMLByte  fgCharCharsTable1_1[0x10000];
00330 
00331     friend class XMLReader;
00332 };
00333 
00334 
00335 // ---------------------------------------------------------------------------
00336 //  XMLReader: Public, static methods
00337 // ---------------------------------------------------------------------------
00338 inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00339 {
00341     return XMLChar1_0::isXMLLetter(toCheck, toCheck2);
00342 }
00343 
00344 inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00345 {
00346     if (!toCheck2)
00347         return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
00348     else {
00349         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00350            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00351                return true;
00352     }
00353     return false;
00354 }
00355 
00356 inline bool XMLChar1_1::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00357 {
00358     if (!toCheck2) {
00359         return (((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
00360     }
00361     else {
00362         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00363            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00364                return true;
00365     }
00366     return false;
00367 }
00368 
00369 inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00370 {
00371     if (!toCheck2)
00372         return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
00373     else {
00374         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00375            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00376                return true;
00377     }
00378     return false;
00379 }
00380 
00381 inline bool XMLChar1_1::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00382 {
00383     if (!toCheck2)
00384         return ((fgCharCharsTable1_1[toCheck] & gNCNameCharMask) != 0);
00385     else {
00386         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00387            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00388                return true;
00389     }
00390     return false;
00391 }
00392 
00393 inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00394 {
00395     if (!toCheck2)
00396         return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
00397     else {
00398         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00399            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00400                return true;
00401     }
00402     return false;
00403 }
00404 
00405 
00406 inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00407 {
00408     if (!toCheck2)
00409         return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
00410     return false;
00411 }
00412 
00413 inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00414 {
00415     if (!toCheck2)
00416         return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
00417     else {
00418         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00419            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00420                return true;
00421     }
00422     return false;
00423 }
00424 
00425 inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00426 {
00427     if (!toCheck2)
00428         return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
00429     return false;
00430 }
00431 
00432 inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00433 {
00434     if (!toCheck2)
00435         return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
00436     return false;
00437 }
00438 
00439 
00440 XERCES_CPP_NAMESPACE_END
00441 
00442 #endif

Generated on Thu Feb 19 11:35:44 2009 for Xerces-C++ by  doxygen 1.5.4