Xerces-C++  3.2.3
XMLChar.hpp
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id$
20  */
21 
22 #if !defined(XERCESC_INCLUDE_GUARD_XMLCHAR_HPP)
23 #define XERCESC_INCLUDE_GUARD_XMLCHAR_HPP
24 
26 
28 
29 // ---------------------------------------------------------------------------
30 // This file defines Char and utility that conforms to XML 1.0 and XML 1.1
31 // ---------------------------------------------------------------------------
32 // Masks for the fgCharCharsTable1_0 array
35 const XMLByte gNameCharMask = 0x4;
39 const XMLByte gXMLCharMask = 0x40;
41 
42 // ---------------------------------------------------------------------------
43 // This class is for XML 1.0
44 // ---------------------------------------------------------------------------
46 {
47 public:
48  // -----------------------------------------------------------------------
49  // Public, static methods, check the string
50  // -----------------------------------------------------------------------
51  static bool isAllSpaces
52  (
53  const XMLCh* const toCheck
54  , const XMLSize_t count
55  );
56 
57  static bool containsWhiteSpace
58  (
59  const XMLCh* const toCheck
60  , const XMLSize_t count
61  );
62 
63  static bool isValidNmtoken
64  (
65  const XMLCh* const toCheck
66  , const XMLSize_t count
67  );
68 
69  static bool isValidName
70  (
71  const XMLCh* const toCheck
72  , const XMLSize_t count
73  );
74 
75  static bool isValidName
76  (
77  const XMLCh* const toCheck
78  );
79 
80  static bool isValidNCName
81  (
82  const XMLCh* const toCheck
83  , const XMLSize_t count
84  );
85 
86  static bool isValidQName
87  (
88  const XMLCh* const toCheck
89  , const XMLSize_t count
90  );
91 
92  // -----------------------------------------------------------------------
93  // Public, static methods, check the XMLCh
94  // surrogate pair is assumed if second parameter is not null
95  // -----------------------------------------------------------------------
96  static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
97  static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
98  static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
99  static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
100  static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
101  static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
102  static bool isWhitespace(const XMLCh toCheck);
103  static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2);
104  static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
105 
106  static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
107  static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
108  static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
109 
110  // -----------------------------------------------------------------------
111  // Special Non-conformant Public, static methods
112  // -----------------------------------------------------------------------
116  static bool isNELRecognized();
117 
121  static void enableNELWS();
122 
123 private:
124  // -----------------------------------------------------------------------
125  // Unimplemented constructors and operators
126  // -----------------------------------------------------------------------
127  XMLChar1_0();
128 
129  // -----------------------------------------------------------------------
130  // Static data members
131  //
132  // fgCharCharsTable1_0
133  // The character characteristics table. Bits in each byte, represent
134  // the characteristics of each character. It is generated via some
135  // code and then hard coded into the cpp file for speed.
136  //
137  // fNEL
138  // Flag to represents whether NEL and LSEP newline recognition is enabled
139  // or disabled
140  // -----------------------------------------------------------------------
141  static XMLByte fgCharCharsTable1_0[0x10000];
142  static bool enableNEL;
143 
144  friend class XMLReader;
145 };
146 
147 
148 // ---------------------------------------------------------------------------
149 // XMLReader: Public, static methods
150 // ---------------------------------------------------------------------------
151 inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
152 {
153  // An XML letter is a FirstNameChar minus ':' and '_'.
154  if (!toCheck2) {
155  return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0)
156  && (toCheck != chColon) && (toCheck != chUnderscore));
157  }
158  return false;
159 }
160 
161 inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
162 {
163  if (!toCheck2)
164  return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
165  else {
166  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
167  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
168  return true;
169  }
170  return false;
171 }
172 
173 inline bool XMLChar1_0::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
174 {
175  if (!toCheck2) {
176  return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
177  }
178  else {
179  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
180  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
181  return true;
182  }
183  return false;
184 }
185 
186 inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
187 {
188  if (!toCheck2)
189  return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
190  else {
191  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
192  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
193  return true;
194  }
195  return false;
196 }
197 
198 inline bool XMLChar1_0::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
199 {
200  if (!toCheck2)
201  return ((fgCharCharsTable1_0[toCheck] & gNCNameCharMask) != 0);
202  else {
203  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
204  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
205  return true;
206  }
207  return false;
208 }
209 
210 inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
211 {
212  if (!toCheck2)
213  return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
214  else {
215  if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
216  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
217  return true;
218  }
219  return false;
220 }
221 
222 
223 inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
224 {
225  if (!toCheck2)
226  return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
227  return false;
228 }
229 
230 inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
231 {
232  if (!toCheck2)
233  return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
234  else {
235  if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
236  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
237  return true;
238  }
239  return false;
240 }
241 
242 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck)
243 {
244  return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
245 }
246 
247 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
248 {
249  if (!toCheck2)
250  return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
251  return false;
252 }
253 
254 inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
255 {
256  if (!toCheck2)
257  return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
258  return false;
259 }
260 
262 
263  return enableNEL;
264 }
265 
266 
267 // ---------------------------------------------------------------------------
268 // This class is for XML 1.1
269 // ---------------------------------------------------------------------------
271 {
272 public:
273  // -----------------------------------------------------------------------
274  // Public, static methods, check the string
275  // -----------------------------------------------------------------------
276  static bool isAllSpaces
277  (
278  const XMLCh* const toCheck
279  , const XMLSize_t count
280  );
281 
282  static bool containsWhiteSpace
283  (
284  const XMLCh* const toCheck
285  , const XMLSize_t count
286  );
287 
288  static bool isValidNmtoken
289  (
290  const XMLCh* const toCheck
291  , const XMLSize_t count
292  );
293 
294  static bool isValidName
295  (
296  const XMLCh* const toCheck
297  , const XMLSize_t count
298  );
299 
300  static bool isValidName
301  (
302  const XMLCh* const toCheck
303  );
304 
305  static bool isValidNCName
306  (
307  const XMLCh* const toCheck
308  , const XMLSize_t count
309  );
310 
311  static bool isValidQName
312  (
313  const XMLCh* const toCheck
314  , const XMLSize_t count
315  );
316 
317  // -----------------------------------------------------------------------
318  // Public, static methods, check the XMLCh
319  // -----------------------------------------------------------------------
320  static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
321  static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
322  static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
323  static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
324  static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
325  static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
326  static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
327  static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
328 
329  static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
330  static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
331  static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
332 
333 private:
334  // -----------------------------------------------------------------------
335  // Unimplemented constructors and operators
336  // -----------------------------------------------------------------------
337  XMLChar1_1();
338 
339  // -----------------------------------------------------------------------
340  // Static data members
341  //
342  // fgCharCharsTable1_1
343  // The character characteristics table. Bits in each byte, represent
344  // the characteristics of each character. It is generated via some
345  // code and then hard coded into the cpp file for speed.
346  //
347  // -----------------------------------------------------------------------
348  static XMLByte fgCharCharsTable1_1[0x10000];
349 
350  friend class XMLReader;
351 };
352 
353 
354 // ---------------------------------------------------------------------------
355 // XMLReader: Public, static methods
356 // ---------------------------------------------------------------------------
357 inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
358 {
360  return XMLChar1_0::isXMLLetter(toCheck, toCheck2);
361 }
362 
363 inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
364 {
365  if (!toCheck2)
366  return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
367  else {
368  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
369  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
370  return true;
371  }
372  return false;
373 }
374 
375 inline bool XMLChar1_1::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
376 {
377  if (!toCheck2) {
378  return (((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
379  }
380  else {
381  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
382  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
383  return true;
384  }
385  return false;
386 }
387 
388 inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
389 {
390  if (!toCheck2)
391  return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
392  else {
393  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
394  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
395  return true;
396  }
397  return false;
398 }
399 
400 inline bool XMLChar1_1::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
401 {
402  if (!toCheck2)
403  return ((fgCharCharsTable1_1[toCheck] & gNCNameCharMask) != 0);
404  else {
405  if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
406  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
407  return true;
408  }
409  return false;
410 }
411 
412 inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
413 {
414  if (!toCheck2)
415  return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
416  else {
417  if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
418  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
419  return true;
420  }
421  return false;
422 }
423 
424 
425 inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
426 {
427  if (!toCheck2)
428  return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
429  return false;
430 }
431 
432 inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
433 {
434  if (!toCheck2)
435  return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
436  else {
437  if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
438  if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
439  return true;
440  }
441  return false;
442 }
443 
444 inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
445 {
446  if (!toCheck2)
447  return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
448  return false;
449 }
450 
451 inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
452 {
453  if (!toCheck2)
454  return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
455  return false;
456 }
457 
458 
460 
461 #endif
XMLChar1_1::isFirstNCNameChar
static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:375
XMLChar1_1::isXMLLetter
static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:357
XERCES_CPP_NAMESPACE_BEGIN
#define XERCES_CPP_NAMESPACE_BEGIN
Definition: XercesDefs.hpp:112
gWhitespaceCharMask
const XMLByte gWhitespaceCharMask
Definition: XMLChar.hpp:40
gFirstNameCharMask
const XMLByte gFirstNameCharMask
Definition: XMLChar.hpp:34
XMLUniDefs.hpp
XMLChar1_1::isNameChar
static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:388
XMLChar1_1::isNCNameChar
static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:400
XMLChar1_0::isXMLLetter
static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:151
XMLChar1_1
Definition: XMLChar.hpp:270
chColon
const XMLCh chColon
Definition: XMLUniDefs.hpp:53
XERCES_CPP_NAMESPACE_END
#define XERCES_CPP_NAMESPACE_END
Definition: XercesDefs.hpp:113
XMLChar1_0::isXMLChar
static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:230
XMLChar1_0::isNELRecognized
static bool isNELRecognized()
Return true if NEL (0x85) and LSEP (0x2028) to be treated as white space char.
Definition: XMLChar.hpp:261
gXMLCharMask
const XMLByte gXMLCharMask
Definition: XMLChar.hpp:39
XMLByte
unsigned char XMLByte
Definition: XercesDefs.hpp:65
XMLUTIL_EXPORT
#define XMLUTIL_EXPORT
Definition: XercesDefs.hpp:162
XMLChar1_0::isNameChar
static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:186
XMLChar1_0::isFirstNCNameChar
static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:173
XMLChar1_0
Definition: XMLChar.hpp:45
XMLCh
uint16_t XMLCh
Definition: Xerces_autoconf_config.hpp:120
XMLChar1_0::isWhitespace
static bool isWhitespace(const XMLCh toCheck)
Definition: XMLChar.hpp:242
XMLChar1_0::isFirstNameChar
static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:161
XMLChar1_0::isSpecialStartTagChar
static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:223
XMLChar1_0::isControlChar
static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:254
gNameCharMask
const XMLByte gNameCharMask
Definition: XMLChar.hpp:35
XMLSize_t
size_t XMLSize_t
Definition: Xerces_autoconf_config.hpp:112
chUnderscore
const XMLCh chUnderscore
Definition: XMLUniDefs.hpp:76
XMLChar1_1::isFirstNameChar
static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:363
gControlCharMask
const XMLByte gControlCharMask
Definition: XMLChar.hpp:38
XMLChar1_1::isControlChar
static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:451
XMLChar1_1::isPlainContentChar
static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:412
gNCNameCharMask
const XERCES_CPP_NAMESPACE_BEGIN XMLByte gNCNameCharMask
Definition: XMLChar.hpp:33
XMLChar1_1::isSpecialStartTagChar
static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:425
XMLChar1_1::isXMLChar
static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:432
gSpecialStartTagCharMask
const XMLByte gSpecialStartTagCharMask
Definition: XMLChar.hpp:37
gPlainContentCharMask
const XMLByte gPlainContentCharMask
Definition: XMLChar.hpp:36
XMLChar1_1::isWhitespace
static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:444
XMLChar1_0::isPlainContentChar
static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:210
XMLChar1_0::isNCNameChar
static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2=0)
Definition: XMLChar.hpp:198