SAXParser.hpp

Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  * 
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  * 
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: SAXParser.hpp 568078 2007-08-21 11:43:25Z amassari $
00020  */
00021 
00022 #if !defined(SAXPARSER_HPP)
00023 #define SAXPARSER_HPP
00024 
00025 #include <xercesc/sax/Parser.hpp>
00026 #include <xercesc/internal/VecAttrListImpl.hpp>
00027 #include <xercesc/framework/XMLDocumentHandler.hpp>
00028 #include <xercesc/framework/XMLElementDecl.hpp>
00029 #include <xercesc/framework/XMLEntityHandler.hpp>
00030 #include <xercesc/framework/XMLErrorReporter.hpp>
00031 #include <xercesc/framework/XMLBuffer.hpp>
00032 #include <xercesc/util/SecurityManager.hpp>
00033 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
00034 
00035 XERCES_CPP_NAMESPACE_BEGIN
00036 
00037 
00038 class DocumentHandler;
00039 class EntityResolver;
00040 class XMLPScanToken;
00041 class XMLScanner;
00042 class XMLValidator;
00043 class Grammar;
00044 class GrammarResolver;
00045 class XMLGrammarPool;
00046 class XMLEntityResolver;
00047 class XMLResourceIdentifier;
00048 class PSVIHandler;
00049 
00064 class PARSERS_EXPORT SAXParser :
00065 
00066     public XMemory
00067     , public Parser
00068     , public XMLDocumentHandler
00069     , public XMLErrorReporter
00070     , public XMLEntityHandler
00071     , public DocTypeHandler    
00072 {
00073 public :
00074     // -----------------------------------------------------------------------
00075     //  Class types
00076     // -----------------------------------------------------------------------
00085     enum ValSchemes
00086     {
00087         Val_Never
00088         , Val_Always
00089         , Val_Auto
00090     };
00091 
00092 
00093     // -----------------------------------------------------------------------
00094     //  Constructors and Destructor
00095     // -----------------------------------------------------------------------
00106     SAXParser
00107     (
00108           XMLValidator*   const valToAdopt = 0
00109         , MemoryManager*  const manager = XMLPlatformUtils::fgMemoryManager
00110         , XMLGrammarPool* const gramPool = 0 
00111     );
00112 
00116     ~SAXParser();
00118 
00119 
00120     // -----------------------------------------------------------------------
00121     //  Getter Methods
00122     // -----------------------------------------------------------------------
00131     DocumentHandler* getDocumentHandler();
00132 
00139     const DocumentHandler* getDocumentHandler() const;
00140 
00147     EntityResolver* getEntityResolver();
00148 
00155     const EntityResolver* getEntityResolver() const;
00156 
00163     XMLEntityResolver* getXMLEntityResolver();
00164 
00171     const XMLEntityResolver* getXMLEntityResolver() const;
00172 
00179     ErrorHandler* getErrorHandler();
00180 
00187     const ErrorHandler* getErrorHandler() const;
00188 
00195     PSVIHandler* getPSVIHandler();
00196 
00203     const PSVIHandler* getPSVIHandler() const;
00204 
00211     const XMLValidator& getValidator() const;
00212 
00220     ValSchemes getValidationScheme() const;
00221 
00232     bool getDoSchema() const;
00233 
00244     bool getValidationSchemaFullChecking() const;
00245 
00256     bool getIdentityConstraintChecking() const;
00257 
00268     int getErrorCount() const;
00269 
00279     bool getDoNamespaces() const;
00280 
00290     bool getExitOnFirstFatalError() const;
00291 
00302     bool getValidationConstraintFatal() const;
00303 
00323     XMLCh* getExternalSchemaLocation() const;
00324 
00344     XMLCh* getExternalNoNamespaceSchemaLocation() const;
00345 
00361     SecurityManager* getSecurityManager() const;
00362 
00374     bool getLoadExternalDTD() const;
00375 
00386     bool isCachingGrammarFromParse() const;
00387 
00398     bool isUsingCachedGrammarInParse() const;
00399 
00411     bool getCalculateSrcOfs() const;
00412 
00423     bool getStandardUriConformant() const;
00424 
00431     Grammar* getGrammar(const XMLCh* const nameSpaceKey);
00432 
00438     Grammar* getRootGrammar();
00439 
00446     const XMLCh* getURIText(unsigned int uriId) const;
00447 
00454     unsigned int getSrcOffset() const;
00455 
00467     bool getGenerateSyntheticAnnotations() const;
00468 
00476     bool getValidateAnnotations() const;
00477 
00485     bool getIgnoreCachedDTD() const;
00486 
00494     bool getIgnoreAnnotations() const;
00495 
00503     bool getDisableDefaultEntityResolution() const;
00504 
00512     bool getSkipDTDValidation() const;
00513 
00515 
00516 
00517     // -----------------------------------------------------------------------
00518     //  Setter methods
00519     // -----------------------------------------------------------------------
00520 
00533     void setGenerateSyntheticAnnotations(const bool newValue);
00534 
00542     void setValidateAnnotations(const bool newValue);
00543 
00557     void setDoNamespaces(const bool newState);
00558 
00575     void setValidationScheme(const ValSchemes newScheme);
00576 
00592     void setDoSchema(const bool newState);
00593 
00610     void setValidationSchemaFullChecking(const bool schemaFullChecking);
00611 
00623     void setIdentityConstraintChecking(const bool identityConstraintChecking);
00624 
00640     void setExitOnFirstFatalError(const bool newState);
00641 
00661     void setValidationConstraintFatal(const bool newState);
00662 
00683     void setExternalSchemaLocation(const XMLCh* const schemaLocation);
00684 
00693     void setExternalSchemaLocation(const char* const schemaLocation);
00694 
00709     void setExternalNoNamespaceSchemaLocation(const XMLCh* const noNamespaceSchemaLocation);
00710 
00719     void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation);
00720 
00736     void setSecurityManager(SecurityManager* const securityManager);
00737 
00754     void setLoadExternalDTD(const bool newState);
00755 
00773     void cacheGrammarFromParse(const bool newState);
00774 
00793     void useCachedGrammarInParse(const bool newState);
00794 
00807     void setCalculateSrcOfs(const bool newState);
00808 
00819     void setStandardUriConformant(const bool newState);
00820 
00828     void useScanner(const XMLCh* const scannerName);
00829 
00840     void setInputBufferSize(const size_t bufferSize);
00841 
00856     void setIgnoreCachedDTD(const bool newValue);
00857 
00867     void setIgnoreAnnotations(const bool newValue);
00868 
00883     void setDisableDefaultEntityResolution(const bool newValue);
00884 
00897     void setSkipDTDValidation(const bool newValue);
00899 
00900 
00901     // -----------------------------------------------------------------------
00902     //  Advanced document handler list maintenance methods
00903     // -----------------------------------------------------------------------
00904 
00920     void installAdvDocHandler(XMLDocumentHandler* const toInstall);
00921 
00931     bool removeAdvDocHandler(XMLDocumentHandler* const toRemove);
00933 
00934 
00935     // -----------------------------------------------------------------------
00936     //  Progressive scan methods
00937     // -----------------------------------------------------------------------
00938 
00941 
00969     bool parseFirst
00970     (
00971         const   XMLCh* const    systemId
00972         ,       XMLPScanToken&  toFill
00973     );
00974 
01002     bool parseFirst
01003     (
01004         const   char* const     systemId
01005         ,       XMLPScanToken&  toFill
01006     );
01007 
01035     bool parseFirst
01036     (
01037         const   InputSource&    source
01038         ,       XMLPScanToken&  toFill
01039     );
01040 
01065     bool parseNext(XMLPScanToken& token);
01066 
01088     void parseReset(XMLPScanToken& token);
01089 
01091 
01092     // -----------------------------------------------------------------------
01093     //  Grammar preparsing interface
01094     // -----------------------------------------------------------------------
01095 
01125     Grammar* loadGrammar(const InputSource& source,
01126                          const short grammarType,
01127                          const bool toCache = false);
01128 
01154     Grammar* loadGrammar(const XMLCh* const systemId,
01155                          const short grammarType,
01156                          const bool toCache = false);
01157 
01182     Grammar* loadGrammar(const char* const systemId,
01183                          const short grammarType,
01184                          const bool toCache = false);
01185 
01189     void resetCachedGrammarPool();
01190 
01192 
01193 
01194     // -----------------------------------------------------------------------
01195     //  Implementation of the SAX Parser interface
01196     // -----------------------------------------------------------------------
01197 
01209     virtual void parse(const InputSource& source);
01210 
01220     virtual void parse(const XMLCh* const systemId);
01221 
01229     virtual void parse(const char* const systemId);
01230 
01241     virtual void setDocumentHandler(DocumentHandler* const handler);
01242 
01252     virtual void setDTDHandler(DTDHandler* const handler);
01253 
01264     virtual void setErrorHandler(ErrorHandler* const handler);
01265 
01276     virtual void setPSVIHandler(PSVIHandler* const handler);
01277 
01293     virtual void setEntityResolver(EntityResolver* const resolver);
01294 
01310     virtual void setXMLEntityResolver(XMLEntityResolver* const resolver);
01311 
01313 
01314 
01315     // -----------------------------------------------------------------------
01316     //  Implementation of the XMLDocumentHandler interface
01317     // -----------------------------------------------------------------------
01318 
01336     virtual void docCharacters
01337     (
01338         const   XMLCh* const    chars
01339         , const unsigned int    length
01340         , const bool            cdataSection
01341     );
01342 
01352     virtual void docComment
01353     (
01354         const   XMLCh* const    comment
01355     );
01356 
01376     virtual void docPI
01377     (
01378         const   XMLCh* const    target
01379         , const XMLCh* const    data
01380     );
01381 
01393     virtual void endDocument();
01394 
01414     virtual void endElement
01415     (
01416         const   XMLElementDecl& elemDecl
01417         , const unsigned int    urlId
01418         , const bool            isRoot
01419         , const XMLCh* const    elemPrefix
01420     );
01421 
01432     virtual void endEntityReference
01433     (
01434         const   XMLEntityDecl&  entDecl
01435     );
01436 
01456     virtual void ignorableWhitespace
01457     (
01458         const   XMLCh* const    chars
01459         , const unsigned int    length
01460         , const bool            cdataSection
01461     );
01462 
01467     virtual void resetDocument();
01468 
01479     virtual void startDocument();
01480 
01507     virtual void startElement
01508     (
01509         const   XMLElementDecl&         elemDecl
01510         , const unsigned int            urlId
01511         , const XMLCh* const            elemPrefix
01512         , const RefVectorOf<XMLAttr>&   attrList
01513         , const unsigned int            attrCount
01514         , const bool                    isEmpty
01515         , const bool                    isRoot
01516     );
01517 
01527     virtual void startEntityReference
01528     (
01529         const   XMLEntityDecl&  entDecl
01530     );
01531 
01549     virtual void XMLDecl
01550     (
01551         const   XMLCh* const    versionStr
01552         , const XMLCh* const    encodingStr
01553         , const XMLCh* const    standaloneStr
01554         , const XMLCh* const    actualEncodingStr
01555     );
01557 
01558 
01559     // -----------------------------------------------------------------------
01560     //  Implementation of the XMLErrorReporter interface
01561     // -----------------------------------------------------------------------
01562 
01588     virtual void error
01589     (
01590         const   unsigned int                errCode
01591         , const XMLCh* const                msgDomain
01592         , const XMLErrorReporter::ErrTypes  errType
01593         , const XMLCh* const                errorText
01594         , const XMLCh* const                systemId
01595         , const XMLCh* const                publicId
01596         , const XMLSSize_t                  lineNum
01597         , const XMLSSize_t                  colNum
01598     );
01599 
01608     virtual void resetErrors();
01610 
01611 
01612     // -----------------------------------------------------------------------
01613     //  Implementation of the XMLEntityHandler interface
01614     // -----------------------------------------------------------------------
01615 
01629     virtual void endInputSource(const InputSource& inputSource);
01630 
01645     virtual bool expandSystemId
01646     (
01647         const   XMLCh* const    systemId
01648         ,       XMLBuffer&      toFill
01649     );
01650 
01658     virtual void resetEntities();
01659 
01682     virtual InputSource* resolveEntity
01683     (
01684         const   XMLCh* const    publicId
01685         , const XMLCh* const    systemId
01686         , const XMLCh* const    baseURI = 0
01687     );
01688 
01705     virtual InputSource* resolveEntity
01706     (
01707         XMLResourceIdentifier* resourceIdentifier
01708     );
01709 
01721     virtual void startInputSource(const InputSource& inputSource);
01723 
01724 
01725     // -----------------------------------------------------------------------
01726     //  Implementation of the Deprecated DocTypeHandler Interface
01727     // -----------------------------------------------------------------------
01744     virtual void attDef
01745     (
01746         const   DTDElementDecl& elemDecl
01747         , const DTDAttDef&      attDef
01748         , const bool            ignore
01749     );
01750 
01760     virtual void doctypeComment
01761     (
01762         const   XMLCh* const    comment
01763     );
01764 
01783     virtual void doctypeDecl
01784     (
01785         const   DTDElementDecl& elemDecl
01786         , const XMLCh* const    publicId
01787         , const XMLCh* const    systemId
01788         , const bool            hasIntSubset
01789         , const bool            hasExtSubset = false
01790     );
01791 
01805     virtual void doctypePI
01806     (
01807         const   XMLCh* const    target
01808         , const XMLCh* const    data
01809     );
01810 
01822     virtual void doctypeWhitespace
01823     (
01824         const   XMLCh* const    chars
01825         , const unsigned int    length
01826     );
01827 
01840     virtual void elementDecl
01841     (
01842         const   DTDElementDecl& decl
01843         , const bool            isIgnored
01844     );
01845 
01856     virtual void endAttList
01857     (
01858         const   DTDElementDecl& elemDecl
01859     );
01860 
01867     virtual void endIntSubset();
01868 
01875     virtual void endExtSubset();
01876 
01891     virtual void entityDecl
01892     (
01893         const   DTDEntityDecl&  entityDecl
01894         , const bool            isPEDecl
01895         , const bool            isIgnored
01896     );
01897 
01902     virtual void resetDocType();
01903 
01916     virtual void notationDecl
01917     (
01918         const   XMLNotationDecl&    notDecl
01919         , const bool                isIgnored
01920     );
01921 
01932     virtual void startAttList
01933     (
01934         const   DTDElementDecl& elemDecl
01935     );
01936 
01943     virtual void startIntSubset();
01944 
01951     virtual void startExtSubset();
01952 
01965     virtual void TextDecl
01966     (
01967         const   XMLCh* const    versionStr
01968         , const XMLCh* const    encodingStr
01969     );
01971 
01972 
01973     // -----------------------------------------------------------------------
01974     //  Deprecated Methods
01975     // -----------------------------------------------------------------------
01988     bool getDoValidation() const;
01989 
02003     void setDoValidation(const bool newState);
02005 
02006 
02007 protected :
02008     // -----------------------------------------------------------------------
02009     //  Protected Methods
02010     // -----------------------------------------------------------------------
02017     const XMLScanner& getScanner() const;
02018 
02023     GrammarResolver* getGrammarResolver() const;
02024 
02025 
02026 private:
02027     // -----------------------------------------------------------------------
02028     //  Unimplemented constructors and operators
02029     // -----------------------------------------------------------------------
02030     SAXParser(const SAXParser&);
02031     SAXParser& operator=(const SAXParser&);
02032 
02033     // -----------------------------------------------------------------------
02034     //  Initialize/Cleanup methods
02035     // -----------------------------------------------------------------------
02036     void initialize();
02037     void cleanUp();
02038     void resetInProgress();
02039 
02040     // -----------------------------------------------------------------------
02041     //  Private data members
02042     //
02043     //  fAttrList
02044     //      A temporary implementation of the basic SAX attribute list
02045     //      interface. We use this one over and over on each startElement
02046     //      event to allow SAX-like access to the element attributes.
02047     //
02048     //  fDocHandler
02049     //      The installed SAX doc handler, if any. Null if none.
02050     //
02051     //  fDTDHandler
02052     //      The installed SAX DTD handler, if any. Null if none.
02053     //
02054     //  fElemDepth
02055     //      This is used to track the element nesting depth, so that we can
02056     //      know when we are inside content. This is so we can ignore char
02057     //      data outside of content.
02058     //
02059     //  fEntityResolver
02060     //      The installed SAX entity handler, if any. Null if none.
02061     //
02062     //  fErrorHandler
02063     //      The installed SAX error handler, if any. Null if none.
02064     //
02065     //  fPSVIHandler
02066     //      The installed PSVI handler, if any. Null if none.
02067     //
02068     //  fAdvDHCount
02069     //  fAdvDHList
02070     //  fAdvDHListSize
02071     //      This is an array of pointers to XMLDocumentHandlers, which is
02072     //      how we see installed advanced document handlers. There will
02073     //      usually not be very many at all, so a simple array is used
02074     //      instead of a collection, for performance. It will grow if needed,
02075     //      but that is unlikely.
02076     //
02077     //      The count is how many handlers are currently installed. The size
02078     //      is how big the array itself is (for expansion purposes.) When
02079     //      count == size, is time to expand.
02080     //
02081     //  fParseInProgress
02082     //      This flag is set once a parse starts. It is used to prevent
02083     //      multiple entrance or reentrance of the parser.
02084     //
02085     //  fScanner
02086     //      The scanner being used by this parser. It is created internally
02087     //      during construction.
02088     //
02089     //   fGrammarPool
02090     //      The grammar pool passed from external application (through derivatives).
02091     //      which could be 0, not owned.
02092     //
02093     // -----------------------------------------------------------------------
02094     bool                 fParseInProgress;
02095     unsigned int         fElemDepth;
02096     unsigned int         fAdvDHCount;
02097     unsigned int         fAdvDHListSize;
02098     VecAttrListImpl      fAttrList;
02099     DocumentHandler*     fDocHandler;
02100     DTDHandler*          fDTDHandler;
02101     EntityResolver*      fEntityResolver;
02102     XMLEntityResolver*   fXMLEntityResolver;
02103     ErrorHandler*        fErrorHandler;
02104     PSVIHandler*         fPSVIHandler;
02105     XMLDocumentHandler** fAdvDHList;
02106     XMLScanner*          fScanner;
02107     GrammarResolver*     fGrammarResolver;
02108     XMLStringPool*       fURIStringPool;
02109     XMLValidator*        fValidator;
02110     MemoryManager*       fMemoryManager;
02111     XMLGrammarPool*      fGrammarPool;
02112     XMLBuffer            fElemQNameBuf;
02113 };
02114 
02115 
02116 // ---------------------------------------------------------------------------
02117 //  SAXParser: Getter methods
02118 // ---------------------------------------------------------------------------
02119 inline DocumentHandler* SAXParser::getDocumentHandler()
02120 {
02121     return fDocHandler;
02122 }
02123 
02124 inline const DocumentHandler* SAXParser::getDocumentHandler() const
02125 {
02126     return fDocHandler;
02127 }
02128 
02129 inline EntityResolver* SAXParser::getEntityResolver()
02130 {
02131     return fEntityResolver;
02132 }
02133 
02134 inline XMLEntityResolver* SAXParser::getXMLEntityResolver()
02135 {
02136     return fXMLEntityResolver;
02137 }
02138 
02139 inline const XMLEntityResolver* SAXParser::getXMLEntityResolver() const
02140 {
02141     return fXMLEntityResolver;
02142 }
02143 
02144 inline const EntityResolver* SAXParser::getEntityResolver() const
02145 {
02146     return fEntityResolver;
02147 }
02148 
02149 inline ErrorHandler* SAXParser::getErrorHandler()
02150 {
02151     return fErrorHandler;
02152 }
02153 
02154 inline const ErrorHandler* SAXParser::getErrorHandler() const
02155 {
02156     return fErrorHandler;
02157 }
02158 
02159 inline PSVIHandler* SAXParser::getPSVIHandler()
02160 {
02161     return fPSVIHandler;
02162 }
02163 
02164 inline const PSVIHandler* SAXParser::getPSVIHandler() const
02165 {
02166     return fPSVIHandler;
02167 }
02168 
02169 inline const XMLScanner& SAXParser::getScanner() const
02170 {
02171     return *fScanner;
02172 }
02173 
02174 inline GrammarResolver* SAXParser::getGrammarResolver() const
02175 {
02176     return fGrammarResolver;
02177 }
02178 
02179 XERCES_CPP_NAMESPACE_END
02180 
02181 #endif

Generated on Thu Feb 19 11:35:43 2009 for Xerces-C++ by  doxygen 1.5.4