ACEXML
6.1.2
|
A SAX based parser. More...
#include <ACEXML/parser/parser/Parser.h>
Public Member Functions | |
ACEXML_Parser (void) | |
Default constructor. | |
virtual | ~ACEXML_Parser (void) |
Destructor. | |
int | initialize (ACEXML_InputSource *input) |
virtual ACEXML_ContentHandler * | getContentHandler (void) const |
virtual ACEXML_DTDHandler * | getDTDHandler (void) const |
virtual ACEXML_EntityResolver * | getEntityResolver (void) const |
virtual ACEXML_ErrorHandler * | getErrorHandler (void) const |
virtual int | getFeature (const ACEXML_Char *name) |
virtual void | setFeature (const ACEXML_Char *name, int boolean_value) |
virtual void * | getProperty (const ACEXML_Char *name) |
virtual void | setProperty (const ACEXML_Char *name, void *value) |
virtual void | parse (ACEXML_InputSource *input) |
virtual void | parse (const ACEXML_Char *systemId) |
virtual void | setContentHandler (ACEXML_ContentHandler *handler) |
virtual void | setDTDHandler (ACEXML_DTDHandler *handler) |
virtual void | setEntityResolver (ACEXML_EntityResolver *resolver) |
virtual void | setErrorHandler (ACEXML_ErrorHandler *handler) |
Protected Member Functions | |
void | parse_xml_prolog (void) |
void | parse_version_info (void) |
void | parse_encoding_decl (void) |
void | parse_xml_decl (void) |
int | parse_text_decl (void) |
int | parse_processing_instruction (void) |
int | parse_doctypedecl (void) |
void | parse_element (int is_root) |
int | parse_content (const ACEXML_Char *startname, const ACEXML_Char *&ns_uri, const ACEXML_Char *&ns_lname, int ns_flag) |
int | parse_char_reference (ACEXML_Char *buf, size_t &len) |
ACEXML_Char * | parse_reference_name (void) |
int | parse_cdata (void) |
int | parse_internal_dtd (void) |
int | parse_comment (void) |
int | parse_element_decl (void) |
int | parse_entity_decl (void) |
int | parse_attlist_decl (void) |
int | parse_atttype (void) |
int | parse_notation_decl (void) |
int | parse_external_id (ACEXML_Char *&publicId, ACEXML_Char *&systemId) |
int | parse_external_dtd (void) |
int | parse_external_subset (void) |
int | parse_markup_decl (void) |
int | parse_conditional_section (void) |
int | parse_includesect (void) |
int | parse_ignoresect (void) |
int | parse_PE_reference (void) |
int | parse_entity_reference (void) |
int | parse_entity_value (ACEXML_Char *&str) |
int | parse_defaultdecl (void) |
int | parse_children_definition (void) |
int | parse_child (int skip_open_paren) |
ACEXML_Char * | parse_name (ACEXML_Char ch=0) |
ACEXML_Char * | parse_nmtoken (ACEXML_Char ch=0) |
int | parse_version (ACEXML_Char *&str) |
int | parse_version_num (ACEXML_Char *&str) |
int | parse_encname (ACEXML_Char *&str) |
int | parse_sddecl (ACEXML_Char *&str) |
ACEXML_Char * | parse_attname (void) |
int | parse_attvalue (ACEXML_Char *&str) |
int | parse_tokenized_type (void) |
int | parse_system_literal (ACEXML_Char *&str) |
int | parse_pubid_literal (ACEXML_Char *&str) |
int | is_whitespace (const ACEXML_Char c) const |
int | isChar (ACEXML_UCS4 c) const |
int | isCharRef (const ACEXML_Char c) const |
int | isBasechar (const ACEXML_Char c) const |
int | isIdeographic (const ACEXML_Char c) const |
int | isCombiningchar (const ACEXML_Char c) const |
int | isDigit (const ACEXML_Char c) const |
int | isExtender (const ACEXML_Char c) const |
int | isLetter (const ACEXML_Char c) const |
int | isNameChar (const ACEXML_Char c) const |
int | isPubidChar (const ACEXML_Char c) const |
virtual ACEXML_Char | get (void) |
Get a character. | |
virtual ACEXML_Char | peek (void) |
Peek a character. | |
Private Member Functions | |
ACEXML_Char | skip_whitespace (void) |
int | skip_whitespace_count (ACEXML_Char *peek=0) |
int | skip_equal (void) |
int | get_quoted_string (ACEXML_Char *&str) |
int | isNormalDigit (const ACEXML_Char c) const |
void | error (const ACEXML_Char *msg) |
void | warning (const ACEXML_Char *msg) |
void | fatal_error (const ACEXML_Char *msg) |
void | prefix_mapping (const ACEXML_Char *prefix, const ACEXML_Char *uri, int start) |
int | parse_token (const ACEXML_Char *keyword) |
int | push_context (ACEXML_Parser_Context *context) |
size_t | pop_context (int GE_ref) |
virtual int | switch_input (ACEXML_CharStream *cstream, const ACEXML_Char *systemId, const ACEXML_Char *publicId=0) |
virtual int | switch_input (ACEXML_InputSource *input, const ACEXML_Char *systemId, const ACEXML_Char *publicId=0) |
int | check_for_PE_reference (void) |
void | reset (void) |
ACEXML_Char * | normalize_systemid (const ACEXML_Char *systemId) |
Private Attributes | |
ACEXML_DTDHandler * | dtd_handler_ |
ACEXML_EntityResolver * | entity_resolver_ |
ACEXML_ContentHandler * | content_handler_ |
ACEXML_ErrorHandler * | error_handler_ |
ACEXML_Char * | doctype_ |
Document Type. | |
ACEXML_Parser_Context * | current_ |
Current parser context. | |
ACE_Unbounded_Stack < ACEXML_Parser_Context * > | ctx_stack_ |
Stack used to hold the Parser_Context. | |
ACE_Unbounded_Stack < ACEXML_Char * > | GE_reference_ |
Set used to hold the general entity references that are active. | |
ACE_Unbounded_Stack < ACEXML_Char * > | PE_reference_ |
Set used to hold the parameter entity references that are active. | |
ACE_Obstack_T< ACEXML_Char > | obstack_ |
Obstack used by the parser to hold all the strings parsed. | |
ACE_Obstack_T< ACEXML_Char > | alt_stack_ |
Alternative obstack used to hold any strings when the original is in use. | |
ACEXML_NamespaceSupport | xml_namespace_ |
Namespace stack used by the parser to implement support for Namespaces. | |
int | nested_namespace_ |
T => We are processing a nested namespace. | |
ACEXML_Entity_Manager | internal_GE_ |
Set of internal parsed general entities in the document. | |
ACEXML_Entity_Manager | external_GE_ |
Set of external parsed general entities in the document. | |
ACEXML_Entity_Manager | unparsed_entities_ |
Set of unparsed entities in the document. | |
ACEXML_Entity_Manager | predef_entities_ |
Set of predefined entities used by the parser. | |
ACEXML_Entity_Manager | internal_PE_ |
Set of internal parsed parameter entities in the document. | |
ACEXML_Entity_Manager | external_PE_ |
Set of external parsed parameter entities in the document. | |
ACEXML_Entity_Manager | notations_ |
Set of notations declared in the document. | |
ACEXML_ParserInt::ReferenceState | ref_state_ |
State of the parser when it encounters a reference. | |
int | external_subset_ |
T => We are parsing an external subset. | |
int | external_entity_ |
T => We are parsing an external entity value. | |
int | has_pe_refs_ |
T => Internal DTD has parameter entity references. | |
int | standalone_ |
If set, the document is a standalone XML document. | |
int | external_dtd_ |
If set, the document has an external DTD subset. | |
int | internal_dtd_ |
If set, the document has an internal DTD. | |
int | simple_parsing_ |
int | validate_ |
If set, the parser should also validate. | |
int | namespaces_ |
If set, the parser should allow access by namespace qualified names. | |
int | namespace_prefixes_ |
Static Private Attributes | |
static const ACEXML_Char | simple_parsing_feature_ [] = ACE_TEXT ("Simple") |
static const ACEXML_Char | namespaces_feature_ [] = ACE_TEXT ("http://xml.org/sax/features/namespaces") |
static const ACEXML_Char | namespace_prefixes_feature_ [] = ACE_TEXT ("http://xml.org/sax/features/namespace-prefixes") |
static const ACEXML_Char | validation_feature_ [] = ACE_TEXT ("http://xml.org/sax/features/validation") |
A SAX based parser.
ACEXML_Parser::ACEXML_Parser | ( | void | ) |
Default constructor.
|
virtual |
Destructor.
|
private |
Check for a parameter entity reference. This is used to check for the occurrence of a PE Reference withing markupDecl. Additionally this function consumes any leading or trailing whitespace around the PE Reference.
Number | of whitespace characters skipped. |
|
private |
Dispatch errors to ErrorHandler.
|
private |
Dispatch fatal errors to ErrorHandler.
|
protectedvirtual |
Get a character.
|
private |
Get a quoted string. Quoted strings are used to specify attribute values and this routine will replace character and entity references on-the-fly. Parameter entities are not allowed (or replaced) in this function. (But regular entities are.)
str | returns the un-quoted string. |
0 | on success, -1 otherwise. |
|
virtual |
Return the current content handler.
Implements ACEXML_XMLReader.
|
virtual |
Return the current DTD handler.
Implements ACEXML_XMLReader.
|
virtual |
Return the current entity resolver.
Implements ACEXML_XMLReader.
|
virtual |
Return the current error handler.
Implements ACEXML_XMLReader.
|
virtual |
Look up the value of a feature. This method allows programmers to check whether a specific feature has been activated in the parser.
Implements ACEXML_XMLReader.
|
virtual |
Look up the value of a property.
Implements ACEXML_XMLReader.
int ACEXML_Parser::initialize | ( | ACEXML_InputSource * | input | ) |
Initialize the parser state.
0 | if parser was initialized correctly else -1. |
|
protected |
Check if a character c is a whitespace.
1 | if c is a valid white space character. 0 otherwise. |
|
protected |
Check if a character c is a BaseChar.
1 | if c is a valid BaseChar character, 0 otherwise. |
|
protected |
Check if a character c is a valid Char.
1 | if c is a valid character. 0 otherwise. |
|
protected |
Check if a character c is a valid CharRef character.
1 | if c is a valid character reference character, 0 otherwise. |
|
protected |
Check if a character c is a CombiningChar.
1 | if c is a valid CombiningChar character, 0 otherwise. |
|
protected |
Check if a character c is a Digit.
1 | if c is a valid Digit character, 0 otherwise. |
|
protected |
Check if a character c is an Extender.
1 | if c is a valid Extender character, 0 otherwise. |
|
protected |
Check if a character c is a Ideographic.
1 | if c is a valid Ideographic character, 0 otherwise. |
|
protected |
Check if a character c is a Letter.
1 | if c is a valid Letter character, 0 otherwise. |
|
protected |
Check if a character is an acceptable NameChar.
1 | if c is a valid NameChar character, 0 otherwise. |
|
private |
Check if a character c is a Digit.
1 | if c is a valid Digit character, 0 otherwise. |
|
protected |
Check if a character is a PubidChar.
1 | if c is a valid PubidChar character, 0 otherwise. |
|
private |
Very trivial, non-conformant normalization of a systemid.
|
virtual |
Parse an XML document.
Implements ACEXML_XMLReader.
|
virtual |
Parse an XML document from a system identifier (URI).
Implements ACEXML_XMLReader.
|
protected |
Parse an "ATTLIST" decl. Thse first character this method expects is always the 'A' (the first char) in the word "ATTLIST".
0 | on success, -1 otherwise. |
|
protected |
Parse an attribute name.
str | String containing the value of the attribute name if successful. |
0 | otherwise. |
|
protected |
Parse a AttType declaration.
AttType ::= StringType | TokenizedType | EnumeratedType StringType ::= 'CDATA' TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default] | 'IDREF' [VC: IDREF] | 'IDREFS' [VC: IDREF] | 'ENTITY' [VC: Entity Name] | 'ENTITIES' [VC: Entity Name] | 'NMTOKEN' [VC: Name Token] | 'NMTOKENS'
EnumeratedType ::= NotationType | Enumeration NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' [VC: Notation Attributes] [VC: One Notation Per Element Type] [VC: No Notation on Empty Element] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration]
|
protected |
Parse an attribute value.
str | String containing the value of the attribute if successful. |
|
protected |
Parse a CDATA section. The first character should always be the first '[' in CDATA definition.
0 | on success. |
-1 | if fail. |
|
protected |
Parse a character reference, i.e., " " or "". The first character encountered should be the '#' char.
buf | points to a character buffer for the result. |
len | In/out argument which initially specifies the size of the buffer and is later set to the no. of characters in the reference. |
0 | on success and -1 otherwise. |
|
protected |
Parse a cp
non-terminal. cp
can either be a seq
or a choice
. This function calls itself recursively.
skip_open_paren | when non-zero, it indicates that the open paren of the seq or choice has already been removed from the input stream. |
0 | on success, -1 otherwise. |
|
protected |
Parse the "children" and "Mixed" non-terminals in contentspec.
The first character this function sees must be the first open paren '(' in children.
0 | on success, -1 otherwise. |
|
protected |
Skip over a comment. The first character encountered should always be the first '-' in the comment prefix "@<@!--".
|
protected |
Parse a conditionalSect declaration.
|
protected |
Parse a content declaration.
|
protected |
Parse a DefaultDecl specification.
|
protected |
Parse the DOCTYPE declaration. The first character encountered should always be 'D' in doctype prefix: "@<@!DOCTYPE".
|
protected |
Parse an XML element. The first character encountered should be the first character of the element "Name".
is_root | If not 0, then we are expecting to see the "root" element now, and the next element's name need to match the name defined in DOCTYPE definition, i.e., this->doctype_. |
|
protected |
Parse an "ELEMENT" decl. The first character this method expects is always the 'L' (the second char) in the word "ELEMENT".
0 | on success, -1 otherwise. |
|
protected |
Parse the encoding name in an XML Prolog section.
str | String containing the encoding name if successful. |
|
protected |
Parse a EncodingDecl declaration.
|
protected |
Parse an "ENTITY" decl. The first character this method expects is always the 'N' (the second char) in the word "ENTITY".
0 | on success, -1 otherwise. |
|
protected |
Parse a Reference.
|
protected |
Parse an entityValue.
|
protected |
Parse an external DTD.
|
protected |
Parse an ExternalID or a reference to PUBLIC ExternalID. Possible cases are in the forms of:
SYSTEM 'quoted string representing system resource' PUBLIC 'quoted name of public ID' 'quoted resource' PUBLIC 'quoted name we are referring to'
The first character this function sees must be either 'S' or 'P'. When the function finishes parsing, the input stream points at the first non-whitespace character.
publicId | returns the unquoted publicId read. If none is available, it will be reset to 0. |
systemId | returns the unquoted systemId read. If none is available, it will be reset to 0. |
0 | on success, -1 otherwise. |
|
protected |
Parse an external subset. This does the actual parsing of an external subset and is called by
|
protected |
Parse a ignoreSect declaration.
|
protected |
Parse a includeSect declaration.
|
protected |
Parse a "markupdecl" section, this includes both "markupdecl" and "DeclSep" sections in XML specification
|
protected |
Parse a markupDecl section.
|
protected |
Parse a name from the input CharStream. If ch @!= 0, then we have already consumed the first name character from the input CharStream, otherwise, parse_name will use this->get() to acquire the initial character.
|
protected |
Parse a NMTOKEN from the input stream.
|
protected |
*Parse a "NOTATION" decl. The first character this method expects is always the 'N' (the first char) in the word "NOTATION".
0 | on success, -1 otherwise. |
|
protected |
Parse a PEReference.
|
protected |
Parse a PI statement. The first character encountered should always be '?' in the PI prefix "@<?".
0 | on success, -1 otherwise. |
|
protected |
Parse a PubidLiteral.
str | String containing the PubidLiteral if successful. |
|
protected |
Parse a reference name, i.e., foo in "&foo;" or "%foo;". The first character encountered should be the character following '&' or ''. Effectively the same as
A | pointer to name of reference, 0 otherwise. |
|
protected |
Parse a SDDecl string.
str | String containing the encoding name if successful. |
|
protected |
Parse a SystemLiteral.
str | String containing the SystemLiteral if successful. |
|
protected |
Parse a TextDecl declaration.
|
private |
Parse a keyword.
|
protected |
Parse a tokenized type attribute.
|
protected |
Parse the version string in an XML Prolog section.
str | String containing the version number if successful. |
|
protected |
Parse VersionInfo declaration.
|
protected |
Parse the version number in a VersionInfo declaration.
|
protected |
Parse a XMLDecl declaration.
|
protected |
Parse XML Prolog.
|
protectedvirtual |
Peek a character.
|
private |
Pop the top element in the stack and replace current context with that.
|
private |
Dispatch prefix mapping calls to the ContentHandler.
prefix | Namespace prefix |
uri | Namespace URI |
name | Local name |
start | 1 => startPrefixMapping 0 => endPrefixMapping |
|
private |
Push the current context on to the stack.
|
private |
Reset the parser state.
|
virtual |
Allow an application to register a content event handler.
Implements ACEXML_XMLReader.
|
virtual |
Allow an application to register a DTD event handler.
Implements ACEXML_XMLReader.
|
virtual |
Allow an application to register an entity resolver.
Implements ACEXML_XMLReader.
|
virtual |
Allow an application to register an error event handler.
Implements ACEXML_XMLReader.
|
virtual |
Activating or deactivating a feature.
Implements ACEXML_XMLReader.
|
virtual |
Set the value of a property.
Implements ACEXML_XMLReader.
|
private |
Skip an equal sign.
0 | when succeeds, -1 if no equal sign is found. |
|
private |
Skip any whitespaces encountered until the first non-whitespace character is encountered.
|
private |
Skip any whitespaces encountered until the first non-whitespace character. The first non-whitespace character is not consumed. This method does peek into the input CharStream and therefore is more expensive than skip_whitespace.
peek | If non-null, peek points to a ACEXML_Char where skip_whitespace_count stores the first non-whitespace character it sees (character is not removed from the stream.) |
|
privatevirtual |
Create a new ACEXML_CharStream from systemId and publicId and replace the current input stream with the newly created stream.
|
privatevirtual |
Create a new ACEXML_InputSource from systemId and publicId and replace the current input source with the newly created InputSource.
|
private |
Dispatch warnings to ErrorHandler.
Alternative obstack used to hold any strings when the original is in use.
Stack used to hold the Parser_Context.
Current parser context.
|
private |
Document Type.
Keeping track of the handlers. We do not manage the memory for handlers.
|
private |
If set, the document has an external DTD subset.
|
private |
T => We are parsing an external entity value.
Set of external parsed general entities in the document.
Set of external parsed parameter entities in the document.
|
private |
T => We are parsing an external subset.
Set used to hold the general entity references that are active.
|
private |
T => Internal DTD has parameter entity references.
|
private |
If set, the document has an internal DTD.
Set of internal parsed general entities in the document.
Set of internal parsed parameter entities in the document.
|
private |
If set, the parser should include namespace declarations in the list of attributes of an element.
|
private |
If set, the parser should allow access by namespace qualified names.
|
private |
T => We are processing a nested namespace.
Set of notations declared in the document.
Obstack used by the parser to hold all the strings parsed.
Set used to hold the parameter entity references that are active.
Set of predefined entities used by the parser.
State of the parser when it encounters a reference.
|
private |
Feature flags If set, the parser should parse a document without a prolog
|
private |
If set, the document is a standalone XML document.
Set of unparsed entities in the document.
|
private |
If set, the parser should also validate.
Namespace stack used by the parser to implement support for Namespaces.