Inheritance: < Object

Using the Pull Parser

This API is experimental, and subject to change.

 parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
 while parser.has_next?
   res = parser.next
   puts res[1]['att'] if res.start_tag? and res[0] == 'b'
 end

See the PullEvent class for information on the content of the results. The data is identical to the arguments passed for the various events to the StreamListener API.

Notice that:

 parser = PullParser.new( "<a>BAD DOCUMENT" )
 while parser.has_next?
   res = parser.next
   raise res[1] if res.error?
 end

Nat Price gave me some good ideas for the API.

Constants

Name		Description
ATTDEF	= "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
ATTDEF_RE	= /#{ATTDEF}/
ATTLISTDECL_PATTERN	= /^\s<!ATTLIST\s+#{NAME}(?:#{ATTDEF})\s*>/um
ATTLISTDECL_START	= /^\s*<!ATTLIST/um
ATTRIBUTE_PATTERN	= /\s(#{NAME_STR})\s=\s(["'])(.?)\4/um
ATTTYPE	= "(CDATA\|ID\|IDREF\|IDREFS\|ENTITY\|ENTITIES\|NMTOKEN\|NMTOKENS\|#{ENUMERATEDTYPE})"
ATTVALUE	= "(?:\"((?:[^<&\"]\|#{REFERENCE}))\")\|(?:'((?:[^<&']\|#{REFERENCE}))')"
CDATA_END	= /^\s\]\s>/um
CDATA_PATTERN	= /<!\[CDATA\[(.*?)\]\]>/um
CDATA_START	= /\A<!\[CDATA\[/u
CLOSE_MATCH	= /^\s<\/(#{NAME_STR})\s>/um
COMMENT_PATTERN	= /<!--(.*?)-->/um
COMMENT_START	= /\A<!--/u
DEFAULTDECL	= "(#REQUIRED\|#IMPLIED\|(?:(#FIXED\\s+)?#{ATTVALUE}))"
DEFAULT_ENTITIES	= { 'gt' => [/>/, '>', '>', />/], 'lt' => [/</, '<', '<', /</], 'quot' => [/"/, '"', '"', /"/], "apos" => [/'/, "'", "'", /'/]
DOCTYPE_PATTERN	= /\s<!DOCTYPE\s+(.?)(\[\|>)/um
DOCTYPE_START	= /\A\s*<!DOCTYPE\s/um
ELEMENTDECL_PATTERN	= /^\s(<!ELEMENT.?)>/um
ELEMENTDECL_START	= /^\s*<!ELEMENT/um
ENCODING	= /\bencoding\s=\s["'](.*?)['"]/um
ENTITYDECL	= /\s*(?:#{GEDECL})\|(?:#{PEDECL})/um
ENTITYDEF	= "(?:#{ENTITYVALUE}\|(?:#{EXTERNALID}(#{NDATADECL})?))"
ENTITYVALUE	= %Q{((?:"(?:[^%&"]\|#{PEREFERENCE}\|#{REFERENCE})")\|(?:'([^%&']\|#{PEREFERENCE}\|#{REFERENCE})'))}
ENTITY_START	= /^\s*<!ENTITY/
ENUMERATEDTYPE	= "(?:(?:#{NOTATIONTYPE})\|(?:#{ENUMERATION}))"
ENUMERATION	= "\$\\s#{NMTOKEN}(?:\\s\\\|\\s#{NMTOKEN})\\s*\$"
EREFERENCE	= /&(?!#{NAME};)/
EXTERNALID	= "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})\|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
GEDECL	= "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
IDENTITY	= /^([!\\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.?)['"])?(\s+['"](.*?)["'])?/u
INSTRUCTION_PATTERN	= /<\?(.?)(\s+.?)?\?>/um
INSTRUCTION_START	= /\A<\?/u
MISSING_ATTRIBUTE_QUOTES	= /^<#{NAME_STR}\s+#{NAME_STR}\s=\s[^"']/um	These are patterns to identify common markup errors, to make the error messages more informative.
NAME	= "([\\w:]#{NAMECHAR}*)"
NAMECHAR	= '[\-\w\d\.:]'
NAME_STR	= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
NCNAME_STR	= '[\w:][\-\w\d.]*'
NDATADECL	= "\\s+NDATA\\s+#{NAME}"
NMTOKEN	= "(?:#{NAMECHAR})+"
NMTOKENS	= "#{NMTOKEN}(\\s+#{NMTOKEN})*"
NOTATIONDECL_START	= /^\s*<!NOTATION/um
NOTATIONTYPE	= "NOTATION\\s+\$\\s#{NAME}(?:\\s\\\|\\s#{NAME})\\s*\$"
PEDECL	= "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
PEDEF	= "(?:#{ENTITYVALUE}\|#{EXTERNALID})"
PEREFERENCE	= "%#{NAME};"
PUBIDCHAR	= "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"	Entity constants
PUBIDLITERAL	= %Q{("[#{PUBIDCHAR}']"\|'[#{PUBIDCHAR}]')}
PUBLIC	= /^\s<!NOTATION\s+(\w[\-\w])\s+(PUBLIC)\s+(["'])(.?)\3(?:\s+(["'])(.?)\5)?\s*>/um
REFERENCE	= "(?:&#{NAME};\|&#\\d+;\|&#x[0-9a-fA-F]+;)"
REFERENCE_RE	= /#{REFERENCE}/
STANDALONE	= /\bstandalone\s=\s["'](.?)['"]/um
SYSTEM	= /^\s<!NOTATION\s+(\w[\-\w])\s+(SYSTEM)\s+(["'])(.?)\3\s>/um
SYSTEMENTITY	= /^\s(%.?;)\s*$/um
SYSTEMLITERAL	= %Q{((?:"[^"]")\|(?:'[^']'))}
TAG_MATCH	= /^<((?>#{NAME_STR}))\s((?>\s+#{UNAME_STR}\s=\s(["']).?\5))\s(\/)?>/um
TEXT_PATTERN	= /\A([^<]*)/um
UNAME_STR	= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
VERSION	= /\bversion\s=\s["'](.*?)['"]/um
XMLDECL_PATTERN	= /<\?xml\s+(.*?)\?>/um
XMLDECL_START	= /\A<\?xml\s/u;

Attributes

Name	Visibility	R/W	Description
source	public	R

Methods

Class

Visibility	Signature
public	new ( source )

Instance

Visibility	Signature
public	add_listener ( listener )
public	empty? ()
public	entity ( reference, entities )
public	has_next? ()
public	normalize ( input, entities=nil, entity_filter=nil )
public	peek (depth=0)
public	position ()
public	pull ()
public	stream= ( source )
public	unnormalize ( string, entities=nil, filter=nil )
public	unshift (token)

Class Method Detail

new( source )

Instance Method Detail

add_listener( listener )

empty?()

Returns true if there are no more events

entity( reference, entities )

has_next?()

Returns true if there are more events. Synonymous with !empty?

normalize( input, entities=nil, entity_filter=nil )

Escapes all possible entities

peek(depth=0)

Peek at the depth event in the stack. The first element on the stack is at depth 0. If depth is -1, will parse to the end of the input stream and return the last event, which is always :end_document. Be aware that this causes the stream to be parsed up to the depth event, so you can effectively pre-parse the entire document (pull the entire thing into memory) using this method.

position()

pull()

Returns the next event. This is a PullEvent object.

stream=( source )

unnormalize( string, entities=nil, filter=nil )

Unescapes all possible entities

unshift(token)

Push an event back on the head of the stream. This method has (theoretically) infinite depth.

REXML::Parsers::BaseParser