Switched to JDOMFactory for tree building
This commit is contained in:
parent
ab07a57174
commit
8d2713cb4d
1 changed files with 37 additions and 32 deletions
|
@ -6,11 +6,15 @@ import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import org.xml.sax.SAXParseException;
|
import org.xml.sax.SAXParseException;
|
||||||
|
|
||||||
|
import org.jdom2.Attribute;
|
||||||
|
import org.jdom2.AttributeType;
|
||||||
import org.jdom2.CDATA;
|
import org.jdom2.CDATA;
|
||||||
import org.jdom2.Comment;
|
import org.jdom2.Comment;
|
||||||
import org.jdom2.Content;
|
import org.jdom2.Content;
|
||||||
|
import org.jdom2.DefaultJDOMFactory;
|
||||||
import org.jdom2.Document;
|
import org.jdom2.Document;
|
||||||
import org.jdom2.Element;
|
import org.jdom2.Element;
|
||||||
|
import org.jdom2.JDOMFactory;
|
||||||
import org.jdom2.Namespace;
|
import org.jdom2.Namespace;
|
||||||
import org.jdom2.Parent;
|
import org.jdom2.Parent;
|
||||||
import org.jdom2.Text;
|
import org.jdom2.Text;
|
||||||
|
@ -38,30 +42,36 @@ public class SloppyXMLParser {
|
||||||
private Pattern sTagPtn = Pattern.compile( "(\\s*)<(?:(\\w+):)?(\\w+)((?: [^>]+?)??)\\s*(/?)>" );
|
private Pattern sTagPtn = Pattern.compile( "(\\s*)<(?:(\\w+):)?(\\w+)((?: [^>]+?)??)\\s*(/?)>" );
|
||||||
private Pattern eTagPtn = Pattern.compile( "([^<]*)</\\s*([^>]+)>" );
|
private Pattern eTagPtn = Pattern.compile( "([^<]*)</\\s*([^>]+)>" );
|
||||||
private Pattern endSpacePtn = Pattern.compile( "\\s+$" );
|
private Pattern endSpacePtn = Pattern.compile( "\\s+$" );
|
||||||
private Pattern strayECommentPtn = Pattern.compile( "(\\s*)-->" );
|
private Pattern strayCharsPtn = Pattern.compile( "(\\s*)(?:-->|[-.>,])" );
|
||||||
private Pattern strayCharsPtn = Pattern.compile( "(\\s*)[-.>,]" );
|
|
||||||
|
|
||||||
private Pattern attrPtn = Pattern.compile( "\\s*(?:(\\w+):)?(\\w+)\\s*=\\s*(\"[^\"]*\"|'[^']*')" );
|
private Pattern attrPtn = Pattern.compile( "\\s*(?:(\\w+):)?(\\w+)\\s*=\\s*(\"[^\"]*\"|'[^']*')" );
|
||||||
|
|
||||||
private List<Pattern> chunkPtns = new ArrayList<Pattern>();
|
private List<Pattern> chunkPtns = new ArrayList<Pattern>();
|
||||||
|
|
||||||
|
private JDOMFactory factory;
|
||||||
|
|
||||||
|
|
||||||
public SloppyXMLParser() {
|
public SloppyXMLParser() {
|
||||||
|
this( null );
|
||||||
|
}
|
||||||
|
|
||||||
|
public SloppyXMLParser( JDOMFactory factory ) {
|
||||||
|
if ( factory == null ) factory = new DefaultJDOMFactory();
|
||||||
|
this.factory = factory;
|
||||||
|
|
||||||
chunkPtns.add( declPtn );
|
chunkPtns.add( declPtn );
|
||||||
chunkPtns.add( commentPtn );
|
chunkPtns.add( commentPtn );
|
||||||
chunkPtns.add( cdataPtn );
|
chunkPtns.add( cdataPtn );
|
||||||
chunkPtns.add( sTagPtn );
|
chunkPtns.add( sTagPtn );
|
||||||
chunkPtns.add( eTagPtn );
|
chunkPtns.add( eTagPtn );
|
||||||
chunkPtns.add( endSpacePtn );
|
chunkPtns.add( endSpacePtn );
|
||||||
chunkPtns.add( strayECommentPtn );
|
|
||||||
chunkPtns.add( strayCharsPtn );
|
chunkPtns.add( strayCharsPtn );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Document build( CharSequence s ) throws JDOMParseException {
|
public Document build( CharSequence s ) throws JDOMParseException {
|
||||||
Document doc = new Document();
|
Element rootNode = factory.element( "wrapper" );
|
||||||
Element rootNode = new Element( "wrapper" );
|
Document doc = factory.document( rootNode );
|
||||||
doc.setRootElement( rootNode );
|
|
||||||
|
|
||||||
Parent parentNode = rootNode;
|
Parent parentNode = rootNode;
|
||||||
int sLen = s.length();
|
int sLen = s.length();
|
||||||
|
@ -84,25 +94,25 @@ public class SloppyXMLParser {
|
||||||
else if ( chunkPtn == commentPtn ) {
|
else if ( chunkPtn == commentPtn ) {
|
||||||
String whitespace = m.group( 1 );
|
String whitespace = m.group( 1 );
|
||||||
if ( whitespace.length() > 0 )
|
if ( whitespace.length() > 0 )
|
||||||
parentNode.addContent( new Text( whitespace ) );
|
factory.addContent( parentNode, factory.text( whitespace ) );
|
||||||
|
|
||||||
tmp = m.group( 2 );
|
tmp = m.group( 2 );
|
||||||
tmp = tmp.replaceAll( "^-+|(?<=-)-+|-+$", "" );
|
tmp = tmp.replaceAll( "^-+|(?<=-)-+|-+$", "" );
|
||||||
Comment commentNode = new Comment( tmp );
|
Comment commentNode = factory.comment( tmp );
|
||||||
parentNode.addContent( commentNode );
|
factory.addContent( parentNode, commentNode );
|
||||||
}
|
}
|
||||||
else if ( chunkPtn == cdataPtn ) {
|
else if ( chunkPtn == cdataPtn ) {
|
||||||
String whitespace = m.group( 1 );
|
String whitespace = m.group( 1 );
|
||||||
if ( whitespace.length() > 0 )
|
if ( whitespace.length() > 0 )
|
||||||
parentNode.addContent( new Text( whitespace ) );
|
factory.addContent( parentNode, factory.text( whitespace ) );
|
||||||
|
|
||||||
CDATA cdataNode = new CDATA( m.group(2) );
|
CDATA cdataNode = factory.cdata( m.group(2) );
|
||||||
parentNode.addContent( cdataNode );
|
factory.addContent( parentNode, cdataNode );
|
||||||
}
|
}
|
||||||
else if ( chunkPtn == sTagPtn ) {
|
else if ( chunkPtn == sTagPtn ) {
|
||||||
String whitespace = m.group( 1 );
|
String whitespace = m.group( 1 );
|
||||||
if ( whitespace.length() > 0 )
|
if ( whitespace.length() > 0 )
|
||||||
parentNode.addContent( new Text( whitespace ) );
|
factory.addContent( parentNode, factory.text( whitespace ) );
|
||||||
|
|
||||||
String nodePrefix = m.group( 2 ); // Might be null.
|
String nodePrefix = m.group( 2 ); // Might be null.
|
||||||
String nodeName = m.group( 3 );
|
String nodeName = m.group( 3 );
|
||||||
|
@ -112,10 +122,10 @@ public class SloppyXMLParser {
|
||||||
Element tagNode;
|
Element tagNode;
|
||||||
if ( nodePrefix != null ) {
|
if ( nodePrefix != null ) {
|
||||||
Namespace nodeNS = Namespace.getNamespace( nodePrefix, nodePrefix ); // URI? *shrug*
|
Namespace nodeNS = Namespace.getNamespace( nodePrefix, nodePrefix ); // URI? *shrug*
|
||||||
rootNode.addNamespaceDeclaration( nodeNS );
|
factory.addNamespaceDeclaration( rootNode, nodeNS );
|
||||||
tagNode = new Element( nodeName, nodeNS );
|
tagNode = factory.element( nodeName, nodeNS );
|
||||||
} else {
|
} else {
|
||||||
tagNode = new Element( nodeName );
|
tagNode = factory.element( nodeName );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( attrString.length() > 0 ) {
|
if ( attrString.length() > 0 ) {
|
||||||
|
@ -131,15 +141,17 @@ public class SloppyXMLParser {
|
||||||
// This is a pseudo attribute declaring a namespace.
|
// This is a pseudo attribute declaring a namespace.
|
||||||
// Move it to the root node.
|
// Move it to the root node.
|
||||||
Namespace attrNS = Namespace.getNamespace( attrName, attrName ); // URI? *shrug*
|
Namespace attrNS = Namespace.getNamespace( attrName, attrName ); // URI? *shrug*
|
||||||
rootNode.addNamespaceDeclaration( attrNS );
|
factory.addNamespaceDeclaration( rootNode, attrNS );
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Namespace attrNS = Namespace.getNamespace( attrPrefix, attrPrefix ); // URI? *shrug*
|
Namespace attrNS = Namespace.getNamespace( attrPrefix, attrPrefix ); // URI? *shrug*
|
||||||
rootNode.addNamespaceDeclaration( attrNS );
|
factory.addNamespaceDeclaration( rootNode, attrNS );
|
||||||
tagNode.setAttribute( attrName, attrValue, attrNS );
|
Attribute attrObj = factory.attribute( attrName, attrValue, AttributeType.UNDECLARED, attrNS );
|
||||||
|
factory.setAttribute( tagNode, attrObj );
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
tagNode.setAttribute( attrName, attrValue );
|
Attribute attrObj = factory.attribute( attrName, attrValue, AttributeType.UNDECLARED, Namespace.NO_NAMESPACE );
|
||||||
|
factory.setAttribute( tagNode, attrObj );
|
||||||
}
|
}
|
||||||
am.region( am.end(), am.regionEnd() );
|
am.region( am.end(), am.regionEnd() );
|
||||||
}
|
}
|
||||||
|
@ -153,30 +165,23 @@ public class SloppyXMLParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
parentNode.addContent( tagNode );
|
factory.addContent( parentNode, tagNode );
|
||||||
if ( !selfClosing ) parentNode = tagNode;
|
if ( !selfClosing ) parentNode = tagNode;
|
||||||
}
|
}
|
||||||
else if ( chunkPtn == eTagPtn ) {
|
else if ( chunkPtn == eTagPtn ) {
|
||||||
String interimText = m.group( 1 );
|
String interimText = m.group( 1 );
|
||||||
parentNode.addContent( new Text( interimText ) );
|
factory.addContent( parentNode, factory.text( interimText ) );
|
||||||
parentNode = parentNode.getParent();
|
parentNode = parentNode.getParent();
|
||||||
}
|
}
|
||||||
else if ( chunkPtn == endSpacePtn ) {
|
else if ( chunkPtn == endSpacePtn ) {
|
||||||
// This is the end of the document.
|
// This is the end of the document.
|
||||||
}
|
}
|
||||||
else if ( chunkPtn == strayECommentPtn ) {
|
|
||||||
// Stray end-comment bracket.
|
|
||||||
|
|
||||||
String whitespace = m.group( 1 );
|
|
||||||
if ( whitespace.length() > 0 )
|
|
||||||
parentNode.addContent( new Text( whitespace ) );
|
|
||||||
}
|
|
||||||
else if ( chunkPtn == strayCharsPtn ) {
|
else if ( chunkPtn == strayCharsPtn ) {
|
||||||
// Non-space junk between an end tag and a start tag.
|
// Non-space junk between an end tag and a start tag.
|
||||||
|
|
||||||
String whitespace = m.group( 1 );
|
String whitespace = m.group( 1 );
|
||||||
if ( whitespace.length() > 0 )
|
if ( whitespace.length() > 0 )
|
||||||
parentNode.addContent( new Text( whitespace ) );
|
factory.addContent( parentNode, factory.text( whitespace ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
matchedChunk = true;
|
matchedChunk = true;
|
||||||
|
@ -201,9 +206,9 @@ public class SloppyXMLParser {
|
||||||
Element newRoot = rootNode.getChildren().get( 0 );
|
Element newRoot = rootNode.getChildren().get( 0 );
|
||||||
newRoot.detach();
|
newRoot.detach();
|
||||||
for ( Namespace ns : rootNode.getAdditionalNamespaces() ) {
|
for ( Namespace ns : rootNode.getAdditionalNamespaces() ) {
|
||||||
newRoot.addNamespaceDeclaration( ns );
|
factory.addNamespaceDeclaration( newRoot, ns );
|
||||||
}
|
}
|
||||||
doc.setRootElement( newRoot );
|
factory.setRoot( doc, newRoot );
|
||||||
}
|
}
|
||||||
|
|
||||||
return doc;
|
return doc;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue