Added support for hyphens and periods in elements/attribs, plus default namespace declarations

This commit is contained in:
Vhati 2013-09-01 14:24:49 -04:00
parent a585047427
commit 4eb46ea542

View file

@ -39,12 +39,12 @@ public class SloppyXMLParser {
private Pattern declPtn = Pattern.compile( "(\\s*)<[?]xml [^?]*[?]>" ); private Pattern declPtn = Pattern.compile( "(\\s*)<[?]xml [^?]*[?]>" );
private Pattern commentPtn = Pattern.compile( "(?s)(\\s*)<!--((?:.(?!-->))*.)-->" ); private Pattern commentPtn = Pattern.compile( "(?s)(\\s*)<!--((?:.(?!-->))*.)-->" );
private Pattern cdataPtn = Pattern.compile( "(?s)(\\s*)<!\\[CDATA\\[((?:.(?!\\]\\]>))*.)\\]\\]>" ); private Pattern cdataPtn = Pattern.compile( "(?s)(\\s*)<!\\[CDATA\\[((?:.(?!\\]\\]>))*.)\\]\\]>" );
private Pattern sTagPtn = Pattern.compile( "(\\s*)<(?:(\\w+):)?(\\w+)((?: [^>]+?)??)\\s*(/?)>" ); private Pattern sTagPtn = Pattern.compile( "(\\s*)<(?:([\\w.-]+):)?([\\w.-]+)((?: [^>]+?)??)\\s*(/?)>" );
private Pattern eTagPtn = Pattern.compile( "([^<]*)</\\s*([^>]+)>" ); private Pattern eTagPtn = Pattern.compile( "([^<]*)</\\s*([^>]+)>" );
private Pattern endSpacePtn = Pattern.compile( "\\s+$" ); private Pattern endSpacePtn = Pattern.compile( "\\s+$" );
private Pattern strayCharsPtn = Pattern.compile( "(\\s*)(?:-->|[-.>,])" ); private Pattern strayCharsPtn = Pattern.compile( "(\\s*)(?:-->|[-.>,])" );
private Pattern attrPtn = Pattern.compile( "\\s*(?:(\\w+):)?(\\w+)\\s*=\\s*(\"[^\"]*\"|'[^']*')" ); private Pattern attrPtn = Pattern.compile( "\\s*(?:([\\w.-]+):)?([\\w.-]+)\\s*=\\s*(\"[^\"]*\"|'[^']*')" );
private List<Pattern> chunkPtns = new ArrayList<Pattern>(); private List<Pattern> chunkPtns = new ArrayList<Pattern>();
@ -138,7 +138,7 @@ public class SloppyXMLParser {
if ( attrPrefix != null ) { if ( attrPrefix != null ) {
if ( attrPrefix.equals( "xmlns" ) ) { if ( attrPrefix.equals( "xmlns" ) ) {
// This is a pseudo attribute declaring a namespace. // This is a pseudo attribute declaring a namespace prefix.
// Move it to the root node. // Move it to the root node.
Namespace attrNS = Namespace.getNamespace( attrName, attrName ); // URI? *shrug* Namespace attrNS = Namespace.getNamespace( attrName, attrName ); // URI? *shrug*
factory.addNamespaceDeclaration( rootNode, attrNS ); factory.addNamespaceDeclaration( rootNode, attrNS );
@ -149,7 +149,12 @@ public class SloppyXMLParser {
Attribute attrObj = factory.attribute( attrName, attrValue, AttributeType.UNDECLARED, attrNS ); Attribute attrObj = factory.attribute( attrName, attrValue, AttributeType.UNDECLARED, attrNS );
factory.setAttribute( tagNode, attrObj ); factory.setAttribute( tagNode, attrObj );
} }
} else if ( attrName.equals("xmlns") ) {
// New default namespace URI within this node.
Namespace attrNS = Namespace.getNamespace( attrValue );
factory.addNamespaceDeclaration( tagNode, attrNS );
} else { } else {
// Normal attribute.
Attribute attrObj = factory.attribute( attrName, attrValue, AttributeType.UNDECLARED, Namespace.NO_NAMESPACE ); Attribute attrObj = factory.attribute( attrName, attrValue, AttributeType.UNDECLARED, Namespace.NO_NAMESPACE );
factory.setAttribute( tagNode, attrObj ); factory.setAttribute( tagNode, attrObj );
} }