Added SloppyXMLParser

This commit is contained in:
Vhati 2013-08-26 17:29:14 -04:00
parent c5dc2669cf
commit c9b5c38368
3 changed files with 292 additions and 4 deletions

View file

@ -28,6 +28,7 @@ import java.util.zip.ZipInputStream;
import net.vhati.modmanager.core.Report;
import net.vhati.modmanager.core.Report.ReportMessage;
import net.vhati.modmanager.core.SloppyXMLParser;
import ar.com.hjg.pngj.PngReader;
@ -403,12 +404,24 @@ public class ModUtilities {
pendingMsgs.add( new ReportMessage(
ReportMessage.WARNING_SUBSECTION,
"XML Syntax Issues:",
"Normal XML Parser Issues:",
condensedList
) );
}
if ( xmlReport.outcome == false )
modValid = false;
Report sloppyReport = validateSloppyModXML( decodeResult.text );
if ( sloppyReport.messages.size() > 0 ) {
pendingMsgs.add( new ReportMessage(
ReportMessage.ERROR_SUBSECTION,
"Sloppy XML Parser Issues:",
sloppyReport.messages
) );
}
if ( sloppyReport.outcome == false )
modValid = false;
}
}
@ -683,7 +696,7 @@ public class ModUtilities {
badLine = srcBuf.substring( badStart, badEnd );
}
}
String msg = String.format( "Fix this and try again:\n%s", e );
String msg = String.format( "Fix this and try again:\n%s", e.toString() );
msg += "\n";
msg += "~ ~ ~ ~ ~\n";
msg += badLine +"\n";
@ -714,6 +727,58 @@ public class ModUtilities {
}
/**
* Checks if a mod's xml can be parsed sloppily.
*
* @param text unparsed xml
*/
public static Report validateSloppyModXML( String text ) {
List<ReportMessage> messages = new ArrayList<ReportMessage>();
boolean xmlValid = true;
try {
SloppyXMLParser parser = new SloppyXMLParser();
parser.build( text );
}
catch ( JDOMParseException e ) {
int lineNum = e.getLineNumber();
if ( lineNum != -1 ) {
int badStart = -1;
int badEnd = -1;
String badLine = "???";
Matcher m = Pattern.compile( "\n|\\z" ).matcher( text );
for ( int i=1; i <= lineNum && m.find(); i++) {
if ( i == lineNum-1 ) {
badStart = m.end();
} else if ( i == lineNum ) {
badEnd = m.start();
badLine = text.substring( badStart, badEnd );
}
}
String msg = String.format( "Fix this and try again:\n%s", e.toString() );
msg += "\n";
msg += "~ ~ ~ ~ ~\n";
msg += badLine +"\n";
msg += "~ ~ ~ ~ ~";
messages.add( new ReportMessage(
ReportMessage.EXCEPTION,
msg
) );
}
else {
messages.add( new ReportMessage(
ReportMessage.EXCEPTION,
"An error occurred. See log for details."
) );
}
xmlValid = false;
}
return new Report( messages, xmlValid );
}
/**
* Returns the latest modification time among files within a mod.
*

View file

@ -0,0 +1,217 @@
package net.vhati.modmanager.core;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.xml.sax.SAXParseException;
import org.jdom2.CDATA;
import org.jdom2.Comment;
import org.jdom2.Content;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Parent;
import org.jdom2.input.JDOMParseException;
import org.jdom2.located.LocatedText;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;
/**
* A scraper for malformed XML.
*
* Sloppiness:
* Any closing tag, regardless of its name, closes the parent tag.
* <!-- <!-- blah --> is valid (but the extra dashes will be discarded).
* --> can occur alone (discarded).
* An attribute name can start right after the quote from a prior value.
*
* Only use this as a last resort, after a real parser fails.
*/
public class SloppyXMLParser {
private Pattern declPtn = Pattern.compile( "(\\s*)<[?]xml [^?]*[?]>" );
private Pattern commentPtn = Pattern.compile( "(?s)(\\s*)<!--((?:.(?!-->))*.)-->" );
private Pattern cdataPtn = Pattern.compile( "(?s)(\\s*)<!\\[CDATA\\[((?:.(?!\\]\\]>))*.)\\]\\]>" );
private Pattern sTagPtn = Pattern.compile( "(\\s*)<(?:(\\w+):)?(\\w+)((?: [^>]+?)??)\\s*(/?)>" );
private Pattern eTagPtn = Pattern.compile( "([^<]*)</\\s*([^>]+)>" );
private Pattern endSpacePtn = Pattern.compile( "\\s+$" );
private Pattern strayECommentPtn = Pattern.compile( "(\\s*)-->" );
private Pattern strayCharsPtn = Pattern.compile( "(\\s*)[-.>,]" );
private Pattern attrPtn = Pattern.compile( "\\s*([^=]+?)\\s*=\\s*(\"[^\"]*\"|'[^']*')" );
private List<Pattern> chunkPtns = new ArrayList<Pattern>();
public SloppyXMLParser() {
chunkPtns.add( declPtn );
chunkPtns.add( commentPtn );
chunkPtns.add( cdataPtn );
chunkPtns.add( sTagPtn );
chunkPtns.add( eTagPtn );
chunkPtns.add( endSpacePtn );
chunkPtns.add( strayECommentPtn );
chunkPtns.add( strayCharsPtn );
}
public Document build( CharSequence s ) throws JDOMParseException {
Document doc = new Document();
Element rootNode = new Element( "wrapper" );
doc.addContent( rootNode );
Parent parentNode = rootNode;
int sLen = s.length();
int lastPos = -1;
int pos = 0;
String tmp = null;
Matcher m = declPtn.matcher( s );
while ( pos > lastPos && pos < sLen ) {
m.region( pos, sLen );
boolean matchedChunk = false;
for ( Pattern chunkPtn : chunkPtns ) {
m.usePattern( chunkPtn );
if ( !m.lookingAt() ) continue;
if ( chunkPtn == declPtn ) {
// Don't care.
}
else if ( chunkPtn == commentPtn ) {
String whitespace = m.group( 1 );
if ( whitespace.length() > 0 )
parentNode.addContent( new LocatedText( whitespace ) );
tmp = m.group( 2 );
tmp = tmp.replaceAll( "^-+|(?<=-)-+|-+$", "" );
Comment commentNode = new Comment( tmp );
parentNode.addContent( commentNode );
}
else if ( chunkPtn == cdataPtn ) {
String whitespace = m.group( 1 );
if ( whitespace.length() > 0 )
parentNode.addContent( new LocatedText( whitespace ) );
CDATA cdataNode = new CDATA( m.group(2) );
parentNode.addContent( cdataNode );
}
else if ( chunkPtn == sTagPtn ) {
String whitespace = m.group( 1 );
if ( whitespace.length() > 0 )
parentNode.addContent( new LocatedText( whitespace ) );
String nodeNS = m.group( 2 ); // Might be null.
String nodeName = m.group( 3 );
String attrString = m.group( 4 );
boolean selfClosing = ( m.group( 5 ).length() > 0 );
Element tagNode = new Element( nodeName );
if ( attrString.length() > 0 ) {
Matcher am = attrPtn.matcher( attrString );
while ( am.lookingAt() ) {
String attrName = am.group( 1 );
String attrValue = am.group( 2 );
attrValue = attrValue.substring( 1, attrValue.length()-1 );
tagNode.setAttribute( attrName, attrValue );
am.region( am.end(), am.regionEnd() );
}
if ( am.regionStart() < attrString.length() ) {
int[] lineAndCol = getLineAndCol( s, pos );
int lineNum = lineAndCol[0];
int colNum = lineAndCol[1];
SAXParseException cause = new SAXParseException( String.format( "At line %d, column %d: Strange attributes.", lineNum, colNum ), null, null, lineNum, colNum);
throw new JDOMParseException( String.format( "Error on line %d: %s", lineNum, cause.getMessage() ), cause );
}
}
parentNode.addContent( tagNode );
if ( !selfClosing ) parentNode = tagNode;
}
else if ( chunkPtn == eTagPtn ) {
String interimText = m.group( 1 );
parentNode.addContent( new LocatedText( interimText ) );
parentNode = parentNode.getParent();
}
else if ( chunkPtn == endSpacePtn ) {
// This is the end of the document.
}
else if ( chunkPtn == strayECommentPtn ) {
// Stray end-comment bracket.
String whitespace = m.group( 1 );
if ( whitespace.length() > 0 )
parentNode.addContent( new LocatedText( whitespace ) );
}
else if ( chunkPtn == strayECommentPtn ) {
// Non-space junk between an end tag and a start tag.
String whitespace = m.group( 1 );
if ( whitespace.length() > 0 )
parentNode.addContent( new LocatedText( whitespace ) );
}
matchedChunk = true;
lastPos = pos;
pos = m.end();
break;
}
if ( !matchedChunk ) {
int[] lineAndCol = getLineAndCol( s, pos );
int lineNum = lineAndCol[0];
int colNum = lineAndCol[1];
SAXParseException cause = new SAXParseException( String.format( "At line %d, column %d: Unexpected characters.", lineNum, colNum ), null, null, lineNum, colNum);
throw new JDOMParseException( String.format( "Error on line %d: %s", lineNum, cause.getMessage() ), cause );
}
}
return doc;
}
/**
* Returns lineNum and colNum for a position in text.
*/
private int[] getLineAndCol( CharSequence s, int pos ) {
Matcher breakMatcher = Pattern.compile( "\n" ).matcher( s );
breakMatcher.region( 0, pos+1 );
int lastBreakPos = -1;
int lineNum = 1;
while ( breakMatcher.find() ) {
lastBreakPos = breakMatcher.start();
breakMatcher.region( breakMatcher.end(), breakMatcher.regionEnd() );
lineNum++;
}
int colNum;
if ( lastBreakPos == -1 )
colNum = pos+1;
else
colNum = pos - lastBreakPos;
return new int[] { lineNum, colNum };
}
public String prettyPrint( Document doc ) {
Format format = Format.getPrettyFormat();
//format.setExpandEmptyElements( true );
StringWriter writer = new StringWriter();
XMLOutputter outputter = new XMLOutputter( format );
try {
outputter.output( doc, writer );
}
catch ( IOException e ) {e.printStackTrace();}
return writer.toString();
}
}

View file

@ -570,9 +570,15 @@ public class ManagerFrame extends JFrame implements ActionListener, HashObserver
resultBuf.append( "No mods were checked." );
}
else if ( anyInvalid ) {
resultBuf.append( "FTL itself can tolerate lots of errors and still run. " );
resultBuf.append( "But invalid XML may break tools that do proper parsing, " );
resultBuf.append( "FTL itself can tolerate lots of XML typos and still run. " );
resultBuf.append( "But malformed XML may break tools that do proper parsing, " );
resultBuf.append( "and it hinders the development of new tools.\n" );
resultBuf.append( "\n" );
resultBuf.append( "In future releases, Slipstream will try to parse XML while " );
resultBuf.append( "patching: first strictly, then failing over to a sloppy " );
resultBuf.append( "parser. The sloppy parser will tolerate similar errors, " );
resultBuf.append( "at the risk of unforseen behavior, so satisfying the " );
resultBuf.append( "strict parser is advised.\n" );
}
infoArea.setDescription( "Results", resultBuf.toString() );
}