Added SloppyXMLParser
This commit is contained in:
parent
c5dc2669cf
commit
c9b5c38368
3 changed files with 292 additions and 4 deletions
|
@ -28,6 +28,7 @@ import java.util.zip.ZipInputStream;
|
||||||
|
|
||||||
import net.vhati.modmanager.core.Report;
|
import net.vhati.modmanager.core.Report;
|
||||||
import net.vhati.modmanager.core.Report.ReportMessage;
|
import net.vhati.modmanager.core.Report.ReportMessage;
|
||||||
|
import net.vhati.modmanager.core.SloppyXMLParser;
|
||||||
|
|
||||||
import ar.com.hjg.pngj.PngReader;
|
import ar.com.hjg.pngj.PngReader;
|
||||||
|
|
||||||
|
@ -403,12 +404,24 @@ public class ModUtilities {
|
||||||
|
|
||||||
pendingMsgs.add( new ReportMessage(
|
pendingMsgs.add( new ReportMessage(
|
||||||
ReportMessage.WARNING_SUBSECTION,
|
ReportMessage.WARNING_SUBSECTION,
|
||||||
"XML Syntax Issues:",
|
"Normal XML Parser Issues:",
|
||||||
condensedList
|
condensedList
|
||||||
) );
|
) );
|
||||||
}
|
}
|
||||||
if ( xmlReport.outcome == false )
|
if ( xmlReport.outcome == false )
|
||||||
modValid = false;
|
modValid = false;
|
||||||
|
|
||||||
|
Report sloppyReport = validateSloppyModXML( decodeResult.text );
|
||||||
|
|
||||||
|
if ( sloppyReport.messages.size() > 0 ) {
|
||||||
|
pendingMsgs.add( new ReportMessage(
|
||||||
|
ReportMessage.ERROR_SUBSECTION,
|
||||||
|
"Sloppy XML Parser Issues:",
|
||||||
|
sloppyReport.messages
|
||||||
|
) );
|
||||||
|
}
|
||||||
|
if ( sloppyReport.outcome == false )
|
||||||
|
modValid = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -683,7 +696,7 @@ public class ModUtilities {
|
||||||
badLine = srcBuf.substring( badStart, badEnd );
|
badLine = srcBuf.substring( badStart, badEnd );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
String msg = String.format( "Fix this and try again:\n%s", e );
|
String msg = String.format( "Fix this and try again:\n%s", e.toString() );
|
||||||
msg += "\n";
|
msg += "\n";
|
||||||
msg += "~ ~ ~ ~ ~\n";
|
msg += "~ ~ ~ ~ ~\n";
|
||||||
msg += badLine +"\n";
|
msg += badLine +"\n";
|
||||||
|
@ -714,6 +727,58 @@ public class ModUtilities {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a mod's xml can be parsed sloppily.
|
||||||
|
*
|
||||||
|
* @param text unparsed xml
|
||||||
|
*/
|
||||||
|
public static Report validateSloppyModXML( String text ) {
|
||||||
|
|
||||||
|
List<ReportMessage> messages = new ArrayList<ReportMessage>();
|
||||||
|
boolean xmlValid = true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
SloppyXMLParser parser = new SloppyXMLParser();
|
||||||
|
parser.build( text );
|
||||||
|
}
|
||||||
|
catch ( JDOMParseException e ) {
|
||||||
|
int lineNum = e.getLineNumber();
|
||||||
|
if ( lineNum != -1 ) {
|
||||||
|
int badStart = -1;
|
||||||
|
int badEnd = -1;
|
||||||
|
String badLine = "???";
|
||||||
|
Matcher m = Pattern.compile( "\n|\\z" ).matcher( text );
|
||||||
|
for ( int i=1; i <= lineNum && m.find(); i++) {
|
||||||
|
if ( i == lineNum-1 ) {
|
||||||
|
badStart = m.end();
|
||||||
|
} else if ( i == lineNum ) {
|
||||||
|
badEnd = m.start();
|
||||||
|
badLine = text.substring( badStart, badEnd );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String msg = String.format( "Fix this and try again:\n%s", e.toString() );
|
||||||
|
msg += "\n";
|
||||||
|
msg += "~ ~ ~ ~ ~\n";
|
||||||
|
msg += badLine +"\n";
|
||||||
|
msg += "~ ~ ~ ~ ~";
|
||||||
|
messages.add( new ReportMessage(
|
||||||
|
ReportMessage.EXCEPTION,
|
||||||
|
msg
|
||||||
|
) );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
messages.add( new ReportMessage(
|
||||||
|
ReportMessage.EXCEPTION,
|
||||||
|
"An error occurred. See log for details."
|
||||||
|
) );
|
||||||
|
}
|
||||||
|
xmlValid = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Report( messages, xmlValid );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the latest modification time among files within a mod.
|
* Returns the latest modification time among files within a mod.
|
||||||
*
|
*
|
||||||
|
|
217
src/main/java/net/vhati/modmanager/core/SloppyXMLParser.java
Normal file
217
src/main/java/net/vhati/modmanager/core/SloppyXMLParser.java
Normal file
|
@ -0,0 +1,217 @@
|
||||||
|
package net.vhati.modmanager.core;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import org.xml.sax.SAXParseException;
|
||||||
|
|
||||||
|
import org.jdom2.CDATA;
|
||||||
|
import org.jdom2.Comment;
|
||||||
|
import org.jdom2.Content;
|
||||||
|
import org.jdom2.Document;
|
||||||
|
import org.jdom2.Element;
|
||||||
|
import org.jdom2.Parent;
|
||||||
|
import org.jdom2.input.JDOMParseException;
|
||||||
|
import org.jdom2.located.LocatedText;
|
||||||
|
import org.jdom2.output.Format;
|
||||||
|
import org.jdom2.output.XMLOutputter;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A scraper for malformed XML.
|
||||||
|
*
|
||||||
|
* Sloppiness:
|
||||||
|
* Any closing tag, regardless of its name, closes the parent tag.
|
||||||
|
* <!-- <!-- blah --> is valid (but the extra dashes will be discarded).
|
||||||
|
* --> can occur alone (discarded).
|
||||||
|
* An attribute name can start right after the quote from a prior value.
|
||||||
|
*
|
||||||
|
* Only use this as a last resort, after a real parser fails.
|
||||||
|
*/
|
||||||
|
public class SloppyXMLParser {
|
||||||
|
|
||||||
|
private Pattern declPtn = Pattern.compile( "(\\s*)<[?]xml [^?]*[?]>" );
|
||||||
|
private Pattern commentPtn = Pattern.compile( "(?s)(\\s*)<!--((?:.(?!-->))*.)-->" );
|
||||||
|
private Pattern cdataPtn = Pattern.compile( "(?s)(\\s*)<!\\[CDATA\\[((?:.(?!\\]\\]>))*.)\\]\\]>" );
|
||||||
|
private Pattern sTagPtn = Pattern.compile( "(\\s*)<(?:(\\w+):)?(\\w+)((?: [^>]+?)??)\\s*(/?)>" );
|
||||||
|
private Pattern eTagPtn = Pattern.compile( "([^<]*)</\\s*([^>]+)>" );
|
||||||
|
private Pattern endSpacePtn = Pattern.compile( "\\s+$" );
|
||||||
|
private Pattern strayECommentPtn = Pattern.compile( "(\\s*)-->" );
|
||||||
|
private Pattern strayCharsPtn = Pattern.compile( "(\\s*)[-.>,]" );
|
||||||
|
|
||||||
|
private Pattern attrPtn = Pattern.compile( "\\s*([^=]+?)\\s*=\\s*(\"[^\"]*\"|'[^']*')" );
|
||||||
|
|
||||||
|
private List<Pattern> chunkPtns = new ArrayList<Pattern>();
|
||||||
|
|
||||||
|
|
||||||
|
public SloppyXMLParser() {
|
||||||
|
chunkPtns.add( declPtn );
|
||||||
|
chunkPtns.add( commentPtn );
|
||||||
|
chunkPtns.add( cdataPtn );
|
||||||
|
chunkPtns.add( sTagPtn );
|
||||||
|
chunkPtns.add( eTagPtn );
|
||||||
|
chunkPtns.add( endSpacePtn );
|
||||||
|
chunkPtns.add( strayECommentPtn );
|
||||||
|
chunkPtns.add( strayCharsPtn );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Document build( CharSequence s ) throws JDOMParseException {
|
||||||
|
Document doc = new Document();
|
||||||
|
Element rootNode = new Element( "wrapper" );
|
||||||
|
doc.addContent( rootNode );
|
||||||
|
|
||||||
|
Parent parentNode = rootNode;
|
||||||
|
int sLen = s.length();
|
||||||
|
int lastPos = -1;
|
||||||
|
int pos = 0;
|
||||||
|
String tmp = null;
|
||||||
|
Matcher m = declPtn.matcher( s );
|
||||||
|
|
||||||
|
while ( pos > lastPos && pos < sLen ) {
|
||||||
|
m.region( pos, sLen );
|
||||||
|
boolean matchedChunk = false;
|
||||||
|
|
||||||
|
for ( Pattern chunkPtn : chunkPtns ) {
|
||||||
|
m.usePattern( chunkPtn );
|
||||||
|
if ( !m.lookingAt() ) continue;
|
||||||
|
|
||||||
|
if ( chunkPtn == declPtn ) {
|
||||||
|
// Don't care.
|
||||||
|
}
|
||||||
|
else if ( chunkPtn == commentPtn ) {
|
||||||
|
String whitespace = m.group( 1 );
|
||||||
|
if ( whitespace.length() > 0 )
|
||||||
|
parentNode.addContent( new LocatedText( whitespace ) );
|
||||||
|
|
||||||
|
tmp = m.group( 2 );
|
||||||
|
tmp = tmp.replaceAll( "^-+|(?<=-)-+|-+$", "" );
|
||||||
|
Comment commentNode = new Comment( tmp );
|
||||||
|
parentNode.addContent( commentNode );
|
||||||
|
}
|
||||||
|
else if ( chunkPtn == cdataPtn ) {
|
||||||
|
String whitespace = m.group( 1 );
|
||||||
|
if ( whitespace.length() > 0 )
|
||||||
|
parentNode.addContent( new LocatedText( whitespace ) );
|
||||||
|
|
||||||
|
CDATA cdataNode = new CDATA( m.group(2) );
|
||||||
|
parentNode.addContent( cdataNode );
|
||||||
|
}
|
||||||
|
else if ( chunkPtn == sTagPtn ) {
|
||||||
|
String whitespace = m.group( 1 );
|
||||||
|
if ( whitespace.length() > 0 )
|
||||||
|
parentNode.addContent( new LocatedText( whitespace ) );
|
||||||
|
|
||||||
|
String nodeNS = m.group( 2 ); // Might be null.
|
||||||
|
String nodeName = m.group( 3 );
|
||||||
|
String attrString = m.group( 4 );
|
||||||
|
boolean selfClosing = ( m.group( 5 ).length() > 0 );
|
||||||
|
|
||||||
|
Element tagNode = new Element( nodeName );
|
||||||
|
|
||||||
|
if ( attrString.length() > 0 ) {
|
||||||
|
Matcher am = attrPtn.matcher( attrString );
|
||||||
|
while ( am.lookingAt() ) {
|
||||||
|
String attrName = am.group( 1 );
|
||||||
|
String attrValue = am.group( 2 );
|
||||||
|
attrValue = attrValue.substring( 1, attrValue.length()-1 );
|
||||||
|
tagNode.setAttribute( attrName, attrValue );
|
||||||
|
am.region( am.end(), am.regionEnd() );
|
||||||
|
}
|
||||||
|
if ( am.regionStart() < attrString.length() ) {
|
||||||
|
int[] lineAndCol = getLineAndCol( s, pos );
|
||||||
|
int lineNum = lineAndCol[0];
|
||||||
|
int colNum = lineAndCol[1];
|
||||||
|
|
||||||
|
SAXParseException cause = new SAXParseException( String.format( "At line %d, column %d: Strange attributes.", lineNum, colNum ), null, null, lineNum, colNum);
|
||||||
|
throw new JDOMParseException( String.format( "Error on line %d: %s", lineNum, cause.getMessage() ), cause );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
parentNode.addContent( tagNode );
|
||||||
|
if ( !selfClosing ) parentNode = tagNode;
|
||||||
|
}
|
||||||
|
else if ( chunkPtn == eTagPtn ) {
|
||||||
|
String interimText = m.group( 1 );
|
||||||
|
parentNode.addContent( new LocatedText( interimText ) );
|
||||||
|
parentNode = parentNode.getParent();
|
||||||
|
}
|
||||||
|
else if ( chunkPtn == endSpacePtn ) {
|
||||||
|
// This is the end of the document.
|
||||||
|
}
|
||||||
|
else if ( chunkPtn == strayECommentPtn ) {
|
||||||
|
// Stray end-comment bracket.
|
||||||
|
|
||||||
|
String whitespace = m.group( 1 );
|
||||||
|
if ( whitespace.length() > 0 )
|
||||||
|
parentNode.addContent( new LocatedText( whitespace ) );
|
||||||
|
}
|
||||||
|
else if ( chunkPtn == strayECommentPtn ) {
|
||||||
|
// Non-space junk between an end tag and a start tag.
|
||||||
|
|
||||||
|
String whitespace = m.group( 1 );
|
||||||
|
if ( whitespace.length() > 0 )
|
||||||
|
parentNode.addContent( new LocatedText( whitespace ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
matchedChunk = true;
|
||||||
|
lastPos = pos;
|
||||||
|
pos = m.end();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( !matchedChunk ) {
|
||||||
|
int[] lineAndCol = getLineAndCol( s, pos );
|
||||||
|
int lineNum = lineAndCol[0];
|
||||||
|
int colNum = lineAndCol[1];
|
||||||
|
|
||||||
|
SAXParseException cause = new SAXParseException( String.format( "At line %d, column %d: Unexpected characters.", lineNum, colNum ), null, null, lineNum, colNum);
|
||||||
|
throw new JDOMParseException( String.format( "Error on line %d: %s", lineNum, cause.getMessage() ), cause );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns lineNum and colNum for a position in text.
|
||||||
|
*/
|
||||||
|
private int[] getLineAndCol( CharSequence s, int pos ) {
|
||||||
|
Matcher breakMatcher = Pattern.compile( "\n" ).matcher( s );
|
||||||
|
breakMatcher.region( 0, pos+1 );
|
||||||
|
int lastBreakPos = -1;
|
||||||
|
int lineNum = 1;
|
||||||
|
while ( breakMatcher.find() ) {
|
||||||
|
lastBreakPos = breakMatcher.start();
|
||||||
|
breakMatcher.region( breakMatcher.end(), breakMatcher.regionEnd() );
|
||||||
|
lineNum++;
|
||||||
|
}
|
||||||
|
int colNum;
|
||||||
|
if ( lastBreakPos == -1 )
|
||||||
|
colNum = pos+1;
|
||||||
|
else
|
||||||
|
colNum = pos - lastBreakPos;
|
||||||
|
|
||||||
|
return new int[] { lineNum, colNum };
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String prettyPrint( Document doc ) {
|
||||||
|
Format format = Format.getPrettyFormat();
|
||||||
|
//format.setExpandEmptyElements( true );
|
||||||
|
|
||||||
|
StringWriter writer = new StringWriter();
|
||||||
|
XMLOutputter outputter = new XMLOutputter( format );
|
||||||
|
try {
|
||||||
|
outputter.output( doc, writer );
|
||||||
|
}
|
||||||
|
catch ( IOException e ) {e.printStackTrace();}
|
||||||
|
|
||||||
|
return writer.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -570,9 +570,15 @@ public class ManagerFrame extends JFrame implements ActionListener, HashObserver
|
||||||
resultBuf.append( "No mods were checked." );
|
resultBuf.append( "No mods were checked." );
|
||||||
}
|
}
|
||||||
else if ( anyInvalid ) {
|
else if ( anyInvalid ) {
|
||||||
resultBuf.append( "FTL itself can tolerate lots of errors and still run. " );
|
resultBuf.append( "FTL itself can tolerate lots of XML typos and still run. " );
|
||||||
resultBuf.append( "But invalid XML may break tools that do proper parsing, " );
|
resultBuf.append( "But malformed XML may break tools that do proper parsing, " );
|
||||||
resultBuf.append( "and it hinders the development of new tools.\n" );
|
resultBuf.append( "and it hinders the development of new tools.\n" );
|
||||||
|
resultBuf.append( "\n" );
|
||||||
|
resultBuf.append( "In future releases, Slipstream will try to parse XML while " );
|
||||||
|
resultBuf.append( "patching: first strictly, then failing over to a sloppy " );
|
||||||
|
resultBuf.append( "parser. The sloppy parser will tolerate similar errors, " );
|
||||||
|
resultBuf.append( "at the risk of unforseen behavior, so satisfying the " );
|
||||||
|
resultBuf.append( "strict parser is advised.\n" );
|
||||||
}
|
}
|
||||||
infoArea.setDescription( "Results", resultBuf.toString() );
|
infoArea.setDescription( "Results", resultBuf.toString() );
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue