Added SloppyXMLParser
This commit is contained in:
parent
c5dc2669cf
commit
c9b5c38368
3 changed files with 292 additions and 4 deletions
|
@ -28,6 +28,7 @@ import java.util.zip.ZipInputStream;
|
|||
|
||||
import net.vhati.modmanager.core.Report;
|
||||
import net.vhati.modmanager.core.Report.ReportMessage;
|
||||
import net.vhati.modmanager.core.SloppyXMLParser;
|
||||
|
||||
import ar.com.hjg.pngj.PngReader;
|
||||
|
||||
|
@ -403,12 +404,24 @@ public class ModUtilities {
|
|||
|
||||
pendingMsgs.add( new ReportMessage(
|
||||
ReportMessage.WARNING_SUBSECTION,
|
||||
"XML Syntax Issues:",
|
||||
"Normal XML Parser Issues:",
|
||||
condensedList
|
||||
) );
|
||||
}
|
||||
if ( xmlReport.outcome == false )
|
||||
modValid = false;
|
||||
|
||||
Report sloppyReport = validateSloppyModXML( decodeResult.text );
|
||||
|
||||
if ( sloppyReport.messages.size() > 0 ) {
|
||||
pendingMsgs.add( new ReportMessage(
|
||||
ReportMessage.ERROR_SUBSECTION,
|
||||
"Sloppy XML Parser Issues:",
|
||||
sloppyReport.messages
|
||||
) );
|
||||
}
|
||||
if ( sloppyReport.outcome == false )
|
||||
modValid = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -683,7 +696,7 @@ public class ModUtilities {
|
|||
badLine = srcBuf.substring( badStart, badEnd );
|
||||
}
|
||||
}
|
||||
String msg = String.format( "Fix this and try again:\n%s", e );
|
||||
String msg = String.format( "Fix this and try again:\n%s", e.toString() );
|
||||
msg += "\n";
|
||||
msg += "~ ~ ~ ~ ~\n";
|
||||
msg += badLine +"\n";
|
||||
|
@ -714,6 +727,58 @@ public class ModUtilities {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if a mod's xml can be parsed sloppily.
|
||||
*
|
||||
* @param text unparsed xml
|
||||
*/
|
||||
public static Report validateSloppyModXML( String text ) {
|
||||
|
||||
List<ReportMessage> messages = new ArrayList<ReportMessage>();
|
||||
boolean xmlValid = true;
|
||||
|
||||
try {
|
||||
SloppyXMLParser parser = new SloppyXMLParser();
|
||||
parser.build( text );
|
||||
}
|
||||
catch ( JDOMParseException e ) {
|
||||
int lineNum = e.getLineNumber();
|
||||
if ( lineNum != -1 ) {
|
||||
int badStart = -1;
|
||||
int badEnd = -1;
|
||||
String badLine = "???";
|
||||
Matcher m = Pattern.compile( "\n|\\z" ).matcher( text );
|
||||
for ( int i=1; i <= lineNum && m.find(); i++) {
|
||||
if ( i == lineNum-1 ) {
|
||||
badStart = m.end();
|
||||
} else if ( i == lineNum ) {
|
||||
badEnd = m.start();
|
||||
badLine = text.substring( badStart, badEnd );
|
||||
}
|
||||
}
|
||||
String msg = String.format( "Fix this and try again:\n%s", e.toString() );
|
||||
msg += "\n";
|
||||
msg += "~ ~ ~ ~ ~\n";
|
||||
msg += badLine +"\n";
|
||||
msg += "~ ~ ~ ~ ~";
|
||||
messages.add( new ReportMessage(
|
||||
ReportMessage.EXCEPTION,
|
||||
msg
|
||||
) );
|
||||
}
|
||||
else {
|
||||
messages.add( new ReportMessage(
|
||||
ReportMessage.EXCEPTION,
|
||||
"An error occurred. See log for details."
|
||||
) );
|
||||
}
|
||||
xmlValid = false;
|
||||
}
|
||||
|
||||
return new Report( messages, xmlValid );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the latest modification time among files within a mod.
|
||||
*
|
||||
|
|
217
src/main/java/net/vhati/modmanager/core/SloppyXMLParser.java
Normal file
217
src/main/java/net/vhati/modmanager/core/SloppyXMLParser.java
Normal file
|
@ -0,0 +1,217 @@
|
|||
package net.vhati.modmanager.core;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
import org.jdom2.CDATA;
|
||||
import org.jdom2.Comment;
|
||||
import org.jdom2.Content;
|
||||
import org.jdom2.Document;
|
||||
import org.jdom2.Element;
|
||||
import org.jdom2.Parent;
|
||||
import org.jdom2.input.JDOMParseException;
|
||||
import org.jdom2.located.LocatedText;
|
||||
import org.jdom2.output.Format;
|
||||
import org.jdom2.output.XMLOutputter;
|
||||
|
||||
|
||||
/**
|
||||
* A scraper for malformed XML.
|
||||
*
|
||||
* Sloppiness:
|
||||
* Any closing tag, regardless of its name, closes the parent tag.
|
||||
* <!-- <!-- blah --> is valid (but the extra dashes will be discarded).
|
||||
* --> can occur alone (discarded).
|
||||
* An attribute name can start right after the quote from a prior value.
|
||||
*
|
||||
* Only use this as a last resort, after a real parser fails.
|
||||
*/
|
||||
public class SloppyXMLParser {
|
||||
|
||||
private Pattern declPtn = Pattern.compile( "(\\s*)<[?]xml [^?]*[?]>" );
|
||||
private Pattern commentPtn = Pattern.compile( "(?s)(\\s*)<!--((?:.(?!-->))*.)-->" );
|
||||
private Pattern cdataPtn = Pattern.compile( "(?s)(\\s*)<!\\[CDATA\\[((?:.(?!\\]\\]>))*.)\\]\\]>" );
|
||||
private Pattern sTagPtn = Pattern.compile( "(\\s*)<(?:(\\w+):)?(\\w+)((?: [^>]+?)??)\\s*(/?)>" );
|
||||
private Pattern eTagPtn = Pattern.compile( "([^<]*)</\\s*([^>]+)>" );
|
||||
private Pattern endSpacePtn = Pattern.compile( "\\s+$" );
|
||||
private Pattern strayECommentPtn = Pattern.compile( "(\\s*)-->" );
|
||||
private Pattern strayCharsPtn = Pattern.compile( "(\\s*)[-.>,]" );
|
||||
|
||||
private Pattern attrPtn = Pattern.compile( "\\s*([^=]+?)\\s*=\\s*(\"[^\"]*\"|'[^']*')" );
|
||||
|
||||
private List<Pattern> chunkPtns = new ArrayList<Pattern>();
|
||||
|
||||
|
||||
public SloppyXMLParser() {
|
||||
chunkPtns.add( declPtn );
|
||||
chunkPtns.add( commentPtn );
|
||||
chunkPtns.add( cdataPtn );
|
||||
chunkPtns.add( sTagPtn );
|
||||
chunkPtns.add( eTagPtn );
|
||||
chunkPtns.add( endSpacePtn );
|
||||
chunkPtns.add( strayECommentPtn );
|
||||
chunkPtns.add( strayCharsPtn );
|
||||
}
|
||||
|
||||
|
||||
public Document build( CharSequence s ) throws JDOMParseException {
|
||||
Document doc = new Document();
|
||||
Element rootNode = new Element( "wrapper" );
|
||||
doc.addContent( rootNode );
|
||||
|
||||
Parent parentNode = rootNode;
|
||||
int sLen = s.length();
|
||||
int lastPos = -1;
|
||||
int pos = 0;
|
||||
String tmp = null;
|
||||
Matcher m = declPtn.matcher( s );
|
||||
|
||||
while ( pos > lastPos && pos < sLen ) {
|
||||
m.region( pos, sLen );
|
||||
boolean matchedChunk = false;
|
||||
|
||||
for ( Pattern chunkPtn : chunkPtns ) {
|
||||
m.usePattern( chunkPtn );
|
||||
if ( !m.lookingAt() ) continue;
|
||||
|
||||
if ( chunkPtn == declPtn ) {
|
||||
// Don't care.
|
||||
}
|
||||
else if ( chunkPtn == commentPtn ) {
|
||||
String whitespace = m.group( 1 );
|
||||
if ( whitespace.length() > 0 )
|
||||
parentNode.addContent( new LocatedText( whitespace ) );
|
||||
|
||||
tmp = m.group( 2 );
|
||||
tmp = tmp.replaceAll( "^-+|(?<=-)-+|-+$", "" );
|
||||
Comment commentNode = new Comment( tmp );
|
||||
parentNode.addContent( commentNode );
|
||||
}
|
||||
else if ( chunkPtn == cdataPtn ) {
|
||||
String whitespace = m.group( 1 );
|
||||
if ( whitespace.length() > 0 )
|
||||
parentNode.addContent( new LocatedText( whitespace ) );
|
||||
|
||||
CDATA cdataNode = new CDATA( m.group(2) );
|
||||
parentNode.addContent( cdataNode );
|
||||
}
|
||||
else if ( chunkPtn == sTagPtn ) {
|
||||
String whitespace = m.group( 1 );
|
||||
if ( whitespace.length() > 0 )
|
||||
parentNode.addContent( new LocatedText( whitespace ) );
|
||||
|
||||
String nodeNS = m.group( 2 ); // Might be null.
|
||||
String nodeName = m.group( 3 );
|
||||
String attrString = m.group( 4 );
|
||||
boolean selfClosing = ( m.group( 5 ).length() > 0 );
|
||||
|
||||
Element tagNode = new Element( nodeName );
|
||||
|
||||
if ( attrString.length() > 0 ) {
|
||||
Matcher am = attrPtn.matcher( attrString );
|
||||
while ( am.lookingAt() ) {
|
||||
String attrName = am.group( 1 );
|
||||
String attrValue = am.group( 2 );
|
||||
attrValue = attrValue.substring( 1, attrValue.length()-1 );
|
||||
tagNode.setAttribute( attrName, attrValue );
|
||||
am.region( am.end(), am.regionEnd() );
|
||||
}
|
||||
if ( am.regionStart() < attrString.length() ) {
|
||||
int[] lineAndCol = getLineAndCol( s, pos );
|
||||
int lineNum = lineAndCol[0];
|
||||
int colNum = lineAndCol[1];
|
||||
|
||||
SAXParseException cause = new SAXParseException( String.format( "At line %d, column %d: Strange attributes.", lineNum, colNum ), null, null, lineNum, colNum);
|
||||
throw new JDOMParseException( String.format( "Error on line %d: %s", lineNum, cause.getMessage() ), cause );
|
||||
}
|
||||
}
|
||||
|
||||
parentNode.addContent( tagNode );
|
||||
if ( !selfClosing ) parentNode = tagNode;
|
||||
}
|
||||
else if ( chunkPtn == eTagPtn ) {
|
||||
String interimText = m.group( 1 );
|
||||
parentNode.addContent( new LocatedText( interimText ) );
|
||||
parentNode = parentNode.getParent();
|
||||
}
|
||||
else if ( chunkPtn == endSpacePtn ) {
|
||||
// This is the end of the document.
|
||||
}
|
||||
else if ( chunkPtn == strayECommentPtn ) {
|
||||
// Stray end-comment bracket.
|
||||
|
||||
String whitespace = m.group( 1 );
|
||||
if ( whitespace.length() > 0 )
|
||||
parentNode.addContent( new LocatedText( whitespace ) );
|
||||
}
|
||||
else if ( chunkPtn == strayECommentPtn ) {
|
||||
// Non-space junk between an end tag and a start tag.
|
||||
|
||||
String whitespace = m.group( 1 );
|
||||
if ( whitespace.length() > 0 )
|
||||
parentNode.addContent( new LocatedText( whitespace ) );
|
||||
}
|
||||
|
||||
matchedChunk = true;
|
||||
lastPos = pos;
|
||||
pos = m.end();
|
||||
break;
|
||||
}
|
||||
|
||||
if ( !matchedChunk ) {
|
||||
int[] lineAndCol = getLineAndCol( s, pos );
|
||||
int lineNum = lineAndCol[0];
|
||||
int colNum = lineAndCol[1];
|
||||
|
||||
SAXParseException cause = new SAXParseException( String.format( "At line %d, column %d: Unexpected characters.", lineNum, colNum ), null, null, lineNum, colNum);
|
||||
throw new JDOMParseException( String.format( "Error on line %d: %s", lineNum, cause.getMessage() ), cause );
|
||||
}
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns lineNum and colNum for a position in text.
|
||||
*/
|
||||
private int[] getLineAndCol( CharSequence s, int pos ) {
|
||||
Matcher breakMatcher = Pattern.compile( "\n" ).matcher( s );
|
||||
breakMatcher.region( 0, pos+1 );
|
||||
int lastBreakPos = -1;
|
||||
int lineNum = 1;
|
||||
while ( breakMatcher.find() ) {
|
||||
lastBreakPos = breakMatcher.start();
|
||||
breakMatcher.region( breakMatcher.end(), breakMatcher.regionEnd() );
|
||||
lineNum++;
|
||||
}
|
||||
int colNum;
|
||||
if ( lastBreakPos == -1 )
|
||||
colNum = pos+1;
|
||||
else
|
||||
colNum = pos - lastBreakPos;
|
||||
|
||||
return new int[] { lineNum, colNum };
|
||||
}
|
||||
|
||||
|
||||
public String prettyPrint( Document doc ) {
|
||||
Format format = Format.getPrettyFormat();
|
||||
//format.setExpandEmptyElements( true );
|
||||
|
||||
StringWriter writer = new StringWriter();
|
||||
XMLOutputter outputter = new XMLOutputter( format );
|
||||
try {
|
||||
outputter.output( doc, writer );
|
||||
}
|
||||
catch ( IOException e ) {e.printStackTrace();}
|
||||
|
||||
return writer.toString();
|
||||
}
|
||||
}
|
|
@ -570,9 +570,15 @@ public class ManagerFrame extends JFrame implements ActionListener, HashObserver
|
|||
resultBuf.append( "No mods were checked." );
|
||||
}
|
||||
else if ( anyInvalid ) {
|
||||
resultBuf.append( "FTL itself can tolerate lots of errors and still run. " );
|
||||
resultBuf.append( "But invalid XML may break tools that do proper parsing, " );
|
||||
resultBuf.append( "FTL itself can tolerate lots of XML typos and still run. " );
|
||||
resultBuf.append( "But malformed XML may break tools that do proper parsing, " );
|
||||
resultBuf.append( "and it hinders the development of new tools.\n" );
|
||||
resultBuf.append( "\n" );
|
||||
resultBuf.append( "In future releases, Slipstream will try to parse XML while " );
|
||||
resultBuf.append( "patching: first strictly, then failing over to a sloppy " );
|
||||
resultBuf.append( "parser. The sloppy parser will tolerate similar errors, " );
|
||||
resultBuf.append( "at the risk of unforseen behavior, so satisfying the " );
|
||||
resultBuf.append( "strict parser is advised.\n" );
|
||||
}
|
||||
infoArea.setDescription( "Results", resultBuf.toString() );
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue