diff --git a/src/main/java/net/vhati/modmanager/core/ModUtilities.java b/src/main/java/net/vhati/modmanager/core/ModUtilities.java index 7e48ca1..555032a 100644 --- a/src/main/java/net/vhati/modmanager/core/ModUtilities.java +++ b/src/main/java/net/vhati/modmanager/core/ModUtilities.java @@ -28,6 +28,7 @@ import java.util.zip.ZipInputStream; import net.vhati.modmanager.core.Report; import net.vhati.modmanager.core.Report.ReportMessage; +import net.vhati.modmanager.core.SloppyXMLParser; import ar.com.hjg.pngj.PngReader; @@ -403,12 +404,24 @@ public class ModUtilities { pendingMsgs.add( new ReportMessage( ReportMessage.WARNING_SUBSECTION, - "XML Syntax Issues:", + "Normal XML Parser Issues:", condensedList ) ); } if ( xmlReport.outcome == false ) modValid = false; + + Report sloppyReport = validateSloppyModXML( decodeResult.text ); + + if ( sloppyReport.messages.size() > 0 ) { + pendingMsgs.add( new ReportMessage( + ReportMessage.ERROR_SUBSECTION, + "Sloppy XML Parser Issues:", + sloppyReport.messages + ) ); + } + if ( sloppyReport.outcome == false ) + modValid = false; } } @@ -683,7 +696,7 @@ public class ModUtilities { badLine = srcBuf.substring( badStart, badEnd ); } } - String msg = String.format( "Fix this and try again:\n%s", e ); + String msg = String.format( "Fix this and try again:\n%s", e.toString() ); msg += "\n"; msg += "~ ~ ~ ~ ~\n"; msg += badLine +"\n"; @@ -714,6 +727,58 @@ public class ModUtilities { } + /** + * Checks if a mod's xml can be parsed sloppily. + * + * @param text unparsed xml + */ + public static Report validateSloppyModXML( String text ) { + + List messages = new ArrayList(); + boolean xmlValid = true; + + try { + SloppyXMLParser parser = new SloppyXMLParser(); + parser.build( text ); + } + catch ( JDOMParseException e ) { + int lineNum = e.getLineNumber(); + if ( lineNum != -1 ) { + int badStart = -1; + int badEnd = -1; + String badLine = "???"; + Matcher m = Pattern.compile( "\n|\\z" ).matcher( text ); + for ( int i=1; i <= lineNum && m.find(); i++) { + if ( i == lineNum-1 ) { + badStart = m.end(); + } else if ( i == lineNum ) { + badEnd = m.start(); + badLine = text.substring( badStart, badEnd ); + } + } + String msg = String.format( "Fix this and try again:\n%s", e.toString() ); + msg += "\n"; + msg += "~ ~ ~ ~ ~\n"; + msg += badLine +"\n"; + msg += "~ ~ ~ ~ ~"; + messages.add( new ReportMessage( + ReportMessage.EXCEPTION, + msg + ) ); + } + else { + messages.add( new ReportMessage( + ReportMessage.EXCEPTION, + "An error occurred. See log for details." + ) ); + } + xmlValid = false; + } + + return new Report( messages, xmlValid ); + } + + /** * Returns the latest modification time among files within a mod. * diff --git a/src/main/java/net/vhati/modmanager/core/SloppyXMLParser.java b/src/main/java/net/vhati/modmanager/core/SloppyXMLParser.java new file mode 100644 index 0000000..89d6e41 --- /dev/null +++ b/src/main/java/net/vhati/modmanager/core/SloppyXMLParser.java @@ -0,0 +1,217 @@ +package net.vhati.modmanager.core; + +import java.io.IOException; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.xml.sax.SAXParseException; + +import org.jdom2.CDATA; +import org.jdom2.Comment; +import org.jdom2.Content; +import org.jdom2.Document; +import org.jdom2.Element; +import org.jdom2.Parent; +import org.jdom2.input.JDOMParseException; +import org.jdom2.located.LocatedText; +import org.jdom2.output.Format; +import org.jdom2.output.XMLOutputter; + + +/** + * A scraper for malformed XML. + * + * Sloppiness: + * Any closing tag, regardless of its name, closes the parent tag. + * is valid (but the extra dashes will be discarded). + * --> can occur alone (discarded). + * An attribute name can start right after the quote from a prior value. + * + * Only use this as a last resort, after a real parser fails. + */ +public class SloppyXMLParser { + + private Pattern declPtn = Pattern.compile( "(\\s*)<[?]xml [^?]*[?]>" ); + private Pattern commentPtn = Pattern.compile( "(?s)(\\s*)))*.)-->" ); + private Pattern cdataPtn = Pattern.compile( "(?s)(\\s*)))*.)\\]\\]>" ); + private Pattern sTagPtn = Pattern.compile( "(\\s*)<(?:(\\w+):)?(\\w+)((?: [^>]+?)??)\\s*(/?)>" ); + private Pattern eTagPtn = Pattern.compile( "([^<]*)]+)>" ); + private Pattern endSpacePtn = Pattern.compile( "\\s+$" ); + private Pattern strayECommentPtn = Pattern.compile( "(\\s*)-->" ); + private Pattern strayCharsPtn = Pattern.compile( "(\\s*)[-.>,]" ); + + private Pattern attrPtn = Pattern.compile( "\\s*([^=]+?)\\s*=\\s*(\"[^\"]*\"|'[^']*')" ); + + private List chunkPtns = new ArrayList(); + + + public SloppyXMLParser() { + chunkPtns.add( declPtn ); + chunkPtns.add( commentPtn ); + chunkPtns.add( cdataPtn ); + chunkPtns.add( sTagPtn ); + chunkPtns.add( eTagPtn ); + chunkPtns.add( endSpacePtn ); + chunkPtns.add( strayECommentPtn ); + chunkPtns.add( strayCharsPtn ); + } + + + public Document build( CharSequence s ) throws JDOMParseException { + Document doc = new Document(); + Element rootNode = new Element( "wrapper" ); + doc.addContent( rootNode ); + + Parent parentNode = rootNode; + int sLen = s.length(); + int lastPos = -1; + int pos = 0; + String tmp = null; + Matcher m = declPtn.matcher( s ); + + while ( pos > lastPos && pos < sLen ) { + m.region( pos, sLen ); + boolean matchedChunk = false; + + for ( Pattern chunkPtn : chunkPtns ) { + m.usePattern( chunkPtn ); + if ( !m.lookingAt() ) continue; + + if ( chunkPtn == declPtn ) { + // Don't care. + } + else if ( chunkPtn == commentPtn ) { + String whitespace = m.group( 1 ); + if ( whitespace.length() > 0 ) + parentNode.addContent( new LocatedText( whitespace ) ); + + tmp = m.group( 2 ); + tmp = tmp.replaceAll( "^-+|(?<=-)-+|-+$", "" ); + Comment commentNode = new Comment( tmp ); + parentNode.addContent( commentNode ); + } + else if ( chunkPtn == cdataPtn ) { + String whitespace = m.group( 1 ); + if ( whitespace.length() > 0 ) + parentNode.addContent( new LocatedText( whitespace ) ); + + CDATA cdataNode = new CDATA( m.group(2) ); + parentNode.addContent( cdataNode ); + } + else if ( chunkPtn == sTagPtn ) { + String whitespace = m.group( 1 ); + if ( whitespace.length() > 0 ) + parentNode.addContent( new LocatedText( whitespace ) ); + + String nodeNS = m.group( 2 ); // Might be null. + String nodeName = m.group( 3 ); + String attrString = m.group( 4 ); + boolean selfClosing = ( m.group( 5 ).length() > 0 ); + + Element tagNode = new Element( nodeName ); + + if ( attrString.length() > 0 ) { + Matcher am = attrPtn.matcher( attrString ); + while ( am.lookingAt() ) { + String attrName = am.group( 1 ); + String attrValue = am.group( 2 ); + attrValue = attrValue.substring( 1, attrValue.length()-1 ); + tagNode.setAttribute( attrName, attrValue ); + am.region( am.end(), am.regionEnd() ); + } + if ( am.regionStart() < attrString.length() ) { + int[] lineAndCol = getLineAndCol( s, pos ); + int lineNum = lineAndCol[0]; + int colNum = lineAndCol[1]; + + SAXParseException cause = new SAXParseException( String.format( "At line %d, column %d: Strange attributes.", lineNum, colNum ), null, null, lineNum, colNum); + throw new JDOMParseException( String.format( "Error on line %d: %s", lineNum, cause.getMessage() ), cause ); + } + } + + parentNode.addContent( tagNode ); + if ( !selfClosing ) parentNode = tagNode; + } + else if ( chunkPtn == eTagPtn ) { + String interimText = m.group( 1 ); + parentNode.addContent( new LocatedText( interimText ) ); + parentNode = parentNode.getParent(); + } + else if ( chunkPtn == endSpacePtn ) { + // This is the end of the document. + } + else if ( chunkPtn == strayECommentPtn ) { + // Stray end-comment bracket. + + String whitespace = m.group( 1 ); + if ( whitespace.length() > 0 ) + parentNode.addContent( new LocatedText( whitespace ) ); + } + else if ( chunkPtn == strayECommentPtn ) { + // Non-space junk between an end tag and a start tag. + + String whitespace = m.group( 1 ); + if ( whitespace.length() > 0 ) + parentNode.addContent( new LocatedText( whitespace ) ); + } + + matchedChunk = true; + lastPos = pos; + pos = m.end(); + break; + } + + if ( !matchedChunk ) { + int[] lineAndCol = getLineAndCol( s, pos ); + int lineNum = lineAndCol[0]; + int colNum = lineAndCol[1]; + + SAXParseException cause = new SAXParseException( String.format( "At line %d, column %d: Unexpected characters.", lineNum, colNum ), null, null, lineNum, colNum); + throw new JDOMParseException( String.format( "Error on line %d: %s", lineNum, cause.getMessage() ), cause ); + } + } + + return doc; + } + + /** + * Returns lineNum and colNum for a position in text. + */ + private int[] getLineAndCol( CharSequence s, int pos ) { + Matcher breakMatcher = Pattern.compile( "\n" ).matcher( s ); + breakMatcher.region( 0, pos+1 ); + int lastBreakPos = -1; + int lineNum = 1; + while ( breakMatcher.find() ) { + lastBreakPos = breakMatcher.start(); + breakMatcher.region( breakMatcher.end(), breakMatcher.regionEnd() ); + lineNum++; + } + int colNum; + if ( lastBreakPos == -1 ) + colNum = pos+1; + else + colNum = pos - lastBreakPos; + + return new int[] { lineNum, colNum }; + } + + + public String prettyPrint( Document doc ) { + Format format = Format.getPrettyFormat(); + //format.setExpandEmptyElements( true ); + + StringWriter writer = new StringWriter(); + XMLOutputter outputter = new XMLOutputter( format ); + try { + outputter.output( doc, writer ); + } + catch ( IOException e ) {e.printStackTrace();} + + return writer.toString(); + } +} diff --git a/src/main/java/net/vhati/modmanager/ui/ManagerFrame.java b/src/main/java/net/vhati/modmanager/ui/ManagerFrame.java index fed8c99..f32209b 100644 --- a/src/main/java/net/vhati/modmanager/ui/ManagerFrame.java +++ b/src/main/java/net/vhati/modmanager/ui/ManagerFrame.java @@ -570,9 +570,15 @@ public class ManagerFrame extends JFrame implements ActionListener, HashObserver resultBuf.append( "No mods were checked." ); } else if ( anyInvalid ) { - resultBuf.append( "FTL itself can tolerate lots of errors and still run. " ); - resultBuf.append( "But invalid XML may break tools that do proper parsing, " ); + resultBuf.append( "FTL itself can tolerate lots of XML typos and still run. " ); + resultBuf.append( "But malformed XML may break tools that do proper parsing, " ); resultBuf.append( "and it hinders the development of new tools.\n" ); + resultBuf.append( "\n" ); + resultBuf.append( "In future releases, Slipstream will try to parse XML while " ); + resultBuf.append( "patching: first strictly, then failing over to a sloppy " ); + resultBuf.append( "parser. The sloppy parser will tolerate similar errors, " ); + resultBuf.append( "at the risk of unforseen behavior, so satisfying the " ); + resultBuf.append( "strict parser is advised.\n" ); } infoArea.setDescription( "Results", resultBuf.toString() ); }