From 9c30e4180a539e5662162656323f412f8774e2cd Mon Sep 17 00:00:00 2001 From: Vhati Date: Sat, 30 Dec 2017 11:39:31 -0500 Subject: [PATCH] Added a Validate warning about FTL 1.5.13 for chars outside windows-1252 --- skel_common/backup/auto_update.json | 1 + skel_common/readme_changelog.txt | 1 + .../vhati/modmanager/core/ModUtilities.java | 107 ++++++++++++++++-- 3 files changed, 98 insertions(+), 11 deletions(-) diff --git a/skel_common/backup/auto_update.json b/skel_common/backup/auto_update.json index 7ba5722..e30bc5e 100644 --- a/skel_common/backup/auto_update.json +++ b/skel_common/backup/auto_update.json @@ -19,6 +19,7 @@ "Made the comments in boilerplace mod metadata optional", "Fixed omitted Validate warnings for PNG files", "Added Validate warnings about FTL 1.6.1+ for TTF, MP3, and PNG files", + "Added a Validate warning about FTL 1.5.13 for chars outside windows-1252", "Disabled XML escaping when reencoding to ensure invalid chars cause an error", "Changed logging framework to SLF4J/Logback", "Changed command line parser to picocli" diff --git a/skel_common/readme_changelog.txt b/skel_common/readme_changelog.txt index 2fa22bf..0acd16d 100644 --- a/skel_common/readme_changelog.txt +++ b/skel_common/readme_changelog.txt @@ -6,6 +6,7 @@ Changelog - Fixed omitted Validate warnings for PNG files - Disabled XML escaping when reencoding to ensure invalid chars cause an error - Added Validate warnings about FTL 1.6.1+ for TTF, MP3, and PNG files +- Added a Validate warning about FTL 1.5.13 for chars outside windows-1252 - Changed logging framework to SLF4J/Logback - Changed command line parser to picocli diff --git a/src/main/java/net/vhati/modmanager/core/ModUtilities.java b/src/main/java/net/vhati/modmanager/core/ModUtilities.java index e3ddd95..51f0a88 100644 --- a/src/main/java/net/vhati/modmanager/core/ModUtilities.java +++ b/src/main/java/net/vhati/modmanager/core/ModUtilities.java @@ -16,12 +16,16 @@ import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; +import java.text.BreakIterator; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.Set; +import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.ZipEntry; @@ -441,6 +445,7 @@ public class ModUtilities { List seenJunkDirs = new ArrayList(); CharsetEncoder asciiEncoder = Charset.forName( "US-ASCII" ).newEncoder(); + CharsetEncoder win1252Encoder = Charset.forName( "windows-1252" ).newEncoder(); ZipInputStream zis = null; try { @@ -576,15 +581,6 @@ public class ModUtilities { modValid = false; } - // Found chars unique to windows-1252. - if ( decodeResult.encoding.equalsIgnoreCase( "windows-1252" ) ) { - pendingMsgs.add( new ReportMessage( - ReportMessage.WARNING, - String.format( "Fancy %s chars (UTF-8 is recommended for that)", decodeResult.encoding ) - ) ); - modValid = false; - } - if ( decodeResult.eol != DecodeResult.EOL_CRLF && decodeResult.eol != DecodeResult.EOL_NONE ) { if ( isXML ) { @@ -602,6 +598,53 @@ public class ModUtilities { modValid = false; } + if ( decodeResult.encoding.equalsIgnoreCase( "windows-1252" ) ) { + // Found non-ASCII chars unique to windows-1252. + + Set uniqueGraphemes = new TreeSet(); + getUniqueGraphemes( decodeResult.text, uniqueGraphemes, Locale.getDefault() ); + + StringBuilder charBuf = new StringBuilder(); + for ( CharSequence grapheme : uniqueGraphemes ) { + if ( !asciiEncoder.reset().canEncode( grapheme ) ) { + if ( charBuf.length() > 0 ) charBuf.append( "," ); + + charBuf.append( grapheme ); + } + } + + pendingMsgs.add( new ReportMessage( + ReportMessage.WARNING, + String.format( "Windows-1252 encoding with fancy non-ASCII chars (UTF-8 is recommended for clarity): %s", charBuf.toString() ) + ) ); + } + else { + // Not windows-1252. + // Nag if there are chars that can't be converted to + // windows-1252 (for FTL 1.01-1.5.13). + + Set uniqueGraphemes = new TreeSet(); + getUniqueGraphemes( decodeResult.text, uniqueGraphemes, Locale.getDefault() ); + + StringBuilder charBuf = new StringBuilder(); + for ( CharSequence grapheme : uniqueGraphemes ) { + if ( !win1252Encoder.reset().canEncode( grapheme ) ) { + if ( charBuf.length() > 0 ) charBuf.append( "," ); + + charBuf.append( grapheme ).append( " (" ); + appendGraphemeHex( grapheme, charBuf ); + charBuf.append( ")" ); + } + } + if ( charBuf.length() > 0 ) { + pendingMsgs.add( new ReportMessage( + ReportMessage.WARNING, + String.format( "Characters that can't be re-encoded as windows-1252 will not work in FTL 1.5.13 and earlier: %s", charBuf.toString() ) + ) ); + } + } + + // Suggest replacements for odd characters. List oddCharPtns = new ArrayList(); Map oddCharSuggestions = new HashMap(); Map> oddCharLists = new HashMap>(); @@ -647,8 +690,6 @@ public class ModUtilities { } } - // TODO: Nag if there are chars FTL can't show. - if ( isXML ) { Report xmlReport = validateModXML( decodeResult.text ); @@ -736,6 +777,50 @@ public class ModUtilities { return new Report( messages, modValid ); } + /** + * Populates an existing Set with unique graphemes from a string. + * + * What humans think of as a character, is a grapheme. Unicode allows + * multiple code points (Java's char values) to cluster into a grapheme + * (like a letter, plus an accent). + * + * This method may be called repeatedly to accumulate graphemes from + * multiple strings. + * + * @src a String to scan + * @dstSet a Set to add results into (such as a TreeSet) + * @locale a locale for creating a BreakIterator + * @see java.text.BreakIterator + */ + public static void getUniqueGraphemes( String src, Set dstSet, Locale locale ) { + BreakIterator graphemeIt = BreakIterator.getCharacterInstance( locale ); // No arg means default Locale. + graphemeIt.setText( src ); + + int start = graphemeIt.first(); + int end = graphemeIt.next(); + + while ( end != BreakIterator.DONE ) { + CharSequence grapheme = src.subSequence( start, end ); + dstSet.add( grapheme ); + + start = end; + end = graphemeIt.next(); + } + } + + /** + * Appends a grapheme's code points ("U+XXXX") to a buffer. + * + * If a grapheme involves multiple code points, they will be + * space-delimited. + */ + public static void appendGraphemeHex( CharSequence src, StringBuilder dstBuf ) { + for ( int i=0; i < src.length(); i++ ) { + if ( i > 0 ) dstBuf.append( " " ); + dstBuf.append( String.format( "U+%04X", (int)src.charAt( i ) ) ); + } + } + /** * Checks a mod's xml for problems.