Added a Validate warning about FTL 1.5.13 for chars outside windows-1252

This commit is contained in:
Vhati 2017-12-30 11:39:31 -05:00
parent 505f57f43e
commit 9c30e4180a
3 changed files with 98 additions and 11 deletions

View file

@ -19,6 +19,7 @@
"Made the comments in boilerplace mod metadata optional", "Made the comments in boilerplace mod metadata optional",
"Fixed omitted Validate warnings for PNG files", "Fixed omitted Validate warnings for PNG files",
"Added Validate warnings about FTL 1.6.1+ for TTF, MP3, and PNG files", "Added Validate warnings about FTL 1.6.1+ for TTF, MP3, and PNG files",
"Added a Validate warning about FTL 1.5.13 for chars outside windows-1252",
"Disabled XML escaping when reencoding to ensure invalid chars cause an error", "Disabled XML escaping when reencoding to ensure invalid chars cause an error",
"Changed logging framework to SLF4J/Logback", "Changed logging framework to SLF4J/Logback",
"Changed command line parser to picocli" "Changed command line parser to picocli"

View file

@ -6,6 +6,7 @@ Changelog
- Fixed omitted Validate warnings for PNG files - Fixed omitted Validate warnings for PNG files
- Disabled XML escaping when reencoding to ensure invalid chars cause an error - Disabled XML escaping when reencoding to ensure invalid chars cause an error
- Added Validate warnings about FTL 1.6.1+ for TTF, MP3, and PNG files - Added Validate warnings about FTL 1.6.1+ for TTF, MP3, and PNG files
- Added a Validate warning about FTL 1.5.13 for chars outside windows-1252
- Changed logging framework to SLF4J/Logback - Changed logging framework to SLF4J/Logback
- Changed command line parser to picocli - Changed command line parser to picocli

View file

@ -16,12 +16,16 @@ import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder; import java.nio.charset.CharsetEncoder;
import java.text.BreakIterator;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.zip.ZipEntry; import java.util.zip.ZipEntry;
@ -441,6 +445,7 @@ public class ModUtilities {
List<String> seenJunkDirs = new ArrayList<String>(); List<String> seenJunkDirs = new ArrayList<String>();
CharsetEncoder asciiEncoder = Charset.forName( "US-ASCII" ).newEncoder(); CharsetEncoder asciiEncoder = Charset.forName( "US-ASCII" ).newEncoder();
CharsetEncoder win1252Encoder = Charset.forName( "windows-1252" ).newEncoder();
ZipInputStream zis = null; ZipInputStream zis = null;
try { try {
@ -576,15 +581,6 @@ public class ModUtilities {
modValid = false; modValid = false;
} }
// Found chars unique to windows-1252.
if ( decodeResult.encoding.equalsIgnoreCase( "windows-1252" ) ) {
pendingMsgs.add( new ReportMessage(
ReportMessage.WARNING,
String.format( "Fancy %s chars (UTF-8 is recommended for that)", decodeResult.encoding )
) );
modValid = false;
}
if ( decodeResult.eol != DecodeResult.EOL_CRLF && if ( decodeResult.eol != DecodeResult.EOL_CRLF &&
decodeResult.eol != DecodeResult.EOL_NONE ) { decodeResult.eol != DecodeResult.EOL_NONE ) {
if ( isXML ) { if ( isXML ) {
@ -602,6 +598,53 @@ public class ModUtilities {
modValid = false; modValid = false;
} }
if ( decodeResult.encoding.equalsIgnoreCase( "windows-1252" ) ) {
// Found non-ASCII chars unique to windows-1252.
Set<CharSequence> uniqueGraphemes = new TreeSet<CharSequence>();
getUniqueGraphemes( decodeResult.text, uniqueGraphemes, Locale.getDefault() );
StringBuilder charBuf = new StringBuilder();
for ( CharSequence grapheme : uniqueGraphemes ) {
if ( !asciiEncoder.reset().canEncode( grapheme ) ) {
if ( charBuf.length() > 0 ) charBuf.append( "," );
charBuf.append( grapheme );
}
}
pendingMsgs.add( new ReportMessage(
ReportMessage.WARNING,
String.format( "Windows-1252 encoding with fancy non-ASCII chars (UTF-8 is recommended for clarity): %s", charBuf.toString() )
) );
}
else {
// Not windows-1252.
// Nag if there are chars that can't be converted to
// windows-1252 (for FTL 1.01-1.5.13).
Set<CharSequence> uniqueGraphemes = new TreeSet<CharSequence>();
getUniqueGraphemes( decodeResult.text, uniqueGraphemes, Locale.getDefault() );
StringBuilder charBuf = new StringBuilder();
for ( CharSequence grapheme : uniqueGraphemes ) {
if ( !win1252Encoder.reset().canEncode( grapheme ) ) {
if ( charBuf.length() > 0 ) charBuf.append( "," );
charBuf.append( grapheme ).append( " (" );
appendGraphemeHex( grapheme, charBuf );
charBuf.append( ")" );
}
}
if ( charBuf.length() > 0 ) {
pendingMsgs.add( new ReportMessage(
ReportMessage.WARNING,
String.format( "Characters that can't be re-encoded as windows-1252 will not work in FTL 1.5.13 and earlier: %s", charBuf.toString() )
) );
}
}
// Suggest replacements for odd characters.
List<Pattern> oddCharPtns = new ArrayList<Pattern>(); List<Pattern> oddCharPtns = new ArrayList<Pattern>();
Map<Pattern,String> oddCharSuggestions = new HashMap<Pattern,String>(); Map<Pattern,String> oddCharSuggestions = new HashMap<Pattern,String>();
Map<Pattern,List<Character>> oddCharLists = new HashMap<Pattern,List<Character>>(); Map<Pattern,List<Character>> oddCharLists = new HashMap<Pattern,List<Character>>();
@ -647,8 +690,6 @@ public class ModUtilities {
} }
} }
// TODO: Nag if there are chars FTL can't show.
if ( isXML ) { if ( isXML ) {
Report xmlReport = validateModXML( decodeResult.text ); Report xmlReport = validateModXML( decodeResult.text );
@ -736,6 +777,50 @@ public class ModUtilities {
return new Report( messages, modValid ); return new Report( messages, modValid );
} }
/**
* Populates an existing Set with unique graphemes from a string.
*
* What humans think of as a character, is a grapheme. Unicode allows
* multiple code points (Java's char values) to cluster into a grapheme
* (like a letter, plus an accent).
*
* This method may be called repeatedly to accumulate graphemes from
* multiple strings.
*
* @src a String to scan
* @dstSet a Set to add results into (such as a TreeSet)
* @locale a locale for creating a BreakIterator
* @see java.text.BreakIterator
*/
public static void getUniqueGraphemes( String src, Set<CharSequence> dstSet, Locale locale ) {
BreakIterator graphemeIt = BreakIterator.getCharacterInstance( locale ); // No arg means default Locale.
graphemeIt.setText( src );
int start = graphemeIt.first();
int end = graphemeIt.next();
while ( end != BreakIterator.DONE ) {
CharSequence grapheme = src.subSequence( start, end );
dstSet.add( grapheme );
start = end;
end = graphemeIt.next();
}
}
/**
* Appends a grapheme's code points ("U+XXXX") to a buffer.
*
* If a grapheme involves multiple code points, they will be
* space-delimited.
*/
public static void appendGraphemeHex( CharSequence src, StringBuilder dstBuf ) {
for ( int i=0; i < src.length(); i++ ) {
if ( i > 0 ) dstBuf.append( " " );
dstBuf.append( String.format( "U+%04X", (int)src.charAt( i ) ) );
}
}
/** /**
* Checks a mod's xml for problems. * Checks a mod's xml for problems.