Added a Validate warning about FTL 1.5.13 for chars outside windows-1252
This commit is contained in:
parent
505f57f43e
commit
9c30e4180a
3 changed files with 98 additions and 11 deletions
|
@ -19,6 +19,7 @@
|
||||||
"Made the comments in boilerplace mod metadata optional",
|
"Made the comments in boilerplace mod metadata optional",
|
||||||
"Fixed omitted Validate warnings for PNG files",
|
"Fixed omitted Validate warnings for PNG files",
|
||||||
"Added Validate warnings about FTL 1.6.1+ for TTF, MP3, and PNG files",
|
"Added Validate warnings about FTL 1.6.1+ for TTF, MP3, and PNG files",
|
||||||
|
"Added a Validate warning about FTL 1.5.13 for chars outside windows-1252",
|
||||||
"Disabled XML escaping when reencoding to ensure invalid chars cause an error",
|
"Disabled XML escaping when reencoding to ensure invalid chars cause an error",
|
||||||
"Changed logging framework to SLF4J/Logback",
|
"Changed logging framework to SLF4J/Logback",
|
||||||
"Changed command line parser to picocli"
|
"Changed command line parser to picocli"
|
||||||
|
|
|
@ -6,6 +6,7 @@ Changelog
|
||||||
- Fixed omitted Validate warnings for PNG files
|
- Fixed omitted Validate warnings for PNG files
|
||||||
- Disabled XML escaping when reencoding to ensure invalid chars cause an error
|
- Disabled XML escaping when reencoding to ensure invalid chars cause an error
|
||||||
- Added Validate warnings about FTL 1.6.1+ for TTF, MP3, and PNG files
|
- Added Validate warnings about FTL 1.6.1+ for TTF, MP3, and PNG files
|
||||||
|
- Added a Validate warning about FTL 1.5.13 for chars outside windows-1252
|
||||||
- Changed logging framework to SLF4J/Logback
|
- Changed logging framework to SLF4J/Logback
|
||||||
- Changed command line parser to picocli
|
- Changed command line parser to picocli
|
||||||
|
|
||||||
|
|
|
@ -16,12 +16,16 @@ import java.nio.charset.CharacterCodingException;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.nio.charset.CharsetDecoder;
|
import java.nio.charset.CharsetDecoder;
|
||||||
import java.nio.charset.CharsetEncoder;
|
import java.nio.charset.CharsetEncoder;
|
||||||
|
import java.text.BreakIterator;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.zip.ZipEntry;
|
import java.util.zip.ZipEntry;
|
||||||
|
@ -441,6 +445,7 @@ public class ModUtilities {
|
||||||
List<String> seenJunkDirs = new ArrayList<String>();
|
List<String> seenJunkDirs = new ArrayList<String>();
|
||||||
|
|
||||||
CharsetEncoder asciiEncoder = Charset.forName( "US-ASCII" ).newEncoder();
|
CharsetEncoder asciiEncoder = Charset.forName( "US-ASCII" ).newEncoder();
|
||||||
|
CharsetEncoder win1252Encoder = Charset.forName( "windows-1252" ).newEncoder();
|
||||||
|
|
||||||
ZipInputStream zis = null;
|
ZipInputStream zis = null;
|
||||||
try {
|
try {
|
||||||
|
@ -576,15 +581,6 @@ public class ModUtilities {
|
||||||
modValid = false;
|
modValid = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Found chars unique to windows-1252.
|
|
||||||
if ( decodeResult.encoding.equalsIgnoreCase( "windows-1252" ) ) {
|
|
||||||
pendingMsgs.add( new ReportMessage(
|
|
||||||
ReportMessage.WARNING,
|
|
||||||
String.format( "Fancy %s chars (UTF-8 is recommended for that)", decodeResult.encoding )
|
|
||||||
) );
|
|
||||||
modValid = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( decodeResult.eol != DecodeResult.EOL_CRLF &&
|
if ( decodeResult.eol != DecodeResult.EOL_CRLF &&
|
||||||
decodeResult.eol != DecodeResult.EOL_NONE ) {
|
decodeResult.eol != DecodeResult.EOL_NONE ) {
|
||||||
if ( isXML ) {
|
if ( isXML ) {
|
||||||
|
@ -602,6 +598,53 @@ public class ModUtilities {
|
||||||
modValid = false;
|
modValid = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( decodeResult.encoding.equalsIgnoreCase( "windows-1252" ) ) {
|
||||||
|
// Found non-ASCII chars unique to windows-1252.
|
||||||
|
|
||||||
|
Set<CharSequence> uniqueGraphemes = new TreeSet<CharSequence>();
|
||||||
|
getUniqueGraphemes( decodeResult.text, uniqueGraphemes, Locale.getDefault() );
|
||||||
|
|
||||||
|
StringBuilder charBuf = new StringBuilder();
|
||||||
|
for ( CharSequence grapheme : uniqueGraphemes ) {
|
||||||
|
if ( !asciiEncoder.reset().canEncode( grapheme ) ) {
|
||||||
|
if ( charBuf.length() > 0 ) charBuf.append( "," );
|
||||||
|
|
||||||
|
charBuf.append( grapheme );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pendingMsgs.add( new ReportMessage(
|
||||||
|
ReportMessage.WARNING,
|
||||||
|
String.format( "Windows-1252 encoding with fancy non-ASCII chars (UTF-8 is recommended for clarity): %s", charBuf.toString() )
|
||||||
|
) );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Not windows-1252.
|
||||||
|
// Nag if there are chars that can't be converted to
|
||||||
|
// windows-1252 (for FTL 1.01-1.5.13).
|
||||||
|
|
||||||
|
Set<CharSequence> uniqueGraphemes = new TreeSet<CharSequence>();
|
||||||
|
getUniqueGraphemes( decodeResult.text, uniqueGraphemes, Locale.getDefault() );
|
||||||
|
|
||||||
|
StringBuilder charBuf = new StringBuilder();
|
||||||
|
for ( CharSequence grapheme : uniqueGraphemes ) {
|
||||||
|
if ( !win1252Encoder.reset().canEncode( grapheme ) ) {
|
||||||
|
if ( charBuf.length() > 0 ) charBuf.append( "," );
|
||||||
|
|
||||||
|
charBuf.append( grapheme ).append( " (" );
|
||||||
|
appendGraphemeHex( grapheme, charBuf );
|
||||||
|
charBuf.append( ")" );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( charBuf.length() > 0 ) {
|
||||||
|
pendingMsgs.add( new ReportMessage(
|
||||||
|
ReportMessage.WARNING,
|
||||||
|
String.format( "Characters that can't be re-encoded as windows-1252 will not work in FTL 1.5.13 and earlier: %s", charBuf.toString() )
|
||||||
|
) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Suggest replacements for odd characters.
|
||||||
List<Pattern> oddCharPtns = new ArrayList<Pattern>();
|
List<Pattern> oddCharPtns = new ArrayList<Pattern>();
|
||||||
Map<Pattern,String> oddCharSuggestions = new HashMap<Pattern,String>();
|
Map<Pattern,String> oddCharSuggestions = new HashMap<Pattern,String>();
|
||||||
Map<Pattern,List<Character>> oddCharLists = new HashMap<Pattern,List<Character>>();
|
Map<Pattern,List<Character>> oddCharLists = new HashMap<Pattern,List<Character>>();
|
||||||
|
@ -647,8 +690,6 @@ public class ModUtilities {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Nag if there are chars FTL can't show.
|
|
||||||
|
|
||||||
if ( isXML ) {
|
if ( isXML ) {
|
||||||
Report xmlReport = validateModXML( decodeResult.text );
|
Report xmlReport = validateModXML( decodeResult.text );
|
||||||
|
|
||||||
|
@ -736,6 +777,50 @@ public class ModUtilities {
|
||||||
return new Report( messages, modValid );
|
return new Report( messages, modValid );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Populates an existing Set with unique graphemes from a string.
|
||||||
|
*
|
||||||
|
* What humans think of as a character, is a grapheme. Unicode allows
|
||||||
|
* multiple code points (Java's char values) to cluster into a grapheme
|
||||||
|
* (like a letter, plus an accent).
|
||||||
|
*
|
||||||
|
* This method may be called repeatedly to accumulate graphemes from
|
||||||
|
* multiple strings.
|
||||||
|
*
|
||||||
|
* @src a String to scan
|
||||||
|
* @dstSet a Set to add results into (such as a TreeSet)
|
||||||
|
* @locale a locale for creating a BreakIterator
|
||||||
|
* @see java.text.BreakIterator
|
||||||
|
*/
|
||||||
|
public static void getUniqueGraphemes( String src, Set<CharSequence> dstSet, Locale locale ) {
|
||||||
|
BreakIterator graphemeIt = BreakIterator.getCharacterInstance( locale ); // No arg means default Locale.
|
||||||
|
graphemeIt.setText( src );
|
||||||
|
|
||||||
|
int start = graphemeIt.first();
|
||||||
|
int end = graphemeIt.next();
|
||||||
|
|
||||||
|
while ( end != BreakIterator.DONE ) {
|
||||||
|
CharSequence grapheme = src.subSequence( start, end );
|
||||||
|
dstSet.add( grapheme );
|
||||||
|
|
||||||
|
start = end;
|
||||||
|
end = graphemeIt.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Appends a grapheme's code points ("U+XXXX") to a buffer.
|
||||||
|
*
|
||||||
|
* If a grapheme involves multiple code points, they will be
|
||||||
|
* space-delimited.
|
||||||
|
*/
|
||||||
|
public static void appendGraphemeHex( CharSequence src, StringBuilder dstBuf ) {
|
||||||
|
for ( int i=0; i < src.length(); i++ ) {
|
||||||
|
if ( i > 0 ) dstBuf.append( " " );
|
||||||
|
dstBuf.append( String.format( "U+%04X", (int)src.charAt( i ) ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks a mod's xml for problems.
|
* Checks a mod's xml for problems.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue