fixed Emoji_Modifier_Base is missing 1F46D; cleaned up for easier testing

git-svn-id: https://unicode.org/repos/unicodetools/trunk@1565 13e8329f-0b23-4da4-9fe8-d0f6fe080806
diff --git a/unicodetools/org/unicode/tools/emoji/CandidateData.java b/unicodetools/org/unicode/tools/emoji/CandidateData.java
index dafa341..1670f24 100644
--- a/unicodetools/org/unicode/tools/emoji/CandidateData.java
+++ b/unicodetools/org/unicode/tools/emoji/CandidateData.java
@@ -1,9 +1,11 @@
 package org.unicode.tools.emoji;
 
+import java.io.File;
 import java.text.Collator;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.Date;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Locale;
@@ -34,9 +36,11 @@
 import com.ibm.icu.lang.CharSequences;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.text.DateFormat;
 import com.ibm.icu.text.Transform;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.ICUException;
+import com.ibm.icu.util.ULocale;
 import com.ibm.icu.util.VersionInfo;
 
 public class CandidateData implements Transform<String, String>, EmojiDataSource {
@@ -139,6 +143,7 @@
         String proposalItem = null;
         Status status = null;
 
+        date = new File(FileUtilities.getRelativeFileName(CandidateData.class, sourceFile)).lastModified();
         for (String line : FileUtilities.in(CandidateData.class, sourceFile)) {
             line = line.trim();
             try {
@@ -491,6 +496,7 @@
             return EmojiOrder.FULL_COMPARATOR.compare(o1, o2);
         }
     };
+    private long date;
 
     /**
      * @return the characters
@@ -944,4 +950,19 @@
         default: throw new IllegalArgumentException();
         }
     }
+
+    @Override
+    public UnicodeSet getKeycapSequences() {
+        return UnicodeSet.EMPTY;
+    }
+
+    @Override
+    public String addEmojiVariants(String s1) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public String getVersionString() {
+        return "candidates:" + DateFormat.getInstanceForSkeleton("yyyyMMdd", ULocale.ROOT).format(date);
+    }
 }
diff --git a/unicodetools/org/unicode/tools/emoji/EmojiData.java b/unicodetools/org/unicode/tools/emoji/EmojiData.java
index d9fd680..dd34fa2 100644
--- a/unicodetools/org/unicode/tools/emoji/EmojiData.java
+++ b/unicodetools/org/unicode/tools/emoji/EmojiData.java
@@ -778,7 +778,7 @@
         return modifierSequences;
     }
 
-    public UnicodeSet getModifiers() {
+    public static UnicodeSet getModifiers() {
         return MODIFIERS;
     }
 
@@ -959,7 +959,7 @@
 
     private static Pattern EMOJI_VARIANTs = Pattern.compile("[" + Emoji.EMOJI_VARIANT + Emoji.TEXT_VARIANT + "]");
 
-    enum VariantStatus {
+    public enum VariantStatus {
         /** All characters that need them have emoji-variants */
         full("fully-qualified"), 
         /** The first character has an emoji-variant, if needed */
@@ -969,10 +969,18 @@
         /** Neither full nor partial */
         component("component")
         ;
-        final String name;
+        public final String name;
         private VariantStatus(String name) {
             this.name = name;
         }
+        public static final VariantStatus forString(String name) {
+            for (VariantStatus item : values()) {
+                if (name.equals(item.name)) {
+                    return item;
+                }
+            }
+            return valueOf(name);
+        }
     }
 
     public VariantStatus getVariantStatus(String emoji) {
@@ -1729,6 +1737,10 @@
     public VersionInfo getVersion() {
         return version;
     }
+    
+    public String getVersionString() {
+        return version.getVersionString(2, 2);
+    }
 
     public UnicodeSet getRegionalIndicators() {
         // TODO Auto-generated method stub
diff --git a/unicodetools/org/unicode/tools/emoji/EmojiDataSource.java b/unicodetools/org/unicode/tools/emoji/EmojiDataSource.java
index 024ad7f..cfa8aa5 100644
--- a/unicodetools/org/unicode/tools/emoji/EmojiDataSource.java
+++ b/unicodetools/org/unicode/tools/emoji/EmojiDataSource.java
@@ -13,6 +13,7 @@
     public UnicodeSet getExtendedPictographic();
     public UnicodeSet getTagSequences();
     public UnicodeSet getModifierSequences();
+    public UnicodeSet getKeycapSequences();
     public UnicodeSet getFlagSequences();
     public UnicodeSet getZwjSequencesNormal();
     public UnicodeSet getEmojiWithVariants();
@@ -28,6 +29,7 @@
         return getName(UTF16.valueOf(codepoint));
     }
     public UnicodeMap<String> getRawNames();
+    
     public default UnicodeSet getBasicSequences() {
         UnicodeSet result = new UnicodeSet();
         for (String s : getSingletonsWithDefectives()) {
@@ -42,5 +44,16 @@
         }
         return result.freeze();
     }
+    
+    public default UnicodeSet getEmojiForSortRules() {
+        return new UnicodeSet()
+                .addAll(getAllEmojiWithoutDefectives())
+                .removeAll(Emoji.DEFECTIVE)
+                .addAll(getZwjSequencesNormal()) 
+                .addAll(getKeycapSequences());
+    }
+    
+    public String addEmojiVariants(String s1);
+    public String getVersionString();
 }
 
diff --git a/unicodetools/org/unicode/tools/emoji/EmojiDataSourceCombined.java b/unicodetools/org/unicode/tools/emoji/EmojiDataSourceCombined.java
index c98bfcc..5f9c80f 100644
--- a/unicodetools/org/unicode/tools/emoji/EmojiDataSourceCombined.java
+++ b/unicodetools/org/unicode/tools/emoji/EmojiDataSourceCombined.java
@@ -120,14 +120,29 @@
         return add(emojiData.getRawNames(), candidates.getRawNames());
     }
     
-    public static void main(String[] args) {
-        UnicodeSet allChars = EMOJI_DATA.getAllEmojiWithDefectives();
-        
-    }
-
     @Override
     public UnicodeSet getTakesSign() {
         return add(emojiData.getTakesSign(),
                 candidates.getTakesSign());
     }
+
+    @Override
+    public UnicodeSet getKeycapSequences() {
+        return emojiData.getKeycapSequences();
+    }
+
+    @Override
+    public String addEmojiVariants(String s1) {
+        return emojiData.addEmojiVariants(s1);
+    }
+
+    @Override
+    public String getVersionString() {
+        return emojiData.getVersion() + " + " + candidates.getVersionString();
+    }
+    
+//    public static void main(String[] args) {
+//        UnicodeSet allChars = EMOJI_DATA.getAllEmojiWithDefectives();
+//        
+//    }
 }
\ No newline at end of file
diff --git a/unicodetools/org/unicode/tools/emoji/EmojiOrder.java b/unicodetools/org/unicode/tools/emoji/EmojiOrder.java
index dd011a5..3515e3d 100644
--- a/unicodetools/org/unicode/tools/emoji/EmojiOrder.java
+++ b/unicodetools/org/unicode/tools/emoji/EmojiOrder.java
@@ -115,8 +115,9 @@
     public final Comparator<String>        codepointCompareSeparateDefects;
     public final UnicodeMap<MajorGroup>  majorGroupings = new UnicodeMap<>(); 
     public final Map<String, Integer>  groupOrder; 
-    final EmojiData emojiData;
+    public final EmojiData emojiData;
     private final Map<String, MajorGroup> categoryToMajor;
+    private final String reformatted;
 
     /**
      * @return the categoryToMajor
@@ -143,7 +144,9 @@
                 ;
         HashMap<String, Integer> _groupOrder = new LinkedHashMap<String,Integer>();
         Map<String, MajorGroup> _categoryToMajor = new LinkedHashMap<>();
-        orderingToCharacters = loadOrdering(version, file, mp, _groupOrder, _categoryToMajor);
+        StringBuilder _reformatted = new StringBuilder();
+        orderingToCharacters = loadOrdering(version, file, mp, _groupOrder, _categoryToMajor, _reformatted);
+        reformatted = _reformatted.toString();
         mp.freeze();
         majorGroupings.freeze();
         groupOrder = Collections.unmodifiableMap(_groupOrder);
@@ -173,10 +176,11 @@
         }
     }
 
-    Relation<String, String> loadOrdering(VersionInfo version, String sourceFile, 
+    private Relation<String, String> loadOrdering(VersionInfo version, String sourceFile, 
             MapComparator<String> mapComparator, 
             Map<String, Integer> _groupOrder, 
-            Map<String, MajorGroup> _categoryToMajor) {
+            Map<String, MajorGroup> _categoryToMajor,
+            StringBuilder reformatted) {
         //System.out.println(sourceFile);
         Relation<String, String> result = Relation.of(new LinkedHashMap<String, Set<String>>(), LinkedHashSet.class);
         Set<String> sorted = new LinkedHashSet<>();
@@ -184,147 +188,143 @@
         MajorGroup majorGroup = null;
         EmojiIterator ei = new EmojiIterator(emojiData, false);
         final String directory = Settings.DATA_DIR + "/emoji/" + version.getVersionString(2, 2) + "/source";
-        try (PrintWriter reformatted = FileUtilities.openUTF8Writer(Emoji.TR51_INTERNAL_DIR, sourceFile)) {
-            for (String line : FileUtilities.in(EmojiOrder.class,
-                    sourceFile)) {
-                if (line.isEmpty() || line.startsWith("#") && !line.startsWith("#⃣") && !line.startsWith("#️⃣")) {
+        for (String line : FileUtilities.in(EmojiOrder.class,
+                sourceFile)) {
+            if (line.isEmpty() || line.startsWith("#") && !line.startsWith("#⃣") && !line.startsWith("#️⃣")) {
+                continue;
+            }
+            if (DEBUG) System.out.println(line);
+
+            line = Emoji.UNESCAPE.transform(line);
+            line = line.replace(Emoji.TEXT_VARIANT_STRING, "").replace(Emoji.EMOJI_VARIANT_STRING, "");
+
+            if (line.contains("keycap")) {
+                int debug = 0;
+            }
+
+            if (line.startsWith("@@")) {
+                majorGroup = MajorGroup.fromString(line.substring(2).trim());
+                reformatted.append(line).append('\n');
+                continue;
+            }
+            if (line.startsWith("@")) {
+                String item = line.substring(1).trim();
+                if (!_groupOrder.containsKey(item)) {
+                    _groupOrder.put(item, _groupOrder.size());
+                }
+                MajorGroup major = _categoryToMajor.get(item);
+                if (major == null) {
+                    _categoryToMajor.put(item, majorGroup);
+                } else if (major != majorGroup) {
+                    throw new IllegalArgumentException("Conflicting major categories");
+                }
+                lastLabel.value.clear();
+                lastLabel.value.add(item);
+                reformatted.append("@" + item).append('\n');
+                continue;
+            }
+            //                String oldLine = line;
+            //                line = Emoji.getLabelFromLine(lastLabel, line);
+            //                for (String item : lastLabel.value) {
+            //                    if (!_groupOrder.containsKey(item)) {
+            //                        _groupOrder.put(item, _groupOrder.size());
+            //                    }
+            //                    MajorGroup major = _categoryToMajor.get(item);
+            //                    if (major == null) {
+            //                        _categoryToMajor.put(item, majorGroup);
+            //                    } else if (major != majorGroup) {
+            //                        throw new IllegalArgumentException("Conflicting major categories");
+            //                    }
+            //                    // hack for now
+            //                    if (oldLine.contains("\t")) {
+            //                        reformatted.println("@" + item);
+            //                    }
+            //                }
+            if (line.indexOf("🤝") >= 0) {
+                int debug = 0;
+            }
+            boolean isFirst = true;
+            for (String string : ei.set(line)) {
+                // NOTE: all emoji variant selectors have been removed at this point
+                if (sorted.contains(string)) {
                     continue;
                 }
-                if (DEBUG) System.out.println(line);
+                if (isFirst) {
+                    isFirst = false;
+                } else {
+                    reformatted.append(' ');
+                }
+                reformatted.append(EmojiData.EMOJI_DATA.addEmojiVariants(string));
+                //System.out.println("Adding: " + Utility.hex(string) + "\t" + string);
+                add(result, sorted, majorGroup, lastLabel, string);
+                addVariants(result, sorted, majorGroup, lastLabel, string); 
 
-                line = Emoji.UNESCAPE.transform(line);
-                line = line.replace(Emoji.TEXT_VARIANT_STRING, "").replace(Emoji.EMOJI_VARIANT_STRING, "");
-
-                if (line.contains("keycap")) {
-                    int debug = 0;
+                switch (string) {
+                case "👭": 
+                    addVariants(result, sorted, majorGroup, lastLabel, "👩‍🤝‍👩"); 
+                    break;
+                case "👫": 
+                    addVariants(result, sorted, majorGroup, lastLabel, "👩‍🤝‍👨"); 
+                    break;
+                case "👬": 
+                    addVariants(result, sorted, majorGroup, lastLabel, "👨‍🤝‍👨");
+                    break;
                 }
 
-                if (line.startsWith("@@")) {
-                    majorGroup = MajorGroup.fromString(line.substring(2).trim());
-                    reformatted.println(line);
-                    continue;
-                }
-                if (line.startsWith("@")) {
-                    String item = line.substring(1).trim();
-                    if (!_groupOrder.containsKey(item)) {
-                        _groupOrder.put(item, _groupOrder.size());
-                    }
-                    MajorGroup major = _categoryToMajor.get(item);
-                    if (major == null) {
-                        _categoryToMajor.put(item, majorGroup);
-                    } else if (major != majorGroup) {
-                        throw new IllegalArgumentException("Conflicting major categories");
-                    }
-                    lastLabel.value.clear();
-                    lastLabel.value.add(item);
-                    reformatted.println("@" + item);
-                    continue;
-                }
-                //                String oldLine = line;
-                //                line = Emoji.getLabelFromLine(lastLabel, line);
-                //                for (String item : lastLabel.value) {
-                //                    if (!_groupOrder.containsKey(item)) {
-                //                        _groupOrder.put(item, _groupOrder.size());
-                //                    }
-                //                    MajorGroup major = _categoryToMajor.get(item);
-                //                    if (major == null) {
-                //                        _categoryToMajor.put(item, majorGroup);
-                //                    } else if (major != majorGroup) {
-                //                        throw new IllegalArgumentException("Conflicting major categories");
-                //                    }
-                //                    // hack for now
-                //                    if (oldLine.contains("\t")) {
-                //                        reformatted.println("@" + item);
+                //                ImmutableList<String> list = hack.get(string);
+                //                if (list != null) {
+                //                    addVariants(result, sorted, majorGroup, lastLabel, string);
+                //                    for (String string2 : list) {
+                //                        //System.err.println("Adding " + show(string2));
+                //                        add(result, sorted, majorGroup, lastLabel, string2); 
+                //                        addVariants(result, sorted, majorGroup, lastLabel, string2);
                 //                    }
                 //                }
-                if (line.indexOf("🤝") >= 0) {
-                    int debug = 0;
-                }
-                boolean isFirst = true;
-                for (String string : ei.set(line)) {
-                    // NOTE: all emoji variant selectors have been removed at this point
-                    if (sorted.contains(string)) {
-                        continue;
-                    }
-                    if (isFirst) {
-                        isFirst = false;
-                    } else {
-                        reformatted.print(' ');
-                    }
-                    reformatted.print(EmojiData.EMOJI_DATA.addEmojiVariants(string));
-                    //System.out.println("Adding: " + Utility.hex(string) + "\t" + string);
-                    add(result, sorted, majorGroup, lastLabel, string);
-                    addVariants(result, sorted, majorGroup, lastLabel, string); 
-                    
-                    switch (string) {
-                    case "👭": 
-                        addVariants(result, sorted, majorGroup, lastLabel, "👩‍🤝‍👩"); 
-                        break;
-                    case "👫": 
-                        addVariants(result, sorted, majorGroup, lastLabel, "👩‍🤝‍👨"); 
-                        break;
-                    case "👬": 
-                        addVariants(result, sorted, majorGroup, lastLabel, "👨‍🤝‍👨");
-                        break;
-                    }
 
-                    //                ImmutableList<String> list = hack.get(string);
-                    //                if (list != null) {
-                    //                    addVariants(result, sorted, majorGroup, lastLabel, string);
-                    //                    for (String string2 : list) {
-                    //                        //System.err.println("Adding " + show(string2));
-                    //                        add(result, sorted, majorGroup, lastLabel, string2); 
-                    //                        addVariants(result, sorted, majorGroup, lastLabel, string2);
-                    //                    }
-                    //                }
-
-                    // We have a hack for blond person, and add them explicity.
-                    if (emojiData.getGenderBase().contains(string) && !string.equals("👱")) {
-                        addVariants(result, sorted, majorGroup, lastLabel, string + "\u200d\u2642"); 
-                        addVariants(result, sorted, majorGroup, lastLabel, string + "\u200d\u2640"); 
-                    }
-                    //                // add/remove all variant strings
-                    //                if (string.contains(Emoji.JOINER_STRING) || emojiData.getKeycapBases().contains(string.charAt(0))) { 
-                    //                    addVariants(result, sorted, majorGroup, lastLabel, string);
-                    //                }
+                // We have a hack for blond person, and add them explicity.
+                if (emojiData.getGenderBase().contains(string) && !string.equals("👱")) {
+                    addVariants(result, sorted, majorGroup, lastLabel, string + "\u200d\u2642"); 
+                    addVariants(result, sorted, majorGroup, lastLabel, string + "\u200d\u2640"); 
                 }
-                if (!isFirst) { // skip empty lines
-                    reformatted.println();
-                }
+                //                // add/remove all variant strings
+                //                if (string.contains(Emoji.JOINER_STRING) || emojiData.getKeycapBases().contains(string.charAt(0))) { 
+                //                    addVariants(result, sorted, majorGroup, lastLabel, string);
+                //                }
             }
-
-            Set<String> missing = new UnicodeSet(emojiData.getSortingChars())
-                    .removeAll(emojiData.getModifierSequences())
-                    .addAllTo(new LinkedHashSet<String>());
-            missing.removeAll(sorted);
-            for (Iterator<String> it = missing.iterator(); it.hasNext();) {
-                String s = it.next();
-                if (s.endsWith(Emoji.EMOJI_VARIANT_STRING)) {
-                    it.remove();
-                }
+            if (!isFirst) { // skip empty lines
+                reformatted.append('\n');
             }
-            if (!missing.isEmpty() && !sourceFile.startsWith("alt")) {
-                result.putAll("other", missing);
-                System.err.println("Missing some orderings: ");
-                for (String s : missing) {
-                    System.err.print(s + " ");
-                }
-                System.err.println();
-
-                for (String s : missing) {
-                    System.err.println("\t" + s + "\t\t" + Emoji.show(s));
-                }
-                System.err.println(directory);
-            }
-            sorted.addAll(missing);
-            mapComparator.add(sorted);
-            //mapComparator.setErrorOnMissing(true);
-            mapComparator.freeze();
-            result.freeze();
-            return result;
-        } catch (IOException e) {
-            throw new ICUUncheckedIOException(e);
         }
+
+        Set<String> missing = new UnicodeSet(emojiData.getSortingChars())
+                .removeAll(emojiData.getModifierSequences())
+                .addAllTo(new LinkedHashSet<String>());
+        missing.removeAll(sorted);
+        for (Iterator<String> it = missing.iterator(); it.hasNext();) {
+            String s = it.next();
+            if (s.endsWith(Emoji.EMOJI_VARIANT_STRING)) {
+                it.remove();
+            }
+        }
+        if (!missing.isEmpty() && !sourceFile.startsWith("alt")) {
+            result.putAll("other", missing);
+            System.err.println("Missing some orderings: ");
+            for (String s : missing) {
+                System.err.print(s + " ");
+            }
+            System.err.println();
+
+            for (String s : missing) {
+                System.err.println("\t" + s + "\t\t" + Emoji.show(s));
+            }
+            System.err.println(directory);
+        }
+        sorted.addAll(missing);
+        mapComparator.add(sorted);
+        //mapComparator.setErrorOnMissing(true);
+        mapComparator.freeze();
+        result.freeze();
+        return result;
     }
 
     private void addAllModifiers(Relation<String, String> result, Set<String> sorted, Output<Set<String>> lastLabel, MajorGroup majorGroup, String... strings) {
@@ -708,4 +708,9 @@
                 + "\t" + info
                 );
     }
+
+
+    public String getReformatted() {
+        return reformatted;
+    }
 }
\ No newline at end of file
diff --git a/unicodetools/org/unicode/tools/emoji/GenerateEmojiData.java b/unicodetools/org/unicode/tools/emoji/GenerateEmojiData.java
index 54c7896..56149bd 100644
--- a/unicodetools/org/unicode/tools/emoji/GenerateEmojiData.java
+++ b/unicodetools/org/unicode/tools/emoji/GenerateEmojiData.java
@@ -127,8 +127,8 @@
 
         PropPrinter printer = new PropPrinter().set(extraNames);
         
-        try (TempPrintWriter outText2 = new TempPrintWriter(OUTPUT_DIR, "emoji-internal.txt")) {
-            UnicodeSet emojiGenderBase = EmojiDataSourceCombined.EMOJI_DATA.getSingletonsWithDefectives();
+        try (TempPrintWriter outText2 = new TempPrintWriter(OUTPUT_DIR, "internal/emoji-internal.txt")) {
+            UnicodeSet emojiGenderBase = EmojiDataSourceCombined.EMOJI_DATA.getGenderBases();
             outText2.println(Utility.getBaseDataHeader("emoji-internal", 51, "Emoji Data Internal", Emoji.VERSION_STRING));
             
             
@@ -293,6 +293,12 @@
 
         // generate emoji-test
         GenerateEmojiKeyboard.showLines(EmojiOrder.STD_ORDER, EmojiOrder.STD_ORDER.emojiData.getSortingChars(), Target.propFile, OUTPUT_DIR);
+
+        try (TempPrintWriter reformatted = new TempPrintWriter(OUTPUT_DIR, "internal/emojiOrdering.txt")) {
+            reformatted.write(EmojiOrder.BETA_ORDER.getReformatted());
+        } catch (IOException e) {
+            throw new ICUUncheckedIOException(e);
+        }
     }
 
     private static int maxLength(String... items) {
diff --git a/unicodetools/org/unicode/tools/emoji/TempPrintWriter.java b/unicodetools/org/unicode/tools/emoji/TempPrintWriter.java
index f808952..c2e4b23 100644
--- a/unicodetools/org/unicode/tools/emoji/TempPrintWriter.java
+++ b/unicodetools/org/unicode/tools/emoji/TempPrintWriter.java
@@ -21,6 +21,7 @@
     }
 
     public TempPrintWriter(File file) {
+        super();
         final String parentFile = file.getParent();
         this.filename = file.toString();
         Random rand = new Random();
@@ -37,26 +38,30 @@
     }
 
     @Override
-    public void close() throws IOException {
+    public void close() {
         tempPrintWriter.close();
-        Utility.replaceDifferentOrDelete(filename, tempName, false);
+        try {
+            Utility.replaceDifferentOrDelete(filename, tempName, false);
+        } catch (IOException e) {
+            throw new ICUUncheckedIOException(e);
+        }
     }
 
     @Override
-    public void write(char[] cbuf, int off, int len) throws IOException {
+    public void write(char[] cbuf, int off, int len) {
         tempPrintWriter.write(cbuf, off, len);
     }
 
     @Override
-    public void flush() throws IOException {
+    public void flush() {
         tempPrintWriter.flush();
     }
 
-    public void println(String line) throws IOException {
+    public void println(String line) {
         tempPrintWriter.println(line);
     }
     
-    public void println() throws IOException {
+    public void println() {
         tempPrintWriter.println();
     }
 }
\ No newline at end of file