fixed Emoji_Modifier_Base is missing 1F46D; cleaned up for easier testing
git-svn-id: https://unicode.org/repos/unicodetools/trunk@1565 13e8329f-0b23-4da4-9fe8-d0f6fe080806
diff --git a/unicodetools/org/unicode/tools/emoji/CandidateData.java b/unicodetools/org/unicode/tools/emoji/CandidateData.java
index dafa341..1670f24 100644
--- a/unicodetools/org/unicode/tools/emoji/CandidateData.java
+++ b/unicodetools/org/unicode/tools/emoji/CandidateData.java
@@ -1,9 +1,11 @@
package org.unicode.tools.emoji;
+import java.io.File;
import java.text.Collator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
+import java.util.Date;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
@@ -34,9 +36,11 @@
import com.ibm.icu.lang.CharSequences;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.text.DateFormat;
import com.ibm.icu.text.Transform;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ICUException;
+import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.VersionInfo;
public class CandidateData implements Transform<String, String>, EmojiDataSource {
@@ -139,6 +143,7 @@
String proposalItem = null;
Status status = null;
+ date = new File(FileUtilities.getRelativeFileName(CandidateData.class, sourceFile)).lastModified();
for (String line : FileUtilities.in(CandidateData.class, sourceFile)) {
line = line.trim();
try {
@@ -491,6 +496,7 @@
return EmojiOrder.FULL_COMPARATOR.compare(o1, o2);
}
};
+ private long date;
/**
* @return the characters
@@ -944,4 +950,19 @@
default: throw new IllegalArgumentException();
}
}
+
+ @Override
+ public UnicodeSet getKeycapSequences() {
+ return UnicodeSet.EMPTY;
+ }
+
+ @Override
+ public String addEmojiVariants(String s1) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String getVersionString() {
+ return "candidates:" + DateFormat.getInstanceForSkeleton("yyyyMMdd", ULocale.ROOT).format(date);
+ }
}
diff --git a/unicodetools/org/unicode/tools/emoji/EmojiData.java b/unicodetools/org/unicode/tools/emoji/EmojiData.java
index d9fd680..dd34fa2 100644
--- a/unicodetools/org/unicode/tools/emoji/EmojiData.java
+++ b/unicodetools/org/unicode/tools/emoji/EmojiData.java
@@ -778,7 +778,7 @@
return modifierSequences;
}
- public UnicodeSet getModifiers() {
+ public static UnicodeSet getModifiers() {
return MODIFIERS;
}
@@ -959,7 +959,7 @@
private static Pattern EMOJI_VARIANTs = Pattern.compile("[" + Emoji.EMOJI_VARIANT + Emoji.TEXT_VARIANT + "]");
- enum VariantStatus {
+ public enum VariantStatus {
/** All characters that need them have emoji-variants */
full("fully-qualified"),
/** The first character has an emoji-variant, if needed */
@@ -969,10 +969,18 @@
/** Neither full nor partial */
component("component")
;
- final String name;
+ public final String name;
private VariantStatus(String name) {
this.name = name;
}
+ public static final VariantStatus forString(String name) {
+ for (VariantStatus item : values()) {
+ if (name.equals(item.name)) {
+ return item;
+ }
+ }
+ return valueOf(name);
+ }
}
public VariantStatus getVariantStatus(String emoji) {
@@ -1729,6 +1737,10 @@
public VersionInfo getVersion() {
return version;
}
+
+ public String getVersionString() {
+ return version.getVersionString(2, 2);
+ }
public UnicodeSet getRegionalIndicators() {
// TODO Auto-generated method stub
diff --git a/unicodetools/org/unicode/tools/emoji/EmojiDataSource.java b/unicodetools/org/unicode/tools/emoji/EmojiDataSource.java
index 024ad7f..cfa8aa5 100644
--- a/unicodetools/org/unicode/tools/emoji/EmojiDataSource.java
+++ b/unicodetools/org/unicode/tools/emoji/EmojiDataSource.java
@@ -13,6 +13,7 @@
public UnicodeSet getExtendedPictographic();
public UnicodeSet getTagSequences();
public UnicodeSet getModifierSequences();
+ public UnicodeSet getKeycapSequences();
public UnicodeSet getFlagSequences();
public UnicodeSet getZwjSequencesNormal();
public UnicodeSet getEmojiWithVariants();
@@ -28,6 +29,7 @@
return getName(UTF16.valueOf(codepoint));
}
public UnicodeMap<String> getRawNames();
+
public default UnicodeSet getBasicSequences() {
UnicodeSet result = new UnicodeSet();
for (String s : getSingletonsWithDefectives()) {
@@ -42,5 +44,16 @@
}
return result.freeze();
}
+
+ public default UnicodeSet getEmojiForSortRules() {
+ return new UnicodeSet()
+ .addAll(getAllEmojiWithoutDefectives())
+ .removeAll(Emoji.DEFECTIVE)
+ .addAll(getZwjSequencesNormal())
+ .addAll(getKeycapSequences());
+ }
+
+ public String addEmojiVariants(String s1);
+ public String getVersionString();
}
diff --git a/unicodetools/org/unicode/tools/emoji/EmojiDataSourceCombined.java b/unicodetools/org/unicode/tools/emoji/EmojiDataSourceCombined.java
index c98bfcc..5f9c80f 100644
--- a/unicodetools/org/unicode/tools/emoji/EmojiDataSourceCombined.java
+++ b/unicodetools/org/unicode/tools/emoji/EmojiDataSourceCombined.java
@@ -120,14 +120,29 @@
return add(emojiData.getRawNames(), candidates.getRawNames());
}
- public static void main(String[] args) {
- UnicodeSet allChars = EMOJI_DATA.getAllEmojiWithDefectives();
-
- }
-
@Override
public UnicodeSet getTakesSign() {
return add(emojiData.getTakesSign(),
candidates.getTakesSign());
}
+
+ @Override
+ public UnicodeSet getKeycapSequences() {
+ return emojiData.getKeycapSequences();
+ }
+
+ @Override
+ public String addEmojiVariants(String s1) {
+ return emojiData.addEmojiVariants(s1);
+ }
+
+ @Override
+ public String getVersionString() {
+ return emojiData.getVersion() + " + " + candidates.getVersionString();
+ }
+
+// public static void main(String[] args) {
+// UnicodeSet allChars = EMOJI_DATA.getAllEmojiWithDefectives();
+//
+// }
}
\ No newline at end of file
diff --git a/unicodetools/org/unicode/tools/emoji/EmojiOrder.java b/unicodetools/org/unicode/tools/emoji/EmojiOrder.java
index dd011a5..3515e3d 100644
--- a/unicodetools/org/unicode/tools/emoji/EmojiOrder.java
+++ b/unicodetools/org/unicode/tools/emoji/EmojiOrder.java
@@ -115,8 +115,9 @@
public final Comparator<String> codepointCompareSeparateDefects;
public final UnicodeMap<MajorGroup> majorGroupings = new UnicodeMap<>();
public final Map<String, Integer> groupOrder;
- final EmojiData emojiData;
+ public final EmojiData emojiData;
private final Map<String, MajorGroup> categoryToMajor;
+ private final String reformatted;
/**
* @return the categoryToMajor
@@ -143,7 +144,9 @@
;
HashMap<String, Integer> _groupOrder = new LinkedHashMap<String,Integer>();
Map<String, MajorGroup> _categoryToMajor = new LinkedHashMap<>();
- orderingToCharacters = loadOrdering(version, file, mp, _groupOrder, _categoryToMajor);
+ StringBuilder _reformatted = new StringBuilder();
+ orderingToCharacters = loadOrdering(version, file, mp, _groupOrder, _categoryToMajor, _reformatted);
+ reformatted = _reformatted.toString();
mp.freeze();
majorGroupings.freeze();
groupOrder = Collections.unmodifiableMap(_groupOrder);
@@ -173,10 +176,11 @@
}
}
- Relation<String, String> loadOrdering(VersionInfo version, String sourceFile,
+ private Relation<String, String> loadOrdering(VersionInfo version, String sourceFile,
MapComparator<String> mapComparator,
Map<String, Integer> _groupOrder,
- Map<String, MajorGroup> _categoryToMajor) {
+ Map<String, MajorGroup> _categoryToMajor,
+ StringBuilder reformatted) {
//System.out.println(sourceFile);
Relation<String, String> result = Relation.of(new LinkedHashMap<String, Set<String>>(), LinkedHashSet.class);
Set<String> sorted = new LinkedHashSet<>();
@@ -184,147 +188,143 @@
MajorGroup majorGroup = null;
EmojiIterator ei = new EmojiIterator(emojiData, false);
final String directory = Settings.DATA_DIR + "/emoji/" + version.getVersionString(2, 2) + "/source";
- try (PrintWriter reformatted = FileUtilities.openUTF8Writer(Emoji.TR51_INTERNAL_DIR, sourceFile)) {
- for (String line : FileUtilities.in(EmojiOrder.class,
- sourceFile)) {
- if (line.isEmpty() || line.startsWith("#") && !line.startsWith("#⃣") && !line.startsWith("#️⃣")) {
+ for (String line : FileUtilities.in(EmojiOrder.class,
+ sourceFile)) {
+ if (line.isEmpty() || line.startsWith("#") && !line.startsWith("#⃣") && !line.startsWith("#️⃣")) {
+ continue;
+ }
+ if (DEBUG) System.out.println(line);
+
+ line = Emoji.UNESCAPE.transform(line);
+ line = line.replace(Emoji.TEXT_VARIANT_STRING, "").replace(Emoji.EMOJI_VARIANT_STRING, "");
+
+ if (line.contains("keycap")) {
+ int debug = 0;
+ }
+
+ if (line.startsWith("@@")) {
+ majorGroup = MajorGroup.fromString(line.substring(2).trim());
+ reformatted.append(line).append('\n');
+ continue;
+ }
+ if (line.startsWith("@")) {
+ String item = line.substring(1).trim();
+ if (!_groupOrder.containsKey(item)) {
+ _groupOrder.put(item, _groupOrder.size());
+ }
+ MajorGroup major = _categoryToMajor.get(item);
+ if (major == null) {
+ _categoryToMajor.put(item, majorGroup);
+ } else if (major != majorGroup) {
+ throw new IllegalArgumentException("Conflicting major categories");
+ }
+ lastLabel.value.clear();
+ lastLabel.value.add(item);
+ reformatted.append("@" + item).append('\n');
+ continue;
+ }
+ // String oldLine = line;
+ // line = Emoji.getLabelFromLine(lastLabel, line);
+ // for (String item : lastLabel.value) {
+ // if (!_groupOrder.containsKey(item)) {
+ // _groupOrder.put(item, _groupOrder.size());
+ // }
+ // MajorGroup major = _categoryToMajor.get(item);
+ // if (major == null) {
+ // _categoryToMajor.put(item, majorGroup);
+ // } else if (major != majorGroup) {
+ // throw new IllegalArgumentException("Conflicting major categories");
+ // }
+ // // hack for now
+ // if (oldLine.contains("\t")) {
+ // reformatted.println("@" + item);
+ // }
+ // }
+ if (line.indexOf("🤝") >= 0) {
+ int debug = 0;
+ }
+ boolean isFirst = true;
+ for (String string : ei.set(line)) {
+ // NOTE: all emoji variant selectors have been removed at this point
+ if (sorted.contains(string)) {
continue;
}
- if (DEBUG) System.out.println(line);
+ if (isFirst) {
+ isFirst = false;
+ } else {
+ reformatted.append(' ');
+ }
+ reformatted.append(EmojiData.EMOJI_DATA.addEmojiVariants(string));
+ //System.out.println("Adding: " + Utility.hex(string) + "\t" + string);
+ add(result, sorted, majorGroup, lastLabel, string);
+ addVariants(result, sorted, majorGroup, lastLabel, string);
- line = Emoji.UNESCAPE.transform(line);
- line = line.replace(Emoji.TEXT_VARIANT_STRING, "").replace(Emoji.EMOJI_VARIANT_STRING, "");
-
- if (line.contains("keycap")) {
- int debug = 0;
+ switch (string) {
+ case "👭":
+ addVariants(result, sorted, majorGroup, lastLabel, "👩🤝👩");
+ break;
+ case "👫":
+ addVariants(result, sorted, majorGroup, lastLabel, "👩🤝👨");
+ break;
+ case "👬":
+ addVariants(result, sorted, majorGroup, lastLabel, "👨🤝👨");
+ break;
}
- if (line.startsWith("@@")) {
- majorGroup = MajorGroup.fromString(line.substring(2).trim());
- reformatted.println(line);
- continue;
- }
- if (line.startsWith("@")) {
- String item = line.substring(1).trim();
- if (!_groupOrder.containsKey(item)) {
- _groupOrder.put(item, _groupOrder.size());
- }
- MajorGroup major = _categoryToMajor.get(item);
- if (major == null) {
- _categoryToMajor.put(item, majorGroup);
- } else if (major != majorGroup) {
- throw new IllegalArgumentException("Conflicting major categories");
- }
- lastLabel.value.clear();
- lastLabel.value.add(item);
- reformatted.println("@" + item);
- continue;
- }
- // String oldLine = line;
- // line = Emoji.getLabelFromLine(lastLabel, line);
- // for (String item : lastLabel.value) {
- // if (!_groupOrder.containsKey(item)) {
- // _groupOrder.put(item, _groupOrder.size());
- // }
- // MajorGroup major = _categoryToMajor.get(item);
- // if (major == null) {
- // _categoryToMajor.put(item, majorGroup);
- // } else if (major != majorGroup) {
- // throw new IllegalArgumentException("Conflicting major categories");
- // }
- // // hack for now
- // if (oldLine.contains("\t")) {
- // reformatted.println("@" + item);
+ // ImmutableList<String> list = hack.get(string);
+ // if (list != null) {
+ // addVariants(result, sorted, majorGroup, lastLabel, string);
+ // for (String string2 : list) {
+ // //System.err.println("Adding " + show(string2));
+ // add(result, sorted, majorGroup, lastLabel, string2);
+ // addVariants(result, sorted, majorGroup, lastLabel, string2);
// }
// }
- if (line.indexOf("🤝") >= 0) {
- int debug = 0;
- }
- boolean isFirst = true;
- for (String string : ei.set(line)) {
- // NOTE: all emoji variant selectors have been removed at this point
- if (sorted.contains(string)) {
- continue;
- }
- if (isFirst) {
- isFirst = false;
- } else {
- reformatted.print(' ');
- }
- reformatted.print(EmojiData.EMOJI_DATA.addEmojiVariants(string));
- //System.out.println("Adding: " + Utility.hex(string) + "\t" + string);
- add(result, sorted, majorGroup, lastLabel, string);
- addVariants(result, sorted, majorGroup, lastLabel, string);
-
- switch (string) {
- case "👭":
- addVariants(result, sorted, majorGroup, lastLabel, "👩🤝👩");
- break;
- case "👫":
- addVariants(result, sorted, majorGroup, lastLabel, "👩🤝👨");
- break;
- case "👬":
- addVariants(result, sorted, majorGroup, lastLabel, "👨🤝👨");
- break;
- }
- // ImmutableList<String> list = hack.get(string);
- // if (list != null) {
- // addVariants(result, sorted, majorGroup, lastLabel, string);
- // for (String string2 : list) {
- // //System.err.println("Adding " + show(string2));
- // add(result, sorted, majorGroup, lastLabel, string2);
- // addVariants(result, sorted, majorGroup, lastLabel, string2);
- // }
- // }
-
- // We have a hack for blond person, and add them explicity.
- if (emojiData.getGenderBase().contains(string) && !string.equals("👱")) {
- addVariants(result, sorted, majorGroup, lastLabel, string + "\u200d\u2642");
- addVariants(result, sorted, majorGroup, lastLabel, string + "\u200d\u2640");
- }
- // // add/remove all variant strings
- // if (string.contains(Emoji.JOINER_STRING) || emojiData.getKeycapBases().contains(string.charAt(0))) {
- // addVariants(result, sorted, majorGroup, lastLabel, string);
- // }
+ // We have a hack for blond person, and add them explicity.
+ if (emojiData.getGenderBase().contains(string) && !string.equals("👱")) {
+ addVariants(result, sorted, majorGroup, lastLabel, string + "\u200d\u2642");
+ addVariants(result, sorted, majorGroup, lastLabel, string + "\u200d\u2640");
}
- if (!isFirst) { // skip empty lines
- reformatted.println();
- }
+ // // add/remove all variant strings
+ // if (string.contains(Emoji.JOINER_STRING) || emojiData.getKeycapBases().contains(string.charAt(0))) {
+ // addVariants(result, sorted, majorGroup, lastLabel, string);
+ // }
}
-
- Set<String> missing = new UnicodeSet(emojiData.getSortingChars())
- .removeAll(emojiData.getModifierSequences())
- .addAllTo(new LinkedHashSet<String>());
- missing.removeAll(sorted);
- for (Iterator<String> it = missing.iterator(); it.hasNext();) {
- String s = it.next();
- if (s.endsWith(Emoji.EMOJI_VARIANT_STRING)) {
- it.remove();
- }
+ if (!isFirst) { // skip empty lines
+ reformatted.append('\n');
}
- if (!missing.isEmpty() && !sourceFile.startsWith("alt")) {
- result.putAll("other", missing);
- System.err.println("Missing some orderings: ");
- for (String s : missing) {
- System.err.print(s + " ");
- }
- System.err.println();
-
- for (String s : missing) {
- System.err.println("\t" + s + "\t\t" + Emoji.show(s));
- }
- System.err.println(directory);
- }
- sorted.addAll(missing);
- mapComparator.add(sorted);
- //mapComparator.setErrorOnMissing(true);
- mapComparator.freeze();
- result.freeze();
- return result;
- } catch (IOException e) {
- throw new ICUUncheckedIOException(e);
}
+
+ Set<String> missing = new UnicodeSet(emojiData.getSortingChars())
+ .removeAll(emojiData.getModifierSequences())
+ .addAllTo(new LinkedHashSet<String>());
+ missing.removeAll(sorted);
+ for (Iterator<String> it = missing.iterator(); it.hasNext();) {
+ String s = it.next();
+ if (s.endsWith(Emoji.EMOJI_VARIANT_STRING)) {
+ it.remove();
+ }
+ }
+ if (!missing.isEmpty() && !sourceFile.startsWith("alt")) {
+ result.putAll("other", missing);
+ System.err.println("Missing some orderings: ");
+ for (String s : missing) {
+ System.err.print(s + " ");
+ }
+ System.err.println();
+
+ for (String s : missing) {
+ System.err.println("\t" + s + "\t\t" + Emoji.show(s));
+ }
+ System.err.println(directory);
+ }
+ sorted.addAll(missing);
+ mapComparator.add(sorted);
+ //mapComparator.setErrorOnMissing(true);
+ mapComparator.freeze();
+ result.freeze();
+ return result;
}
private void addAllModifiers(Relation<String, String> result, Set<String> sorted, Output<Set<String>> lastLabel, MajorGroup majorGroup, String... strings) {
@@ -708,4 +708,9 @@
+ "\t" + info
);
}
+
+
+ public String getReformatted() {
+ return reformatted;
+ }
}
\ No newline at end of file
diff --git a/unicodetools/org/unicode/tools/emoji/GenerateEmojiData.java b/unicodetools/org/unicode/tools/emoji/GenerateEmojiData.java
index 54c7896..56149bd 100644
--- a/unicodetools/org/unicode/tools/emoji/GenerateEmojiData.java
+++ b/unicodetools/org/unicode/tools/emoji/GenerateEmojiData.java
@@ -127,8 +127,8 @@
PropPrinter printer = new PropPrinter().set(extraNames);
- try (TempPrintWriter outText2 = new TempPrintWriter(OUTPUT_DIR, "emoji-internal.txt")) {
- UnicodeSet emojiGenderBase = EmojiDataSourceCombined.EMOJI_DATA.getSingletonsWithDefectives();
+ try (TempPrintWriter outText2 = new TempPrintWriter(OUTPUT_DIR, "internal/emoji-internal.txt")) {
+ UnicodeSet emojiGenderBase = EmojiDataSourceCombined.EMOJI_DATA.getGenderBases();
outText2.println(Utility.getBaseDataHeader("emoji-internal", 51, "Emoji Data Internal", Emoji.VERSION_STRING));
@@ -293,6 +293,12 @@
// generate emoji-test
GenerateEmojiKeyboard.showLines(EmojiOrder.STD_ORDER, EmojiOrder.STD_ORDER.emojiData.getSortingChars(), Target.propFile, OUTPUT_DIR);
+
+ try (TempPrintWriter reformatted = new TempPrintWriter(OUTPUT_DIR, "internal/emojiOrdering.txt")) {
+ reformatted.write(EmojiOrder.BETA_ORDER.getReformatted());
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
private static int maxLength(String... items) {
diff --git a/unicodetools/org/unicode/tools/emoji/TempPrintWriter.java b/unicodetools/org/unicode/tools/emoji/TempPrintWriter.java
index f808952..c2e4b23 100644
--- a/unicodetools/org/unicode/tools/emoji/TempPrintWriter.java
+++ b/unicodetools/org/unicode/tools/emoji/TempPrintWriter.java
@@ -21,6 +21,7 @@
}
public TempPrintWriter(File file) {
+ super();
final String parentFile = file.getParent();
this.filename = file.toString();
Random rand = new Random();
@@ -37,26 +38,30 @@
}
@Override
- public void close() throws IOException {
+ public void close() {
tempPrintWriter.close();
- Utility.replaceDifferentOrDelete(filename, tempName, false);
+ try {
+ Utility.replaceDifferentOrDelete(filename, tempName, false);
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
@Override
- public void write(char[] cbuf, int off, int len) throws IOException {
+ public void write(char[] cbuf, int off, int len) {
tempPrintWriter.write(cbuf, off, len);
}
@Override
- public void flush() throws IOException {
+ public void flush() {
tempPrintWriter.flush();
}
- public void println(String line) throws IOException {
+ public void println(String line) {
tempPrintWriter.println(line);
}
- public void println() throws IOException {
+ public void println() {
tempPrintWriter.println();
}
}
\ No newline at end of file