From 202077cd2f719282ef4ee29fcc5dc95f596b1f07 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 30 Jan 2024 18:15:51 +0100 Subject: [PATCH] Use IndexUnicodeProperties in the JSPs (#659) --- .../java/org/unicode/jsp/CachedProps.java | 288 -------------- .../java/org/unicode/jsp/ScriptTester2.java | 7 +- .../main/java/org/unicode/jsp/UcdLoader.java | 111 ++++++ .../main/java/org/unicode/jsp/UnicodeJsp.java | 43 ++- .../org/unicode/jsp/UnicodeSetUtilities.java | 71 ++-- .../org/unicode/jsp/UnicodeUtilities.java | 354 +++++++++++++----- .../org/unicode/jsp/XPropertyFactory.java | 83 ++-- UnicodeJsps/src/main/webapp/WEB-INF/web.xml | 4 + UnicodeJsps/src/main/webapp/character.jsp | 26 +- UnicodeJsps/src/main/webapp/identifier.jsp | 3 +- UnicodeJsps/src/main/webapp/index.css | 5 + .../src/main/webapp/list-unicodeset.jsp | 11 +- .../java/org/unicode/jsptest/TestEmoji.java | 2 +- .../java/org/unicode/jsptest/TestJsp.java | 22 +- .../org/unicode/jsptest/TestMultivalued.java | 12 + .../org/unicode/jsptest/TestUnicodeSet.java | 90 +---- docs/unicodejsps/index.md | 2 +- .../unicode/props/IndexUnicodeProperties.java | 12 +- .../text/UCD/TestUnicodeInvariants.java | 5 +- .../unicode/text/UCD/VersionedProperty.java | 57 ++- .../org/unicode/text/utility/Settings.java | 19 + 21 files changed, 643 insertions(+), 584 deletions(-) delete mode 100644 UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java create mode 100644 UnicodeJsps/src/main/java/org/unicode/jsp/UcdLoader.java diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java b/UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java deleted file mode 100644 index 3f78c440b..000000000 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java +++ /dev/null @@ -1,288 +0,0 @@ -package org.unicode.jsp; - -import com.google.common.base.Splitter; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMultimap; -import com.google.common.collect.Multimap; -import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.util.ICUUncheckedIOException; -import com.ibm.icu.util.VersionInfo; -import java.io.DataInputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.zip.GZIPInputStream; -import org.unicode.cldr.draft.FileUtilities; -import org.unicode.jsp.UnicodeDataInput.ItemReader; -import org.unicode.props.UnicodeProperty; - -public class CachedProps { - public static final boolean IS_BETA = false; - - public static final Splitter HASH_SPLITTER = Splitter.on('#').trimResults(); - public static final Splitter SEMI_SPLITTER = Splitter.on(';').trimResults(); - - static ConcurrentHashMap versionToCachedProps = - new ConcurrentHashMap(); - - public final VersionInfo version; - final Set propNames; - final ConcurrentHashMap propertyCache = - new ConcurrentHashMap(); - final BiMultimap nameToAliases = new BiMultimap(null, null); - final Map> nameToValueToAliases = new LinkedHashMap(); - - static VersionInfo jspVersionInfo = UCharacter.getUnicodeVersion(); - static CachedProps CACHED_PROPS = getInstance(jspVersionInfo); - - static UnicodeProperty NAMES = CachedProps.CACHED_PROPS.getProperty("Name"); - - private CachedProps(VersionInfo version2) { - version = version2; - File dir = new File(getRelativeFileName(CachedProps.class, "props")); - LinkedHashSet temp = new LinkedHashSet(); - for (String filename : dir.list()) { - if (filename.endsWith(".bin")) { - temp.add(filename.substring(0, filename.length() - 4)); - } - } - - // scf ; Simple_Case_Folding ; sfc - for (String fileName : Arrays.asList("PropertyAliases.txt", "ExtraPropertyAliases.txt")) { - for (String line : FileUtilities.in(CachedProps.class, "data/" + fileName)) { - List splitLine = breakLine(line); - if (splitLine == null) { - continue; - } - String name = splitLine.get(1); - List nameAliases = new ArrayList(splitLine); - nameAliases.remove(1); - nameToAliases.putAll(name, nameAliases); - } - } - // AHex; Y ; Yes ; T - // ; True - // ccc; 0; NR ; Not_Reordered - for (String fileName : - Arrays.asList("PropertyValueAliases.txt", "ExtraPropertyValueAliases.txt")) { - for (String line : FileUtilities.in(CachedProps.class, "data/" + fileName)) { - List splitLine = breakLine(line); - if (splitLine == null) { - continue; - } - String pname = splitLine.get(0); - Collection names = nameToAliases.getKeys(pname); - String longName = names.iterator().next(); - BiMultimap valueToAliases = nameToValueToAliases.get(longName); - if (valueToAliases == null) { - nameToValueToAliases.put( - longName, valueToAliases = new BiMultimap(null, null)); - } - List aliases = splitLine.subList(1, splitLine.size()); - for (String item : aliases) { - valueToAliases.putAll(item, aliases); - } - } - } - propNames = Collections.unmodifiableSet(temp); - } - - private List breakLine(String line) { - Iterable items = HASH_SPLITTER.split(line); - String first = items.iterator().next(); - if (first.isEmpty()) { - return null; - } - List splitLine = SEMI_SPLITTER.splitToList(first); - if (splitLine.isEmpty()) { - return null; - } - return splitLine; - } - - public static CachedProps getInstance(VersionInfo version) { - CachedProps result = versionToCachedProps.get(version); - if (result == null) { - versionToCachedProps.put(version, result = new CachedProps(version)); - } - return result; - } - - public Set getAvailable() { - return propNames; - } - - public UnicodeProperty getProperty(String propName) { - UnicodeProperty result = propertyCache.get(propName); - if (result == null) { - if (!propNames.contains(propName)) { - result = null; - } else { - try { - return new DelayedUnicodeProperty( - version, - propName, - nameToAliases.getValues(propName), - nameToValueToAliases.get(propName)); - } catch (Exception e) { - throw new IllegalArgumentException(propName, e); - } - } - } - return result; - } - - class DelayedUnicodeProperty extends UnicodeProperty { - - private final VersionInfo version; - private UnicodeMap map; - private List nameAliases; - private Multimap valueToAliases; - - public DelayedUnicodeProperty( - VersionInfo version, - String propName, - Collection nameAliases, - BiMultimap biMultimap) { - this.version = version; - Collection temp; - if (IS_BETA) { - propName = propName + "β"; - temp = new LinkedHashSet(); - for (String nameAlias : nameAliases) { - temp.add(nameAlias + "β"); - } - } else { - temp = nameAliases; - } - this.nameAliases = ImmutableList.copyOf(temp); - this.valueToAliases = - biMultimap == null - ? null - : ImmutableMultimap.copyOf(biMultimap.getKeyToValues()); - setName(propName); - } - - @Override - protected String _getVersion() { - return version.getVersionString(2, 2); - } - - @Override - protected String _getValue(int codepoint) { - return getMap().get(codepoint); - } - - @Override - protected List _getNameAliases(List result) { - result.clear(); - result.addAll(nameAliases); - return result; - } - - @Override - protected List _getValueAliases(String valueAlias, List result) { - result.clear(); - if (valueToAliases != null) { - result.addAll(valueToAliases.get(valueAlias)); - } - result.add(valueAlias); - return result; - } - - @Override - protected List _getAvailableValues(List result) { - result.addAll(getMap().values()); - return result; - } - - @Override - protected UnicodeMap _getUnicodeMap() { - return getMap(); - } - - private UnicodeMap getMap() { - if (map == null) { - InputStream fis = null; - InputStream gs = null; - DataInputStream in = null; - map = new UnicodeMap().freeze(); - try { - String baseName = getName(); - if (baseName.endsWith("β")) { - baseName = baseName.substring(0, baseName.length() - 1); - } - fis = CachedProps.class.getResourceAsStream("props/" + baseName + ".bin"); - gs = new GZIPInputStream(fis); - in = new DataInputStream(gs); - final ItemReader stringReader = new UnicodeDataInput.StringReader(); - UnicodeMap newItem; - final UnicodeDataInput unicodeDataInput = new UnicodeDataInput(); - newItem = unicodeDataInput.set(in, true).readUnicodeMap(stringReader); - map = newItem.freeze(); - } catch (Exception e) { - } - try { - if (fis != null) { - fis.close(); - if (gs != null) { - gs.close(); - if (in != null) { - in.close(); - } - } - } - } catch (IOException e) { - } - } - return map; - } - } - - public static String getRelativeFileName(Class class1, String filename) { - URL resource = class1.getResource(filename); - String resourceString = resource.toString(); - if (resourceString.startsWith("file:")) { - return resourceString.substring(5); - } else if (resourceString.startsWith("jar:file:")) { - return resourceString.substring(9); - } else { - throw new ICUUncheckedIOException("File not found: " + resourceString); - } - } - - public static void main(String[] args) { - CachedProps cp = CachedProps.getInstance(VersionInfo.getInstance(12)); - Set available = cp.getAvailable(); - System.out.println(available); - for (String name : available) { - UnicodeProperty p = cp.getProperty(name); - System.out.println( - p.getName() + "\t" + p.getNameAliases() + "\t" + clip(p.getAvailableValues())); - String value = p.getValue('a'); - System.out.println("value('a'): " + value + "\t" + p.getValueAliases(value)); - } - } - - private static String clip(Collection availableValues) { - return availableValues.size() > 24 - ? new ArrayList(availableValues).subList(0, 23) + ", …" - : availableValues.toString(); - } - - public Set getPropertyNames() { - return nameToAliases.keySet(); - } -} diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/ScriptTester2.java b/UnicodeJsps/src/main/java/org/unicode/jsp/ScriptTester2.java index 0815290f1..52b139afb 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/ScriptTester2.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/ScriptTester2.java @@ -22,6 +22,7 @@ import java.util.SortedMap; import java.util.TreeMap; import java.util.TreeSet; +import org.unicode.props.IndexUnicodeProperties; public class ScriptTester2 { private final UnicodeMap toEquivalents; @@ -48,7 +49,7 @@ private ScriptTester2( public static ScriptTester2 getInstance(VersionInfo version, UnicodeSet allowed) { allowed = allowed.isFrozen() ? allowed : new UnicodeSet(allowed).freeze(); - CachedProps props = CachedProps.getInstance(version); + var props = IndexUnicodeProperties.make(version); // System.out.println(new TreeSet(props.getAvailable())); UnicodeMap confusables = props.getProperty("Confusable_MA").getUnicodeMap(); UnicodeMap equiv = new UnicodeMap(); @@ -114,12 +115,12 @@ public static ScriptTester2 getInstance(VersionInfo version, UnicodeSet allowed) } public static UnicodeSet getAllowedStatus(VersionInfo version) { - CachedProps props = CachedProps.getInstance(version); + var props = IndexUnicodeProperties.make(version); return props.getProperty("Identifier_Status").getUnicodeMap().getSet("Allowed").freeze(); } public static UnicodeSet getNFKD_Quick_CheckNo(VersionInfo version) { - CachedProps props = CachedProps.getInstance(version); + var props = IndexUnicodeProperties.make(version); return props.getProperty("NFKD_Quick_Check").getUnicodeMap().getSet("No").freeze(); } diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UcdLoader.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UcdLoader.java new file mode 100644 index 000000000..ea6063774 --- /dev/null +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UcdLoader.java @@ -0,0 +1,111 @@ +package org.unicode.jsp; + +import com.ibm.icu.util.ICUException; +import com.ibm.icu.util.VersionInfo; +import java.io.IOException; +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.ServletRequest; +import javax.servlet.ServletResponse; +import javax.servlet.annotation.WebServlet; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.UcdProperty; +import org.unicode.props.UcdPropertyValues.Age_Values; +import org.unicode.text.utility.Settings; + +@WebServlet +public class UcdLoader implements javax.servlet.Servlet { + + // Allow access to the last (published) and latest (dev) versions lazily in tests, though these + // will get fully loaded by this servlet before actually serving the JSPs. + static VersionInfo oldestLoadedUcd = Settings.LAST_VERSION_INFO; + + public static synchronized VersionInfo getOldestLoadedUcd() { + return oldestLoadedUcd; + } + + private static synchronized void setOldestLoadedUcd(VersionInfo v) { + oldestLoadedUcd = v; + } + + private static void loadUcdHistory(VersionInfo earliest) { + System.out.println("Loading back to " + earliest + "..."); + Age_Values[] ages = Age_Values.values(); + final long overallStart = System.currentTimeMillis(); + for (int i = ages.length - 1; i >= 0; --i) { + final var age = ages[i]; + if (age == Age_Values.Unassigned) { + continue; + } + final long ucdStart = System.currentTimeMillis(); + System.out.println("Loading UCD " + age.getShortName() + "..."); + for (boolean unihan : new boolean[] {false, true}) { + final long partStart = System.currentTimeMillis(); + final String name = unihan ? "Unihan" : "non-Unihan properties"; + final var properties = IndexUnicodeProperties.make(age.getShortName()); + for (UcdProperty property : UcdProperty.values()) { + if (property.getShortName().startsWith("cjk") == unihan) { + try { + properties.load(property); + } catch (ICUException e) { + e.printStackTrace(); + } + } + } + System.out.println( + "Loaded " + + name + + " for " + + age.getShortName() + + " (" + + (System.currentTimeMillis() - partStart) + + " ms)"); + } + System.out.println( + "Loaded UCD " + + age.getShortName() + + " in " + + (System.currentTimeMillis() - ucdStart) + + " ms"); + var version = VersionInfo.getInstance(age.getShortName()); + setOldestLoadedUcd(version); + if (version == earliest) { + break; + } + } + System.out.println( + "Loaded all UCD history in " + + (System.currentTimeMillis() - overallStart) / 1000 + + " s"); + } + + @Override + public void destroy() {} + + @Override + public ServletConfig getServletConfig() { + return null; + } + + @Override + public String getServletInfo() { + return null; + } + + @Override + public void init(ServletConfig config) throws ServletException { + loadUcdHistory(Settings.LAST_VERSION_INFO); + new Thread( + new Runnable() { + @Override + public void run() { + loadUcdHistory(null); + } + }) + .start(); + } + + @Override + public void service(ServletRequest request, ServletResponse response) + throws ServletException, IOException {} +} diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeJsp.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeJsp.java index 8d514843a..eaa909054 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeJsp.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeJsp.java @@ -26,6 +26,7 @@ import org.unicode.idna.Uts46; import org.unicode.jsp.UnicodeUtilities.CodePointShower; import org.unicode.text.utility.Settings; +import org.unicode.text.utility.Settings.ReleasePhase; public class UnicodeJsp { @@ -128,8 +129,10 @@ else if (choice.equals("Sentence")) return result.toString(); } - public static void showProperties(int cp, Appendable out) throws IOException { - UnicodeUtilities.showProperties(cp, out); + public static void showProperties( + int cp, String history, boolean showDevProperties, Appendable out) throws IOException { + showDevProperties = Settings.latestVersionPhase == ReleasePhase.BETA || showDevProperties; + UnicodeUtilities.showProperties(cp, history, showDevProperties, out); } static String defaultIdnaInput = @@ -176,14 +179,17 @@ public static void showSet( String grouping, String info, UnicodeSet a, + boolean showDevProperties, boolean abbreviate, boolean ucdFormat, boolean collate, Appendable out) throws IOException { + showDevProperties = Settings.latestVersionPhase == ReleasePhase.BETA || showDevProperties; CodePointShower codePointShower = - new CodePointShower(grouping, info, abbreviate, ucdFormat, collate); - UnicodeUtilities.showSetMain(a, codePointShower, out); + new CodePointShower( + grouping, info, showDevProperties, abbreviate, ucdFormat, collate); + UnicodeUtilities.showSetMain(a, showDevProperties, codePointShower, out); } public static void showPropsTable(Appendable out, String propForValues, String myLink) @@ -393,8 +399,9 @@ public static String testIdnaLines(String lines, String filter) { return UnicodeUtilities.testIdnaLines(lines, filter); } - public static String getIdentifier(String script) { - return UnicodeUtilities.getIdentifier(script); + public static String getIdentifier(String script, boolean showDevProperties) { + showDevProperties = Settings.latestVersionPhase == ReleasePhase.BETA || showDevProperties; + return UnicodeUtilities.getIdentifier(script, showDevProperties); } static final String VERSIONS = @@ -403,12 +410,10 @@ public static String getIdentifier(String script) { + VersionInfo.ICU_VERSION.getVersionString(2, 2) + "; " + "Unicode/Emoji version: " - + UCharacter.getUnicodeVersion().getVersionString(2, 2) + + Settings.lastVersion + "; " - + (CachedProps.IS_BETA - ? "Unicodeβ version: " - + CachedProps.CACHED_PROPS.version.getVersionString(2, 2) - + "; " + + (Settings.latestVersionPhase == ReleasePhase.BETA + ? "Unicodeβ version: " + Settings.latestVersion + "; " : ""); public static String getVersions() { @@ -416,14 +421,14 @@ public static String getVersions() { } static final String SUBHEAD = - !CachedProps.IS_BETA - ? "" - : "

Properties use ICU for Unicode V" - + UCharacter.getUnicodeVersion().getVersionString(2, 2) - + "; the beta properties support Unicode V" - + VersionInfo.getInstance(Settings.latestVersion).getVersionString(2, 2) - + "β. " - + "For more information, see Unicode Utilities Beta.

"; + Settings.latestVersionPhase == ReleasePhase.BETA + ? "

Unmarked properties are from Unicode V" + + Settings.lastVersion + + "; the beta properties are from Unicode V" + + Settings.latestVersion + + "β. " + + "For more information, see Unicode Utilities Beta.

" + : ""; public static String getSubtitle() { return SUBHEAD; diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java index 047ca3790..9380d8ca4 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java @@ -130,8 +130,6 @@ public static UnicodeSet parseUnicodeSet(String input) { private static class MySymbolTable extends UnicodeSet.XSymbolTable { UnicodeRegex unicodeRegex; XPropertyFactory factory; - private UnicodeProperty gcProp; - private UnicodeProperty scProp; public MySymbolTable() { unicodeRegex = new UnicodeRegex().setSymbolTable(this); @@ -155,30 +153,34 @@ public boolean applyPropertyAlias( String propertyName, String propertyValue, UnicodeSet result) { boolean status = false; boolean invert = false; - if (factory == null) { - factory = XPropertyFactory.make(); - gcProp = factory.getProperty("gc"); - scProp = factory.getProperty("sc"); - } int posNotEqual = propertyName.indexOf('\u2260'); - int posColon = propertyName.indexOf(':'); - if (posNotEqual >= 0 || posColon >= 0) { - if (posNotEqual < 0) posNotEqual = propertyName.length(); - if (posColon < 0) posColon = propertyName.length(); - int opPos = posNotEqual < posColon ? posNotEqual : posColon; + if (posNotEqual >= 0) { propertyValue = propertyValue.length() == 0 - ? propertyName.substring(opPos + 1) - : propertyName.substring(opPos + 1) + "=" + propertyValue; - propertyName = propertyName.substring(0, opPos); - if (posNotEqual < posColon) { - invert = true; - } + ? propertyName.substring(posNotEqual + 1) + : propertyName.substring(posNotEqual + 1) + "=" + propertyValue; + propertyName = propertyName.substring(0, posNotEqual); + invert = true; } if (propertyName.endsWith("!")) { propertyName = propertyName.substring(0, propertyName.length() - 1); invert = !invert; } + int posColon = propertyName.indexOf(':'); + String versionPrefix = ""; + String versionlessPropertyName = propertyName; + if (posColon >= 0) { + versionPrefix = propertyName.substring(0, posColon + 1); + versionlessPropertyName = propertyName.substring(posColon + 1); + } + + if (factory == null) { + factory = XPropertyFactory.make(); + } + + var gcProp = factory.getProperty(versionPrefix + "gc"); + var scProp = factory.getProperty(versionPrefix + "sc"); + UnicodeProperty prop = factory.getProperty(propertyName); if (propertyValue.length() != 0) { if (prop == null) { @@ -191,16 +193,17 @@ public boolean applyPropertyAlias( status = applyPropertyAlias0(prop, propertyValue, result, invert); } else { try { - status = applyPropertyAlias0(gcProp, propertyName, result, invert); + status = applyPropertyAlias0(gcProp, versionlessPropertyName, result, invert); } catch (Exception e) { } ; if (!status) { try { - status = applyPropertyAlias0(scProp, propertyName, result, invert); + status = + applyPropertyAlias0( + scProp, versionlessPropertyName, result, invert); } catch (Exception e) { } - ; if (!status) { if (prop.isType(UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) { try { @@ -208,7 +211,6 @@ public boolean applyPropertyAlias( } catch (Exception e) { } } - ; if (!status) { status = applyPropertyAlias0(prop, "", result, invert); } @@ -218,6 +220,18 @@ public boolean applyPropertyAlias( return status; } + private static String[][] COARSE_GENERAL_CATEGORIES = { + {"Other", "C", "Cc", "Cf", "Cn", "Co", "Cs"}, + {"Letter", "L", "Ll", "Lm", "Lo", "Lt", "Lu"}, + {"Cased_Letter", "LC", "Ll", "Lt", "Lu"}, + {"Mark", "M", "Mc", "Me", "Mn"}, + {"Number", "N", "Nd", "Nl", "No"}, + {"Punctuation", "P", "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"}, + {"Symbol", "S", "Sc", "Sk", "Sm", "So"}, + {"Separator", "Z", "Zl", "Zp", "Zs"}, + }; + + // TODO(eggrobin): I think this function only ever returns true; might as well make it void. private boolean applyPropertyAlias0( UnicodeProperty prop, String propertyValue, UnicodeSet result, boolean invert) { result.clear(); @@ -280,6 +294,19 @@ private boolean applyPropertyAlias0( if (isAge) { set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq)); } else { + if (prop.getName().equals("General_Category")) { + for (String[] coarseValue : COARSE_GENERAL_CATEGORIES) { + final String longName = coarseValue[0]; + final String shortName = coarseValue[1]; + if (UnicodeProperty.equalNames(propertyValue, longName) + || UnicodeProperty.equalNames(propertyValue, shortName)) { + for (int i = 2; i < coarseValue.length; ++i) { + prop.getSet(coarseValue[i], result); + } + return true; + } + } + } set = prop.getSet(propertyValue); } } else if (isAge) { diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java index f4050d8dd..df1e636de 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java @@ -17,6 +17,7 @@ import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSetIterator; +import com.ibm.icu.util.ICUException; import com.ibm.icu.util.ULocale; import com.ibm.icu.util.VersionInfo; import java.io.BufferedReader; @@ -49,8 +50,10 @@ import org.unicode.idna.IdnaTypes; import org.unicode.idna.Punycode; import org.unicode.idna.Uts46; +import org.unicode.props.UcdPropertyValues.Age_Values; import org.unicode.props.UnicodeProperty; import org.unicode.props.UnicodeProperty.UnicodeMapProperty; +import org.unicode.text.utility.Settings; // For dependency management, it might be useful to split this omnibus class into // pieces by topic, such as collation utilities vs. IDNA utilities etc. @@ -68,10 +71,6 @@ public class UnicodeUtilities { .removeAll(new UnicodeSet("[:whitespace:]")) .freeze(); - static { - CachedProps cp = CachedProps.CACHED_PROPS; // force load - } - private static Subheader subheader = null; static Transliterator toHTML; @@ -272,8 +271,6 @@ public static boolean equals(CharSequence inbuffer, CharSequence outbuffer) { return true; } - static final int BLOCK_ENUM = UCharacter.getPropertyEnum("block"); - static XPropertyFactory getFactory() { return XPropertyFactory.make(); } @@ -285,10 +282,14 @@ static XPropertyFactory getFactory() { numberFormat.setGroupingUsed(true); } - public static void showSetMain(UnicodeSet a, CodePointShower codePointShower, Appendable out) + public static void showSetMain( + UnicodeSet a, + boolean showDevProperties, + CodePointShower codePointShower, + Appendable out) throws IOException { if (codePointShower.groupingProps.isEmpty()) { - showSet(a, codePointShower, out); + showSet(a, showDevProperties, codePointShower, out); return; } @@ -329,7 +330,7 @@ public static void showSetMain(UnicodeSet a, CodePointShower codePointShower, Ap : "") + "
\n"); } - showSet(items, codePointShower, out); + showSet(items, showDevProperties, codePointShower, out); for (int i = 0; i < propsOld.length; ++i) { propsOld[i] = props2[i]; } @@ -355,8 +356,7 @@ static int getFirstDiff(String[] a, String[] b) { public static String getStringProperties( UnicodeProperty prop, String s, String separator, boolean getShortest) { // check for single code point, later - if (prop instanceof UnicodeMapProperty - || prop instanceof CachedProps.DelayedUnicodeProperty) { + if (prop instanceof UnicodeMapProperty) { Object value = prop.getUnicodeMap().get(s); if (value != null) { return (String) value; @@ -382,7 +382,10 @@ public static String getStringProperties( /*jsp*/ public static void showSet( - UnicodeSet inputSetRaw, CodePointShower codePointShower, Appendable out) + UnicodeSet inputSetRaw, + boolean showDevProperties, + CodePointShower codePointShower, + Appendable out) throws IOException { if (codePointShower.doTable) { out.append(""); @@ -401,12 +404,21 @@ public static void showSet( LinkedHashMap items = new LinkedHashMap(); String specials = "Unassigned, Private use, or Surrogates"; - UnicodeSet specialSet = - new UnicodeSet(inputSetRaw).retainAll(UnicodeProperty.getSPECIALS()); + var specialsForDisplay = UnicodeProperty.getSPECIALS(); + if (!showDevProperties) { + // UnicodeProperty uses the freshest (smallest) set of unassigned code points; if we + // are not showing the corresponding block and character names, put the + // newly-assigned characters in the unassigned pile for display. + specialsForDisplay = specialsForDisplay.cloneAsThawed(); + getFactory() + .getProperty("General_Category") + .getSet("Unassigned", specialsForDisplay); + } + UnicodeSet specialSet = new UnicodeSet(inputSetRaw).retainAll(specialsForDisplay); UnicodeSet inputSet = specialSet.size() == 0 ? inputSetRaw - : new UnicodeSet(inputSetRaw).removeAll(UnicodeProperty.getSPECIALS()); + : new UnicodeSet(inputSetRaw).removeAll(specialsForDisplay); if (specialSet.size() != 0) { items.put(specials, specialSet); } @@ -418,16 +430,24 @@ public static void showSet( if (set == null) items.put(newBlock, set = new UnicodeSet()); set.add(it.string); } else { - String block = - UCharacter.getStringPropertyValue( - BLOCK_ENUM, s, UProperty.NameChoice.LONG) - .replace('_', ' '); + String devBlock = + getFactory().getProperty("Udev:Block").getValue(s).replace('_', ' '); + String block = getFactory().getProperty("Block").getValue(s).replace('_', ' '); String newBlock = - "" - + block - + ""; + showDevProperties && !devBlock.equals(block) + ? "" + + devBlock + + "" + : "" + + block + + ""; String newSubhead = getSubheader().getSubheader(s); if (newSubhead == null) { newSubhead = "no subhead"; @@ -486,7 +506,7 @@ public static void showSet( } } - public static String getIdentifier(String script) { + public static String getIdentifier(String script, boolean showDevProperties) { StringBuilder result = new StringBuilder(); UnicodeProperty scriptProp = getFactory().getProperty("sc"); UnicodeSet scriptSet; @@ -529,7 +549,11 @@ public static String getIdentifier(String script) { if (allowed.size() == 0) { result.append("none"); } else { - showSet(allowed, new CodePointShower("", "", true, false, false), result); + showSet( + allowed, + showDevProperties, + new CodePointShower("", "", showDevProperties, true, false, false), + result); } if (restricted.size() == 0) { @@ -547,7 +571,9 @@ public static String getIdentifier(String script) { + ""); showSet( items, - new CodePointShower("", "", true, false, false).setRestricted(true), + showDevProperties, + new CodePointShower("", "", showDevProperties, true, false, false) + .setRestricted(true), result); } } @@ -588,6 +614,7 @@ private static String getLiteral(String s) { static class CodePointShower { public final boolean doTable; + public final boolean showDevProperties; public final boolean abbreviate; public final boolean ucdFormat; public final boolean collate; @@ -604,12 +631,14 @@ public CodePointShower setRestricted(boolean restricted) { public CodePointShower( String grouping, String info, + boolean showDevProperties, boolean abbreviate, boolean ucdFormat, boolean collate) { this.groupingProps = getProps(grouping); this.infoProps = getProps(info); this.doTable = true; // !infoProps.isEmpty(); + this.showDevProperties = showDevProperties; this.abbreviate = abbreviate; this.ucdFormat = ucdFormat; this.collate = collate; @@ -637,7 +666,8 @@ private void showString(final String string, String separator, Appendable out) if (UnicodeUtilities.RTL.containsSome(literal)) { literal = '\u200E' + literal + '\u200E'; } - String name = UnicodeUtilities.getName(string, separator, false, false); + String name = + UnicodeUtilities.getName(string, separator, showDevProperties, false, false); literal = UnicodeSetUtilities.addEmojiVariation(literal); if (doTable) { out.append( @@ -793,7 +823,11 @@ String getPropString(List props, String codePoints, boolean sho } private static String getName( - String string, String separator, boolean andCode, boolean plainText) { + String string, + String separator, + boolean showDevProperties, + boolean andCode, + boolean plainText) { StringBuilder result = new StringBuilder(); int cp; for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) { @@ -804,23 +838,28 @@ private static String getName( if (andCode) { result.append("U+").append(com.ibm.icu.impl.Utility.hex(cp, 4)).append(' '); } - final String name = CachedProps.NAMES.getValue(cp); + final String devName = + showDevProperties ? getFactory().getProperty("Udev:Name").getValue(cp) : null; + final String name = getFactory().getProperty("Name").getValue(cp); if (name != null) { result.append(name); } else { - // TODO(egg): We only have Name_Aliasβ during β, which is silly. This will probably - // solve itself as part of https://github.com/unicode-org/unicodetools/issues/432. - String alias = - getFactory() - .getProperty(CachedProps.IS_BETA ? "Name_Aliasβ" : "Name_Alias") - .getValue(cp); - if (alias == null) { - alias = "no name"; - } - if (plainText) { - result.append("(" + alias + ")"); + if (devName != null) { + if (plainText) { + result.append("{" + devName + "}"); + } else { + result.append("" + devName + ""); + } } else { - result.append("" + alias + ""); + String alias = getFactory().getProperty("Name_Alias").getValue(cp); + if (alias == null) { + alias = "no name"; + } + if (plainText) { + result.append("(" + alias + ")"); + } else { + result.append("" + alias + ""); + } } } } @@ -1316,22 +1355,44 @@ public static void getDifferences( // ((RuleBasedCollator) col).setNumericCollation(true); // } - public static void showProperties(int cp, Appendable out) throws IOException { + private static String getScriptCat(String versionPrefix, int cp) { + String scriptCat = + getFactory().getProperty(versionPrefix + "script").getValue(cp).replace("_", " "); + if (scriptCat.equals("Common") || scriptCat.equals("Inherited")) { + scriptCat = + getFactory().getProperty(versionPrefix + "gc").getValue(cp).replace("_", " "); + } else { + scriptCat += " Script"; + } + return scriptCat; + } + + public static void showProperties( + int cp, String history, boolean showDevProperties, Appendable out) throws IOException { String text = UTF16.valueOf(cp); String name = getFactory().getProperty("Name").getValue(cp); - if (name != null) { - name = toHTML.transliterate(name); + final String devName = + showDevProperties ? getFactory().getProperty("Udev:Name").getValue(cp) : null; + if (name == null) { + if (devName != null) { + name = "" + toHTML.transliterate(devName) + ""; + } else { + name = "Unknown"; + } } else { - name = "Unknown"; + name = toHTML.transliterate(name); } boolean allowed = XIDModifications.isAllowed(cp); - String scriptCat = getFactory().getProperty("script").getValue(cp).replace("_", " "); - if (scriptCat.equals("Common") || scriptCat.equals("Inherited")) { - scriptCat = getFactory().getProperty("gc").getValue(cp).replace("_", " "); - } else { - scriptCat += " Script"; + String scriptCat = getScriptCat("", cp); + if (showDevProperties) { + String devScriptCat = getScriptCat("Udev:", cp); + if (!devScriptCat.equals(scriptCat)) { + // The old and new script and GC will be given below; only show the new one here, + // but highlighted. + scriptCat = "" + devScriptCat + ""; + } } String hex = com.ibm.icu.impl.Utility.hex(cp, 4); @@ -1369,23 +1430,48 @@ public static void showProperties(int cp, Appendable out) throws IOException { TreeSet sortedProps = Builder.with(new TreeSet(col)).addAll(availableNames).remove("Name").get(); + String kRSUnicode = getFactory().getProperty("kRSUnicode").getValue(cp); + boolean isUnihan = kRSUnicode != null; + + Age_Values age = Age_Values.forName(getFactory().getProperty("Age").getValue(cp)); + VersionInfo minVersion = + history.equals("assigned") && age != Age_Values.Unassigned + ? VersionInfo.getInstance(age.getShortName()) + : history.equals("full") + ? VersionInfo.getInstance(Age_Values.V1_1.getShortName()) + : Settings.LAST_VERSION_INFO; + if (minVersion.compareTo(UcdLoader.getOldestLoadedUcd()) < 0) { + minVersion = UcdLoader.getOldestLoadedUcd(); + out.append( + "

Still loading UCD versions before " + + minVersion.getVersionString(2, 4) + + "

"); + } + out.append( "
" - + "" + "" + "
Properties for U+" + + "" + + (isUnihan ? "non-Unihan properties for U+" : "Properties for U+") + hex + "
With Non-Default ValuesWith Default Values
\n"); out.append("\n"); + List unihanProperties = new ArrayList<>(); + VersionInfo maxVersion = + showDevProperties ? Settings.LATEST_VERSION_INFO : Settings.LAST_VERSION_INFO; for (String propName : sortedProps) { UnicodeProperty prop = getFactory().getProperty(propName); if (prop.getName().equals("confusable")) continue; + if (prop.getFirstNameAlias().startsWith("cjk")) { + unihanProperties.add(propName); + continue; + } boolean isDefault = prop.isDefault(cp); if (isDefault) continue; - String propValue = prop.getValue(cp); - showPropertyValue(propName, propValue, isDefault, out); + showPropertyValue(propName, cp, minVersion, maxVersion, isDefault, out); } out.append("
\n"); @@ -1395,15 +1481,38 @@ public static void showProperties(int cp, Appendable out) throws IOException { for (String propName : sortedProps) { UnicodeProperty prop = getFactory().getProperty(propName); if (prop.getName().equals("confusable")) continue; + if (prop.getFirstNameAlias().startsWith("cjk")) { + continue; + } boolean isDefault = prop.isDefault(cp); if (!isDefault) continue; - String propValue = prop.getValue(cp); - showPropertyValue(propName, propValue, isDefault, out); + showPropertyValue(propName, cp, minVersion, maxVersion, isDefault, out); } out.append("
\n"); out.append("\n"); + if (isUnihan) { + out.append( + "" + + "" + + "
" + + "Unihan properties for U+" + + hex + + "
\n"); + out.append("\n"); + for (int i = 0; i < unihanProperties.size() / 2; ++i) { + showPropertyValue(unihanProperties.get(i), cp, minVersion, maxVersion, false, out); + } + out.append("
\n"); + out.append("
\n"); + out.append("\n"); + for (int i = unihanProperties.size() / 2; i < unihanProperties.size(); ++i) { + showPropertyValue(unihanProperties.get(i), cp, minVersion, maxVersion, false, out); + } + out.append("
\n"); + out.append("
\n"); + } } private static StringBuilder displayConfusables(int codepoint) { @@ -1527,32 +1636,69 @@ private static void getBoxedCharacters(String s, StringBuilder confusableString) } private static void showPropertyValue( - String propName, String propValue, boolean isDefault, Appendable out) + String propName, + int codePoint, + VersionInfo minVersion, + VersionInfo maxVersion, + boolean isDefault, + Appendable out) throws IOException { String defaultClass = isDefault ? " class='default'" : ""; - if (propValue == null) { - out.append( - "" - + propName - + "null\n"); - return; + class PropertyAssignment { + VersionInfo first; + VersionInfo last; + String value; + } + List history = new ArrayList<>(); + // TODO(eggrobin): TUP normalization chokes on sufficiently old versions, but this is not + // worth debugging as we want to get rid of it. + if (!propName.startsWith("toNF")) { + for (var a : Age_Values.values()) { + if (a == Age_Values.Unassigned) { + break; + } + var version = VersionInfo.getInstance(a.getShortName()); + if (version.compareTo(minVersion) < 0) { + continue; + } + if (version.compareTo(maxVersion) > 0) { + break; + } + String versionPrefix = + version == Settings.LATEST_VERSION_INFO + ? "dev" + : version.getVersionString(3, 3); + UnicodeProperty property = null; + try { + property = getFactory().getProperty("U" + versionPrefix + ":" + propName); + } catch (ICUException e) { + } + if (property == null) { + continue; + } + String value = property.getValue(codePoint); + PropertyAssignment lastAssignment = + history.isEmpty() ? null : history.get(history.size() - 1); + if (lastAssignment == null + || (value != null && !value.equals(lastAssignment.value)) + || (value == null && lastAssignment.value != null)) { + PropertyAssignment assignment = new PropertyAssignment(); + assignment.first = version; + assignment.last = version; + assignment.value = value; + history.add(assignment); + } else { + lastAssignment.last = version; + } + } + } + if (history.isEmpty()) { + var current = new PropertyAssignment(); + current.first = Settings.LAST_VERSION_INFO; + current.last = Settings.LAST_VERSION_INFO; + current.value = getFactory().getProperty(propName).getValue(codePoint); + history.add(current); } - String hValue = toHTML.transliterate(propValue); - hValue = - "" - + hValue - + ""; - out.append( "" + propName - + "" - + hValue - + "\n"); + + ""); + for (PropertyAssignment assignment : history) { + String first = + assignment.first.getVersionString(2, 4) + + (assignment.first == Settings.LATEST_VERSION_INFO + ? Settings.latestVersionPhase.toString() + : ""); + String last = + assignment.last.getVersionString(2, 4) + + (assignment.last == Settings.LATEST_VERSION_INFO + ? Settings.latestVersionPhase.toString() + : ""); + boolean isCurrent = + assignment.first.compareTo(Settings.LAST_VERSION_INFO) <= 0 + && Settings.LAST_VERSION_INFO.compareTo(assignment.last) <= 0; + boolean showVersion = !isCurrent || history.size() != 1; + boolean isSingleVersion = assignment.first == assignment.last; + boolean isNew = assignment.first == Settings.LATEST_VERSION_INFO; + String versionRange = + (showVersion ? (isSingleVersion ? first : first + ".." + last) + ": " : ""); + if (assignment.value == null) { + out.append("" + versionRange + "null"); + } else { + String hValue = toHTML.transliterate(assignment.value); + out.append( + "" + + versionRange + + hValue + + ""); + } + } + out.append(""); } /*jsp*/ @@ -1941,7 +2123,7 @@ private static void showBidiLine( writer.println("Character"); for (int i = 0; i < str.length(); ++i) { final String s = str.substring(i, i + 1); - String title = toHTML.transform(getName(s, "", true, true)); + String title = toHTML.transform(getName(s, "", false, true, true)); writer.println( ") base.getInternalAvailablePropertyAliases(new ArrayList())) { - add(base.getProperty(propertyAlias)); + IndexUnicodeProperties latest = IndexUnicodeProperties.make(Settings.latestVersion); + // Contract the unassigned set as much as possible (based on latest rather than last), so + // that dev/α/β property lookups are correct. + UnicodeProperty.contractUNASSIGNED( + latest.getProperty("General_Category").getSet("Unassigned")); + IndexUnicodeProperties last = IndexUnicodeProperties.make(Settings.lastVersion); + for (UcdProperty property : last.getAvailableUcdProperties()) { + add(last.getProperty(property)); } for (int i = Common.XSTRING_START; i < Common.XSTRING_LIMIT; ++i) { XUnicodeProperty property = new XUnicodeProperty(i); @@ -101,11 +105,6 @@ public final Factory add2(UnicodeProperty sp) { .set(Uts46.SINGLETON.getMappingsDisplay()) .setMain("toUts46n", "toUts46n", UnicodeProperty.STRING, "1.1")); - add( - new StringTransformProperty(Common.NFKC_CF, false) - .setMain("NFKC_Casefold", "NFKC_CF", UnicodeProperty.STRING, "1.1") - .addName("toNFKC_CF")); - add( new CodepointTransformProperty( new Transform() { @@ -188,21 +187,6 @@ public String transform(String source) { false) .setMain("toTitlecase", "toTC", UnicodeProperty.STRING, "1.1")); - add( - new StringTransformProperty( - new StringTransform() { - @Override - public String transform(String source) { - StringBuilder b = new StringBuilder(); - for (int cp : CharSequences.codePoints(source)) { - b.appendCodePoint(UCharacter.getBidiPairedBracket(cp)); - } - return b.toString(); - } - }, - false) - .setMain("Bidi_Paired_Bracket", "bpb", UnicodeProperty.STRING, "7.0")); - add( new StringTransformProperty( new StringTransform() { @@ -267,43 +251,22 @@ public String transform(Integer source) { addExamplarProperty(LocaleData.ES_AUXILIARY, "exema", "exemplar_aux"); addExamplarProperty(LocaleData.ES_PUNCTUATION, "exemp", "exemplar_punct"); - // set up the special script property - UnicodeProperty scriptProp = base.getProperty("sc"); - - // Compose the function and add - UnicodeMap specialMap = new UnicodeMap(); - specialMap.putAll( - scriptProp.getUnicodeMap()); // if there is no value, use the script property - specialMap.putAll(ScriptTester.getScriptSpecialsNames()); - add( - new UnicodeProperty.UnicodeMapProperty() - .set(specialMap) - .setMain("Script_Extensions", "scx", UnicodeProperty.ENUMERATED, "1.1") - .addValueAliases( - ScriptTester.getScriptSpecialsAlternates(scriptProp), - AliasAddAction.IGNORE_IF_MISSING) - .setMultivalued(true)); - - CachedProps cp = CachedProps.CACHED_PROPS; - for (String prop : cp.getAvailable()) { - add2(cp.getProperty(prop)); - } UnicodeSet Basic_Emoji = - cp.getProperty("Basic_Emoji").getSet("Yes", null); // TODO: was .getTrueSet(); + getProperty("Basic_Emoji").getSet("Yes", null); // TODO: was .getTrueSet(); UnicodeSet Emoji_Keycap_Sequence = - cp.getProperty("RGI_Emoji_Keycap_Sequence") + getProperty("RGI_Emoji_Keycap_Sequence") .getSet("Yes", null); // TODO: was .getTrueSet(); UnicodeSet RGI_Emoji_Modifier_Sequence = - cp.getProperty("RGI_Emoji_Modifier_Sequence") + getProperty("RGI_Emoji_Modifier_Sequence") .getSet("Yes", null); // TODO: was .getTrueSet(); UnicodeSet RGI_Emoji_Tag_Sequence = - cp.getProperty("RGI_Emoji_Tag_Sequence") + getProperty("RGI_Emoji_Tag_Sequence") .getSet("Yes", null); // TODO: was .getTrueSet(); UnicodeSet RGI_Emoji_Flag_Sequence = - cp.getProperty("RGI_Emoji_Flag_Sequence") + getProperty("RGI_Emoji_Flag_Sequence") .getSet("Yes", null); // TODO: was .getTrueSet(); UnicodeSet RGI_Emoji_Zwj_Sequence = - cp.getProperty("RGI_Emoji_Zwj_Sequence") + getProperty("RGI_Emoji_Zwj_Sequence") .getSet("Yes", null); // TODO: was .getTrueSet(); UnicodeSet RGI_Emoji = new UnicodeSet() diff --git a/UnicodeJsps/src/main/webapp/WEB-INF/web.xml b/UnicodeJsps/src/main/webapp/WEB-INF/web.xml index 2c830eb12..3b2453a7c 100644 --- a/UnicodeJsps/src/main/webapp/WEB-INF/web.xml +++ b/UnicodeJsps/src/main/webapp/WEB-INF/web.xml @@ -9,4 +9,8 @@ default.htm default.jsp + + org.unicode.jsp.UcdLoader + 1 + \ No newline at end of file diff --git a/UnicodeJsps/src/main/webapp/character.jsp b/UnicodeJsps/src/main/webapp/character.jsp index 18076d93e..b627f0cfb 100644 --- a/UnicodeJsps/src/main/webapp/character.jsp +++ b/UnicodeJsps/src/main/webapp/character.jsp @@ -22,12 +22,22 @@ th { text-align: left } String queryString = request.getQueryString(); UtfParameters utfParameters = new UtfParameters(queryString); - String text = utfParameters.getParameter("a", "\u2615", "\u2615"); + String text = utfParameters.getParameter("a", "\u2615", "\u2615"); + String history = utfParameters.getParameter("history", "", ""); + boolean showDevProperties = utfParameters.getParameter("showDevProperties", "", "").equals("1"); int[] codePoints = UnicodeJsp.parseCode(text,null,null); int cp = codePoints[0]; String nextHex = "character.jsp?a=" + Utility.hex(cp < 0x110000 ? cp+1 : 0, 4); String prevHex = "character.jsp?a=" + Utility.hex(cp > 0 ? cp-1 : 0x10FFFF, 4); + if (!history.isEmpty()) { + nextHex += "&history=" + history; + prevHex += "&history=" + history; + } + if (showDevProperties) { + nextHex += "&showDevProperties=1"; + prevHex += "&showDevProperties=1"; + } if (codePoints.length > 1) { %>

@@ -64,10 +74,22 @@ th { text-align: left }
+ <% + if (!history.isEmpty()) { + %> + + <% + } + if (showDevProperties) { + %> + + <% + } + %>

<% - UnicodeJsp.showProperties(cp, out); + UnicodeJsp.showProperties(cp, history, showDevProperties, out); %>

The list includes both Unicode Character Properties and some additions (like idna2003 or subhead)

diff --git a/UnicodeJsps/src/main/webapp/identifier.jsp b/UnicodeJsps/src/main/webapp/identifier.jsp index 9b3d70b7e..7c1461b74 100644 --- a/UnicodeJsps/src/main/webapp/identifier.jsp +++ b/UnicodeJsps/src/main/webapp/identifier.jsp @@ -12,8 +12,9 @@ UtfParameters utfParameters = new UtfParameters(queryString); String test = utfParameters.getParameter("a", "Latin"); + boolean showDevProperties = utfParameters.getParameter("showDevProperties", "", "").equals("1"); - String a_out = UnicodeJsp.getIdentifier(test); + String a_out = UnicodeJsp.getIdentifier(test, showDevProperties); %>

Unicode Utilities: Identifier

diff --git a/UnicodeJsps/src/main/webapp/index.css b/UnicodeJsps/src/main/webapp/index.css index 3c7d25f77..ad7c5557f 100644 --- a/UnicodeJsps/src/main/webapp/index.css +++ b/UnicodeJsps/src/main/webapp/index.css @@ -27,6 +27,11 @@ h3 {background-color: #EEEEEE} .L1 {background-color: #CCCCCC} .L0 {background-color: #C8C8C8} .default { background-color: #C8C8C8} +.changed { + background-color: #FFFF00; + border-style: dotted; + border-width: 1px; +} .propTable, .bigTable { margin-left: auto; margin-right: auto; diff --git a/UnicodeJsps/src/main/webapp/list-unicodeset.jsp b/UnicodeJsps/src/main/webapp/list-unicodeset.jsp index 4c031c878..80e5988a1 100644 --- a/UnicodeJsps/src/main/webapp/list-unicodeset.jsp +++ b/UnicodeJsps/src/main/webapp/list-unicodeset.jsp @@ -21,6 +21,8 @@ boolean ucdFormat = request.getParameter("ucd") != null; boolean escape = request.getParameter("esc") != null; + boolean showDevProperties = utfParameters.getParameter("showDevProperties", "", "").equals("1"); + UnicodeSet a = new UnicodeSet(); String a_out = UnicodeJsp.getSimpleSet(setA, a, abbreviate, escape); @@ -50,6 +52,13 @@ name="g" size="25" value="<%=Encode.forHtmlAttribute(group)%>"> name="i" size="25" value="<%=Encode.forHtmlAttribute(info)%>"> + <% + if (showDevProperties) { + %> + + <% + } + %> @@ -57,7 +66,7 @@

<%=a_out%>


- <% UnicodeJsp.showSet(group, info, a, abbreviate, ucdFormat, collate, out); %> + <% UnicodeJsp.showSet(group, info, a, showDevProperties, abbreviate, ucdFormat, collate, out); %> <%@ include file="footer.jsp" %> diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestEmoji.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestEmoji.java index 9dc4ce793..f024cdab4 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestEmoji.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestEmoji.java @@ -19,7 +19,7 @@ public void TestBasic() throws IOException { String[] message = {""}; UnicodeSet primary = UnicodeUtilities.parseSimpleSet("[:emoji:]", message); StringBuilder out = new StringBuilder(); - UnicodeJsp.showSet("gc", "sc", primary, false, false, true, out); + UnicodeJsp.showSet("gc", "sc", primary, false, false, false, true, out); assertTrue("", out.toString().contains("ASCII")); logln(out.toString()); diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestJsp.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestJsp.java index 61a99a88a..5d38023d1 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestJsp.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestJsp.java @@ -757,6 +757,7 @@ public void TestGrouping() throws IOException { "sc gc", "", UnicodeSetUtilities.parseUnicodeSet("[:subhead=/Syllables/:]"), + false, true, true, true, @@ -767,6 +768,7 @@ public void TestGrouping() throws IOException { "subhead", "", UnicodeSetUtilities.parseUnicodeSet("[:subhead=/Syllables/:]"), + false, true, true, true, @@ -789,17 +791,26 @@ public void TestStuff() throws IOException { "sc gc", "", new UnicodeSet("[[:ascii:]{123}{ab}{456}]"), + false, true, true, true, printWriter); UnicodeJsp.showSet( - "", "", new UnicodeSet("[\\u0080\\U0010FFFF]"), true, true, true, printWriter); + "", + "", + new UnicodeSet("[\\u0080\\U0010FFFF]"), + false, + true, + true, + true, + printWriter); UnicodeJsp.showSet( "", "", new UnicodeSet("[\\u0080\\U0010FFFF{abc}]"), + false, true, true, true, @@ -808,6 +819,7 @@ public void TestStuff() throws IOException { "", "", new UnicodeSet("[\\u0080-\\U0010FFFF{abc}]"), + false, true, true, true, @@ -823,7 +835,7 @@ public void TestStuff() throws IOException { final UnicodeSet unicodeSet = new UnicodeSet(); logln("simple: " + UnicodeJsp.getSimpleSet("[a-bm-p\uAc00]", unicodeSet, true, false)); - UnicodeJsp.showSet("", "", unicodeSet, true, true, true, printWriter); + UnicodeJsp.showSet("", "", unicodeSet, false, true, true, true, printWriter); // String archaic = // "[[\u018D\u01AA\u01AB\u01B9-\u01BB\u01BE\u01BF\u021C\u021D\u025F\u0277\u027C\u029E\u0343\u03D0\u03D1\u03D5-\u03E1\u03F7-\u03FB\u0483-\u0486\u05A2\u05C5-\u05C7\u066E\u066F\u068E\u0CDE\u10F1-\u10F6\u1100-\u115E\u1161-\u11FF\u17A8\u17D1\u17DD\u1DC0-\u1DC3\u3165-\u318E\uA700-\uA707\\U00010140-\\U00010174]" + @@ -840,6 +852,7 @@ public void TestStuff() throws IOException { UnicodeSetUtilities.parseUnicodeSet("[:hantype=/simp/:]"), false, false, + false, true, printWriter); } @@ -848,14 +861,14 @@ public void TestStuff() throws IOException { @Test public void TestShowProperties() throws IOException { StringWriter out = new StringWriter(); - UnicodeJsp.showProperties(0x00C5, out); + UnicodeJsp.showProperties(0x00C5, "", false, out); assertTrue("props for character", out.toString().contains("Line_Break")); logln(out.toString()); // logln(out); } public void TestIdentifiers() throws IOException { - String out = UnicodeUtilities.getIdentifier("Latin"); + String out = UnicodeUtilities.getIdentifier("Latin", false); assertTrue("identifier info", out.toString().contains("U+016F")); logln(out.toString()); // logln(out); @@ -882,6 +895,7 @@ public void TestShowSet() throws IOException { "", UnicodeSetUtilities.parseUnicodeSet("[:script=/Han/:]"), false, + false, true, true, out); diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java index f039e9790..676661431 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java @@ -55,6 +55,18 @@ public void TestScxMulti() { } catch (Exception e) { exceptionMessage = e.getMessage(); } + assertContains("Expected exception", "The value 'beng,deva' is illegal.", exceptionMessage); + } + + @Test + public void TestkHanyuPinyinMulti() { + String unicodeSetString = "\\p{kRSUnicode=35.6|66.6}"; + String exceptionMessage = null; + try { + UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString); + } catch (Exception e) { + exceptionMessage = e.getMessage(); + } assertEquals( "Expected exception", "Multivalued property values can't contain the delimiter.", diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java index d0b97a857..e18292a9e 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java @@ -4,18 +4,13 @@ import static org.junit.jupiter.api.Assumptions.assumeTrue; import static org.junit.jupiter.params.provider.Arguments.arguments; -import com.google.common.base.Objects; import com.ibm.icu.impl.Row.R2; -import com.ibm.icu.impl.Utility; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; import com.ibm.icu.lang.UProperty.NameChoice; -import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.LocaleData; import com.ibm.icu.util.TimeZone; -import com.ibm.icu.util.ULocale; import com.ibm.icu.util.VersionInfo; import java.io.IOException; import java.nio.charset.Charset; @@ -90,7 +85,8 @@ public void TestOutput() { @Test public void TestEmoji() throws IOException { StringBuilder b = new StringBuilder(); - UnicodeJsp.showSet("scx", "", UnicodeSetUtilities.TAKES_EMOJI_VS, false, false, false, b); + UnicodeJsp.showSet( + "scx", "", UnicodeSetUtilities.TAKES_EMOJI_VS, false, false, false, false, b); String bs = UnicodeUtilities.getPrettySet(UnicodeSetUtilities.FACE, false, false); if (bs.contains(" \uFE0F") || bs.contains(" \u200D")) { errln("Fails extra-space insert" + bs); @@ -216,86 +212,6 @@ public void TestICUEnums() { } } - @Test - public void TestICUStringProps() { - XPropertyFactory factory = XPropertyFactory.make(); - BreakIterator titleIter = BreakIterator.getWordInstance(ULocale.ROOT); - for (int propEnum = UProperty.STRING_START; propEnum < UProperty.STRING_LIMIT; ++propEnum) { - String propName = UCharacter.getPropertyName(propEnum, NameChoice.SHORT); - String propNameLong = UCharacter.getPropertyName(propEnum, NameChoice.LONG); - UnicodeProperty prop3 = factory.getProperty(propName); - logln(Utility.hex(propEnum) + "\t" + propName + "\t" + propNameLong); - int errorCount = 0; - for (int i = 0; i <= 0x10ffff; ++i) { - if (i == 'ß') { - int debug = 0; - } - String icuValue; - try { - switch (propEnum) { - case UProperty.BIDI_PAIRED_BRACKET: - icuValue = UTF16.valueOf(UCharacter.getBidiPairedBracket(i)); - break; - case UProperty.CASE_FOLDING: - icuValue = UCharacter.foldCase(UTF16.valueOf(i), true); - break; - case UProperty.LOWERCASE_MAPPING: - icuValue = UCharacter.toLowerCase(UTF16.valueOf(i)); - break; - case UProperty.TITLECASE_MAPPING: - icuValue = UCharacter.toTitleCase(UTF16.valueOf(i), titleIter); - break; - case UProperty.UPPERCASE_MAPPING: - icuValue = UCharacter.toUpperCase(UTF16.valueOf(i)); - break; - default: - icuValue = - UCharacter.getStringPropertyValue( - propEnum, i, NameChoice.SHORT); - if (propEnum == UProperty.AGE) { - icuValue = - icuValue.equals("0.0.0.0") - ? "unassigned" - : VersionInfo.getInstance(icuValue) - .getVersionString(2, 2); - } - } - } catch (Exception e) { - errln(propNameLong + "\t" + e.getMessage()); - if (++errorCount > 5) break; - else continue; - } - String propValue = prop3.getValue(i); - if (!Objects.equal( - icuValue, propValue)) { // do to avoid verbose mode being every character - assertEquals("string value", icuValue, propValue); - if (++errorCount > 5) break; - else continue; - } - } - } - } - - @Test - public void TestICUDoubleProps() { - XPropertyFactory factory = XPropertyFactory.make(); - // currently only one double property - assertEquals("only 1 double property", 1, UProperty.DOUBLE_LIMIT - UProperty.DOUBLE_START); - String propName = UCharacter.getPropertyName(UProperty.NUMERIC_VALUE, NameChoice.SHORT); - UnicodeProperty prop3 = factory.getProperty(propName); - for (int i = 0; i <= 0x10ffff; ++i) { - Double icuValue = UCharacter.getUnicodeNumericValue(i); - if (icuValue == UCharacter.NO_NUMERIC_VALUE) { - icuValue = Double.NaN; - } - String propString = prop3.getValue(i); - Double propValue = propString == null ? Double.NaN : Double.parseDouble(propString); - if (!icuValue.equals(propValue)) { // do to avoid verbose mode being every character - assertEquals("double value", icuValue, propValue); - } - } - } - private void checkProperty(XPropertyFactory factory, int propEnum) { try { int min = UCharacter.getIntPropertyMinValue(propEnum); @@ -443,7 +359,7 @@ public void TestGC() { gc ; Z ; Separator # Zl | Zp | Zs */ for (String[] extra : extras) { - UnicodeSet expected = new UnicodeSet(extra[2]).freeze(); + UnicodeSet expected = UnicodeSetUtilities.parseUnicodeSet(extra[2]).freeze(); for (String test : extra) { if (test.startsWith("[")) continue; for (String gc : gcs) { diff --git a/docs/unicodejsps/index.md b/docs/unicodejsps/index.md index 88c7391a9..faf1fa84f 100644 --- a/docs/unicodejsps/index.md +++ b/docs/unicodejsps/index.md @@ -92,7 +92,7 @@ Files to investigate Go to `XPropertyFactory.java` to add new properties other than the ones in /props/ ### Using Beta Properties -Set CachedProps.IS_BETA to true. +In [org.unicode.text.Settings](/unicodetools/src/main/java/org/unicode/text/utility/Settings.java), set `latestVersionPhase` to `BETA`. Build & Test diff --git a/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java b/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java index b22203fd3..12ab60a99 100644 --- a/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java +++ b/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java @@ -128,7 +128,7 @@ private IndexUnicodeProperties(VersionInfo ucdVersion2) { oldVersion = ucdVersion2.compareTo(GenerateEnums.ENUM_VERSION_INFO) < 0; } - public static final IndexUnicodeProperties make(VersionInfo ucdVersion) { + public static final synchronized IndexUnicodeProperties make(VersionInfo ucdVersion) { IndexUnicodeProperties newItem = version2IndexUnicodeProperties.get(ucdVersion); if (newItem == null) { version2IndexUnicodeProperties.put( @@ -416,7 +416,11 @@ public UnicodeMap> loadIntList(UcdProperty prop2) { return result; } - public UnicodeMap load(UcdProperty prop2) { + public synchronized boolean isLoaded(UcdProperty prop) { + return property2UnicodeMap.get(prop) != null; + } + + public synchronized UnicodeMap load(UcdProperty prop2) { String fullFilename = "?"; try { if (prop2 == CHECK_PROPERTY) { @@ -449,6 +453,10 @@ public UnicodeMap load(UcdProperty prop2) { public void internalStoreCachedMap(String dir, UcdProperty prop2, UnicodeMap data) { try { + final var binDir = new File(dir); + if (!binDir.exists()) { + binDir.mkdir(); + } final String cacheFileDirName = dir + getUcdVersion(); final File cacheFileDir = new File(cacheFileDirName); if (!cacheFileDir.exists()) { diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java index f664472ba..434f000d5 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java @@ -575,7 +575,8 @@ static UnicodeProperty of( final String propName = scan(PROPCHARS, line, pp, true); if (propName.length() > 0) { final FilterOrProp propOrFilter = new FilterOrProp(); - final VersionedProperty xprop = new VersionedProperty().set(propName); + final VersionedProperty xprop = + VersionedProperty.forInvariantTesting().set(propName); propOrFilter.prop = xprop.getProperty(); if (propOrFilter.prop == null) { throw new IllegalArgumentException( @@ -1275,7 +1276,7 @@ public String parseReference(String text, ParsePosition pos, int limit) { return text.substring(start, i); } - final VersionedProperty propertyVersion = new VersionedProperty(); + final VersionedProperty propertyVersion = VersionedProperty.forInvariantTesting(); @Override public boolean applyPropertyAlias( diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java index 28fba5c14..f3f068dbf 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java @@ -6,6 +6,7 @@ import java.text.ParsePosition; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UnicodeProperty; import org.unicode.props.UnicodeProperty.Factory; @@ -19,6 +20,46 @@ public class VersionedProperty { private UnicodeProperty property; private final transient PatternMatcher matcher = new UnicodeProperty.RegexMatcher(); + private boolean throwOnUnknownProperty; + // The version used in the absence of a version prefix. + private String defaultVersion; + // Maps custom names to versions. For the versions covered by this map, no + // other names are permitted, so if this contains "16.0.0β"↦"16.0.0" but not + // "16.0.0"↦"16.0.0", "U16.0.0:General_Category" is rejected. + private Map versionAliases = new TreeMap<>(); + + private VersionedProperty() {} + + public static VersionedProperty forInvariantTesting() { + var result = new VersionedProperty(); + result.throwOnUnknownProperty = true; + result.defaultVersion = Settings.latestVersion; + result.versionAliases.put("-1", Settings.lastVersion); + for (String last = Settings.lastVersion; ; last = last.substring(0, last.length() - 2)) { + result.versionAliases.put(last, Settings.lastVersion); + if (!last.endsWith(".0")) { + break; + } + } + return result; + } + + public static VersionedProperty forJSPs() { + var result = new VersionedProperty(); + result.throwOnUnknownProperty = false; + result.defaultVersion = Settings.lastVersion; + result.versionAliases.put("dev", Settings.latestVersion); + for (String latest = Settings.latestVersion; + ; + latest = latest.substring(0, latest.length() - 2)) { + result.versionAliases.put(latest + Settings.latestVersionPhase, Settings.latestVersion); + if (!latest.endsWith(".0")) { + break; + } + } + return result; + } + private static final Set TOOL_ONLY_PROPERTIES = Set.of("toNFC", "toNFD", "toNFKC", "toNFKD"); @@ -54,25 +95,31 @@ public VersionedProperty set(String xPropertyName) { throw new IllegalArgumentException( "Version field should start with U or R in " + xPropertyName); } - if (names[0].substring(1).equals("-1")) { - version = Settings.lastVersion; + var aliased = versionAliases.get(names[0].substring(1)); + if (aliased != null) { + version = aliased; } else { version = names[0].substring(1); + if (versionAliases.containsValue(version)) { + throw new IllegalArgumentException("Invalid version " + version); + } } xPropertyName = names[1]; } else { - version = Settings.latestVersion; + version = defaultVersion; } - ; propertyName = xPropertyName; propSource = getIndexedProperties(version); property = propSource.getProperty(xPropertyName); if ((property == null && TOOL_ONLY_PROPERTIES.contains(xPropertyName)) - || (isTrivial(property.getUnicodeMap()) && allowRetroactive)) { + || (property != null && isTrivial(property.getUnicodeMap()) && allowRetroactive)) { propSource = ToolUnicodePropertySource.make(version); property = propSource.getProperty(xPropertyName); } if (property == null || isTrivial(property.getUnicodeMap())) { + if (!throwOnUnknownProperty) { + return null; + } throw new IllegalArgumentException( "Can't create property from name: " + propertyName diff --git a/unicodetools/src/main/java/org/unicode/text/utility/Settings.java b/unicodetools/src/main/java/org/unicode/text/utility/Settings.java index 836296a60..12dd2c183 100644 --- a/unicodetools/src/main/java/org/unicode/text/utility/Settings.java +++ b/unicodetools/src/main/java/org/unicode/text/utility/Settings.java @@ -24,6 +24,25 @@ public class Settings { /** Used for the default version. */ public static final String latestVersion = "16.0.0"; + public enum ReleasePhase { + DEV("dev"), // Before α. + ALPHA("α"), // α review. + BETA("β"); // β review. + + private final String toString; + + ReleasePhase(String s) { + toString = s; + } + + @Override + public String toString() { + return toString; + } + }; + + public static final ReleasePhase latestVersionPhase = ReleasePhase.DEV; + public static final String lastVersion = "15.1.0"; // last released version public static final VersionInfo LATEST_VERSION_INFO = VersionInfo.getInstance(latestVersion);