Skip to content

Commit

Permalink
Allow strings in propertywise tests (#911)
Browse files Browse the repository at this point in the history
* Allow strings in Propertywise tests

* spots

* stringAt

* After macchiati’s review

* spots
  • Loading branch information
eggrobin authored Sep 6, 2024
1 parent 6a34bda commit addf0c9
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,15 @@ private static void propertywiseAlikeLine(
}
}

private static String stringAt(UnicodeSet set, int i) {
final int codePointsSize = set.size() - set.strings().size();
if (i < codePointsSize) {
return Character.toString(set.charAt(i));
} else {
return set.strings().stream().skip(i - codePointsSize).findFirst().get();
}
}

private static void propertywiseCorrespondenceLine(
Set<String> ignoredProperties,
UnicodeSet firstSet,
Expand All @@ -538,13 +547,13 @@ private static void propertywiseCorrespondenceLine(
final List<UnicodeSet> sets = new ArrayList<>();
sets.add(firstSet);
expectToken(":", pp, source);

// Index of the first set of multi-character strings (and of the first multi-character
// reference string).
// This is `m` in the documentation in UnicodeInvariantTest.txt.
int firstMultiCharacterIndex = -1;
do {
final var set = parseUnicodeSet(source, pp);
if (set.hasStrings()) {
throw new BackwardParseException(
"Set should contain only single code points for property comparison",
pp.getIndex());
}
if (set.size() != firstSet.size()) {
throw new BackwardParseException(
"Sets should have the same size for property correspondence (got "
Expand All @@ -554,18 +563,41 @@ private static void propertywiseCorrespondenceLine(
+ ")",
pp.getIndex());
}
if (set.hasStrings() && set.strings().size() != set.size()) {
throw new BackwardParseException(
"Sets should be all strings or all code points for property correspondence",
pp.getIndex());
}
if (firstMultiCharacterIndex == -1) {
if (set.hasStrings()) {
firstMultiCharacterIndex = sets.size();
}
} else if (!set.hasStrings()) {
throw new BackwardParseException(
"Code points should come before strings in property correspondence",
pp.getIndex());
}
sets.add(set);
} while (Lookahead.oneToken(pp, source).accept(":"));
final List<Integer> referenceCodePoints = new ArrayList<>();
if (firstMultiCharacterIndex == -1) {
firstMultiCharacterIndex = sets.size();
}
final List<String> referenceCodePoints = new ArrayList<>();
expectToken("CorrespondTo", pp, source);
do {
final var referenceSet = parseUnicodeSet(source, pp);
if (referenceSet.hasStrings() || referenceSet.size() != 1) {
if (referenceSet.size() != 1) {
throw new BackwardParseException(
"reference should be a single code point or string for property correspondence",
pp.getIndex());
}
if (referenceSet.hasStrings()
!= (referenceCodePoints.size() >= firstMultiCharacterIndex)) {
throw new BackwardParseException(
"reference should be a single code point for property correspondence",
"Strings should correspond to strings for property correspondence",
pp.getIndex());
}
referenceCodePoints.add(referenceSet.charAt(0));
referenceCodePoints.add(referenceSet.iterator().next());
} while (Lookahead.oneToken(pp, source).accept(":"));
if (referenceCodePoints.size() != sets.size()) {
throw new BackwardParseException(
Expand Down Expand Up @@ -608,8 +640,8 @@ public ExpectedPropertyDifference(String actualValueAlias, String referenceValue
expectedDifference = expectedPropertyDifferences.get(alias);
}
if (expectedDifference != null) {
for (int k = 0; k < sets.size(); ++k) {
final int rk = referenceCodePoints.get(k);
for (int k = 0; k < firstMultiCharacterIndex; ++k) {
final int rk = referenceCodePoints.get(k).codePointAt(0);
final String pRk = property.getValue(rk);
if (!Objects.equals(pRk, expectedDifference.referenceValueAlias)) {
errorMessageLines.add(
Expand Down Expand Up @@ -638,9 +670,9 @@ public ExpectedPropertyDifference(String actualValueAlias, String referenceValue
}
}
} else {
for (int k = 0; k < sets.size(); ++k) {
for (int k = 0; k < firstMultiCharacterIndex; ++k) {
final UnicodeSet set = sets.get(k);
final int rk = referenceCodePoints.get(k);
final int rk = referenceCodePoints.get(k).codePointAt(0);
final String pRk = property.getValue(rk);
loop_over_set:
for (int i = 0; i < set.size(); ++i) {
Expand All @@ -652,10 +684,9 @@ public ExpectedPropertyDifference(String actualValueAlias, String referenceValue
Integer lMatchingForReference = null;
for (int l = 0; l < sets.size(); ++l) {
final boolean pCkEqualsCl =
Objects.equals(pCk, Character.toString(sets.get(l).charAt(i)));
Objects.equals(pCk, stringAt(sets.get(l), i));
final boolean pRkEqualsRl =
Objects.equals(
pRk, Character.toString(referenceCodePoints.get(l)));
Objects.equals(pRk, referenceCodePoints.get(l));
if (pRkEqualsRl) {
lMatchingForReference = l;
if (pCkEqualsCl) {
Expand Down Expand Up @@ -685,8 +716,7 @@ public ExpectedPropertyDifference(String actualValueAlias, String referenceValue
+ ")\t=\t"
+ pCk
+ "\t\t"
+ Character.toString(
sets.get(lMatchingForReference).charAt(i))
+ stringAt(sets.get(lMatchingForReference), i)
+ "\twhereas\t"
+ property.getName()
+ "("
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,16 +147,19 @@
# CorrespondTo <R₁> : ... : <Rₙ>
# [ UpTo: <Property> (<SValue> vs <RValue>) {, <Property> (<SValue> vs <RValue>) }]
#
# The Sₖ must be Unicode sets of equal size with no strings. They are considered in code
# point order for the correspondence check (item 2 below).
# The references Rₖ must be Unicode sets each containing a single code point; by a slight abuse of
# notation we refer to the code point as Rₖ in the explanation below.
# The Sₖ must be Unicode sets of equal size, either with no strings or only strings.
# They are considered in code point order for the correspondence check (item 2 below).
# The references Rₖ must be Unicode sets each containing a single code point or a single string;
# by a slight abuse of notation we refer to the code point or string as Rₖ in the explanation below.
# For some m in 2 .. n, the following must hold:
# a. Rₖ is a code point and Sₖ must contain only code points for k ≤ m, and
# b. Rₖ is a string and Sₖ must contain only strings for m < k ≤ n, and
# For every non-ignored property P that does not appear in the optional UpTo clause,
# checks that for each k in 1 .. n, for the ith character C in Sₖ, either:
# checks that for each k in 1 .. m, for the ith character C in Sₖ, either:
# 1. P(C) = P(Rₖ), or
# 2. for some l in 1 .. n, both:
# — P(Rₖ) is equal to Rₗ, and
# — P(C) is equal to the ith character in Sₗ.
# — P(C) is equal to the ith character (or string, if l > m) in Sₗ.
# For every non-ignored property P that appears in the UpTo clause, checks all characters in the
# sets Sₖ have the SValue and all R characters have the RValue.
#
Expand All @@ -174,9 +177,9 @@
Propertywise [[α-ω] - [ς]] : [[Α-Ω] - \p{gc=Cn}]
CorrespondTo [g] : [G]
UpTo: Block (Greek_And_Coptic vs Basic_Latin),
Script (Greek vs Latin),
Script_Extensions (Greek vs Latin),
East_Asian_Width (Ambiguous vs Narrow)
Script (Greek vs Latin),
Script_Extensions (Greek vs Latin),
East_Asian_Width (Ambiguous vs Narrow)
# The modifier letters ʳʷʸ are related to their non-superscripted counterparts in the same way
# that ʰ is related to h. The capitals must be part of the correspondence because they are
# property values of the lowercase letters.
Expand Down Expand Up @@ -1369,6 +1372,13 @@ Ignoring Unicode_1_Name Confusable_MA:
CorrespondTo [ⁱ] : [i] : [I]
end Ignoring;

Propertywise [ゟ] : [{より}]
CorrespondTo [ヿ] : [{コト}]
UpTo: Block (Hiragana vs Katakana),
Script (Hiragana vs Katakana),
Script_Extensions (Hiragana vs Katakana),
Word_Break (Other vs Katakana)

end Ignoring;

end Ignoring;

0 comments on commit addf0c9

Please sign in to comment.