Skip to content

Commit

Permalink
Use CharSequences instead of Strings for isWellFormed and toWellFormed
Browse files Browse the repository at this point in the history
This also adds a shortcut if there are no surrogate characters, to just reuse the existing string instead of creating a new one.
  • Loading branch information
camnwalter committed Sep 19, 2024
1 parent 6109f21 commit cc4e73c
Showing 1 changed file with 39 additions and 13 deletions.
52 changes: 39 additions & 13 deletions rhino/src/main/java/org/mozilla/javascript/NativeString.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

import java.text.Collator;
import java.text.Normalizer;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import org.mozilla.javascript.ScriptRuntime.StringIdOrIndex;

/**
Expand Down Expand Up @@ -782,7 +784,9 @@ else if (Normalizer.Form.NFC.name().equals(formStr))
}
case Id_isWellFormed:
{
String str = ScriptRuntime.toString(requireObjectCoercible(cx, thisObj, f));
CharSequence str =
ScriptRuntime.toCharSequence(
requireObjectCoercible(cx, thisObj, f));
int len = str.length();
boolean foundLeadingSurrogate = false;
for (int i = 0; i < len; i++) {
Expand All @@ -805,29 +809,51 @@ else if (Normalizer.Form.NFC.name().equals(formStr))
}
case Id_toWellFormed:
{
String str = ScriptRuntime.toString(requireObjectCoercible(cx, thisObj, f));
StringBuilder sb = new StringBuilder();
CharSequence str =
ScriptRuntime.toCharSequence(
requireObjectCoercible(cx, thisObj, f));
// true represents a surrogate pair
// false represents a singular surrogate
// normal characters aren't present
Map<Integer, Boolean> surrogates = new HashMap<>();

int len = str.length();
char prev = 0;
int firstSurrogateIndex = -1;
for (int i = 0; i < len; i++) {
char c = str.charAt(i);

if (NativeJSON.isLeadingSurrogate(c)
&& i < len - 1
&& NativeJSON.isTrailingSurrogate(str.charAt(i + 1))) {
// do nothing as the next case will add both surrogates
} else if (NativeJSON.isTrailingSurrogate(c)
&& NativeJSON.isLeadingSurrogate(prev)) {
sb.append(prev).append(c);
if (NativeJSON.isLeadingSurrogate(prev)
&& NativeJSON.isTrailingSurrogate(c)) {
surrogates.put(Integer.valueOf(i - 1), Boolean.TRUE);
surrogates.put(Integer.valueOf(i), Boolean.TRUE);
} else if (NativeJSON.isLeadingSurrogate(c)
|| NativeJSON.isTrailingSurrogate(c)) {
sb.append('\uFFFD');
} else {
sb.append(c);
surrogates.put(Integer.valueOf(i), Boolean.FALSE);
if (firstSurrogateIndex == -1) {
firstSurrogateIndex = i;
}
}

prev = c;
}

if (surrogates.isEmpty()) {
return str.toString();
}

StringBuilder sb =
new StringBuilder(str.subSequence(0, firstSurrogateIndex));
for (int i = firstSurrogateIndex; i < len; i++) {
char c = str.charAt(i);
Boolean pairOrNormal = surrogates.get(Integer.valueOf(i));
if (pairOrNormal == null || pairOrNormal) {
sb.append(c);
} else {
sb.append('\uFFFD');
}
}

return sb.toString();
}

Expand Down

0 comments on commit cc4e73c

Please sign in to comment.