Skip to content

Commit

Permalink
ignore 128..255 when converting to UTF8 (fixes issue with pretokenise…
Browse files Browse the repository at this point in the history
…d code)
  • Loading branch information
gfwilliams committed Jul 1, 2024
1 parent df18f04 commit 94475cc
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions core/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -849,13 +849,14 @@ while (d!==undefined) {console.log(btoa(d));d=f.read(${CHUNKSIZE});}
return js;
}

/* Convert a normal JS string (one char per character) to a string of UTF8 bytes */
/* Convert a normal JS string (one char per character) to a string of UTF8 bytes (passes anything 0..255 straight through) */
function asUTF8Bytes(str) {
var result = "";
var bytes = String.fromCharCode;
for (var i=0; i < str.length; i++) {
var charcode = str.charCodeAt(i);
if (charcode < 0x80) result += bytes(charcode);
// checking below 128 would ensure better compatibility with UTF8 (but breaks pretokenised code)
if (charcode < 256) result += bytes(charcode);
else if (charcode < 0x800) {
result += bytes(0xc0 | (charcode >> 6),
0x80 | (charcode & 0x3f));
Expand Down

0 comments on commit 94475cc

Please sign in to comment.