diff --git a/TeXmacs/langs/encoding/herktounicode.scm b/TeXmacs/langs/encoding/herktounicode.scm new file mode 100644 index 0000000000..fa20a27338 --- /dev/null +++ b/TeXmacs/langs/encoding/herktounicode.scm @@ -0,0 +1,266 @@ +;; Two-way conversions between Cork and Unicode + +;; (C) 2003 Felix Breuer, David Allouche +;; 2024 Darcy Shen +;; +;; This software falls under the GNU general public license version 3 or later. +;; It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE +;; in the root directory or . + + +("#00" "#60") +("#01" "#B4") +("#02" "#02C6") ; modifier letter circumflex accent +("#03" "#02DC") ; small tilde +("#04" "#A8") +("#05" "#02DD") +("#06" "#02DA") +("#07" "#02C7") +("#08" "#02D8") +("#09" "#AF") +("#0A" "#02D9") +("#0B" "#B8") +("#0C" "#02DB") +("#0D" "#201A") +("#0E" "#2039") +("#0F" "#203A") +("#10" "#201C") +("#11" "#201D") +("#12" "#201E") +("#13" "#AB") +("#14" "#BB") +("#15" "#2013") +("#16" "#2014") +("#17" "#200B") +("#18" "#2080") +("#19" "#0131") +("#1A" "#0237") +("#1B" "#FB00") +("#1C" "#FB01") +("#1D" "#FB02") +("#1E" "#FB03") +("#1F" "#FB04") +("#20" "#20") +("#21" "#21") +("#22" "#22") +("#23" "#23") +("#24" "#24") +("#25" "#25") ; percent sign +("#26" "#26") +("#27" "#27") +("#28" "#28") +("#29" "#29") +("#2A" "#2A") +("#2B" "#2B") +("#2C" "#2C") +("#2D" "#2D") +("#2E" "#2E") +("#2F" "#2F") +("#30" "#30") +("#31" "#31") +("#32" "#32") +("#33" "#33") +("#34" "#34") +("#35" "#35") +("#36" "#36") +("#37" "#37") +("#38" "#38") +("#39" "#39") +("#3A" "#3A") +("#3B" "#3B") +("#3C" "#3C") ; less than +("#3D" "#3D") +("#3E" "#3E") ; greater than +("#3F" "#3F") +("#40" "#40") +("#41" "#41") +("#42" "#42") +("#43" "#43") +("#44" "#44") +("#45" "#45") +("#46" "#46") +("#47" "#47") +("#48" "#48") +("#49" "#49") +("#4A" "#4A") +("#4B" "#4B") +("#4C" "#4C") +("#4D" "#4D") +("#4E" "#4E") +("#4F" "#4F") +("#50" "#50") +("#51" "#51") +("#52" "#52") +("#53" "#53") +("#54" "#54") +("#55" "#55") +("#56" "#56") +("#57" "#57") +("#58" "#58") +("#59" "#59") +("#5A" "#5A") +("#5B" "#5B") +("#5C" "#5C") +("#5D" "#5D") +("#5E" "#5E") +("#5F" "#5F") +("#60" "#2018") ; typographic backquote +("#61" "#61") +("#62" "#62") +("#63" "#63") +("#64" "#64") +("#65" "#65") +("#66" "#66") +("#67" "#67") +("#68" "#68") +("#69" "#69") +("#6A" "#6A") +("#6B" "#6B") +("#6C" "#6C") +("#6D" "#6D") +("#6E" "#6E") +("#6F" "#6F") +("#70" "#70") +("#71" "#71") +("#72" "#72") +("#73" "#73") +("#74" "#74") +("#75" "#75") +("#76" "#76") +("#77" "#77") +("#78" "#78") +("#79" "#79") +("#7A" "#7A") +("#7B" "#7B") +("#7C" "#7C") +("#7D" "#7D") +("#7E" "#7E") +("#7F" "#00AD") +("#80" "#0102") +("#81" "#0104") +("#82" "#0106") +("#83" "#010C") +("#84" "#010E") +("#85" "#011A") +("#86" "#0118") +("#87" "#011E") +("#88" "#0139") +("#89" "#013D") +("#8A" "#0141") +("#8B" "#0143") +("#8C" "#0147") +("#8D" "#014A") +("#8E" "#0150") +("#8F" "#0154") +("#90" "#0158") +("#91" "#015A") +("#92" "#0160") +("#93" "#015E") +("#94" "#0164") +("#95" "#0162") +("#96" "#0170") +("#97" "#016E") +("#98" "#0178") +("#99" "#0179") +("#9A" "#017D") +("#9B" "#017B") +("#9C" "#0132") +("#9D" "#0130") +("#9E" "#0111") +("#9F" "#A7") +("#A0" "#0103") +("#A1" "#0105") +("#A2" "#0107") +("#A3" "#010D") +("#A4" "#010F") +("#A5" "#011B") +("#A6" "#0119") +("#A7" "#011F") +("#A8" "#013A") +("#A9" "#013E") +("#AA" "#0142") +("#AB" "#0144") +("#AC" "#0148") +("#AD" "#014B") +("#AE" "#0151") +("#AF" "#0155") +("#B0" "#0159") +("#B1" "#015B") +("#B2" "#0161") +("#B3" "#015F") +("#B4" "#0165") +("#B5" "#0163") +("#B6" "#0171") +("#B7" "#016F") +("#B8" "#FF") +("#B9" "#017A") +("#BA" "#017E") +("#BB" "#017C") +("#BC" "#0133") +("#BD" "#A1") +("#BE" "#BF") +("#BF" "#A3") +("#C0" "#C0") +("#C1" "#C1") +("#C2" "#C2") +("#C3" "#C3") +("#C4" "#C4") +("#C5" "#C5") +("#C6" "#C6") +("#C7" "#C7") +("#C8" "#C8") +("#C9" "#C9") +("#CA" "#CA") +("#CB" "#CB") +("#CC" "#CC") +("#CD" "#CD") +("#CE" "#CE") +("#CF" "#CF") +("#D0" "#D0") +("#D1" "#D1") +("#D2" "#D2") +("#D3" "#D3") +("#D4" "#D4") +("#D5" "#D5") +("#D6" "#D6") +("#D7" "#0152") +("#D8" "#D8") +("#D9" "#D9") +("#DA" "#DA") +("#DB" "#DB") +("#DC" "#DC") +("#DD" "#DD") +("#DE" "#DE") +("#DF" "#1E9E") +("#E0" "#E0") +("#E1" "#E1") +("#E2" "#E2") +("#E3" "#E3") +("#E4" "#E4") +("#E5" "#E5") +("#E6" "#E6") +("#E7" "#E7") +("#E8" "#E8") +("#E9" "#E9") +("#EA" "#EA") +("#EB" "#EB") +("#EC" "#EC") +("#ED" "#ED") +("#EE" "#EE") +("#EF" "#EF") +("#F0" "#F0") +("#F1" "#F1") +("#F2" "#F2") +("#F3" "#F3") +("#F4" "#F4") +("#F5" "#F5") +("#F6" "#F6") +("#F7" "#0153") +("#F8" "#F8") +("#F9" "#F9") +("#FA" "#FA") +("#FB" "#FB") +("#FC" "#FC") +("#FD" "#FD") +("#FE" "#FE") +("#FF" "#DF") diff --git a/TeXmacs/tests/66_13.scm b/TeXmacs/tests/66_13.scm new file mode 100644 index 0000000000..31ff9f0e3a --- /dev/null +++ b/TeXmacs/tests/66_13.scm @@ -0,0 +1,391 @@ +(import (liii check)) +(import (liii base)) + +(check-set-mode! 'report-failed) + +(define (test-herk-0x) + (check (herk->utf8 (string #\x00)) => "`") ; U+0060 + (check (herk->utf8 (string #\x01)) => "´") ; U+00B4 + (check (herk->utf8 (string #\x02)) => "ˆ") ; U+02C6 + (check (herk->utf8 (string #\x03)) => "˜") ; U+02DC + (check (herk->utf8 (string #\x04)) => "¨") ; U+00A8 + (check (herk->utf8 (string #\x05)) => "˝") ; U+02DD + (check (herk->utf8 (string #\x06)) => "˚") ; U+02DA + (check (herk->utf8 (string #\x07)) => "ˇ") ; U+02C7 + (check (herk->utf8 (string #\x08)) => "˘") ; U+02D8 + (check (herk->utf8 (string #\x09)) => "¯") ; U+00AF + (check (herk->utf8 (string #\x0A)) => "˙") ; U+02D9 + (check (herk->utf8 (string #\x0B)) => "¸") ; U+00B8 + (check (herk->utf8 (string #\x0C)) => "˛") ; U+02DB + (check (herk->utf8 (string #\x0D)) => "‚") ; U+201A + (check (herk->utf8 (string #\x0E)) => "‹") ; U+2039 + (check (herk->utf8 (string #\x0F)) => "›") ; U+203A + + (check (herk->utf8 "<#0>") => (string #\x00)) + (check (herk->utf8 "<#F>") => (string #\x0F)) + (check (utf8->herk (string #\x00)) => "<#0>") + (check (utf8->herk (string #\x01)) => "<#1>") + (check (utf8->herk (string #\x02)) => "<#2>") + (check (utf8->herk (string #\x03)) => "<#3>") + (check (utf8->herk (string #\x04)) => "<#4>") + (check (utf8->herk (string #\x05)) => "<#5>") + (check (utf8->herk (string #\x06)) => "<#6>") + (check (utf8->herk (string #\x07)) => "<#7>") + (check (utf8->herk (string #\x08)) => "<#8>") + (check (utf8->herk (string #\x09)) => "<#9>") + (check (utf8->herk (string #\x0A)) => "<#A>") + (check (utf8->herk (string #\x0B)) => "<#B>") + (check (utf8->herk (string #\x0C)) => "<#C>") + (check (utf8->herk (string #\x0D)) => "<#D>") + (check (utf8->herk (string #\x0E)) => "<#E>") + (check (utf8->herk (string #\x0F)) => "<#F>") +) + +(define (test-herk-1x) + (check (herk->utf8 (string #\x10)) => "“") ; U+201C + (check (herk->utf8 (string #\x11)) => "”") ; U+201D + (check (herk->utf8 (string #\x12)) => "„") ; U+201E + (check (herk->utf8 (string #\x13)) => "«") ; U+00AB + (check (herk->utf8 (string #\x14)) => "»") ; U+00BB + (check (herk->utf8 (string #\x15)) => "–") ; U+2013 + (check (herk->utf8 (string #\x16)) => "—") ; U+2014 + (check (herk->utf8 (string #\x17)) => (utf8->string #u8(#xE2 #x80 #x8B))) ; U+200B + (check (herk->utf8 (string #\x18)) => "₀") ; U+2080 + (check (herk->utf8 (string #\x19)) => "ı") ; U+0131 + (check (herk->utf8 (string #\x1A)) => "ȷ") ; U+0237 + (check (herk->utf8 (string #\x1B)) => "ff") ; U+FB00 + (check (herk->utf8 (string #\x1C)) => "fi") ; U+FB01 + (check (herk->utf8 (string #\x1D)) => "fl") ; U+FB02 + (check (herk->utf8 (string #\x1E)) => "ffi") ; U+FB03 + (check (herk->utf8 (string #\x1F)) => "ffl") ; U+FB04 + + (check (herk->utf8 "<#10>") => (string #\x10)) + (check (herk->utf8 "<#1F>") => (string #\x1F)) + (check (utf8->herk (string #\x10)) => "<#10>") + (check (utf8->herk (string #\x11)) => "<#11>") + (check (utf8->herk (string #\x12)) => "<#12>") + (check (utf8->herk (string #\x13)) => "<#13>") + (check (utf8->herk (string #\x14)) => "<#14>") + (check (utf8->herk (string #\x15)) => "<#15>") + (check (utf8->herk (string #\x16)) => "<#16>") + (check (utf8->herk (string #\x17)) => "<#17>") + (check (utf8->herk (string #\x18)) => "<#18>") + (check (utf8->herk (string #\x19)) => "<#19>") + (check (utf8->herk (string #\x1A)) => "<#1A>") + (check (utf8->herk (string #\x1B)) => "<#1B>") + (check (utf8->herk (string #\x1C)) => "<#1C>") + (check (utf8->herk (string #\x1D)) => "<#1D>") + (check (utf8->herk (string #\x1E)) => "<#1E>") + (check (utf8->herk (string #\x1F)) => "<#1F>") +) + +(define (test-herk-2x) + (check (herk->utf8 (string #\x20)) => " ") + (check (herk->utf8 (string #\x21)) => "!") + (check (herk->utf8 (string #\x22)) => "\"") + (check (herk->utf8 (string #\x23)) => "#") + (check (herk->utf8 (string #\x24)) => "$") + (check (herk->utf8 (string #\x25)) => "%") ; percent sign + (check (herk->utf8 (string #\x26)) => "&") + (check (herk->utf8 (string #\x27)) => "'") + (check (herk->utf8 (string #\x28)) => "(") + (check (herk->utf8 (string #\x29)) => ")") + (check (herk->utf8 (string #\x2A)) => "*") + (check (herk->utf8 (string #\x2B)) => "+") + (check (herk->utf8 (string #\x2C)) => ",") + (check (herk->utf8 (string #\x2D)) => "-") + (check (herk->utf8 (string #\x2E)) => ".") + (check (herk->utf8 (string #\x2F)) => "/") +) + +(define (test-herk-3x) + (check (herk->utf8 (string #\x30)) => "0") + (check (herk->utf8 (string #\x31)) => "1") + (check (herk->utf8 (string #\x32)) => "2") + (check (herk->utf8 (string #\x33)) => "3") + (check (herk->utf8 (string #\x34)) => "4") + (check (herk->utf8 (string #\x35)) => "5") + (check (herk->utf8 (string #\x36)) => "6") + (check (herk->utf8 (string #\x37)) => "7") + (check (herk->utf8 (string #\x38)) => "8") + (check (herk->utf8 (string #\x39)) => "9") + (check (herk->utf8 (string #\x3A)) => ":") + (check (herk->utf8 (string #\x3B)) => ";") + (check (herk->utf8 (string #\x3C)) => "<") + (check (herk->utf8 (string #\x3D)) => "=") + (check (herk->utf8 (string #\x3E)) => ">") + (check (herk->utf8 (string #\x3F)) => "?") +) + +(define (test-herk-4x) + (check (herk->utf8 (string #\x40)) => "@") + (check (herk->utf8 (string #\x41)) => "A") + (check (herk->utf8 (string #\x42)) => "B") + (check (herk->utf8 (string #\x43)) => "C") + (check (herk->utf8 (string #\x44)) => "D") + (check (herk->utf8 (string #\x45)) => "E") + (check (herk->utf8 (string #\x46)) => "F") + (check (herk->utf8 (string #\x47)) => "G") + (check (herk->utf8 (string #\x48)) => "H") + (check (herk->utf8 (string #\x49)) => "I") + (check (herk->utf8 (string #\x4A)) => "J") + (check (herk->utf8 (string #\x4B)) => "K") + (check (herk->utf8 (string #\x4C)) => "L") + (check (herk->utf8 (string #\x4D)) => "M") + (check (herk->utf8 (string #\x4E)) => "N") + (check (herk->utf8 (string #\x4F)) => "O") +) + +(define (test-herk-5x) + (check (herk->utf8 (string #\x50)) => "P") + (check (herk->utf8 (string #\x51)) => "Q") + (check (herk->utf8 (string #\x52)) => "R") + (check (herk->utf8 (string #\x53)) => "S") + (check (herk->utf8 (string #\x54)) => "T") + (check (herk->utf8 (string #\x55)) => "U") + (check (herk->utf8 (string #\x56)) => "V") + (check (herk->utf8 (string #\x57)) => "W") + (check (herk->utf8 (string #\x58)) => "X") + (check (herk->utf8 (string #\x59)) => "Y") + (check (herk->utf8 (string #\x5A)) => "Z") + (check (herk->utf8 (string #\x5B)) => "[") + (check (herk->utf8 (string #\x5C)) => "\\") + (check (herk->utf8 (string #\x5D)) => "]") + (check (herk->utf8 (string #\x5E)) => "^") + (check (herk->utf8 (string #\x5F)) => "_") +) + +(define (test-herk-6x) + (check (herk->utf8 (string #\x60)) => "‘") + (check (herk->utf8 (string #\x61)) => "a") + (check (herk->utf8 (string #\x62)) => "b") + (check (herk->utf8 (string #\x63)) => "c") + (check (herk->utf8 (string #\x64)) => "d") + (check (herk->utf8 (string #\x65)) => "e") + (check (herk->utf8 (string #\x66)) => "f") + (check (herk->utf8 (string #\x67)) => "g") + (check (herk->utf8 (string #\x68)) => "h") + (check (herk->utf8 (string #\x69)) => "i") + (check (herk->utf8 (string #\x6A)) => "j") + (check (herk->utf8 (string #\x6B)) => "k") + (check (herk->utf8 (string #\x6C)) => "l") + (check (herk->utf8 (string #\x6D)) => "m") + (check (herk->utf8 (string #\x6E)) => "n") + (check (herk->utf8 (string #\x6F)) => "o") +) + +(define (test-herk-7x) + (check (herk->utf8 (string #\x70)) => "p") + (check (herk->utf8 (string #\x71)) => "q") + (check (herk->utf8 (string #\x72)) => "r") + (check (herk->utf8 (string #\x73)) => "s") + (check (herk->utf8 (string #\x74)) => "t") + (check (herk->utf8 (string #\x75)) => "u") + (check (herk->utf8 (string #\x76)) => "v") + (check (herk->utf8 (string #\x77)) => "w") + (check (herk->utf8 (string #\x78)) => "x") + (check (herk->utf8 (string #\x79)) => "y") + (check (herk->utf8 (string #\x7A)) => "z") + (check (herk->utf8 (string #\x7B)) => "{") + (check (herk->utf8 (string #\x7C)) => "|") + (check (herk->utf8 (string #\x7D)) => "}") + (check (herk->utf8 (string #\x7E)) => "~") + (check (herk->utf8 (string #\x7F)) => (utf8->string #u8(#xC2 #xAD))) ; U+00AD +) + +(define (test-herk-8x) + (check (herk->utf8 (string #\x80)) => "Ă") ; U+0102 + (check (herk->utf8 (string #\x81)) => "Ą") + (check (herk->utf8 (string #\x82)) => "Ć") + (check (herk->utf8 (string #\x83)) => "Č") + (check (herk->utf8 (string #\x84)) => "Ď") + (check (herk->utf8 (string #\x85)) => "Ě") + (check (herk->utf8 (string #\x86)) => "Ę") + (check (herk->utf8 (string #\x87)) => "Ğ") + (check (herk->utf8 (string #\x88)) => "Ĺ") + (check (herk->utf8 (string #\x89)) => "Ľ") + (check (herk->utf8 (string #\x8A)) => "Ł") + (check (herk->utf8 (string #\x8B)) => "Ń") + (check (herk->utf8 (string #\x8C)) => "Ň") + (check (herk->utf8 (string #\x8D)) => "Ŋ") + (check (herk->utf8 (string #\x8E)) => "Ő") + (check (herk->utf8 (string #\x8F)) => "Ŕ") +) + +(define (test-herk-9x) + (check (herk->utf8 (string #\x90)) => "Ř") ; U+0158 + (check (herk->utf8 (string #\x91)) => "Ś") ; U+015A + (check (herk->utf8 (string #\x92)) => "Š") ; U+0162 + (check (herk->utf8 (string #\x93)) => "Ş") ; U+015E + (check (herk->utf8 (string #\x94)) => "Ť") ; U+0164 + (check (herk->utf8 (string #\x95)) => "Ţ") ; U+0166 + (check (herk->utf8 (string #\x96)) => "Ű") ; U+0170 + (check (herk->utf8 (string #\x97)) => "Ů") ; U+016E + (check (herk->utf8 (string #\x98)) => "Ÿ") ; U+0178 + (check (herk->utf8 (string #\x99)) => "Ź") ; U+0178 with diaeresis + (check (herk->utf8 (string #\x9A)) => "Ž") ; U+017D + (check (herk->utf8 (string #\x9B)) => "Ż") ; U+017B + (check (herk->utf8 (string #\x9C)) => "IJ") ; U+0132 + (check (herk->utf8 (string #\x9D)) => "İ") ; U+0130 + (check (herk->utf8 (string #\x9E)) => "đ") ; U+0111 + (check (herk->utf8 (string #\x9F)) => "§") ; U+00A7 +) + +(define (test-herk-Ax) + (check (herk->utf8 (string #\xA0)) => "ă") + (check (herk->utf8 (string #\xA1)) => "ą") + (check (herk->utf8 (string #\xA2)) => "ć") + (check (herk->utf8 (string #\xA3)) => "č") + (check (herk->utf8 (string #\xA4)) => "ď") + (check (herk->utf8 (string #\xA5)) => "ě") + (check (herk->utf8 (string #\xA6)) => "ę") + (check (herk->utf8 (string #\xA7)) => "ğ") + (check (herk->utf8 (string #\xA8)) => "ĺ") + (check (herk->utf8 (string #\xA9)) => "ľ") + (check (herk->utf8 (string #\xAA)) => "ł") + (check (herk->utf8 (string #\xAB)) => "ń") + (check (herk->utf8 (string #\xAC)) => "ň") + (check (herk->utf8 (string #\xAD)) => "ŋ") + (check (herk->utf8 (string #\xAE)) => "ő") + (check (herk->utf8 (string #\xAF)) => "ŕ") + + (check (utf8->herk (utf8->string #u8(#xC2 #xA0))) => "<#A0>") ; U+00A0 + (check (herk->utf8 "<#A0>") => (utf8->string #u8(#xC2 #xA0))) + + (check (utf8->herk (utf8->string #u8(#xC2 #xA1))) => (string #\xBD)) ; U+00A1 +) + +(define (test-herk-Bx) + (check (herk->utf8 (string #\xB0)) => "ř") + (check (herk->utf8 (string #\xB1)) => "ś") + (check (herk->utf8 (string #\xB2)) => "š") + (check (herk->utf8 (string #\xB3)) => "ş") + (check (herk->utf8 (string #\xB4)) => "ť") + (check (herk->utf8 (string #\xB5)) => "ţ") + (check (herk->utf8 (string #\xB6)) => "ű") + (check (herk->utf8 (string #\xB7)) => "ů") + (check (herk->utf8 (string #\xB8)) => "ÿ") + (check (herk->utf8 (string #\xB9)) => "ź") + (check (herk->utf8 (string #\xBA)) => "ž") + (check (herk->utf8 (string #\xBB)) => "ż") + (check (herk->utf8 (string #\xBC)) => "ij") + (check (herk->utf8 (string #\xBD)) => "¡") + (check (herk->utf8 (string #\xBE)) => "¿") + (check (herk->utf8 (string #\xBF)) => "£") +) + +(define (test-herk-Cx) + (check (herk->utf8 (string #\xC0)) => "À") + (check (herk->utf8 (string #\xC1)) => "Á") + (check (herk->utf8 (string #\xC2)) => "Â") + (check (herk->utf8 (string #\xC3)) => "Ã") + (check (herk->utf8 (string #\xC4)) => "Ä") + (check (herk->utf8 (string #\xC5)) => "Å") + (check (herk->utf8 (string #\xC6)) => "Æ") + (check (herk->utf8 (string #\xC7)) => "Ç") + (check (herk->utf8 (string #\xC8)) => "È") + (check (herk->utf8 (string #\xC9)) => "É") + (check (herk->utf8 (string #\xCA)) => "Ê") + (check (herk->utf8 (string #\xCB)) => "Ë") + (check (herk->utf8 (string #\xCC)) => "Ì") + (check (herk->utf8 (string #\xCD)) => "Í") + (check (herk->utf8 (string #\xCE)) => "Î") + (check (herk->utf8 (string #\xCF)) => "Ï") +) + +(define (test-herk-Dx) + (check (herk->utf8 (string #\xD0)) => "Ð") + (check (herk->utf8 (string #\xD1)) => "Ñ") + (check (herk->utf8 (string #\xD2)) => "Ò") + (check (herk->utf8 (string #\xD3)) => "Ó") + (check (herk->utf8 (string #\xD4)) => "Ô") + (check (herk->utf8 (string #\xD5)) => "Õ") + (check (herk->utf8 (string #\xD6)) => "Ö") + (check (herk->utf8 (string #\xD7)) => "Œ") + (check (herk->utf8 (string #\xD8)) => "Ø") + (check (herk->utf8 (string #\xD9)) => "Ù") + (check (herk->utf8 (string #\xDA)) => "Ú") + (check (herk->utf8 (string #\xDB)) => "Û") + (check (herk->utf8 (string #\xDC)) => "Ü") + (check (herk->utf8 (string #\xDD)) => "Ý") + (check (herk->utf8 (string #\xDE)) => "Þ") + (check (herk->utf8 (string #\xDF)) => "ẞ") +) + +(define (test-herk-Ex) + (check (herk->utf8 (string #\xE0)) => "à") + (check (herk->utf8 (string #\xE1)) => "á") + (check (herk->utf8 (string #\xE2)) => "â") + (check (herk->utf8 (string #\xE3)) => "ã") + (check (herk->utf8 (string #\xE4)) => "ä") + (check (herk->utf8 (string #\xE5)) => "å") + (check (herk->utf8 (string #\xE6)) => "æ") + (check (herk->utf8 (string #\xE7)) => "ç") + (check (herk->utf8 (string #\xE8)) => "è") + (check (herk->utf8 (string #\xE9)) => "é") + (check (herk->utf8 (string #\xEA)) => "ê") + (check (herk->utf8 (string #\xEB)) => "ë") + (check (herk->utf8 (string #\xEC)) => "ì") + (check (herk->utf8 (string #\xED)) => "í") + (check (herk->utf8 (string #\xEE)) => "î") + (check (herk->utf8 (string #\xEF)) => "ï") +) + +(define (test-herk-Fx) + (check (herk->utf8 (string #\xF0)) => "ð") + (check (herk->utf8 (string #\xF1)) => "ñ") + (check (herk->utf8 (string #\xF2)) => "ò") + (check (herk->utf8 (string #\xF3)) => "ó") + (check (herk->utf8 (string #\xF4)) => "ô") + (check (herk->utf8 (string #\xF5)) => "õ") + (check (herk->utf8 (string #\xF6)) => "ö") + (check (herk->utf8 (string #\xF7)) => "œ") + (check (herk->utf8 (string #\xF8)) => "ø") + (check (herk->utf8 (string #\xF9)) => "ù") + (check (herk->utf8 (string #\xFA)) => "ú") + (check (herk->utf8 (string #\xFB)) => "û") + (check (herk->utf8 (string #\xFC)) => "ü") + (check (herk->utf8 (string #\xFD)) => "ý") + (check (herk->utf8 (string #\xFE)) => "þ") + (check (herk->utf8 (string #\xFF)) => "ß") + + (check (utf8->herk (string #\xFF)) => "<#FF>") +) + +(define (test-herk-others) + (check (utf8->herk "") => "") + (check (herk->utf8 "") => "") + (check (utf8->herk "") => "") + (check (herk->utf8 "") => "") + + (check (utf8->herk "<#00FF>") => "<#00FF>") + (check (utf8->herk "<#0FF>") => "<#0FF>") + (check (utf8->herk "<#FF>") => "<#FF>") + (check (herk->utf8 "<#FF>") => "ÿ") + (check (herk->utf8 "<#0FF>") => "ÿ") + (check (herk->utf8 "<#00FF>") => "ÿ") + + (check (herk->utf8 "<#4E2D>") => "中")) + +(define (test-herk) + (test-herk-0x) + (test-herk-1x) + (test-herk-2x) + (test-herk-3x) + (test-herk-4x) + (test-herk-5x) + (test-herk-6x) + (test-herk-7x) + (test-herk-8x) + (test-herk-9x) + (test-herk-Ax) + (test-herk-Bx) + (test-herk-Cx) + (test-herk-Dx) + (test-herk-Ex) + (test-herk-Fx) + (test-herk-others)) + +(tm-define (test_66_13) + (test-herk) + (check-report)) diff --git a/TeXmacs/tests/tmu/unicode_256.en.tmu b/TeXmacs/tests/tmu/unicode_256.en.tmu new file mode 100644 index 0000000000..3bc1e7cf37 --- /dev/null +++ b/TeXmacs/tests/tmu/unicode_256.en.tmu @@ -0,0 +1,594 @@ +> + + + +<\body> + <\wide-tabular> + |||| + \; + |<\cell> + 0 + |<\cell> + 1 + |<\cell> + 2 + |<\cell> + 3 + |<\cell> + 4 + |<\cell> + 5 + |<\cell> + 6 + |<\cell> + 7 + |<\cell> + 8 + |<\cell> + 9 + |<\cell> + A + |<\cell> + B + |<\cell> + C + |<\cell> + D + |<\cell> + E + |<\cell> + F + >| + 000 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 001 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + ​ + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 002 + |<\cell> + \ + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 003 + |<\cell> + 0 + |<\cell> + 1 + |<\cell> + 2 + |<\cell> + 3 + |<\cell> + 4 + |<\cell> + 5 + |<\cell> + 6 + |<\cell> + 7 + |<\cell> + 8 + |<\cell> + 9 + |<\cell> + : + |<\cell> + ; + |<\cell> + \ + |<\cell> + = + |<\cell> + \ + |<\cell> + ? + >| + 004 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 005 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 006 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 007 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + ­ + >| + 008 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 009 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 00A + |<\cell> +   + |<\cell> + ¡ + |<\cell> + ¢ + |<\cell> + £ + |<\cell> + ¤ + |<\cell> + ¥ + |<\cell> + ¦ + |<\cell> + § + |<\cell> + ¨ + |<\cell> + © + |<\cell> + ª + |<\cell> + « + |<\cell> + ¬ + |<\cell> + ­ + |<\cell> + ® + |<\cell> + \; + >| + 00B + |<\cell> + ° + |<\cell> + ± + |<\cell> + ² + |<\cell> + ³ + |<\cell> + ´ + |<\cell> + µ + |<\cell> + ¶ + |<\cell> + · + |<\cell> + ¸ + |<\cell> + ¹ + |<\cell> + º + |<\cell> + » + |<\cell> + ¼ + |<\cell> + ½ + |<\cell> + ¾ + |<\cell> + ¿ + >| + 00C + |<\cell> + À + |<\cell> + Á + |<\cell> +  + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 00D + |<\cell> + Ð + |<\cell> + Ñ + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + Œ + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 00E + |<\cell> + à + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 00F + |<\cell> + ð + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + ÷ + |<\cell> + ø + |<\cell> + ù + |<\cell> + ú + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + ÿ + >>> + + + +<\initial> + <\collection> + + + + diff --git a/TeXmacs/tests/tmu/unicode_256.zh.tmu b/TeXmacs/tests/tmu/unicode_256.zh.tmu new file mode 100644 index 0000000000..1b76d94bba --- /dev/null +++ b/TeXmacs/tests/tmu/unicode_256.zh.tmu @@ -0,0 +1,594 @@ +> + +> + +<\body> + <\wide-tabular> + ||||||| + \; + |<\cell> + 0 + |<\cell> + 1 + |<\cell> + 2 + |<\cell> + 3 + |<\cell> + 4 + |<\cell> + 5 + |<\cell> + 6 + |<\cell> + 7 + |<\cell> + 8 + |<\cell> + 9 + |<\cell> + A + |<\cell> + B + |<\cell> + C + |<\cell> + D + |<\cell> + E + |<\cell> + F + >| + 000 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 001 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + ​ + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 002 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 003 + |<\cell> + 0 + |<\cell> + 1 + |<\cell> + 2 + |<\cell> + 3 + |<\cell> + 4 + |<\cell> + 5 + |<\cell> + 6 + |<\cell> + 7 + |<\cell> + 8 + |<\cell> + 9 + |<\cell> + : + |<\cell> + ; + |<\cell> + \ + |<\cell> + = + |<\cell> + \ + |<\cell> + ? + >| + 004 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 005 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 006 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 007 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + ­ + >| + 008 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 009 + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 00A + |<\cell> +   + |<\cell> + ¡ + |<\cell> + ¢ + |<\cell> + £ + |<\cell> + ¤ + |<\cell> + ¥ + |<\cell> + ¦ + |<\cell> + § + |<\cell> + ¨ + |<\cell> + © + |<\cell> + ª + |<\cell> + « + |<\cell> + ¬ + |<\cell> + ­ + |<\cell> + ® + |<\cell> + \; + >| + 00B + |<\cell> + ° + |<\cell> + ± + |<\cell> + ² + |<\cell> + ³ + |<\cell> + ´ + |<\cell> + µ + |<\cell> + ¶ + |<\cell> + · + |<\cell> + ¸ + |<\cell> + ¹ + |<\cell> + º + |<\cell> + » + |<\cell> + ¼ + |<\cell> + ½ + |<\cell> + ¾ + |<\cell> + ¿ + >| + 00C + |<\cell> + À + |<\cell> + Á + |<\cell> +  + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 00D + |<\cell> + Ð + |<\cell> + Ñ + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + Œ + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 00E + |<\cell> + à + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + >| + 00F + |<\cell> + ð + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + ÷ + |<\cell> + ø + |<\cell> + ù + |<\cell> + ú + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + \; + |<\cell> + ÿ + >>> + + + +<\initial> + <\collection> + + + + diff --git a/src/Data/Convert/Mogan/to_tmu.cpp b/src/Data/Convert/Mogan/to_tmu.cpp index 721c379e9d..80aad4bff7 100644 --- a/src/Data/Convert/Mogan/to_tmu.cpp +++ b/src/Data/Convert/Mogan/to_tmu.cpp @@ -20,7 +20,7 @@ using namespace moebius; using lolly::data::binary_to_hexadecimal; using moebius::drd::std_contains; -const string TMU_VERSION= "1.0.4"; +const string TMU_VERSION= "1.0.5"; /****************************************************************************** * Conversion of TeXmacs trees to the present TeXmacs string format diff --git a/src/Data/String/converter.cpp b/src/Data/String/converter.cpp index 80a40a8b1d..39fecde06d 100644 --- a/src/Data/String/converter.cpp +++ b/src/Data/String/converter.cpp @@ -139,12 +139,12 @@ converter_rep::load () { if (from == "Hex-Cork" && to == "UTF-8") { hashtree dic; - hashtree_from_dictionary (dic, "corktounicode", BIT2BIT, UTF8, false); + hashtree_from_dictionary (dic, "herktounicode", BIT2BIT, UTF8, false); ht= dic; } else if (from == "UTF-8" && to == "Hex-Cork") { hashtree dic; - hashtree_from_dictionary (dic, "corktounicode", UTF8, BIT2BIT, true); + hashtree_from_dictionary (dic, "herktounicode", UTF8, BIT2BIT, true); ht= dic; } @@ -367,11 +367,11 @@ utf8_to_herk (string input) { int start, i, n= N (input); string output; for (i= 0; i < n;) { - start = i; - unsigned int code= decode_from_utf8 (input, i); - string s = input (start, i); - string r = apply (conv, s); - if (r == s && code >= 256) r= "<#" * to_Hex (code) * ">"; + start = i; + uint32_t code= decode_from_utf8 (input, i); + string s = input (start, i); + string r = apply (conv, s); + if (r == s && (code < 32 || code >= 128)) r= "<#" * to_Hex (code) * ">"; output << r; } return output;