-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhebrew.ts
149 lines (134 loc) · 3.95 KB
/
hebrew.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
const LETTER_NUMERIC_VALUES: Record<string, number> = {
א: 1,
ב: 2,
ג: 3,
ד: 4,
ה: 5,
ו: 6,
ז: 7,
ח: 8,
ט: 9,
י: 10,
כ: 20,
ל: 30,
מ: 40,
נ: 50,
ס: 60,
ע: 70,
פ: 80,
צ: 90,
ק: 100,
ר: 200,
ש: 300,
ת: 400,
};
const NUMERIC_VALUES_TO_LETTER = Object.entries(LETTER_NUMERIC_VALUES).sort((a, b) => b[1] - a[1]);
const IGNORED_NUMERIC_CHARS = new Set(["'", '"', "׳", "״"]);
function magnitude(value: number): 1 | 10 | 100 {
if (value >= 100) return 100;
if (value >= 10) return 10;
return 1;
}
export function numericLiteralAsInt(hebrew: string): number | undefined {
let sum = 0;
let allowed: Record<number, boolean> = {100: true, 10: true, 1: true};
let lastCharWasTet = false;
for (const char of hebrew.split("")) {
if (IGNORED_NUMERIC_CHARS.has(char)) continue;
const charValue = LETTER_NUMERIC_VALUES[char];
if (charValue === undefined) {
return undefined;
}
sum += charValue;
if (char === "ט" && allowed[1]) {
lastCharWasTet = true;
allowed = {};
continue;
}
if (lastCharWasTet) {
if (char !== "ו" && char !== "ז") {
return undefined;
}
allowed = {};
continue;
}
if (!allowed[magnitude(charValue)]) return undefined;
if (charValue !== 400) {
for (const key of [1, 10, 100]) {
allowed[key] = key < magnitude(charValue);
}
}
}
return sum;
}
export function intToHebrewNumeral(value: number): string {
const chars: string[] = [];
while (value > 0) {
if (value === 15) {
chars.push("טו");
break;
} else if (value === 16) {
chars.push("טז");
break;
}
for (const [numeral, numeralValue] of NUMERIC_VALUES_TO_LETTER) {
if (value >= numeralValue) {
value -= numeralValue;
chars.push(numeral);
break;
}
}
}
return chars.join("");
}
export const ALEPH = "א";
export const BET = "ב";
export const TAV = "ת";
// https://en.wikipedia.org/wiki/Hebrew_(Unicode_block)
const HEBREW_NON_LETTERS = "[֑-ׇ]";
const HEBREW_NON_LETTERS_REGEX = new RegExp(HEBREW_NON_LETTERS, "g");
/**
* Removes all Hebrew-specific unicode characters that are not letters, i.e. vowels, trope
*/
export function stripHebrewNonletters(text: string): string {
return text.replace(HEBREW_NON_LETTERS_REGEX, "");
}
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping
function escapeRegex(text: string): string {
return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // eslint-disable-line unicorn/better-regex
}
export function hebrewSearchRegex(text: string, asRegex: boolean): RegExp | undefined {
const escaped = asRegex ? text : escapeRegex(text);
const regexText = stripHebrewNonletters(escaped).replace(
/([א-ת])/g,
(_, group) => group + HEBREW_NON_LETTERS + "*")
.split(" ").join("[- —–,.:;?!]+"); // Attempt to ignore punctuation
try {
return new RegExp(regexText, "gi");
} catch {
return undefined;
}
}
const FIRST_TROPE = String.fromCharCode(0x0591);
const LAST_TROPE = String.fromCharCode(0x05AF);
const PASEQ = String.fromCharCode(0x05C0);
const THIN_SPACE = new RegExp(String.fromCharCode(0x2009), "g");
export function stripHebrewNonlettersOrVowels(text: string): string {
return (
text
.replace(new RegExp(`[${FIRST_TROPE}-${LAST_TROPE}${PASEQ}]`, "g"), "")
.replace(THIN_SPACE, " ")
.replace(/<small><\/small>/g, "") // sometimes the after-effect of replacing a paseq
);
}
export function penineiHalachaHebrewTitleName(hebrewName: string, page: string): string {
const section = (
page === "Introduction"
? "הקדמה"
: page.split(":").map(x => parseInt(x)).map(intToHebrewNumeral).join(":"));
return `${hebrewName} ${section}`;
}
export function mishnehTorahHebrewTitleName(hebrewName: string, page: string): string {
const chapter = intToHebrewNumeral(parseInt(page));
return `${hebrewName}, פרק ${chapter}`;
}