Skip to content

Commit

Permalink
Added translit generator script
Browse files Browse the repository at this point in the history
  • Loading branch information
foldynl committed May 1, 2024
1 parent 41c5c4a commit fbaf731
Showing 1 changed file with 49 additions and 0 deletions.
49 changes: 49 additions & 0 deletions devtools/translit/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env python3

# This script generates a Unicode character transliteration
# table into sequences of individual characters.

# For each Unicode character from decimal value 128 to 0xffff,
# its transliteration is obtained using the `unidecode` library.

from unidecode import unidecode

def unicode_table():
chars = []
mapChar = []
elementCnt = 0
pos = 0
for i in range(128, 0xffff):
try:
char = chr(i)
charU = unidecode(char)
if charU != "" :
charField = []
for element in charU:
charField.append("'" + element.replace('\\', '\\\\').replace('\'', '\\\'').replace('\"', '\\\"') + "'")

stringlen = len(charU)
print (f"{stringlen}, " + ", ".join(charField) + ",", end = " " )
elementCnt = elementCnt + 1
mapChar.append(pos);
pos = pos + stringlen + 1
if elementCnt % 6 == 0:
print("")
else :
mapChar.append(-1)
except ValueError:
mapChar.append(-1)
return mapChar

print ('const char Data::translitTab[] = {')
l = unicode_table()
print ('};')

print('const int Data::tranlitIndexMap[] = {')
count = 0
for charMap in l:
print(f"{charMap}, ", end = "")
count = count + 1;
if count % 10 == 0:
print("")
print('};')

0 comments on commit fbaf731

Please sign in to comment.