-
Notifications
You must be signed in to change notification settings - Fork 4
/
translator.py
121 lines (104 loc) · 4.38 KB
/
translator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""Tools for generating new Unicode convertions."""
import argparse
import collections
import pathlib
from pathlib import Path
from typing import Dict, Tuple
import toml
def remove_common_characters(string1: str, string2: str) -> Tuple[str, str]:
"""Remove characters the two strings have in common.
Both strings must be of the same length and only cointain unique
characters. Characters in both ``string1`` and ``string2`` should
have both characters in the same order.
Args:
string1 (str): The string that will be checked against
``string2``.
string2 (str): The string that will be checked against
``string1``.
Returns:
Tuple[str, str]: The characters unique to ``string1`` and
``string2``, respectively.
Raises:
ValueError: If the length of ``string1`` does not equal that of
``string2``.
ValueError: If either ``string1`` or ``string2`` contain
duplicate characters.
"""
if len(string1) != len(string2):
raise ValueError(
"Both strings must be the same length."
f" ``string1`` is of length {len(string1)}"
f" and ``string2`` is of length {len(string2)}"
)
for i, string in enumerate((string1, string2)):
if len(string) != len(set(string)):
character_counts = collections.Counter(string)
duplicates = [
character
for character in character_counts
if 1 < character_counts[character]
]
duplicate_preview = ", ".join(duplicate for duplicate in duplicates[:5])
raise ValueError(
"Each string must contain only unique characters."
f" ``string{i + 1}`` contains duplicates of the"
f" following characters: {duplicate_preview},..."
)
cleaned_string1 = ""
cleaned_string2 = ""
for character1, character2 in zip(string1, string2):
if character1 != character2:
cleaned_string1 += character1
cleaned_string2 += character2
return cleaned_string1, cleaned_string2
def read_file(file_path: Path) -> Dict[str, Dict[str, str]]:
"""Read in a file of character transformations.
Assumes top line of file is the characters to be transformed, and
following lines are the transformation name followed by a whitespace
seperator and the transformed characters. It is assumed that
characters without a corresponding transformation will just contain
the original character.
Args:
file_path (Path): The location of the file.
Returns:
Dict[str, Dict[str, str]]: A dictionary representation of the
file. Keys are the transformation's name, and values are
dictionaries who's keys are the characters to be transformed and
their respective values are the transformed characters.
"""
translator = {}
with open(file_path) as file:
# ``readlines()`` is used here because of a bug with pytest-mock
# and Python 3.6
# https://github.com/pytest-dev/pytest-mock/issues/185
for i, line in enumerate(file.readlines()):
if i == 0:
original_text = line.strip()
else:
unicode_name, converted_characters = line.split()
base_characters, converted_characters = remove_common_characters(
original_text, converted_characters
)
translator[unicode_name] = dict(
zip(base_characters, converted_characters)
)
return translator
def write_config(translator: Dict[str, Dict[str, str]], write_path: Path) -> None:
"""Write dictionary to a TOML file.
Args:
translator (Dict[str, Dict[str, str]]): The dictionary to
convert to TOML.
write_path (Path): The path that will be written to. Should be a
TOML file.
"""
write_path.write_text(toml.dumps(translator))
pass
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("input_filename")
parser.add_argument("output_filename")
args = parser.parse_args()
converted_characters = pathlib.Path(args.input_filename)
config_path = pathlib.Path("hey.toml")
translator = read_file(converted_characters)
write_config(translator, write_path=pathlib.Path(args.output_filename))