-
Notifications
You must be signed in to change notification settings - Fork 0
/
base114514.py
executable file
·226 lines (194 loc) · 11.2 KB
/
base114514.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#! /usr/bin/env python3
"""Base114514 data encodings"""
from base64 import b64decode, b64encode
from locale import getdefaultlocale
from binascii import Error as BinASCIIError
from re import fullmatch
import sys
__all__ = ['b114514encode', 'b114514decode', 'encoding_dict', 'decoding_dict']
encoding_dict = {65: b'1145', 66: b'1154', 67: b'1514', 68: b'1415', 69: b'1541', 70: b'1451', 71: b'5114', 72: b'4115',
73: b'5141', 74: b'4151', 75: b'4511', 76: b'5411', 77: b'5541', 78: b'5514', 79: b'5154', 80: b'5451',
81: b'5145', 82: b'5415', 83: b'1554', 84: b'4551', 85: b'4515', 86: b'1545', 87: b'1455', 88: b'4155',
89: b'4415', 90: b'4451', 97: b'4541', 98: b'4145', 99: b'4154', 100: b'4514', 101: b'1445',
102: b'5441', 103: b'1454', 104: b'5414', 105: b'5144', 106: b'1544', 107: b'1114', 108: b'1115',
109: b'5551', 110: b'5554', 111: b'4441', 112: b'4445', 113: b'1151', 114: b'1141', 115: b'5515',
116: b'5545', 117: b'4454', 118: b'4414', 119: b'1511', 120: b'1411', 121: b'4544', 122: b'4144',
48: b'5455', 49: b'5155', 50: b'1111', 51: b'4444', 52: b'5555', 53: b'1155', 54: b'1144', 55: b'5511',
56: b'5544', 57: b'4455', 43: b'4411', 47: b'5115', 61: b'4114'}
decoding_dict = {b'1145': b'A', b'1154': b'B', b'1514': b'C', b'1415': b'D', b'1541': b'E', b'1451': b'F',
b'5114': b'G', b'4115': b'H', b'5141': b'I', b'4151': b'J', b'4511': b'K', b'5411': b'L',
b'5541': b'M', b'5514': b'N', b'5154': b'O', b'5451': b'P', b'5145': b'Q', b'5415': b'R',
b'1554': b'S', b'4551': b'T', b'4515': b'U', b'1545': b'V', b'1455': b'W', b'4155': b'X',
b'4415': b'Y', b'4451': b'Z', b'4541': b'a', b'4145': b'b', b'4154': b'c', b'4514': b'd',
b'1445': b'e', b'5441': b'f', b'1454': b'g', b'5414': b'h', b'5144': b'i', b'1544': b'j',
b'1114': b'k', b'1115': b'l', b'5551': b'm', b'5554': b'n', b'4441': b'o', b'4445': b'p',
b'1151': b'q', b'1141': b'r', b'5515': b's', b'5545': b't', b'4454': b'u', b'4414': b'v',
b'1511': b'w', b'1411': b'x', b'4544': b'y', b'4144': b'z', b'5455': b'0', b'5155': b'1',
b'1111': b'2', b'4444': b'3', b'5555': b'4', b'1155': b'5', b'1144': b'6', b'5511': b'7',
b'5544': b'8', b'4455': b'9', b'4411': b'+', b'5115': b'/', b'4114': b'='}
def _(s: str) -> str:
cn_strings = {
'string argument should contain only ASCII characters': '字符串参数应仅包含 ASCII 字符',
'Non-base114514 digit found': '发现非 base114514 字符',
'Usage: base114514 [OPTION]... [FILE]': '用法:base114514 [选项]... [文件]',
'Base114514 encode or decode FILE, or standard input, to standard output.\n': 'Base114514 编码或解码 <文件> 或标准输入,并输出到标准输出。\n',
'With no FILE, or when FILE is -, read standard input.\n': '如果没有指定 <文件>,或者 <文件> 为 "-",则从标准输入读取。\n',
'Mandatory arguments to long options are mandatory for short options too.': '长选项的必选参数对于短选项也是必选的。',
'decode data': '解码数据',
'when decoding, ignore non-numeric characters': '解码时忽略非数字字符',
'COLS': '列数',
'wrap encoded lines after COLS character (default 76).': '在指定的 <列数> 后自动换行(默认为 76)。',
'Use 0 to disable line wrapping': '0 为禁用自动换行',
'output version information and exit': '显示版本信息并退出',
'The data are encoded as described for the base114514 alphabet by YidaozhanYa.': '数据以 YidaozhanYa 规定的 base114514 数字表的格式进行编码。',
'When decoding, the input may contain newlines in addition to the bytes of': '解码时,输入数据除了包含正式的 base114514 数字表的字节以外,还可能包含一些',
'the formal base114514 alphabet. Use --ignore-garbage to attempt to recover': '换行符。使用 --ignore-garbage 来使程序在已编码的流中遇到数字表以外的',
'from any other non-numeric bytes in the encoded stream.': '字节后尝试恢复执行。',
'Huh, huh, huh, ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah': '哼, 哼, 哼, 啊啊啊啊啊啊啊啊啊啊啊啊啊',
'unrecognized option': '不适用的选项',
'Try \'base114514 --help\' for more information.': '请尝试执行 "base114514 --help" 来获取更多信息。',
'extra operand': '多余的操作对象'
}
if getdefaultlocale()[0].startswith('zh'):
return cn_strings[s]
else:
return s
# from standard base64 module
bytes_types = (bytes, bytearray) # Types acceptable as binary data
def _bytes_from_decode_data(s):
if isinstance(s, str):
try:
return s.encode('ascii')
except UnicodeEncodeError:
raise ValueError(_('string argument should contain only ASCII characters'))
if isinstance(s, bytes_types):
return s
try:
return memoryview(s).tobytes()
except TypeError:
raise TypeError("argument should be a bytes-like object or ASCII "
"string, not %r" % s.__class__.__name__) from None
# Base114514 encoding/decoding uses standard base64 module
def b114514encode(bytes_to_encode: bytes) -> bytes:
"""
Encode the bytes-like object s using Base114514 and return a bytes object.
"""
encoded: bytes = bytes()
base64_encoded: bytes = b64encode(bytes_to_encode)
for i in range(len(base64_encoded)):
encoded += encoding_dict[base64_encoded[i]]
return encoded
def b114514decode(bytes_to_decode: bytes, validate: bool = False) -> bytes:
"""Decode the Base114514 encoded bytes-like object or ASCII string s.
The result is returned as a bytes object. A binascii.Error is raised if
s is incorrectly padded.
If validate is False (the default), characters that are neither in the
normal base-114514 alphabet nor the alternative alphabet are discarded prior
to the padding check. If validate is True, these non-alphabet characters
in the input result in a binascii.Error.
"""
bytes_to_decode = _bytes_from_decode_data(bytes_to_decode)
decoded: bytes = bytes()
if validate and not fullmatch(r'[145]*', bytes_to_decode.decode()):
raise BinASCIIError(_('Non-base114514 digit found'))
validated_bytes_to_decode = bytes()
for i in range(len(bytes_to_decode)):
match bytes_to_decode[i]:
case 49:
validated_bytes_to_decode += b'1'
case 52:
validated_bytes_to_decode += b'4'
case 53:
validated_bytes_to_decode += b'5'
for i in range(len(validated_bytes_to_decode) // 4):
decoded += decoding_dict[validated_bytes_to_decode[i * 4:i * 4 + 4]]
return b64decode(decoded)
def wraps(string: str, every=76):
return '\n'.join(string[i:i + every] for i in range(0, len(string), every))
def main():
wrap: int = 76
decode_mode: bool = False
ignore_garbage_mode: bool = False
file_name: str = ""
sys.argv.pop(0) # def main():
wrap: int = 76
decode_mode: bool = False
ignore_garbage_mode: bool = False
file_name: str = ""
if sys.argv and sys.argv[0] == 'base114514':
sys.argv.pop(0) # remove base114514 itself
for arg in sys.argv:
if arg == '--help':
print(_('Usage: base114514 [OPTION]... [FILE]'))
print(_('Base114514 encode or decode FILE, or standard input, to standard output.\n'))
print(_('With no FILE, or when FILE is -, read standard input.\n'))
print(_('Mandatory arguments to long options are mandatory for short options too.'))
print(f' -d, --decode {_("decode data")}')
print(f' -i, --ignore-garbage {_("when decoding, ignore non-numeric characters")}')
print(f' -w, --wrap={_("COLS")} {_("wrap encoded lines after COLS character (default 76).")}')
print(f' {_("Use 0 to disable line wrapping")}')
print(f' --version {_("output version information and exit")}\n')
print(_('The data are encoded as described for the base114514 alphabet by YidaozhanYa.'))
print(_('When decoding, the input may contain newlines in addition to the bytes of'))
print(_('the formal base114514 alphabet. Use --ignore-garbage to attempt to recover'))
print(_('from any other non-numeric bytes in the encoded stream.'))
exit()
elif arg == '--version':
print('base114514 (下北沢 coreutils) 0.4\n'
'\n'
' ▃▆█▇▄▖◣\n'
' ▟◤ ◥█▎\n'
' ◢◤ ▐ ▐▉\n'
' ▗◤ ▂ ▗▖ ▕█▎\n'
' ◤ ▗▅▖◥▄ ▀◣ █▊\n'
' ▐ ▕▎◥▖◣◤ ◢██\n'
' █◣ ◥▅█▀ ▐██◤\n'
' ◥██◣ ◢██◤\n'
' ◥██◣ ◢▄◤\n'
' ▀██▅▇▀\n\n'
f'{_("Huh, huh, huh, ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah ah")}!')
exit()
elif arg.startswith('-w') or arg.startswith('--wrap'):
wrap = int(arg.replace('--wrap=', '').replace('-w', ''))
elif arg == '-d' or arg == '--decode':
decode_mode = True
elif arg == '-i' or arg == '--ignore-garbage':
ignore_garbage_mode = True
elif arg.startswith('-'):
print(f'base114514: {_("unrecognized option")} \'{arg}\'')
print(_('Try \'base114514 --help\' for more information.'))
exit()
else:
if file_name == "":
file_name = arg
else:
print(f'base114514: {_("extra operand")} \'{arg}\'')
print(_('Try \'base114514 --help\' for more information.'))
exit()
if not decode_mode:
# encode
if file_name == "":
# stdin
encoded_string = b114514encode(sys.stdin.buffer.read()).decode()
else:
# file
opened_file = open(file_name)
encoded_string = b114514encode(opened_file.buffer.read()).decode()
if wrap == 0:
print(encoded_string, end='')
else:
print(wraps(encoded_string, wrap), end='')
else:
# decode
if file_name == "":
# stdin
decoded_string = b114514decode(sys.stdin.buffer.read().strip(b' \n\r'),
not ignore_garbage_mode).decode()
else:
# file
opened_file = open(file_name)
decoded_string = b114514decode(opened_file.buffer.read().strip(b' \n\r'),
not ignore_garbage_mode).decode()
print(decoded_string, end='')
if __name__ == '__main__':
main()