diff --git a/pyglossary/gregorian.py b/pyglossary/gregorian.py index ce11c1723..ab8508817 100644 --- a/pyglossary/gregorian.py +++ b/pyglossary/gregorian.py @@ -29,37 +29,6 @@ name = "gregorian" desc = "Gregorian" -monthName = ( - "January", - "February", - "March", - "April", - "May", - "June", - "July", - "August", - "September", - "October", - "November", - "December", -) - -monthNameAb = ( - "Jan", - "Feb", - "Mar", - "Apr", - "May", - "Jun", - "Jul", - "Aug", - "Sep", - "Oct", - "Nov", - "Dec", -) - - epoch = 1721426 options = () diff --git a/pyglossary/json_utils.py b/pyglossary/json_utils.py index 8c0a3d417..ac8ce9166 100644 --- a/pyglossary/json_utils.py +++ b/pyglossary/json_utils.py @@ -12,7 +12,7 @@ def dataToPrettyJson( - data: "JsonEncodable", + data: JsonEncodable, ensure_ascii: bool = False, sort_keys: bool = False, ) -> str: @@ -24,7 +24,7 @@ def dataToPrettyJson( ) -def jsonToData(st: "AnyStr") -> "JsonEncodable": +def jsonToData(st: "AnyStr") -> JsonEncodable: return json.loads(st) diff --git a/pyglossary/plugins/appledict_bin/appledict_file_tools.py b/pyglossary/plugins/appledict_bin/appledict_file_tools.py index 073abae5a..518d72095 100644 --- a/pyglossary/plugins/appledict_bin/appledict_file_tools.py +++ b/pyglossary/plugins/appledict_bin/appledict_file_tools.py @@ -32,12 +32,6 @@ APPLEDICT_FILE_OFFSET = 0x40 # addressing of AppleDict binary files always ignores first 0x40 bytes - -def readIntAt(buffer: "io.BufferedIOBase", address: int) -> int: - buffer.seek(address) - return unpack("i", buffer.read(4))[0] - - def readIntPair(buffer: "io.BufferedIOBase") -> "tuple[int, int]": # to satisfy mymy, put them in vars with declared type a: int @@ -54,11 +48,6 @@ def read_x_bytes_as_word(buffer: "io.BufferedIOBase", x: int) -> str: return buffer.read(x).decode("UTF-16LE") -def read_2_bytes(buffer: "io.BufferedIOBase", address: int) -> int: - buffer.seek(address) - return read_2_bytes_here(buffer) - - def read_2_bytes_here(buffer: "io.BufferedIOBase") -> int: lower_byte = buffer.read(1) higher_byte = buffer.read(1) diff --git a/pyglossary/plugins/appledict_bin/key_data.py b/pyglossary/plugins/appledict_bin/key_data.py index 4bc4a9e3d..30f0e23ac 100644 --- a/pyglossary/plugins/appledict_bin/key_data.py +++ b/pyglossary/plugins/appledict_bin/key_data.py @@ -49,24 +49,24 @@ class KeyData: This class contains texts by which entry is searchable and other properties. """ - keyword_data_id_xml = { - "DCSKeyword": "d:value", - # Search key -- if entered in search, this key will provide this definition. - "DCSHeadword": "d:title", - # Headword text that is displayed on the search result list. - # When the value is the same as d:value, it can be omitted. - # In that case, the value of the d:value is used also for the d:title. - "DCSAnchor": "d:anchor", - # Used to highlight a specific part in an entry. - # For example, it is used to highlight an idiomatic phrase explanation - # in an entry for a word. - "DCSYomiWord": "d:yomi", - # Used only in making Japanese dictionaries. - "DCSSortKey": "d:DCSSortKey", - # This value shows sorting (probably for non-english languages) - "DCSEntryTitle": "d:DCSEntryTitle", - # Headword displayed as article title - } + # keyword_data_id_xml = { + # "DCSKeyword": "d:value", + # # Search key -- if entered in search, this key will provide this definition. + # "DCSHeadword": "d:title", + # # Headword text that is displayed on the search result list. + # # When the value is the same as d:value, it can be omitted. + # # In that case, the value of the d:value is used also for the d:title. + # "DCSAnchor": "d:anchor", + # # Used to highlight a specific part in an entry. + # # For example, it is used to highlight an idiomatic phrase explanation + # # in an entry for a word. + # "DCSYomiWord": "d:yomi", + # # Used only in making Japanese dictionaries. + # "DCSSortKey": "d:DCSSortKey", + # # This value shows sorting (probably for non-english languages) + # "DCSEntryTitle": "d:DCSEntryTitle", + # # Headword displayed as article title + # } __slots__ = [ "anchor", diff --git a/pyglossary/plugins/babylon_bgl/bgl_text.py b/pyglossary/plugins/babylon_bgl/bgl_text.py index 1da6a28c0..87417f1a9 100644 --- a/pyglossary/plugins/babylon_bgl/bgl_text.py +++ b/pyglossary/plugins/babylon_bgl/bgl_text.py @@ -112,11 +112,11 @@ def replaceHtmlEntryCB(u_match: "re.Match") -> str: return xml_escape(u_res, quotation=False) -def replaceDingbat(u_match: "re.Match") -> str: - r"""Replace chars \\u008c-\\u0095 with \\u2776-\\u277f.""" - ch = u_match.group(0) - code = ch + 0x2776 - 0x8C - return chr(code) +# def replaceDingbat(u_match: "re.Match") -> str: +# r"""Replace chars \\u008c-\\u0095 with \\u2776-\\u277f.""" +# ch = u_match.group(0) +# code = ch + 0x2776 - 0x8C +# return chr(code) def escapeNewlinesCallback(u_match: "re.Match") -> str: diff --git a/pyglossary/plugins/cc_cedict/conv.py b/pyglossary/plugins/cc_cedict/conv.py index 126d385b0..c00d11a7b 100644 --- a/pyglossary/plugins/cc_cedict/conv.py +++ b/pyglossary/plugins/cc_cedict/conv.py @@ -1,4 +1,3 @@ -import os import re from typing import TYPE_CHECKING, cast @@ -15,8 +14,6 @@ line_reg = re.compile(r"^([^ ]+) ([^ ]+) \[([^\]]+)\] /(.+)/$") -script_dir = os.path.dirname(__file__) - COLORS = { "": "black", "1": "red", diff --git a/pyglossary/plugins/stardict.py b/pyglossary/plugins/stardict.py index 1f9c8ba8b..ba3051ecc 100644 --- a/pyglossary/plugins/stardict.py +++ b/pyglossary/plugins/stardict.py @@ -167,16 +167,6 @@ def transformByInnerString(self, text: str) -> str: ... -class SupportsDunderLT(Protocol): - def __lt__(self, __other: Any) -> bool: - ... - - -class SupportsDunderGT(Protocol): - def __gt__(self, __other: Any) -> bool: - ... - - T_SDListItem = TypeVar("T_SDListItem", contravariant=True) diff --git a/pyglossary/slob.py b/pyglossary/slob.py index 8411204ed..cefc9fcc7 100644 --- a/pyglossary/slob.py +++ b/pyglossary/slob.py @@ -187,24 +187,6 @@ def compress_new(x: bytes, m: CompressionModule = m) -> bytes: MIME_TEXT = "text/plain" MIME_HTML = "text/html" -MIME_CSS = "text/css" -MIME_JS = "application/javascript" - -MIME_TYPES = { - "html": MIME_HTML, - "txt": MIME_TEXT, - "js": MIME_JS, - "css": MIME_CSS, - "json": "application/json", - "woff": "application/font-woff", - "svg": "image/svg+xml", - "png": "image/png", - "jpg": "image/jpeg", - "jpeg": "image/jpeg", - "gif": "image/gif", - "ttf": "application/x-font-ttf", - "otf": "application/x-font-opentype", -} class FileFormatException(Exception): @@ -227,10 +209,6 @@ class IncorrectFileSize(FileFormatException): pass -class TagNotFound(Exception): - pass - - @cache def sortkey( strength: int, @@ -582,54 +560,6 @@ def write(self, data: bytes) -> int: return self._file.write(data) -class StructReaderWriter(StructWriter): - def __init__( - self, - _file: "io.BufferedWriter", - reader: "StructReader", - encoding: "str | None" = None, - ) -> None: - super().__init__( - _file=_file, - encoding=encoding, - ) - self._reader = reader - - def tell(self) -> int: - return self._file.tell() - - def write(self, data: bytes) -> int: - return self._file.write(data) - - def read_byte(self) -> int: - return self._reader.read_byte() - - def read_tiny_text(self) -> str: - return self._reader.read_tiny_text() - - -def set_tag_value(filename: str, name: str, value: str) -> None: - with fopen(filename, "rb+") as _file: - _file.seek(len(MAGIC) + 16) - encoding = read_byte_string(_file, U_CHAR).decode(UTF8) - if encodings.search_function(encoding) is None: - raise UnknownEncoding(encoding) - reader = StructReaderWriter( - _file=_file, - reader=StructReader(_file, encoding=encoding), - encoding=encoding, - ) - reader.read_tiny_text() - tag_count = reader.read_byte() - for _ in range(tag_count): - key = reader.read_tiny_text() - if key == name: - reader.write_tiny_text(value, editable=True) - return - reader.read_tiny_text() - raise TagNotFound(name) - - def read_header(_file: "MultiFileReader") -> Header: _file.seek(0) @@ -825,19 +755,6 @@ def close(self) -> None: self._g.close() -def find_parts(fname: str) -> "list[str]": - fname = os.path.expanduser(fname) - dirname = os.path.dirname(fname) or os.getcwd() - basename = os.path.basename(fname) - return sorted( - [ - os.path.join(dirname, name) - for name in os.listdir(dirname) - if name.startswith(basename) - ], - ) - - def open(*filenames: str) -> Slob: return Slob(*filenames) diff --git a/pyglossary/text_utils.py b/pyglossary/text_utils.py index 753c127b9..6e43fbdce 100644 --- a/pyglossary/text_utils.py +++ b/pyglossary/text_utils.py @@ -71,7 +71,6 @@ def fixUtf8(st: "AnyStr") -> str: pattern_t_us = re.compile(r"((? "str": return "|".join(escapeBar(part) for part in parts) -def unescapeBarBytes(st: bytes) -> bytes: - r"""Unscapes vertical bar (\|).""" - # str.replace is probably faster than re.sub - return b_pattern_bar_us.sub(b"\\1|", st).replace(b"\\\\", b"\\") - - # return a message string describing the current exception def excMessage() -> str: i = sys.exc_info() @@ -152,14 +145,6 @@ def excMessage() -> str: return f"{i[0].__name__}: {i[1]}" -def formatHMS(h: int, m: int, s: int) -> str: - if h == 0: - if m == 0: - return f"{s:02d}" - return f"{m:02d}:{s:02d}" - return f"{h:02d}:{m:02d}:{s:02d}" - - # ___________________________________________ # diff --git a/pyglossary/text_utils_extra.py b/pyglossary/text_utils_extra.py index f44aa6862..a094bc66e 100644 --- a/pyglossary/text_utils_extra.py +++ b/pyglossary/text_utils_extra.py @@ -1,3 +1,19 @@ +import re + +b_pattern_bar_us = re.compile(r"((? bytes: + r"""Unscapes vertical bar (\|).""" + # str.replace is probably faster than re.sub + return b_pattern_bar_us.sub(b"\\1|", st).replace(b"\\\\", b"\\") + + def chBaseIntToStr(number: int, base: int) -> str: """Reverse function of int(str, base) and long(str, base).""" import string @@ -17,3 +33,11 @@ def chBaseIntToStr(number: int, base: int) -> str: if number == 0: return sign + result return "" + + +def formatHMS(h: int, m: int, s: int) -> str: + if h == 0: + if m == 0: + return f"{s:02d}" + return f"{m:02d}:{s:02d}" + return f"{h:02d}:{m:02d}:{s:02d}" diff --git a/pyglossary/text_writer.py b/pyglossary/text_writer.py index 15dde83f3..dcee5fdcc 100644 --- a/pyglossary/text_writer.py +++ b/pyglossary/text_writer.py @@ -248,26 +248,3 @@ def writeTxt( writer.finish() -def writeTabfile( - glos: "GlossaryType", - filename: str = "", - encoding: str = "utf-8", - resources: bool = True, -) -> "Generator[None, EntryType, None]": - from pyglossary.text_utils import escapeNTB - - writer = TextGlossaryWriter( - glos, - entryFmt="{word}\t{defi}\n", - outInfoKeysAliasDict=None, - ) - writer.setAttrs( - encoding=encoding, - wordEscapeFunc=escapeNTB, - defiEscapeFunc=escapeNTB, - ext=".txt", - resources=resources, - ) - writer.open(filename) - yield from writer.write() - writer.finish() diff --git a/tests/slob_test.py b/tests/slob_test.py index bde62393b..ac9822dfe 100644 --- a/tests/slob_test.py +++ b/tests/slob_test.py @@ -22,6 +22,58 @@ log.addHandler(mockLog) +class StructReaderWriter(slob.StructWriter): + def __init__( + self, + _file: "io.BufferedWriter", + reader: "slob.StructReader", + encoding: "str | None" = None, + ) -> None: + super().__init__( + _file=_file, + encoding=encoding, + ) + self._reader = reader + + def tell(self) -> int: + return self._file.tell() + + def write(self, data: bytes) -> int: + return self._file.write(data) + + def read_byte(self) -> int: + return self._reader.read_byte() + + def read_tiny_text(self) -> str: + return self._reader.read_tiny_text() + + +class TagNotFound(Exception): + pass + + +def set_tag_value(filename: str, name: str, value: str) -> None: + with slob.fopen(filename, "rb+") as _file: + _file.seek(len(slob.MAGIC) + 16) + encoding = slob.read_byte_string(_file, slob.U_CHAR).decode(slob.UTF8) + if slob.encodings.search_function(encoding) is None: + raise slob.UnknownEncoding(encoding) + reader = StructReaderWriter( + _file=_file, + reader=slob.StructReader(_file, encoding=encoding), + encoding=encoding, + ) + reader.read_tiny_text() + tag_count = reader.read_byte() + for _ in range(tag_count): + key = reader.read_tiny_text() + if key == name: + reader.write_tiny_text(value, editable=True) + return + reader.read_tiny_text() + raise TagNotFound(name) + + class BaseTest(unittest.TestCase): def setUp(self): # if skip_module: @@ -549,18 +601,6 @@ def test_truncated_file(self): self.assertRaises(slob.IncorrectFileSize, slob.open, name) -class TestFindParts(BaseTest): - def test_find_parts(self): - names = [ - os.path.join(self.tmpdir.name, name) for name in ("abc-1", "abc-2", "abc-3") - ] - for name in names: - with slob.fopen(name, "wb"): - pass - parts = slob.find_parts(os.path.join(self.tmpdir.name, "abc")) - self.assertEqual(names, parts) - - class TestTooLongText(BaseTest): def setUp(self): BaseTest.setUp(self) @@ -655,7 +695,7 @@ def observer(event): self.assertRaises( ValueError, - slob.set_tag_value, + set_tag_value, self.path, "t1", "ы" * 128, @@ -675,14 +715,14 @@ def test_edit_existing_tag(self): with slob.open(self.path) as f: self.assertEqual(f.tags["a"], "123456") self.assertEqual(f.tags["b"], "654321") - slob.set_tag_value(self.path, "b", "efg") - slob.set_tag_value(self.path, "a", "xyz") + set_tag_value(self.path, "b", "efg") + set_tag_value(self.path, "a", "xyz") with slob.open(self.path) as f: self.assertEqual(f.tags["a"], "xyz") self.assertEqual(f.tags["b"], "efg") def test_edit_nonexisting_tag(self): - self.assertRaises(slob.TagNotFound, slob.set_tag_value, self.path, "z", "abc") + self.assertRaises(TagNotFound, set_tag_value, self.path, "z", "abc") class TestBinItemNumberLimit(BaseTest): diff --git a/tests/text_utils_extra_test.py b/tests/text_utils_extra_test.py new file mode 100644 index 000000000..4cbe0be61 --- /dev/null +++ b/tests/text_utils_extra_test.py @@ -0,0 +1,39 @@ +import sys +import unittest +from os.path import abspath, dirname + +rootDir = dirname(dirname(abspath(__file__))) +sys.path.insert(0, rootDir) + +from pyglossary.text_utils_extra import ( + formatHMS, + unescapeBarBytes, +) + + +class TestTextUtilsExtra(unittest.TestCase): + def test_formatHMS(self): + f = formatHMS + self.assertEqual(f(0, 0, 0), "00") + self.assertEqual(f(0, 0, 9), "09") + self.assertEqual(f(0, 0, 10), "10") + self.assertEqual(f(0, 0, 59), "59") + self.assertEqual(f(0, 1, 0), "01:00") + self.assertEqual(f(0, 1, 5), "01:05") + self.assertEqual(f(0, 5, 7), "05:07") + self.assertEqual(f(0, 59, 0), "59:00") + self.assertEqual(f(0, 59, 59), "59:59") + self.assertEqual(f(1, 0, 0), "01:00:00") + self.assertEqual(f(123, 5, 7), "123:05:07") + self.assertEqual(f(123, 59, 59), "123:59:59") + + + def test_unescapeBarBytes(self): + f = unescapeBarBytes + self.assertEqual(b"", f(b"")) + self.assertEqual(b"|", f(b"\\|")) + self.assertEqual(b"a|b", f(b"a\\|b")) + self.assertEqual(b"a|b\tc", f(b"a\\|b\tc")) + self.assertEqual(b"a|b\\t\\nc", f(b"a\\|b\\t\\nc")) + self.assertEqual(b"\\", f(b"\\\\")) + self.assertEqual(b"\\|", f(b"\\\\\\|")) diff --git a/tests/text_utils_test.py b/tests/text_utils_test.py index 830d5fbfa..c55d9f7ca 100644 --- a/tests/text_utils_test.py +++ b/tests/text_utils_test.py @@ -11,7 +11,6 @@ crc32hex, escapeNTB, fixUtf8, - formatHMS, isASCII, joinByBar, replacePostSpaceChar, @@ -21,7 +20,6 @@ uint32ToBytes, uintFromBytes, unescapeBar, - unescapeBarBytes, unescapeNTB, urlToPath, ) @@ -117,31 +115,6 @@ def test_joinByBar(self): self.assertEqual("a\\|b|c", f(["a|b", "c"])) self.assertEqual("a\\\\1|b|c", f(["a\\1", "b", "c"])) - def test_unescapeBarBytes(self): - f = unescapeBarBytes - self.assertEqual(b"", f(b"")) - self.assertEqual(b"|", f(b"\\|")) - self.assertEqual(b"a|b", f(b"a\\|b")) - self.assertEqual(b"a|b\tc", f(b"a\\|b\tc")) - self.assertEqual(b"a|b\\t\\nc", f(b"a\\|b\\t\\nc")) - self.assertEqual(b"\\", f(b"\\\\")) - self.assertEqual(b"\\|", f(b"\\\\\\|")) - - def test_formatHMS(self): - f = formatHMS - self.assertEqual(f(0, 0, 0), "00") - self.assertEqual(f(0, 0, 9), "09") - self.assertEqual(f(0, 0, 10), "10") - self.assertEqual(f(0, 0, 59), "59") - self.assertEqual(f(0, 1, 0), "01:00") - self.assertEqual(f(0, 1, 5), "01:05") - self.assertEqual(f(0, 5, 7), "05:07") - self.assertEqual(f(0, 59, 0), "59:00") - self.assertEqual(f(0, 59, 59), "59:59") - self.assertEqual(f(1, 0, 0), "01:00:00") - self.assertEqual(f(123, 5, 7), "123:05:07") - self.assertEqual(f(123, 59, 59), "123:59:59") - def test_uint32ToBytes(self): f = uint32ToBytes