diff --git a/doc/lib-examples/oxford.py b/doc/lib-examples/oxford.py index 1ffb2ce6a..c96856e12 100644 --- a/doc/lib-examples/oxford.py +++ b/doc/lib-examples/oxford.py @@ -44,6 +44,6 @@ def takePhonetic_oxford_gb(glos): # .replace("/", "") # .replace("\\n ", "\\n") # .replace("\\n ", "\\n") - if ph != "": + if ph: phonGlos.addEntryObj(phonGlos.newEntry(word, ph)) return phonGlos diff --git a/pyglossary/core.py b/pyglossary/core.py index d32a730f2..7c00fd4bc 100644 --- a/pyglossary/core.py +++ b/pyglossary/core.py @@ -241,7 +241,7 @@ def emit(self, record: logging.LogRecord) -> None: ### levelname = record.levelname - fp = sys.stderr if levelname in ("CRITICAL", "ERROR") else sys.stdout + fp = sys.stderr if levelname in {"CRITICAL", "ERROR"} else sys.stdout if not self.noColor and levelname in self.colorsConfig: key, default = self.colorsConfig[levelname] @@ -269,7 +269,10 @@ def checkCreateConfDir() -> None: except Exception as e: log.warning(f"failed to create user plugins directory: {e}") if not isfile(confJsonFile): - with open(rootConfJsonFile) as srcF, open(confJsonFile, "w") as usrF: + with ( + open(rootConfJsonFile, encoding="utf-8") as srcF, + open(confJsonFile, "w", encoding="utf-8") as usrF, + ): usrF.write(srcF.read()) @@ -385,7 +388,7 @@ def _unix_show_exception( # can set env var WARNINGS to: # "error", "ignore", "always", "default", "module", "once" if WARNINGS := os.getenv("WARNINGS"): - if WARNINGS in ("default", "error", "ignore", "always", "module", "once"): + if WARNINGS in {"default", "error", "ignore", "always", "module", "once"}: import warnings warnings.filterwarnings(WARNINGS) # type: ignore # noqa: PGH003 diff --git a/pyglossary/entry.py b/pyglossary/entry.py index e090987f4..bd1ca623a 100644 --- a/pyglossary/entry.py +++ b/pyglossary/entry.py @@ -39,7 +39,8 @@ class DataEntry(BaseEntry): "_tmpPath", ] - def isData(self) -> bool: + @classmethod + def isData(cls) -> bool: return True def __init__( @@ -185,7 +186,8 @@ class Entry(BaseEntry): "_word", ] - def isData(self) -> bool: + @classmethod + def isData(cls) -> bool: return False @staticmethod @@ -234,7 +236,7 @@ def __init__( elif not isinstance(defi, str): raise TypeError(f"invalid defi type {type(defi)}") - if defiFormat not in ("m", "h", "x"): + if defiFormat not in {"m", "h", "x"}: raise ValueError(f"invalid defiFormat {defiFormat!r}") self._word = word @@ -332,7 +334,8 @@ def editFuncDefi(self, func: "Callable[[str], str]") -> None: """ self._defi = func(self._defi) - def _stripTrailingBR(self, s: str) -> str: + @classmethod + def _stripTrailingBR(cls, s: str) -> str: while s.endswith(("
", "
")): s = s[:-4] return s @@ -377,9 +380,8 @@ def stripFullHtml(self) -> "str | None": defi = defi[len("") :].strip() if not defi.startswith(" but no " - else: - if not defi.startswith(""): - return None + elif not defi.startswith(""): + return None i = defi.find(" None: def prepare(self) -> None: """Run this after glossary info is set and ready.""" - def run(self, entry: "EntryType") -> "EntryType | None": + def run(self, entry: "EntryType") -> "EntryType | None": # noqa: PLR6301 """ Return an Entry object, or None to skip. @@ -69,7 +69,7 @@ class TrimWhitespaces(EntryFilter): name = "trim_whitespaces" desc = "Remove leading/trailing whitespaces from word(s) and definition" - def run(self, entry: "EntryType") -> "EntryType | None": + def run(self, entry: "EntryType") -> "EntryType | None": # noqa: PLR6301 entry.strip() entry.replace("\r", "") return entry @@ -79,7 +79,7 @@ class NonEmptyWordFilter(EntryFilter): name = "non_empty_word" desc = "Skip entries with empty word" - def run(self, entry: "EntryType") -> "EntryType | None": + def run(self, entry: "EntryType") -> "EntryType | None": # noqa: PLR6301 if not entry.s_word: return None return entry @@ -89,7 +89,7 @@ class NonEmptyDefiFilter(EntryFilter): name = "non_empty_defi" desc = "Skip entries with empty definition" - def run(self, entry: "EntryType") -> "EntryType | None": + def run(self, entry: "EntryType") -> "EntryType | None": # noqa: PLR6301 if not entry.defi: return None return entry @@ -99,7 +99,7 @@ class RemoveEmptyAndDuplicateAltWords(EntryFilter): name = "remove_empty_dup_alt_words" desc = "Remove empty and duplicate alternate words" - def run(self, entry: "EntryType") -> "EntryType | None": + def run(self, entry: "EntryType") -> "EntryType | None": # noqa: PLR6301 entry.removeEmptyAndDuplicateAltWords() if not entry.l_word: return None @@ -111,7 +111,7 @@ class FixUnicode(EntryFilter): desc = "Fix Unicode in word(s) and definition" falseComment = "Do not fix Unicode in word(s) and definition" - def run(self, entry: "EntryType") -> "EntryType | None": + def run(self, entry: "EntryType") -> "EntryType | None": # noqa: PLR6301 entry.editFuncWord(fixUtf8) entry.editFuncDefi(fixUtf8) return entry @@ -142,7 +142,7 @@ class RTLDefi(EntryFilter): name = "rtl" desc = "Make definition right-to-left" - def run(self, entry: "EntryType") -> "EntryType | None": + def run(self, entry: "EntryType") -> "EntryType | None": # noqa: PLR6301 entry.editFuncDefi(lambda defi: f'
{defi}
') return entry @@ -271,7 +271,8 @@ def __init__( re.DOTALL | re.IGNORECASE, ) - def _subLower(self, m: "re.Match") -> str: + @staticmethod + def _subLower(m: "re.Match") -> str: return m.group(0).lower() def _fixDefi(self, st: str) -> str: @@ -286,7 +287,7 @@ class SkipDataEntry(EntryFilter): name = "skip_resources" desc = "Skip resources / data files" - def run(self, entry: "EntryType") -> "EntryType | None": + def run(self, entry: "EntryType") -> "EntryType | None": # noqa: PLR6301 if entry.isData(): return None return entry @@ -310,7 +311,7 @@ def prepare(self) -> None: self._run_func = self.run_fa log.info("Using Persian filter") - def run_fa(self, entry: "EntryType") -> "EntryType | None": + def run_fa(self, entry: "EntryType") -> "EntryType | None": # noqa: PLR6301 from .persian_utils import faEditStr entry.editFuncWord(faEditStr) @@ -492,6 +493,7 @@ def run(self, entry: "EntryType") -> "EntryType | None": class ShowMaxMemoryUsage(EntryFilter): name = "max_memory_usage" desc = "Show Max Memory Usage" + MAX_WORD_LEN = 30 def __init__(self, glos: "GlossaryType") -> None: import os @@ -507,8 +509,8 @@ def run(self, entry: "EntryType") -> "EntryType | None": if usage > self._max_mem_usage: self._max_mem_usage = usage word = entry.s_word - if len(word) > 30: - word = word[:37] + "..." + if len(word) > self.MAX_WORD_LEN: + word = word[:self.MAX_WORD_LEN - 3] + "..." core.trace(log, f"MaxMemUsage: {usage:,}, {word=}") return entry diff --git a/pyglossary/glossary.py b/pyglossary/glossary.py index 825fdc942..674b309b1 100644 --- a/pyglossary/glossary.py +++ b/pyglossary/glossary.py @@ -96,7 +96,8 @@ def read( def addEntryObj(self, entry: "EntryType") -> None: self._data.append(entry) - def updateIter(self) -> None: + @staticmethod + def updateIter() -> None: log.warning("calling glos.updateIter() is no longer needed.") def sortWords( diff --git a/pyglossary/glossary_info.py b/pyglossary/glossary_info.py index 624985864..a95e8f66a 100644 --- a/pyglossary/glossary_info.py +++ b/pyglossary/glossary_info.py @@ -108,7 +108,8 @@ def author(self) -> str: return value return "" - def _getLangByStr(self, st: str) -> "Lang | None": + @staticmethod + def _getLangByStr(st: str) -> "Lang | None": lang = langDict[st] if lang: return lang @@ -209,13 +210,13 @@ def checkPart(part: str) -> None: for part in re.split("-| to ", name): # print(f"{part = }") checkPart(part) - if len(langNames) >= 2: + if len(langNames) >= 2: # noqa: PLR2004 break - if len(langNames) < 2: + if len(langNames) < 2: # noqa: PLR2004 return - if len(langNames) > 2: + if len(langNames) > 2: # noqa: PLR2004 log.info(f"detectLangsFromName: {langNames = }") log.info( diff --git a/pyglossary/glossary_progress.py b/pyglossary/glossary_progress.py index f5318378e..8fe0c6b10 100644 --- a/pyglossary/glossary_progress.py +++ b/pyglossary/glossary_progress.py @@ -29,7 +29,7 @@ def progressbar(self, enabled: bool) -> None: def progressInit( self, - *args, # noqa: ANN + *args, # noqa: ANN003 ) -> None: if self._ui and self._progressbar: self._ui.progressInit(*args) diff --git a/pyglossary/glossary_utils.py b/pyglossary/glossary_utils.py index 05667ddf6..e6a69c6cd 100644 --- a/pyglossary/glossary_utils.py +++ b/pyglossary/glossary_utils.py @@ -31,6 +31,8 @@ log = logging.getLogger("pyglossary") +MAX_EXT_LEN = 4 # FIXME + def splitFilenameExt( filename: str = "", @@ -40,13 +42,13 @@ def splitFilenameExt( filenameNoExt, ext = splitext(filename) ext = ext.lower() - if not ext and len(filenameNoExt) < 5: + if not ext and len(filenameNoExt) <= MAX_EXT_LEN: filenameNoExt, ext = "", filenameNoExt if not ext: return filename, filename, "", "" - if ext[1:] in (*stdCompressions, "zip", "dz"): + if ext[1:] in {*stdCompressions, "zip", "dz"}: compression = ext[1:] filename = filenameNoExt filenameNoExt, ext = splitext(filename) diff --git a/pyglossary/glossary_v2.py b/pyglossary/glossary_v2.py index 76c2db64a..f9354042a 100644 --- a/pyglossary/glossary_v2.py +++ b/pyglossary/glossary_v2.py @@ -258,7 +258,7 @@ def _entryFromRaw(self, rawEntryArg: "RawEntryType") -> "EntryType": rawEntry = rawEntryArg word = rawEntry[0] defi = rawEntry[1].decode("utf-8") - if len(rawEntry) > 2: + if len(rawEntry) > 2: # noqa: PLR2004 defiFormat = rawEntry[2] if defiFormat == "b": fname = word @@ -408,7 +408,7 @@ def _applyEntryFiltersGen( if entry is None: continue for entryFilter in self._entryFilters: - entry = entryFilter.run(entry) + entry = entryFilter.run(entry) # noqa: PLW2901 if entry is None: break else: @@ -655,7 +655,7 @@ def _openReader(self, reader: "Any", filename: str) -> bool: self.progressInit("Reading metadata") lastPos = -100_000 for pos, total in openResult: - if progressbar and pos - lastPos > 100_000: + if progressbar and pos - lastPos > 100_000: # noqa: PLR2004 self.progress(pos, total, unit="bytes") lastPos = pos self.progressEnd() @@ -850,8 +850,8 @@ def _writeEntries( with suppress(StopIteration): gen.send(None) + @staticmethod def _openWriter( - self, writer: "Any", filename: str, ) -> bool: @@ -967,8 +967,8 @@ def _switchToSQLite( self._config["enable_alts"] = True self._sqlite = True + @staticmethod def _checkSortFlag( - self, plugin: "PluginProp", sort: "bool | None", ) -> "bool | None": @@ -1051,8 +1051,8 @@ def _resolveSortParams( return False, True + @staticmethod def _checkSortKey( - self, plugin: "PluginProp", sortKeyName: "str | None", sortEncoding: "str | None", @@ -1092,10 +1092,8 @@ def _checkSortKey( return namedSortKey, sortEncoding - def _convertValidateStrings( - self, - args: ConvertArgs, - ) -> None: + @staticmethod + def _convertValidateStrings(args: ConvertArgs) -> None: if type(args.inputFilename) is not str: raise TypeError("inputFilename must be str") if type(args.outputFilename) is not str: diff --git a/pyglossary/gregorian.py b/pyglossary/gregorian.py index ab8508817..6ed8668fa 100644 --- a/pyglossary/gregorian.py +++ b/pyglossary/gregorian.py @@ -43,10 +43,10 @@ def isLeap(y: int) -> bool: def to_jd(year: int, month: int, day: int) -> int: - if 0 < year < 10000: # > 1.5x faster + if 0 < year < 10000: # > 1.5x faster # noqa: PLR2004 return datetime(year, month, day).toordinal() + 1721425 - if month <= 2: + if month <= 2: # noqa: PLR2004 tm = 0 elif isLeap(year): tm = -1 @@ -67,7 +67,7 @@ def to_jd(year: int, month: int, day: int) -> int: def jd_to(jd: int) -> "tuple[int, int, int]": ordinal = jd - 1721425 - if 0 < ordinal < 3652060: # > 4x faster + if 0 < ordinal < 3652060: # > 4x faster # noqa: PLR2004 # datetime(9999, 12, 31).toordinal() == 3652059 dt = datetime.fromordinal(ordinal) return (dt.year, dt.month, dt.day) @@ -82,7 +82,7 @@ def jd_to(jd: int) -> "tuple[int, int, int]": cent * 100 + quad * 4 + yindex + - (cent != 4 and yindex != 4) + (cent != 4 and yindex != 4) # noqa: PLR2004 ) yearday = jd - to_jd(year, 1, 1) diff --git a/pyglossary/html_utils.py b/pyglossary/html_utils.py index 93c192897..7b629a7bb 100644 --- a/pyglossary/html_utils.py +++ b/pyglossary/html_utils.py @@ -99,8 +99,8 @@ "Egrave": 0x00C8, # È "egrave": 0x00E8, # è "empty": 0x2205, # ∅ - "emsp": 0x2003, # - "ensp": 0x2002, # + "emsp": 0x2003, + "ensp": 0x2002, "Epsilon": 0x0395, # Ε "epsilon": 0x03B5, # ε "equiv": 0x2261, # ≡ @@ -267,7 +267,7 @@ "Theta": 0x0398, # Θ "theta": 0x03B8, # θ "thetasym": 0x03D1, # ϑ - "thinsp": 0x2009, # + "thinsp": 0x2009, "THORN": 0x00DE, # Þ "thorn": 0x00FE, # þ "tilde": 0x02DC, # ˜ diff --git a/pyglossary/lxml_types.py b/pyglossary/lxml_types.py index 9cd1a8976..feca44c00 100644 --- a/pyglossary/lxml_types.py +++ b/pyglossary/lxml_types.py @@ -20,7 +20,7 @@ TypeAlias, ) -from lxml.etree import QName, _Element +from lxml.etree import QName, _Element # noqa: PLC2701 __all__ = ["Element", "T_htmlfile"] diff --git a/pyglossary/option.py b/pyglossary/option.py index c433b59dc..39c8facf1 100644 --- a/pyglossary/option.py +++ b/pyglossary/option.py @@ -101,7 +101,8 @@ def toDict(self) -> "dict[str, Any]": data["falseComment"] = self.falseComment return data - def evaluate(self, raw: str) -> "tuple[Any, bool]": + @classmethod + def evaluate(cls, raw: str) -> "tuple[Any, bool]": """Return (value, isValid).""" if raw == "None": return None, True @@ -130,7 +131,7 @@ def validateRaw(self, raw: str) -> bool: return False return True - def groupValues(self) -> "dict[str, Any] | None": + def groupValues(self) -> "dict[str, Any] | None": # noqa: PLR6301 return None @@ -139,7 +140,7 @@ class BoolOption(Option): def __init__( self, allowNone: bool = False, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) -> None: values: "list[bool | None]" = [False, True] if allowNone: @@ -150,7 +151,7 @@ def __init__( customValue=False, values=values, allowNone=allowNone, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) def toDict(self) -> "dict[str, Any]": @@ -159,8 +160,9 @@ def toDict(self) -> "dict[str, Any]": del data["values"] return data + @classmethod def evaluate( - self, + cls, raw: "str | bool", ) -> "tuple[bool | None, bool]": if raw is None: @@ -171,9 +173,9 @@ def evaluate( raw = raw.lower() if raw == "none": return None, True - if raw in ("yes", "true", "1"): + if raw in {"yes", "true", "1"}: return True, True - if raw in ("no", "false", "0"): + if raw in {"no", "false", "0"}: return False, True return None, False # not valid @@ -182,7 +184,7 @@ def evaluate( class StrOption(Option): def __init__( self, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) -> None: Option.__init__( self, @@ -201,7 +203,7 @@ def validate(self, value: "Any") -> bool: return value in self.values return type(value).__name__ == "str" - def groupValues(self) -> "dict[str, Any] | None": + def groupValues(self) -> "dict[str, Any] | None": # noqa: PLR6301 return None @@ -209,7 +211,7 @@ def groupValues(self) -> "dict[str, Any] | None": class IntOption(Option): def __init__( self, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) -> None: Option.__init__( self, @@ -217,7 +219,8 @@ def __init__( **kwargs, ) - def evaluate(self, raw: "str | int") -> "tuple[int | None, bool]": + @classmethod + def evaluate(cls, raw: "str | int") -> "tuple[int | None, bool]": """Return (value, isValid).""" try: value = int(raw) @@ -268,15 +271,16 @@ class FileSizeOption(IntOption): def typeDesc(self) -> str: return "" - def evaluate(self, raw: "str | int") -> "tuple[int | None, bool]": + @classmethod + def evaluate(cls, raw: "str | int") -> "tuple[int | None, bool]": if not raw: return 0, True factor = 1 if isinstance(raw, str): - m = re.match(self.validPattern, raw) + m = re.match(cls.validPattern, raw) if m is not None: raw, unit = m.groups() - factorTmp = self.factors.get(unit) + factorTmp = cls.factors.get(unit) if factorTmp is None: return None, False factor = factorTmp @@ -293,7 +297,7 @@ def evaluate(self, raw: "str | int") -> "tuple[int | None, bool]": class FloatOption(Option): def __init__( self, - **kwargs, # noqa: noqa: ANN + **kwargs, # noqa: ANN003 ) -> None: Option.__init__( self, @@ -301,8 +305,9 @@ def __init__( **kwargs, ) + @classmethod def evaluate( - self, + cls, raw: "str | float | int", ) -> "tuple[float | None, bool]": """Return (value, isValid).""" @@ -317,7 +322,7 @@ def evaluate( class DictOption(Option): def __init__( self, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) -> None: Option.__init__( self, @@ -333,15 +338,16 @@ def toDict(self) -> "dict[str, Any]": del data["customValue"] return data + @classmethod def evaluate( - self, + cls, raw: "str | dict", ) -> "tuple[dict | None, bool]": import ast if isinstance(raw, dict): return raw, True - if raw == "": + if raw == "": # noqa: PLC1901 return None, True # valid try: value = ast.literal_eval(raw) @@ -361,7 +367,7 @@ def __init__(self, **kwargs) -> None: customValue=True, allowNone=True, multiline=True, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) def toDict(self) -> "dict[str, Any]": @@ -369,10 +375,11 @@ def toDict(self) -> "dict[str, Any]": del data["customValue"] return data - def evaluate(self, raw: str) -> "tuple[list | None, bool]": + @classmethod + def evaluate(cls, raw: str) -> "tuple[list | None, bool]": import ast - if raw == "": + if raw == "": # noqa: PLC1901 return None, True # valid try: value = ast.literal_eval(raw) @@ -392,7 +399,7 @@ def __init__( customValue: bool = True, values: "list[str] | None" = None, comment: "str | None" = None, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) -> None: if values is None: values = [ @@ -431,7 +438,7 @@ def __init__( customValue=customValue, values=values, comment=comment, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) def toDict(self) -> "dict[str, Any]": @@ -468,7 +475,7 @@ def __init__( customValue: bool = True, values: "list[str] | None" = None, comment: "str | None" = None, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) -> None: if values is None: values = [ @@ -485,7 +492,7 @@ def __init__( values=values, multiline=True, comment=comment, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) @@ -501,6 +508,6 @@ def __init__(self, **kwargs) -> None: self, typ="str", customValue=True, - **kwargs, # noqa: ANN + **kwargs, # noqa: ANN003 ) # TODO: use a specific type? diff --git a/pyglossary/os_utils.py b/pyglossary/os_utils.py index 93b221611..20682ab9f 100644 --- a/pyglossary/os_utils.py +++ b/pyglossary/os_utils.py @@ -106,9 +106,9 @@ def _dictzip(filename: str | Path) -> bool: def runDictzip(filename: str | Path, method: str = "") -> None: """Compress file into dictzip format.""" res = None - if method in ["", "idzip"]: + if method in {"", "idzip"}: res = _idzip(filename) - if not res and method in ["", "dictzip"]: + if not res and method in {"", "dictzip"}: res = _dictzip(filename) if not res: log.warning( diff --git a/pyglossary/plugin_lib/pureSalsa20.py b/pyglossary/plugin_lib/pureSalsa20.py index 33f05124e..dc3c9f7eb 100644 --- a/pyglossary/plugin_lib/pureSalsa20.py +++ b/pyglossary/plugin_lib/pureSalsa20.py @@ -236,7 +236,7 @@ def getCounter(self): return little_u64.unpack(little2_i32.pack(*self.ctx[8:10]))[0] def setRounds(self, rounds, testing=False): - assert testing or rounds in (8, 12, 20), "rounds must be 8, 12, 20" + assert testing or rounds in {8, 12, 20}, "rounds must be 8, 12, 20" self.rounds = rounds def encryptBytes(self, data: bytes) -> bytes: @@ -267,7 +267,7 @@ def salsa20_wordtobyte(input_, nRounds=20, checkRounds=True): Returns a 64-byte string. """ assert isinstance(input_, list | tuple) and len(input_) == 16 - assert not checkRounds or nRounds in (8, 12, 20) + assert not checkRounds or nRounds in {8, 12, 20} x = list(input_) diff --git a/pyglossary/plugin_lib/readmdict.py b/pyglossary/plugin_lib/readmdict.py index e59cb345b..721a9858c 100644 --- a/pyglossary/plugin_lib/readmdict.py +++ b/pyglossary/plugin_lib/readmdict.py @@ -148,10 +148,12 @@ def keys(self): def _read_number(self, f): return unpack(self._number_format, f.read(self._number_width))[0] - def _read_int32(self, f): + @staticmethod + def _read_int32(f): return unpack(">I", f.read(4))[0] - def _parse_header(self, header): + @staticmethod + def _parse_header(header): """Extract attributes from .""" taglist = re.findall(rb'(\w+)="(.*?)"', header, re.DOTALL) tagdict = {} @@ -357,7 +359,7 @@ def _read_header(self): if sys.hexversion >= 0x03000000: encoding = encoding.decode("utf-8") # GB18030 > GBK > GB2312 - if encoding in ("GBK", "GB2312"): + if encoding in {"GBK", "GB2312"}: encoding = "GB18030" self._encoding = encoding @@ -652,7 +654,7 @@ def _read_records_v1v2(self): f.close() - def _treat_record_data(self, data): + def _treat_record_data(self, data): # noqa: PLR6301 return data diff --git a/pyglossary/plugin_prop.py b/pyglossary/plugin_prop.py index 3df64a090..42c7a9110 100644 --- a/pyglossary/plugin_prop.py +++ b/pyglossary/plugin_prop.py @@ -304,7 +304,8 @@ def canRead(self) -> bool: def canWrite(self) -> bool: return self._canWrite - def getOptionAttrNamesFromClass(self, rwclass: "type") -> "list[str]": + @staticmethod + def getOptionAttrNamesFromClass(rwclass: "type") -> "list[str]": nameList = [] for cls in (*rwclass.__bases__, rwclass): @@ -521,7 +522,7 @@ def getExtraOptionsFromFunc( if str(param.default) != "": extraOptNames.append(name) continue - if name not in ("filename", "dirname"): + if name not in {"filename", "dirname"}: extraOptNames.append(name) if extraOptNames: log.warning(f"{format}: {extraOptNames = }") diff --git a/pyglossary/plugins/aard2_slob.py b/pyglossary/plugins/aard2_slob.py index d3d8f2079..7c28e4cfb 100644 --- a/pyglossary/plugins/aard2_slob.py +++ b/pyglossary/plugins/aard2_slob.py @@ -181,7 +181,8 @@ def __len__(self) -> int: return 0 return len(self._slobObj) - def _href_sub(self, m: "re.Match") -> str: + @staticmethod + def _href_sub(m: "re.Match") -> str: st = m.group(0) if "//" in st: return st @@ -213,7 +214,7 @@ def __iter__(self) -> "Iterator[EntryType | None]": word = blob.key ctype = blob.content_type.split(";")[0] - if ctype not in (MIME_HTML, MIME_TEXT): + if ctype not in {MIME_HTML, MIME_TEXT}: log.debug(f"unknown {blob.content_type=} in {word=}") word = word.removeprefix("~/") yield self._glos.newDataEntry(word, blob.content) @@ -278,8 +279,8 @@ def __init__(self, glos: GlossaryType) -> None: self._resPrefix = "" self._slobWriter: "slob.Writer | None" = None + @staticmethod def _slobObserver( - self, event: "slob.WriterEvent", # noqa: F401, F821 ) -> None: log.debug(f"slob: {event.name}{': ' + event.data if event.data else ''}") @@ -357,7 +358,7 @@ def addEntry(self, entry: "EntryType") -> None: entry.detectDefiFormat() defiFormat = entry.defiFormat - if self._word_title and defiFormat in ("h", "m"): + if self._word_title and defiFormat in {"h", "m"}: if defiFormat == "m": defiFormat = "h" title = self._glos.wordTitleStr( diff --git a/pyglossary/plugins/appledict/_content.py b/pyglossary/plugins/appledict/_content.py index 0c7967534..014add19f 100644 --- a/pyglossary/plugins/appledict/_content.py +++ b/pyglossary/plugins/appledict/_content.py @@ -117,7 +117,7 @@ def prepare_content_without_soup( return content # noqa: RET504 -def _prepare_href(tag): +def _prepare_href(tag) -> None: href = tag["href"] href = cleanup_link_target(href) @@ -136,7 +136,7 @@ def _prepare_href(tag): tag["href"] = f"x-dictionary:d:{href}" -def _prepare_onclick(soup): +def _prepare_onclick(soup) -> None: for thumb in soup.find_all("div", "pic_thumb"): thumb["onclick"] = ( 'this.setAttribute("style", "display:none"); ' diff --git a/pyglossary/plugins/appledict/indexes/zh.py b/pyglossary/plugins/appledict/indexes/zh.py index 06621f2ae..5acec475a 100644 --- a/pyglossary/plugins/appledict/indexes/zh.py +++ b/pyglossary/plugins/appledict/indexes/zh.py @@ -58,8 +58,7 @@ def zh(titles: "Sequence[str]", content: str) -> "set[str]": indexes.update({title, title + "。"}) # remove all non hieroglyph - title = nonHieroglyphPattern.sub("", title) - indexes.add(title) + indexes.add(nonHieroglyphPattern.sub("", title)) indexes.update(pinyin_indexes(content)) diff --git a/pyglossary/plugins/appledict/jing/main.py b/pyglossary/plugins/appledict/jing/main.py index 0ed897f85..20fd5513d 100644 --- a/pyglossary/plugins/appledict/jing/main.py +++ b/pyglossary/plugins/appledict/jing/main.py @@ -85,9 +85,9 @@ def main() -> None: if len(sys.argv) < 2: prog_name = path.basename(sys.argv[0]) log.info(f"usage:\n {prog_name} filename") - exit(1) + sys.exit(1) try: run(sys.argv[1]) except JingTestError as e: log.fatal(str(e)) - exit(e.returncode) + sys.exit(e.returncode) diff --git a/pyglossary/plugins/appledict_bin/__init__.py b/pyglossary/plugins/appledict_bin/__init__.py index 9328b45e7..0ab38bc39 100644 --- a/pyglossary/plugins/appledict_bin/__init__.py +++ b/pyglossary/plugins/appledict_bin/__init__.py @@ -131,12 +131,12 @@ def __init__(self, glos: GlossaryType) -> None: self._keyTextData: "dict[ArticleAddress, list[RawKeyData]]" = {} self._cssName = "" + @staticmethod def tostring( - self, elem: "Element | HtmlComment | HtmlElement | " "HtmlEntity | HtmlProcessingInstruction", ) -> str: - from lxml.html import tostring as tostring + from lxml.html import tostring return tostring( cast("HtmlElement", elem), @@ -255,7 +255,8 @@ def open(self, filename: str) -> "Iterator[tuple[int, int]]": f"number of entries: {self._wordCount}", ) - def parseMetadata(self, infoPlistPath: str) -> "dict[str, Any]": + @staticmethod + def parseMetadata(infoPlistPath: str) -> "dict[str, Any]": import biplist if not isfile(infoPlistPath): @@ -361,8 +362,8 @@ def _getDefi( return defi + @staticmethod def getChunkLenOffset( - self, pos: int, buffer: bytes, ) -> "tuple[int, int]": @@ -452,11 +453,11 @@ def convertEntryBytesToXml( def readEntryIds(self) -> None: titleById = {} - for entryBytes, _ in self.yieldEntryBytes( + for entryBytesTmp, _ in self.yieldEntryBytes( self._file, self._properties, ): - entryBytes = entryBytes.strip() + entryBytes = entryBytesTmp.strip() if not entryBytes: continue id_i = entryBytes.find(b'id="') @@ -642,6 +643,15 @@ def readResFile(self, fname: str, fpath: str, ext: str) -> EntryType: data = substituteAppleCSS(data) return self._glos.newDataEntry(fname, data) + def fixResFilename(self, fname: str, relPath: str): + if fname == self._cssName: + fname = "style.css" + if relPath: + fname = relPath + "/" + fname + if os.path == "\\": + fname = fname.replace("\\", "/") + return fname + def readResDir( self, dirPath: str, @@ -666,16 +676,13 @@ def readResDir( relPath=join(relPath, fname), ) continue + if not isfile(fpath): continue - if fname == self._cssName: - fname = "style.css" - if relPath: - fname = relPath + "/" + fname - if os.path == "\\": - fname = fname.replace("\\", "/") - core.trace(log, f"Using resource {fpath!r} as {fname!r}") - yield self.readResFile(fname, fpath, ext) + + fname2 = self.fixResFilename(fname, relPath) + core.trace(log, f"Using resource {fpath!r} as {fname2!r}") + yield self.readResFile(fname2, fpath, ext) def __iter__(self) -> Iterator[EntryType]: yield from self.readResDir( diff --git a/pyglossary/plugins/babylon_bgl/bgl_info.py b/pyglossary/plugins/babylon_bgl/bgl_info.py index 1ba739186..94aaedca5 100644 --- a/pyglossary/plugins/babylon_bgl/bgl_info.py +++ b/pyglossary/plugins/babylon_bgl/bgl_info.py @@ -24,7 +24,7 @@ from collections.abc import Callable from typing import Any -import pyglossary.gregorian as gregorian +from pyglossary import gregorian from pyglossary.core import log from pyglossary.text_utils import ( uintFromBytes, diff --git a/pyglossary/plugins/babylon_bgl/bgl_reader.py b/pyglossary/plugins/babylon_bgl/bgl_reader.py index 878c58dfc..c0361c6e9 100644 --- a/pyglossary/plugins/babylon_bgl/bgl_reader.py +++ b/pyglossary/plugins/babylon_bgl/bgl_reader.py @@ -404,10 +404,10 @@ def openGzip(self) -> None: return False b_head = bglFile.read(6) - if len(b_head) < 6 or b_head[:4] not in ( + if len(b_head) < 6 or b_head[:4] not in { b"\x12\x34\x00\x01", b"\x12\x34\x00\x02", - ): + }: log.error(f"invalid header: {b_head[:6]!r}") return False @@ -442,7 +442,7 @@ def readInfo(self) -> None: continue if block.type == 0: self.readType0(block) - elif block.type in (1, 7, 10, 11, 13): + elif block.type in {1, 7, 10, 11, 13}: self.numEntries += 1 elif block.type == 2: self.numResources += 1 @@ -476,7 +476,7 @@ def readInfo(self) -> None: for key, value in self.info.items(): if isinstance(value, bytes): try: - value = value.decode(encoding) + value = value.decode(encoding) # noqa: PLW2901 except Exception: log.warning(f"failed to decode info value: {key} = {value}") else: @@ -520,13 +520,13 @@ def setGlossaryInfo(self) -> None: # TODO: a bool flag to add empty value infos? # leave "creationTime" and "lastUpdated" as is if key == "utf8Encoding": - key = "bgl_" + key + key = "bgl_" + key # noqa: PLW2901 try: glos.setInfo(key, s_value) except Exception: log.exception(f"key = {key}") - def isEndOfDictData(self) -> bool: + def isEndOfDictData(self) -> bool: # noqa: PLR6301 """ Test for end of dictionary data. @@ -799,7 +799,7 @@ def __iter__(self) -> "Iterator[EntryType]": u_defi, ) - elif block.type in (1, 7, 10, 11, 13): + elif block.type in {1, 7, 10, 11, 13}: pos = 0 # word: wordData = self.readEntryWord(block, pos) @@ -1056,7 +1056,8 @@ def readEntry_Type11( def charReferencesStat(self, b_text: bytes, encoding: str) -> None: pass - def decodeCharsetTagsBabylonReference(self, b_text: bytes, b_text2: bytes): + @staticmethod + def decodeCharsetTagsBabylonReference(b_text: bytes, b_text2: bytes): b_refs = b_text2.split(b";") add_text = "" for i_ref, b_ref in enumerate(b_refs): diff --git a/pyglossary/plugins/babylon_bgl/bgl_reader_debug.py b/pyglossary/plugins/babylon_bgl/bgl_reader_debug.py index 2525f1bab..959c68c55 100644 --- a/pyglossary/plugins/babylon_bgl/bgl_reader_debug.py +++ b/pyglossary/plugins/babylon_bgl/bgl_reader_debug.py @@ -187,11 +187,11 @@ def open( self.targetCharsArray = None if self._raw_dump_path: - self.rawDumpFile = open(self._raw_dump_path, "w") + self.rawDumpFile = open(self._raw_dump_path, "w", encoding="utf-8") if self._char_samples_path: - self.samplesDumpFile = open(self._char_samples_path, "w") + self.samplesDumpFile = open(self._char_samples_path, "w", encoding="utf-8") if self._msg_log_path: - self.msgLogFile = open(self._msg_log_path, "w") + self.msgLogFile = open(self._msg_log_path, "w", encoding="utf-8") self.charRefStatPattern = re.compile(b"(&#\\w+;)", re.IGNORECASE) @@ -201,10 +201,10 @@ def openGzip(self): log.error(f"file pointer empty: {bglFile}") return False buf = bglFile.read(6) - if len(buf) < 6 or buf[:4] not in ( + if len(buf) < 6 or buf[:4] not in { b"\x12\x34\x00\x01", b"\x12\x34\x00\x02", - ): + }: log.error(f"invalid header: {buf[:6]!r}") return False self.gzipOffset = gzipOffset = uintFromBytes(buf[4:6]) @@ -376,12 +376,12 @@ def dumpBlocks(self, dumpPath): break self.numBlocks += 1 - if block.type in (1, 7, 10, 11, 13): + if block.type in {1, 7, 10, 11, 13}: self.numEntries += 1 elif block.type == 2: # Embedded File (mostly Image or HTML) metaData.numFiles += 1 - if block.type in (1, 2, 7, 10, 11, 13): + if block.type in {1, 2, 7, 10, 11, 13}: if range_type == block.type: range_count += 1 else: diff --git a/pyglossary/plugins/cc_cedict/conv.py b/pyglossary/plugins/cc_cedict/conv.py index c00d11a7b..a32496bdc 100644 --- a/pyglossary/plugins/cc_cedict/conv.py +++ b/pyglossary/plugins/cc_cedict/conv.py @@ -90,7 +90,7 @@ def render_article( tones.append(tone) f = BytesIO() - with ET.htmlfile(f, encoding="utf-8") as hf: + with ET.htmlfile(f, encoding="utf-8") as hf: # noqa: PLR1702 with hf.element("div", style="border: 1px solid; padding: 5px"): with hf.element("div"): with hf.element("big"): diff --git a/pyglossary/plugins/cc_cedict/pinyin.py b/pyglossary/plugins/cc_cedict/pinyin.py index d5d3cf904..d9e10f488 100644 --- a/pyglossary/plugins/cc_cedict/pinyin.py +++ b/pyglossary/plugins/cc_cedict/pinyin.py @@ -41,7 +41,7 @@ def convert(word: str) -> "tuple[str, str]": if tone == "5": return pinyin, tone - if tone not in ("1", "2", "3", "4"): + if tone not in {"1", "2", "3", "4"}: return word, "" for vowel in VOWELS: diff --git a/pyglossary/plugins/cc_kedict.py b/pyglossary/plugins/cc_kedict.py index 8f8465719..af18f9dfc 100644 --- a/pyglossary/plugins/cc_kedict.py +++ b/pyglossary/plugins/cc_kedict.py @@ -86,14 +86,16 @@ def __init__( "pref": "prefix", } - def isInfoWord(self, _word: str) -> bool: + @classmethod + def isInfoWord(cls, _word: str) -> bool: return False - def fixInfoWord(self, _word: str) -> str: + @classmethod + def fixInfoWord(cls, _word: str) -> str: return "" + @staticmethod def _makeList( - self, hf: "lxml.etree.htmlfile", input_objects: "list[Any]", processor: "Callable", @@ -117,7 +119,7 @@ def _makeList( with hf.element("li"): processor(hf, el, len(input_objects)) - def _processExample( + def _processExample( # noqa: PLR6301 self, hf: "lxml.etree.htmlfile", exampleDict: "dict", @@ -170,7 +172,7 @@ def _processDef( skip_single=False, ) - def _processNote( + def _processNote( # noqa: PLR6301 self, hf: "lxml.etree.htmlfile", note: str, diff --git a/pyglossary/plugins/crawler_dir.py b/pyglossary/plugins/crawler_dir.py index 1b69c2160..e8c6a17df 100644 --- a/pyglossary/plugins/crawler_dir.py +++ b/pyglossary/plugins/crawler_dir.py @@ -68,7 +68,8 @@ def open(self, filename: str) -> None: if not isdir(filename): makedirs(filename) - def filePathFromWord(self, b_word: bytes) -> str: + @staticmethod + def filePathFromWord(b_word: bytes) -> str: bw = b_word.lower() if len(bw) <= 2: return bw.hex() @@ -168,7 +169,8 @@ def _fromFile(self, fpath: str) -> "EntryType": defi = _file.read() return self._glos.newEntry(words, defi) - def _listdirSortKey(self, name: str) -> str: + @staticmethod + def _listdirSortKey(name: str) -> str: name_nox, ext = splitext(name) if ext == ".d": return name diff --git a/pyglossary/plugins/dicformids.py b/pyglossary/plugins/dicformids.py index e71202867..a2b633bdb 100644 --- a/pyglossary/plugins/dicformids.py +++ b/pyglossary/plugins/dicformids.py @@ -175,7 +175,7 @@ def writeProbs(self) -> None: self._dirname, "DictionaryForMIDs.properties", ) - with open(probsPath, mode="w", newline="\n") as fileObj: + with open(probsPath, mode="w", newline="\n", encoding="utf-8") as fileObj: fileObj.write( PROP_TEMPLATE.format( name=glos.getInfo("name"), diff --git a/pyglossary/plugins/dict_cc.py b/pyglossary/plugins/dict_cc.py index 36ba9e281..651cc4629 100644 --- a/pyglossary/plugins/dict_cc.py +++ b/pyglossary/plugins/dict_cc.py @@ -71,8 +71,8 @@ def __len__(self) -> int: ) return self._cur.fetchone()[0] + @staticmethod def makeList( - self, hf: "T_htmlfile", input_elements: "list[Element]", processor: "Callable", @@ -93,8 +93,8 @@ def makeList( with hf.element("li"): processor(hf, el) + @staticmethod def makeGroupsList( - self, hf: "T_htmlfile", groups: "list[tuple[str, str]]", processor: "Callable[[T_htmlfile, tuple[str, str]], None]", @@ -115,7 +115,7 @@ def makeGroupsList( with hf.element("li"): processor(hf, el) - def writeSense( + def writeSense( # noqa: PLR6301 self, hf: "T_htmlfile", row: "tuple[str, str]", @@ -161,7 +161,7 @@ def iterRows( log.error(f"html.unescape({term2!r}) -> {e}") yield term1, term2, row[2] - def parseGender(self, headword: str) -> "tuple[str | None, str]": + def parseGender(self, headword: str) -> "tuple[str | None, str]": # noqa: PLR6301 # {m} masc masculine German: maskulin # {f} fem feminine German: feminin # {n} neut neutral German: neutral @@ -198,11 +198,11 @@ def _iterOneDirection( from lxml import etree as ET glos = self._glos - for headword, groupsOrig in groupby( + for headwordEscaped, groupsOrig in groupby( self.iterRows(column1, column2), key=itemgetter(0), ): - headword = html.unescape(headword) + headword = html.unescape(headwordEscaped) groups: "list[tuple[str, str]]" = [ (term2, entry_type) for _, term2, entry_type in groupsOrig ] diff --git a/pyglossary/plugins/dict_cc_split.py b/pyglossary/plugins/dict_cc_split.py index 8ac2cf0a0..b3cfaacf8 100644 --- a/pyglossary/plugins/dict_cc_split.py +++ b/pyglossary/plugins/dict_cc_split.py @@ -97,7 +97,7 @@ def _iterOneDirection( ) -> "Iterator[EntryType]": for word, defi, entry_type in self.iterRows(column1, column2): if entry_type: - word = f"{word} {{{entry_type}}}" + word = f"{word} {{{entry_type}}}" # noqa: PLW2901 yield self._glos.newEntry(word, defi, defiFormat="m") def __iter__(self) -> "Iterator[EntryType]": diff --git a/pyglossary/plugins/dictunformat.py b/pyglossary/plugins/dictunformat.py index 4cf08db2d..19e410183 100644 --- a/pyglossary/plugins/dictunformat.py +++ b/pyglossary/plugins/dictunformat.py @@ -46,10 +46,12 @@ class Reader(TextGlossaryReader): _headword_separator = "; " # https://github.com/cheusov/dictd/blob/master/dictfmt/dictunformat.in#L14 - def isInfoWord(self, word: str) -> bool: + @classmethod + def isInfoWord(cls, word: str) -> bool: return word.startswith("00-database-") - def fixInfoWord(self, word: str) -> str: + @classmethod + def fixInfoWord(cls, word: str) -> str: return word def setInfo(self, word: str, defi: str) -> None: diff --git a/pyglossary/plugins/dsl/__init__.py b/pyglossary/plugins/dsl/__init__.py index c56af7cc3..77fc91244 100644 --- a/pyglossary/plugins/dsl/__init__.py +++ b/pyglossary/plugins/dsl/__init__.py @@ -220,7 +220,7 @@ def open( # read header for line in self._file: - line = line.rstrip().lstrip("\ufeff") # noqa: B005 + line = line.rstrip().lstrip("\ufeff") # noqa: B005, PLW2901 # \ufeff -> https://github.com/ilius/pyglossary/issues/306 if not line: continue @@ -270,7 +270,8 @@ def _iterLines(self) -> "Iterator[str]": for line in self._file: yield line - def sub_title_line(self, m: "re.Match") -> str: + @staticmethod + def sub_title_line(m: "re.Match") -> str: line = m.group(0)[1:-1] line = line.replace("[']", "") # FIXME line = line.replace("[/']", "") diff --git a/pyglossary/plugins/dsl/lex.py b/pyglossary/plugins/dsl/lex.py index c6215978d..dedafee06 100644 --- a/pyglossary/plugins/dsl/lex.py +++ b/pyglossary/plugins/dsl/lex.py @@ -155,7 +155,7 @@ def processTagClose(tr: TransformerType, tag: str) -> tuple[LexType, ErrorType]: tr.output += "

" elif tag == "b": tr.output += "" - elif tag in ("u", "'"): + elif tag in {"u", "'"}: tr.output += "" elif tag == "i": tr.output += "" @@ -163,7 +163,7 @@ def processTagClose(tr: TransformerType, tag: str) -> tuple[LexType, ErrorType]: tr.output += "" elif tag == "sub": tr.output += "" - elif tag in ("c", "t"): + elif tag in {"c", "t"}: tr.output += "" elif tag == "p": tr.output += "" @@ -171,7 +171,7 @@ def processTagClose(tr: TransformerType, tag: str) -> tuple[LexType, ErrorType]: tr.output += "" elif tag == "ex": tr.output += "" - elif tag in ( + elif tag in { "ref", "url", "s", @@ -181,7 +181,7 @@ def processTagClose(tr: TransformerType, tag: str) -> tuple[LexType, ErrorType]: "!trs", "lang", "com", - ): + }: pass else: log.warning(f"unknown close tag {tag!r}") @@ -296,7 +296,7 @@ def lexS(tr: TransformerType) -> tuple[LexType, ErrorType]: _, ext = splitext(fname) ext = ext.lstrip(".") - if ext in ("wav", "mp3"): + if ext in {"wav", "mp3"}: if tr.audio: tr.output += ( rf' tuple[LexType, ErrorType]: '' "" ) - elif ext in ("jpg", "jpeg", "gif", "tif", "tiff", "png", "bmp"): + elif ext in {"jpg", "jpeg", "gif", "tif", "tiff", "png", "bmp"}: tr.output += rf'{fname}' else: log.warning(f"unknown file extension in {fname!r}") @@ -315,7 +315,7 @@ def lexS(tr: TransformerType) -> tuple[LexType, ErrorType]: return lexRoot, None -def processTagM(tr: TransformerType, tag: str): +def processTagM(tr: TransformerType, tag: str) -> None: padding = "0.3" if len(tag) > 1: padding = tag[1:] @@ -324,7 +324,7 @@ def processTagM(tr: TransformerType, tag: str): tr.output += f'

' -def processTagC(tr: TransformerType): +def processTagC(tr: TransformerType) -> None: color = "green" for key, value in tr.attrs.items(): if value is None: @@ -379,14 +379,14 @@ def processTag(tr: TransformerType, tag: str) -> tuple[LexType, ErrorType]: tr.output += "" elif tag == "sub": tr.output += "" - elif tag in ( + elif tag in { "trn", "!trn", "trs", "!trs", "lang", "com", - ): + }: pass else: log.warning(f"unknown tag {tag!r}") diff --git a/pyglossary/plugins/ebook_kobo.py b/pyglossary/plugins/ebook_kobo.py index c872f2ddb..1812081b1 100644 --- a/pyglossary/plugins/ebook_kobo.py +++ b/pyglossary/plugins/ebook_kobo.py @@ -99,7 +99,7 @@ def is_cyrillic_char(c: str) -> bool: # U+FE2E, U+FE2F: Combining Half Marks # U+1D2B, U+1D78: Phonetic Extensions - return c in ("\uFE2E", "\uFE2F", "\u1D2B", "\u1D78") + return c in {"\uFE2E", "\uFE2F", "\u1D2B", "\u1D78"} def fixFilename(fname: str) -> str: @@ -113,7 +113,8 @@ class Writer: "marisa_trie": "marisa-trie", } - def stripFullHtmlError(self, entry: "EntryType", error: str) -> None: + @staticmethod + def stripFullHtmlError(entry: "EntryType", error: str) -> None: log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}") def __init__(self, glos: "GlossaryType") -> None: @@ -127,7 +128,7 @@ def __init__(self, glos: "GlossaryType") -> None: # img tag has no closing glos.stripFullHtml(errorHandler=self.stripFullHtmlError) - def get_prefix(self, word: str) -> str: + def get_prefix(self, word: str) -> str: # noqa: PLR6301 if not word: return "11" wo = word[:2].strip().lower() diff --git a/pyglossary/plugins/ebook_kobo_dictfile.py b/pyglossary/plugins/ebook_kobo_dictfile.py index aa0c810bf..7d46512c3 100644 --- a/pyglossary/plugins/ebook_kobo_dictfile.py +++ b/pyglossary/plugins/ebook_kobo_dictfile.py @@ -104,10 +104,12 @@ def open(self, filename: str) -> None: TextGlossaryReader.open(self, filename) self._glos.setDefaultDefiFormat("h") - def isInfoWord(self, _word: str) -> bool: + @classmethod + def isInfoWord(cls, _word: str) -> bool: return False - def fixInfoWord(self, _word: str) -> str: + @classmethod + def fixInfoWord(cls, _word: str) -> str: raise NotImplementedError def fixDefi( @@ -181,7 +183,8 @@ def nextBlock( class Writer: _encoding: str = "utf-8" - def stripFullHtmlError(self, entry: "EntryType", error: str) -> None: + @staticmethod + def stripFullHtmlError(entry: "EntryType", error: str) -> None: log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}") def __init__(self, glos: GlossaryType) -> None: diff --git a/pyglossary/plugins/ebook_mobi.py b/pyglossary/plugins/ebook_mobi.py index 639f3168d..2c016a5a0 100644 --- a/pyglossary/plugins/ebook_mobi.py +++ b/pyglossary/plugins/ebook_mobi.py @@ -289,7 +289,8 @@ def format_group_content( infl=infl, ) - def getLangCode(self, lang: "Lang | None") -> str: + @staticmethod + def getLangCode(lang: "Lang | None") -> str: return lang.code if isinstance(lang, Lang) else "" def get_opf_contents( diff --git a/pyglossary/plugins/edlin.py b/pyglossary/plugins/edlin.py index 776fcdbc0..003673843 100644 --- a/pyglossary/plugins/edlin.py +++ b/pyglossary/plugins/edlin.py @@ -211,7 +211,8 @@ def _clear(self) -> None: self._hashSet: "set[str]" = set() # self._wordCount = None - def hashToPath(self, h: str) -> str: + @staticmethod + def hashToPath(h: str) -> str: return h[:2] + "/" + h[2:] def getEntryHash(self, entry: EntryType) -> str: diff --git a/pyglossary/plugins/freedict.py b/pyglossary/plugins/freedict.py index 41d3dc5e7..6bf496228 100644 --- a/pyglossary/plugins/freedict.py +++ b/pyglossary/plugins/freedict.py @@ -155,8 +155,8 @@ class Reader: } gramClass = "grammar" + @staticmethod def makeList( - self, hf: "T_htmlfile", input_objects: "list[Any]", processor: "Callable", @@ -184,13 +184,14 @@ def makeList( with hf.element("li"): processor(hf, el) - def getTitleTag(self, sample: str) -> str: + @staticmethod + def getTitleTag(sample: str) -> str: ws = getWritingSystemFromText(sample) if ws: return ws.titleTag return "b" - def writeRef( + def writeRef( # noqa: PLR6301 self, hf: "T_htmlfile", ref: "Element", @@ -223,7 +224,7 @@ def writeTransCit( sense = ET.Element(f"{tei}sense") for child in elem.xpath("child::node()"): if isinstance(child, str): - child = child.strip() + child = child.strip() # noqa: PLW2901 if child: hf.write(child) log.warning("text directly inside ") @@ -236,7 +237,7 @@ def writeTransCit( quotes.append(child) continue - if child.tag in (f"{tei}gramGrp", f"{tei}usg", f"{tei}note"): + if child.tag in {f"{tei}gramGrp", f"{tei}usg", f"{tei}note"}: sense.append(child) continue @@ -425,9 +426,9 @@ def writeSenseSense( _type = child.attrib.get("type") if not _type: noteList.append(child) - elif _type in ("pos", "gram"): + elif _type in {"pos", "gram"}: gramList.append(child) - elif _type in ( + elif _type in { "sense", "stagr", "stagk", @@ -440,7 +441,7 @@ def writeSenseSense( "infl", "obj", "lbl", - ): + }: noteList.append(child) else: log.warning(f"unknown note type {_type}") @@ -462,7 +463,7 @@ def writeSenseSense( self.writeLangTag(hf, child) continue - if child.tag in (f"{tei}sense", f"{tei}gramGrp"): + if child.tag in {f"{tei}sense", f"{tei}gramGrp"}: continue if child.tag == f"{tei}xr": @@ -649,7 +650,7 @@ def normalizeGramGrpChild(self, elem: "Element") -> str: return self.posMapping.get(text.lower(), text) if tag == f"{tei}gen": return self.genderMapping.get(text.lower(), text) - if tag in (f"{tei}num", f"{tei}number"): + if tag in {f"{tei}num", f"{tei}number"}: return self.numberMapping.get(text.lower(), text) if tag == f"{tei}subc": return self.subcMapping.get(text.lower(), text) @@ -660,7 +661,7 @@ def normalizeGramGrpChild(self, elem: "Element") -> str: return self.posMapping.get(text.lower(), text) if _type == "gen": return self.genderMapping.get(text.lower(), text) - if _type in ("num", "number"): + if _type in {"num", "number"}: return self.numberMapping.get(text.lower(), text) if _type == "subc": return self.subcMapping.get(text.lower(), text) @@ -776,7 +777,8 @@ def setWordCount(self, header: "Element") -> None: except Exception: log.exception(f"unexpected {extent=}") - def tostring(self, elem: "Element") -> str: + @staticmethod + def tostring(elem: "Element") -> str: from lxml import etree as ET return ( @@ -801,7 +803,7 @@ def stripParagList( lines = [] for elem in elems: for line in self.stripParag(elem).split("\n"): - line = line.strip() + line = line.strip() # noqa: PLW2901 if not line: continue lines.append(line) diff --git a/pyglossary/plugins/gettext_po.py b/pyglossary/plugins/gettext_po.py index b0e3e345c..2f3162f3b 100644 --- a/pyglossary/plugins/gettext_po.py +++ b/pyglossary/plugins/gettext_po.py @@ -68,7 +68,7 @@ def clear(self) -> None: def open(self, filename: str) -> None: self._filename = filename - self._file = open(filename) + self._file = open(filename, encoding="utf-8") self._resDir = filename + "_res" if isdir(self._resDir): self._resFileNames = os.listdir(self._resDir) @@ -106,7 +106,7 @@ def __iter__(self) -> "Iterator[EntryType]": msgstr = False wordCount = 0 for line in _file: - line = line.strip() + line = line.strip() # noqa: PLW2901 if not line: continue if line.startswith("#"): @@ -128,11 +128,10 @@ def __iter__(self) -> "Iterator[EntryType]": log.error("msgid omitted!") defi = po_unescape(line[7:]) msgstr = True + elif msgstr: + defi += po_unescape(line) else: - if msgstr: - defi += po_unescape(line) - else: - word += po_unescape(line) + word += po_unescape(line) if word: yield self._glos.newEntry(word, defi) wordCount += 1 diff --git a/pyglossary/plugins/html_dir.py b/pyglossary/plugins/html_dir.py index 289a4da23..33dd3359f 100644 --- a/pyglossary/plugins/html_dir.py +++ b/pyglossary/plugins/html_dir.py @@ -110,7 +110,8 @@ class Writer: _css: str = "" _word_title: bool = True - def stripFullHtmlError(self, entry: "EntryType", error: str) -> None: + @staticmethod + def stripFullHtmlError(entry: "EntryType", error: str) -> None: log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}") def __init__(self, glos: GlossaryType) -> None: @@ -174,7 +175,7 @@ def fixLinks(self, linkTargetSet: "set[str]") -> None: fileByWord: "dict[str, list[tuple[str, int]]]" = {} for line in open(join(dirn, "index.txt"), encoding="utf-8"): - line = line.rstrip("\n") + line = line.rstrip("\n") # noqa: PLW2901 if not line: continue entryIndexStr, wordEsc, filename, _ = line.split("\t") @@ -201,7 +202,7 @@ def getLinksByFile(fileIndex: int) -> "io.TextIOBase": log.info("") for line in open(join(dirn, "links.txt"), encoding="utf-8"): - line = line.rstrip("\n") + line = line.rstrip("\n") # noqa: PLW2901 if not line: continue target, fileIndexStr, x_start, x_size = line.split("\t") @@ -240,8 +241,11 @@ def getLinksByFile(fileIndex: int) -> "io.TextIOBase": with open(join(dirn, f"{filename}.new"), mode="wb") as outFile: for linkLine in open(join(dirn, f"links{fileIndex}"), "rb"): outFile.flush() - linkLine = linkLine.rstrip(b"\n") - b_x_start, b_x_size, b_target = linkLine.split(b"\t") + ( + b_x_start, + b_x_size, + b_target, + ) = linkLine.rstrip(b"\n").split(b"\t") outFile.write( inFile.read( int(b_x_start, 16) - inFile.tell(), @@ -314,7 +318,8 @@ def writeInfo(self, filename: str, header: str) -> None: ) _file.write("") - def _subResSrc(self, m: "re.Match") -> str: + @staticmethod + def _subResSrc(m: "re.Match") -> str: url = m.group(1) if "://" in url: return m.group(0) diff --git a/pyglossary/plugins/info_plugin.py b/pyglossary/plugins/info_plugin.py index 874ad74c4..af1be9a2d 100644 --- a/pyglossary/plugins/info_plugin.py +++ b/pyglossary/plugins/info_plugin.py @@ -119,11 +119,9 @@ def write(self) -> "Generator[None, EntryType, None]": elif defiFormat == "h": match = re_possible_html.search(defi) if match is not None: - tag = match.group().strip("< />").lower() - firstTagCounter[tag] += 1 + firstTagCounter[match.group().strip("< />").lower()] += 1 for tag in re_possible_html.findall(defi): - tag = tag.strip("< />").lower() - allTagsCounter[tag] += 1 + allTagsCounter[tag.strip("< />").lower()] += 1 elif defiFormat == "b": _filenameNoExt, ext = splitext(entry.s_word) ext = ext.lstrip(".") diff --git a/pyglossary/plugins/iupac_goldbook.py b/pyglossary/plugins/iupac_goldbook.py index fe02eaa63..da1e5f0aa 100644 --- a/pyglossary/plugins/iupac_goldbook.py +++ b/pyglossary/plugins/iupac_goldbook.py @@ -141,8 +141,8 @@ def setMetadata(self, header: "Element") -> None: if accessdate: self.setGlosInfo("creationTime", accessdate.text) + @staticmethod def tostring( - self, elem: "Element", ) -> str: from lxml import etree as ET @@ -157,7 +157,8 @@ def tostring( .strip() ) - def innerXML(self, elem: "Element") -> str: + @staticmethod + def innerXML(elem: "Element") -> str: from lxml import etree as ET elemName = elem.xpath("name(/*)") @@ -170,7 +171,7 @@ def innerXML(self, elem: "Element") -> str: return resultStr - def getTerm(self, termE: "Element") -> str: + def getTerm(self, termE: "Element") -> str: # noqa: PLR6301 from lxml import etree as ET term = ( @@ -200,7 +201,7 @@ def __iter__(self) -> "Iterator[EntryType]": events=("end",), tag="entry", ) - for _, elem in context: + for _, elem in context: # noqa: PLR1702 codeE = elem.find("./code") if codeE is None: continue diff --git a/pyglossary/plugins/jmdict.py b/pyglossary/plugins/jmdict.py index 3a79d85bf..918a85eea 100644 --- a/pyglossary/plugins/jmdict.py +++ b/pyglossary/plugins/jmdict.py @@ -97,8 +97,8 @@ class Reader: "word containing irregular kana usage": "irregular", } + @staticmethod def makeList( - self, hf: "T_htmlfile", input_objects: "list[Element]", processor: "Callable", @@ -205,7 +205,8 @@ def br() -> "Element": hf.write(br()) examples = sense.findall("example") - if examples: + # TODO: move to a method + if examples: # noqa: PLR1702 with hf.element( "div", attrib={ @@ -257,7 +258,7 @@ def getEntryByElem( def br() -> "Element": return ET.Element("br") - with ET.htmlfile(f, encoding="utf-8") as hf: + with ET.htmlfile(f, encoding="utf-8") as hf: # noqa: PLR1702 kebList: "list[str]" = [] rebList: "list[str]" = [] kebDisplayList: "list[str]" = [] @@ -365,10 +366,8 @@ def br() -> "Element": byteProgress=byteProgress, ) - def tostring( - self, - elem: "Element", - ) -> str: + @staticmethod + def tostring(elem: "Element") -> str: from lxml import etree as ET return ( diff --git a/pyglossary/plugins/jmnedict.py b/pyglossary/plugins/jmnedict.py index 3ebbeb2fa..167614a6e 100644 --- a/pyglossary/plugins/jmnedict.py +++ b/pyglossary/plugins/jmnedict.py @@ -75,8 +75,8 @@ class Reader: "word containing irregular kana usage": "irregular", } + @staticmethod def makeList( - self, hf: "T_htmlfile", input_objects: "list[Element]", processor: "Callable", @@ -153,7 +153,7 @@ def getEntryByElem( def br() -> "Element": return ET.Element("br") - with ET.htmlfile(f, encoding="utf-8") as hf: + with ET.htmlfile(f, encoding="utf-8") as hf: # noqa: PLR1702 kebList: "list[str]" = [] rebList: "list[tuple[str, list[str]]]" = [] with hf.element("div"): @@ -236,10 +236,8 @@ def br() -> "Element": byteProgress=byteProgress, ) - def tostring( - self, - elem: "Element", - ) -> str: + @staticmethod + def tostring(elem: "Element") -> str: from lxml import etree as ET return ( diff --git a/pyglossary/plugins/lingoes_ldf.py b/pyglossary/plugins/lingoes_ldf.py index a48efb4e8..0ced54e2a 100644 --- a/pyglossary/plugins/lingoes_ldf.py +++ b/pyglossary/plugins/lingoes_ldf.py @@ -69,13 +69,15 @@ def __len__(self) -> int: ) return self._wordCount - def isInfoWord(self, word: str) -> bool: + @classmethod + def isInfoWord(cls, word: str) -> bool: if isinstance(word, str): return word.startswith("#") return False - def fixInfoWord(self, word: str) -> str: + @classmethod + def fixInfoWord(cls, word: str) -> str: if isinstance(word, str): return word.lstrip("#").lower() diff --git a/pyglossary/plugins/octopus_mdict_new/__init__.py b/pyglossary/plugins/octopus_mdict_new/__init__.py index fd4bd7691..90d25324b 100644 --- a/pyglossary/plugins/octopus_mdict_new/__init__.py +++ b/pyglossary/plugins/octopus_mdict_new/__init__.py @@ -245,7 +245,7 @@ def __iter__(self) -> "Iterator[EntryType]": dirPath = dirname(self._filename) for fname in os.listdir(dirPath): ext = splitext(fname)[1].lower() - if ext in (".mdx", ".mdd"): + if ext in {".mdx", ".mdd"}: continue fpath = join(dirPath, fname) if not isfile(fpath): diff --git a/pyglossary/plugins/quickdic6.py b/pyglossary/plugins/quickdic6.py index 25f86c5ff..41fb49d93 100644 --- a/pyglossary/plugins/quickdic6.py +++ b/pyglossary/plugins/quickdic6.py @@ -452,7 +452,7 @@ def write_entry_indexentry( entry: IndexEntryType, ) -> None: token, start_index, count, token_norm, html_indices = entry - has_normalized = token_norm != "" + has_normalized = bool(token_norm) write_string(fp, token) write_int(fp, start_index) write_int(fp, count) @@ -509,7 +509,8 @@ def _compare_without_dash( s2 = self._without_dash(b) return self._comparator.compare(s1, s2) - def _without_dash(self, a: str) -> str: + @staticmethod + def _without_dash(a: str) -> str: return a.replace("-", "").replace("þ", "th").replace("Þ", "Th") @@ -603,7 +604,7 @@ def add_index( tokens = [ (t, comparator.normalize(t), ttype, tidx) for t, ttype, tidx in tokens1 - if t != "" + if t ] if len(synonyms) > 0: @@ -616,7 +617,7 @@ def add_index( for t in tokens if t[0] in synonyms for s in synonyms[t[0]] - if s != "" + if s ], ) @@ -631,26 +632,25 @@ def add_index( prev_token = "" if len(index_entries) == 0 else index_entries[-1][0] if prev_token == token: ( - token, + token, # noqa: PLW2901 index_start, count, - token_norm, + token_norm, # noqa: PLW2901 html_indices, ) = index_entries.pop() else: i_entry = len(index_entries) index_start = len(rows) count = 0 - token_norm = "" if token == token_norm else token_norm + token_norm = "" if token == token_norm else token_norm # noqa: PLW2901 html_indices = [] rows.append((1, i_entry)) if ttype == 4: if tidx not in html_indices: html_indices.append(tidx) - else: - if (ttype, tidx) not in rows[index_start + 1 :]: - rows.append((ttype, tidx)) - count += 1 + elif (ttype, tidx) not in rows[index_start + 1 :]: + rows.append((ttype, tidx)) + count += 1 index_entries.append( (token, index_start, count, token_norm, html_indices), ) @@ -723,7 +723,7 @@ def _extract_synonyms_from_indices(self) -> None: if entry_id not in self._synonyms: self._synonyms[entry_id] = set() self._synonyms[entry_id].add(token) - if token_norm != "": + if token_norm: self._synonyms[entry_id].add(token_norm) def _extract_rows_from_indexentry( @@ -737,11 +737,11 @@ def _extract_rows_from_indexentry( _, _, _, _, _, _, index_entries, _, rows = index token, start_index, count, _, html_indices = index_entries[i_entry] block_rows = rows[start_index : start_index + count + 1] - assert block_rows[0][0] in (1, 3) + assert block_rows[0][0] in {1, 3} assert block_rows[0][1] == i_entry e_rows = [] for entry_type, entry_idx in block_rows[1:]: - if entry_type in (1, 3): + if entry_type in {1, 3}: # avoid an endless recursion if entry_idx not in recurse: e_rows.extend( @@ -825,7 +825,7 @@ def write(self) -> "typing.Generator[None, EntryType, None]": continue entry.detectDefiFormat() - if entry.defiFormat not in ("h", "m"): + if entry.defiFormat not in {"h", "m"}: log.error(f"Unsupported defiFormat={entry.defiFormat}, assuming 'h'") words = entry.l_word @@ -841,17 +841,17 @@ def write(self) -> "typing.Generator[None, EntryType, None]": log.info("Collecting meta data ...") name = self._glos.getInfo("bookname") - if name == "": + if not name: name = self._glos.getInfo("description") sourceLang = ( self._glos.sourceLang - if self._source_lang == "" + if not self._source_lang else langDict[self._source_lang] ) targetLang = ( self._glos.targetLang - if self._target_lang == "" + if not self._target_lang else langDict[self._target_lang] ) if sourceLang and targetLang: @@ -872,7 +872,7 @@ def write(self) -> "typing.Generator[None, EntryType, None]": short_name = long_name = iso = sourceLang normalizer_rules = ( self._normalizer_rules - if self._normalizer_rules != "" + if self._normalizer_rules else ":: Lower; 'ae' > 'ä'; 'oe' > 'ö'; 'ue' > 'ü'; 'ß' > 'ss'; " if iso == "DE" else ":: Any-Latin; ' ' > ; :: Lower; :: NFD;" diff --git a/pyglossary/plugins/sql.py b/pyglossary/plugins/sql.py index 11ba5a112..852ed002b 100644 --- a/pyglossary/plugins/sql.py +++ b/pyglossary/plugins/sql.py @@ -120,11 +120,11 @@ def _writeInfo(self) -> None: if self._add_extra_info: extraInfo = glos.getExtraInfos(info_keys) for index, (key, value) in enumerate(extraInfo.items()): - key = key.replace("'", "''") - value = value.replace("'", "''") + key2 = key.replace("'", "''") + value2 = value.replace("'", "''") fileObj.write( f"INSERT INTO dbinfo_extra VALUES({index + 1}, " - f"'{key}', '{value}');\n", + f"'{key2}', '{value2}');\n", ) def _getInfoKeys(self) -> "list[str]": @@ -168,9 +168,8 @@ def fixStr(word: str) -> str: f"INSERT INTO word VALUES({_id}, '{word}', '{defi}');\n", ) for alt in words[1:]: - alt = fixStr(alt) fileObj.write( - f"INSERT INTO alt VALUES({_id}, '{alt}');\n", + f"INSERT INTO alt VALUES({_id}, '{fixStr(alt)}');\n", ) _id += 1 diff --git a/pyglossary/plugins/stardict.py b/pyglossary/plugins/stardict.py index 4122e3592..0c8145aef 100644 --- a/pyglossary/plugins/stardict.py +++ b/pyglossary/plugins/stardict.py @@ -167,11 +167,11 @@ def transformByInnerString(self, text: str) -> str: ... -T_SDListItem = TypeVar("T_SDListItem", contravariant=True) +T_SDListItem_contra = TypeVar("T_SDListItem_contra", contravariant=True) -class T_SdList(Protocol[T_SDListItem]): - def append(self, x: T_SDListItem) -> None: +class T_SdList(Protocol[T_SDListItem_contra]): + def append(self, x: T_SDListItem_contra) -> None: ... def __len__(self) -> int: @@ -197,7 +197,7 @@ def __len__(self) -> int: def __iter__(self) -> "Iterator[Any]": return iter(self._l) - def sortKey(self, item: "tuple[bytes, Any]") -> "tuple[bytes, bytes]": + def sortKey(self, item: "tuple[bytes, Any]") -> "tuple[bytes, bytes]": # noqa: PLR6301 return ( item[0].lower(), item[0], @@ -246,7 +246,8 @@ def __init__( ) self._con.commit() - def getExtraColumns(self) -> "list[tuple[str, str]]": + @classmethod + def getExtraColumns(cls) -> "list[tuple[str, str]]": # list[(columnName, dataType)] return [] @@ -293,7 +294,8 @@ def __iter__(self) -> "Iterator[EntryType]": class IdxSqList(BaseSqList): - def getExtraColumns(self) -> "list[tuple[str, str]]": + @classmethod + def getExtraColumns(cls) -> "list[tuple[str, str]]": # list[(columnName, dataType)] return [ ("idx_block", "BLOB"), @@ -301,7 +303,8 @@ def getExtraColumns(self) -> "list[tuple[str, str]]": class SynSqList(BaseSqList): - def getExtraColumns(self) -> "list[tuple[str, str]]": + @classmethod + def getExtraColumns(cls) -> "list[tuple[str, str]]": # list[(columnName, dataType)] return [ ("entry_index", "INTEGER"), @@ -407,7 +410,7 @@ def readIfoFile(self) -> None: mode="rb", ) as ifoFile: for line in ifoFile: - line = line.strip() + line = line.strip() # noqa: PLW2901 if not line: continue if line == b"StarDict's dict ifo file": @@ -682,8 +685,8 @@ def readSynFile(self) -> "dict[int, list[str]]": return synDict + @staticmethod def parseDefiBlockCompact( - self, b_block: bytes, sametypesequence: str, ) -> "list[tuple[bytes, int]] | None": @@ -733,8 +736,8 @@ def parseDefiBlockCompact( return res + @staticmethod def parseDefiBlockGeneral( - self, b_block: bytes, ) -> "list[tuple[bytes, int]] | None": """ @@ -858,11 +861,10 @@ def write(self) -> "Generator[None, EntryType, None]": yield from self.writeCompactMergeSyns(self._sametypesequence) else: yield from self.writeCompact(self._sametypesequence) + elif self._merge_syns: + yield from self.writeGeneralMergeSyns() else: - if self._merge_syns: - yield from self.writeGeneralMergeSyns() - else: - yield from self.writeGeneral() + yield from self.writeGeneral() if self._dictzip: runDictzip(f"{self._filename}.dict") syn_file = f"{self._filename}.syn" @@ -1018,7 +1020,7 @@ def writeGeneral(self) -> "Generator[None, EntryType, None]": entry.detectDefiFormat() # call no more than once defiFormat = entry.defiFormat defiFormatCounter[defiFormat] += 1 - if defiFormat not in ("h", "m", "x"): + if defiFormat not in {"h", "m", "x"}: log.error(f"invalid {defiFormat=}, using 'm'") defiFormat = "m" @@ -1199,7 +1201,7 @@ def writeGeneralMergeSyns(self) -> "Generator[None, EntryType, None]": entry.detectDefiFormat() # call no more than once defiFormat = entry.defiFormat defiFormatCounter[defiFormat] += 1 - if defiFormat not in ("h", "m", "x"): + if defiFormat not in {"h", "m", "x"}: log.error(f"invalid {defiFormat=}, using 'm'") defiFormat = "m" @@ -1301,13 +1303,13 @@ def writeIfoFile( desc = f"Publisher: {publisher}\n{desc}" for key in infoKeys: - if key in ( + if key in { "bookname", "description", - ): + }: continue value = glos.getInfo(key) - if value == "": + if not value: continue value = newlinesToSpace(value) ifo.append((key, value)) diff --git a/pyglossary/plugins/stardict_textual.py b/pyglossary/plugins/stardict_textual.py index eecfd6019..09a880675 100644 --- a/pyglossary/plugins/stardict_textual.py +++ b/pyglossary/plugins/stardict_textual.py @@ -212,7 +212,7 @@ def __iter__(self) -> "Iterator[EntryType]": for child in elem.getchildren(): if not child.text: continue - if child.tag in ("key", "synonym"): + if child.tag in {"key", "synonym"}: words.append(child.text) elif child.tag == "definition": _type = child.attrib.get("type", "") diff --git a/pyglossary/plugins/tabfile.py b/pyglossary/plugins/tabfile.py index ed919ab03..6bfccf858 100644 --- a/pyglossary/plugins/tabfile.py +++ b/pyglossary/plugins/tabfile.py @@ -75,10 +75,12 @@ def __iter__(self) -> "Iterator[EntryType | None]": _file.read(), ) - def isInfoWord(self, word: str) -> bool: + @classmethod + def isInfoWord(cls, word: str) -> bool: return word.startswith("#") - def fixInfoWord(self, word: str) -> str: + @classmethod + def fixInfoWord(cls, word: str) -> str: return word.lstrip("#") def nextBlock(self) -> "tuple[str | list[str], str, None] | None": diff --git a/pyglossary/plugins/wiktextract.py b/pyglossary/plugins/wiktextract.py index a3a846922..542f2831a 100644 --- a/pyglossary/plugins/wiktextract.py +++ b/pyglossary/plugins/wiktextract.py @@ -18,7 +18,6 @@ from pyglossary.core import log, pip from pyglossary.glossary_types import EntryType, GlossaryType from pyglossary.io_utils import nullBinaryIO -from pyglossary.langs.writing_system import getWritingSystemFromText from pyglossary.option import ( BoolOption, ListOption, @@ -317,14 +316,14 @@ def writeSenseList( self.writeSense, ) - def writeSenseGloss( + def writeSenseGloss( # noqa: PLR6301 self, hf: "T_htmlfile", text: "str | None", ) -> None: hf.write(text or "") - def writeSenseCategory( + def writeSenseCategory( # noqa: PLR6301 self, hf: "T_htmlfile", category: "dict[str, Any]", @@ -353,7 +352,7 @@ def writeSenseCategories( hf.write("Categories: ") self.makeList(hf, categories, self.writeSenseCategory) - def writeSenseExample( + def writeSenseExample( # noqa: PLR6301 self, hf: "T_htmlfile", example: "dict[str, str]", @@ -392,7 +391,7 @@ def writeSenseExamples( ): self.writeSenseExample(hf, example) - def writeSenseFormOf( + def writeSenseFormOf( # noqa: PLR6301 self, hf: "T_htmlfile", form_of: "dict[str, str]", @@ -458,7 +457,7 @@ def writeTopics( with hf.element("span", style=self.topicStyle): hf.write(topic) - def addWordLink( + def addWordLink( # noqa: PLR6301 self, hf: "T_htmlfile", word: str, @@ -624,8 +623,8 @@ def writeSense( hf.write(ET.Element("br")) + @staticmethod def makeList( - self, hf: "T_htmlfile", input_objects: "list[Any]", processor: "Callable", @@ -652,9 +651,3 @@ def makeList( for el in input_objects: with hf.element("li"): processor(hf, el) - - def getTitleTag(self, sample: str) -> str: - ws = getWritingSystemFromText(sample) - if ws: - return ws.titleTag - return "b" diff --git a/pyglossary/plugins/wordnet.py b/pyglossary/plugins/wordnet.py index 32774ffb4..3eb83bd71 100644 --- a/pyglossary/plugins/wordnet.py +++ b/pyglossary/plugins/wordnet.py @@ -216,11 +216,12 @@ def __init__(self, wordnetdir: str) -> None: self.wordnetdir = wordnetdir self.collector: "dict[str, list[str]]" = defaultdict(list) - def iterlines(self, dict_dir: str) -> Iterator[str]: + @staticmethod + def iterlines(dict_dir: str) -> Iterator[str]: for name in os.listdir(dict_dir): if not name.startswith("data."): continue - with open(os.path.join(dict_dir, name)) as f: + with open(os.path.join(dict_dir, name), encoding="utf-8") as f: for line in f: if not line.startswith(" "): yield line @@ -234,18 +235,18 @@ def prepare(self) -> None: files: dict[str, io.TextIOWrapper] = {} for name in os.listdir(dict_dir): if name.startswith("data.") and name in file2pos: - f = open(os.path.join(dict_dir, name)) # noqa: SIM115 + f = open(os.path.join(dict_dir, name), encoding="utf-8") # noqa: SIM115 for key in file2pos[name]: files[key] = f def a(word: str) -> str: return f'{word}' - for i, line in enumerate(self.iterlines(dict_dir)): - if i % 100 == 0 and i > 0: + for index, line in enumerate(self.iterlines(dict_dir)): + if index % 100 == 0 and index > 0: sys.stdout.write(".") sys.stdout.flush() - if i % 5000 == 0 and i > 0: + if index % 5000 == 0 and index > 0: sys.stdout.write("\n") sys.stdout.flush() if not line or not line.strip(): @@ -261,7 +262,7 @@ def a(word: str) -> str: ) words = synset.words - for i, word in enumerate(words): + for index2, word in enumerate(words): # TODO: move this block to a func synonyms = ", ".join(a(w) for w in words if w != word) synonyms_str = ( @@ -271,10 +272,14 @@ def a(word: str) -> str: ) pointers = defaultdict(list) for pointer in synset.pointers: - if pointer.source and pointer.target and pointer.source - 1 != i: + if ( + pointer.source and + pointer.target and + pointer.source - 1 != index2 + ): continue symbol = pointer.symbol - if symbol and symbol[:1] in (";", "-"): + if symbol and symbol[:1] in {";", "-"}: continue try: symbol_desc = getattr(PointerSymbols, synset.ss_type)[symbol] diff --git a/pyglossary/plugins/wordset.py b/pyglossary/plugins/wordset.py index 61df94a71..4d91d81c9 100644 --- a/pyglossary/plugins/wordset.py +++ b/pyglossary/plugins/wordset.py @@ -85,7 +85,8 @@ def open(self, filename: str) -> None: def __len__(self) -> int: return 0 - def fileNameSortKey(self, fname: str) -> str: + @staticmethod + def fileNameSortKey(fname: str) -> str: fname = splitext(fname)[0] if fname == "misc": return "\x80" diff --git a/pyglossary/plugins/xdxf/__init__.py b/pyglossary/plugins/xdxf/__init__.py index 761bc9949..eadd7f8ff 100644 --- a/pyglossary/plugins/xdxf/__init__.py +++ b/pyglossary/plugins/xdxf/__init__.py @@ -178,7 +178,7 @@ def open(self, filename: str) -> None: ) for _, _elem in context: elem = cast("Element", _elem) - if elem.tag in ("meta_info", "ar", "k", "abr", "dtrn"): + if elem.tag in {"meta_info", "ar", "k", "abr", "dtrn"}: break # every other tag before or is considered info if elem.tag == "abbr_def": @@ -262,8 +262,8 @@ def close(self) -> None: self._file.close() self._file = nullBinaryIO + @staticmethod def tostring( - self, elem: "Element", ) -> str: from lxml import etree as ET @@ -301,7 +301,7 @@ def titles(self, article: "Element") -> "list[str]": return titles - def _mktitle( + def _mktitle( # noqa: PLR6301 self, title_element: "Element", include_opts: "Sequence | None" = None, diff --git a/pyglossary/plugins/xdxf_lax.py b/pyglossary/plugins/xdxf_lax.py index bdee4826d..119ea74fa 100644 --- a/pyglossary/plugins/xdxf_lax.py +++ b/pyglossary/plugins/xdxf_lax.py @@ -219,8 +219,8 @@ def close(self) -> None: self._file.close() self._file = nullBinaryIO + @staticmethod def tostring( - self, elem: "Element", ) -> str: from lxml.html import tostring @@ -258,7 +258,7 @@ def titles(self, article: "Element") -> "list[str]": return titles - def _mktitle( + def _mktitle( # noqa: PLR6301 self, title_element: "Element", include_opts: "Sequence | None" = None, diff --git a/pyglossary/slob.py b/pyglossary/slob.py index 804da9873..b9b8fdd99 100644 --- a/pyglossary/slob.py +++ b/pyglossary/slob.py @@ -270,10 +270,10 @@ def close(self) -> None: def closed(self) -> bool: return len(self._ranges) == 0 - def isatty(self) -> bool: + def isatty(self) -> bool: # noqa: PLR6301 return False - def readable(self) -> bool: + def readable(self) -> bool: # noqa: PLR6301 return True def seek( @@ -291,13 +291,13 @@ def seek( raise ValueError(f"Invalid value for parameter whence: {whence!r}") return self._offset - def seekable(self) -> bool: + def seekable(self) -> bool: # noqa: PLR6301 return True def tell(self) -> int: return self._offset - def writable(self) -> bool: + def writable(self) -> bool: # noqa: PLR6301 return False def read(self, n: "int | None" = -1) -> bytes: @@ -1104,9 +1104,9 @@ def tag(self, name: str, value: str = "") -> None: self._tags[name] = value + @staticmethod def _split_key( - self, - key: "str | tuple[str, str]", + key: str | tuple[str, str], ) -> "tuple[str, str]": if isinstance(key, str): actual_key = key diff --git a/pyglossary/text_reader.py b/pyglossary/text_reader.py index 3143b7339..45373f1ea 100644 --- a/pyglossary/text_reader.py +++ b/pyglossary/text_reader.py @@ -194,10 +194,10 @@ def loadInfo(self) -> "Generator[tuple[int, int], None, None]": self._fileCount = int(fileCountStr) self._glos.setInfo("file_count", "") + @staticmethod def _genDataEntries( - self, - resList: "list[tuple[str, str]]", - resPathSet: "set[str]", + resList: list[tuple[str, str]], + resPathSet: set[str], ) -> "Iterator[DataEntry]": for relPath, fullPath in resList: if relPath in resPathSet: @@ -240,7 +240,8 @@ def __iter__(self) -> "Iterator[EntryType | None]": def __len__(self) -> int: return self._wordCount - def isInfoWord(self, word: str) -> bool: + @classmethod + def isInfoWord(cls, word: str) -> bool: raise NotImplementedError def isInfoWords(self, arg: "str | list[str]") -> bool: @@ -250,7 +251,8 @@ def isInfoWords(self, arg: "str | list[str]") -> bool: return self.isInfoWord(arg[0]) raise TypeError(f"bad argument {arg}") - def fixInfoWord(self, word: str) -> str: + @classmethod + def fixInfoWord(cls, word: str) -> str: raise NotImplementedError def nextBlock(self) -> nextBlockResultType: diff --git a/pyglossary/text_writer.py b/pyglossary/text_writer.py index dcee5fdcc..71f2b03c0 100644 --- a/pyglossary/text_writer.py +++ b/pyglossary/text_writer.py @@ -113,7 +113,7 @@ def _doWriteInfo(self, _file: "io.TextIOBase") -> None: if not (key and value): log.warning(f"skipping info {key=}, {value=}") continue - key = outInfoKeysAliasDict.get(key, key) + key = outInfoKeysAliasDict.get(key, key) # noqa: PLW2901 if not key: continue word = f"##{key}" @@ -122,7 +122,7 @@ def _doWriteInfo(self, _file: "io.TextIOBase") -> None: if not word: continue if defiEscapeFunc is not None: - value = defiEscapeFunc(value) + value = defiEscapeFunc(value) # noqa: PLW2901 if not value: continue _file.write( diff --git a/pyglossary/ui/gtk3_utils/about.py b/pyglossary/ui/gtk3_utils/about.py index 4f904271c..1ce1734fe 100644 --- a/pyglossary/ui/gtk3_utils/about.py +++ b/pyglossary/ui/gtk3_utils/about.py @@ -71,8 +71,8 @@ def __init__( self.show_all() # Something does not work with TextView + @staticmethod def newTabWidgetTextView( - self, text: str, wrap: bool = False, justification: "gtk.Justification | None" = None, @@ -97,8 +97,8 @@ def newTabWidgetTextView( swin.add(tv) return swin + @staticmethod def newTabLabelWidget( - self, text: str, # wrap: bool = False, # justification: "gtk.Justification | None" = None, @@ -124,7 +124,8 @@ def newTabLabelWidget( swin.add(box) return swin - def newTabTitle(self, title: str, icon: str): + @staticmethod + def newTabTitle(title: str, icon: str): box = gtk.Box(orientation=gtk.Orientation.VERTICAL) if icon: box.pack_start(imageFromFile(icon), False, False, 5) diff --git a/pyglossary/ui/gtk4_utils/about.py b/pyglossary/ui/gtk4_utils/about.py index 033cfedbe..3787052f6 100644 --- a/pyglossary/ui/gtk4_utils/about.py +++ b/pyglossary/ui/gtk4_utils/about.py @@ -97,8 +97,8 @@ def __init__( self.show() # Something does not work with TextView + @staticmethod def newTabWidgetTextView( - self, text: str, wrap: bool = False, justification: "gtk.Justification | None" = None, @@ -121,8 +121,8 @@ def newTabWidgetTextView( swin.set_child(tv) return swin + @staticmethod def newTabLabelWidget( - self, text: str, # wrap: bool = False, # justification: "gtk.Justification | None" = None, @@ -148,5 +148,6 @@ def newTabLabelWidget( swin.set_child(box) return swin - def newTabTitle(self, title: str, icon: str): + @staticmethod + def newTabTitle(title: str, icon: str): return AboutTabTitleBox(title, icon) diff --git a/pyglossary/ui/ui_cmd_interactive.py b/pyglossary/ui/ui_cmd_interactive.py index ec0f21170..86d25a06b 100644 --- a/pyglossary/ui/ui_cmd_interactive.py +++ b/pyglossary/ui/ui_cmd_interactive.py @@ -107,7 +107,7 @@ def formatMessage(self): # msg = ANSI(msg) # NOT SUPPORTED return msg # noqa: RET504 - def __pt_formatted_text__(self): + def __pt_formatted_text__(self): # noqa: PLW3201 return [("", self.formatMessage())] @@ -221,7 +221,8 @@ def __init__( fs_action_names = [] self.fs_action_names = fs_action_names - def file_filter(self, _filename: str) -> bool: + @staticmethod + def file_filter(_filename: str) -> bool: # filename is full/absolute file path return True @@ -325,13 +326,14 @@ def __init__( ], ) - def fs_pwd(self, args: "list[str]"): + @staticmethod + def fs_pwd(args: "list[str]"): if args: print(f"extra arguments: {args}") print(os.getcwd()) + @staticmethod def get_ls_l( - self, arg: str, st: "os.stat_result | None" = None, parentDir: str = "", @@ -373,8 +375,8 @@ def fs_ls(self, args: "list[str]"): showTitle = len(args) > 1 # Note: isdir and isfile funcs follow sym links, so no worry about links - for i, arg in enumerate(args): - if i > 0: + for argI, arg in enumerate(args): + if argI > 0: print() if not isdir(arg): @@ -395,17 +397,18 @@ def fs_ls(self, args: "list[str]"): statList = [os.lstat(join(arg, _path)) for _path in contents] maxFileSize = max(st.st_size for st in statList) sizeWidth = len(str(maxFileSize)) - for i, _path in enumerate(contents): + for pathI, path_ in enumerate(contents): print( self.get_ls_l( - _path, + path_, parentDir=arg, - st=statList[i], + st=statList[pathI], sizeWidth=sizeWidth, ), ) - def fs_cd_parent(self, args: "list[str]"): + @staticmethod + def fs_cd_parent(args: "list[str]"): if args: log.error("This command does not take arguments") return @@ -413,7 +416,8 @@ def fs_cd_parent(self, args: "list[str]"): os.chdir(newDir) print(f"Changed current directory to: {newDir}") - def fs_cd(self, args: "list[str]"): + @staticmethod + def fs_cd(args: "list[str]"): if len(args) != 1: log.error("This command takes exactly one argument") return @@ -512,7 +516,8 @@ def askOutputFile(self): False, ) - def pluginByNameOrDesc(self, value: str) -> "PluginProp | None": + @staticmethod + def pluginByNameOrDesc(value: str) -> "PluginProp | None": plugin = pluginByDesc.get(value) if plugin: return plugin @@ -577,7 +582,8 @@ def finish(self): # TODO: how to handle \r and \n in NewlineOption.values? - def getOptionValueSuggestValues(self, option: "Option"): + @staticmethod + def getOptionValueSuggestValues(option: "Option"): if option.values: return [str(x) for x in option.values] if option.typ == "bool": @@ -653,7 +659,7 @@ def askReadOptions(self): ) except (KeyboardInterrupt, EOFError): break - if value == "": + if value == "": # noqa: PLC1901 if optName in self._readOptions: print(f"Unset read-option {optName!r}") del self._readOptions[optName] @@ -728,7 +734,7 @@ def askWriteOptions(self): ) except (KeyboardInterrupt, EOFError): break - if value == "": + if value == "": # noqa: PLC1901 if optName in self._writeOptions: print(f"Unset write-option {optName!r}") del self._writeOptions[optName] @@ -801,7 +807,7 @@ def askConfig(self): value = self.askConfigValue(configKey, option) except (KeyboardInterrupt, EOFError): break - if value == "": + if value == "": # noqa: PLC1901 if configKey in self.config: print(f"Unset config {configKey!r}") del self.config[configKey] diff --git a/pyglossary/ui/ui_gtk.py b/pyglossary/ui/ui_gtk.py index 778c18f7d..a6c85dbf2 100644 --- a/pyglossary/ui/ui_gtk.py +++ b/pyglossary/ui/ui_gtk.py @@ -425,7 +425,7 @@ def valueEdited(self, _cell, path, rawValue): if not prop.customValue: return enable = True - if rawValue == "" and prop.typ != "str": + if rawValue == "" and prop.typ != "str": # noqa: PLC1901 enable = False elif not prop.validateRaw(rawValue): log.error(f"invalid {prop.typ} value: {optName} = {rawValue!r}") @@ -513,7 +513,7 @@ def valueCellClicked(self, path, forceMenu=False) -> bool: prop = self.optionsProp[optName] if prop.typ == "bool": rawValue = model.get_value(itr, self.valueCol) - if rawValue == "": + if rawValue == "": # noqa: PLC1901 value = False else: value, isValid = prop.evaluate(rawValue) diff --git a/pyglossary/ui/ui_gtk4.py b/pyglossary/ui/ui_gtk4.py index 752fde672..def15a974 100644 --- a/pyglossary/ui/ui_gtk4.py +++ b/pyglossary/ui/ui_gtk4.py @@ -428,7 +428,7 @@ def valueEdited(self, _cell, path, rawValue): if not prop.customValue: return enable = True - if rawValue == "" and prop.typ != "str": + if rawValue == "" and prop.typ != "str": # noqa: PLC1901 enable = False elif not prop.validateRaw(rawValue): log.error(f"invalid {prop.typ} value: {optName} = {rawValue!r}") @@ -532,7 +532,7 @@ def valueCellClicked(self, path, forceMenu=False) -> bool: prop = self.optionsProp[optName] if prop.typ == "bool": rawValue = model.get_value(itr, self.valueCol) - if rawValue == "": + if rawValue == "": # noqa: PLC1901 value = False else: value, isValid = prop.evaluate(rawValue) diff --git a/pyglossary/ui/ui_tk.py b/pyglossary/ui/ui_tk.py index 8a841d0f8..863dc01b6 100644 --- a/pyglossary/ui/ui_tk.py +++ b/pyglossary/ui/ui_tk.py @@ -474,9 +474,8 @@ def onDownPress(self, _event): nextDesc = treev.next(selection[0]) if nextDesc: self.setActiveRow(nextDesc) - else: - if self.items: - self.setActiveRow(self.items[0]) + elif self.items: + self.setActiveRow(self.items[0]) treev.focus() def onUpPress(self, _event): @@ -642,8 +641,8 @@ def createOptionsList(self): ] treev.insert("", "end", values=row, iid=optName) # iid should be rowId # adjust column's width if necessary to fit each value - for col_i, value in enumerate(row): - value = str(value) + for col_i, valueTmp in enumerate(row): + value = str(valueTmp) if col_i == 3: value = value.zfill(20) # to reserve window width, because it's hard to resize it later @@ -749,7 +748,7 @@ def valueCellClicked(self, event, optName): return if prop.typ == "bool": rawValue = treev.set(optName, self.valueCol) - if rawValue == "": + if rawValue == "": # noqa: PLC1901 value = False else: value, isValid = prop.evaluate(rawValue) @@ -804,8 +803,8 @@ def callback(): menu.add_cascade(label=groupName, menu=subMenu) maxItemW = max(maxItemW, tkFont.Font().measure(groupName)) else: - for value in propValues: - value = str(value) + for valueTmp in propValues: + value = str(valueTmp) menu.add_command( label=value, command=valueMenuItemSelectedCommand(value), @@ -944,7 +943,7 @@ def __init__( # rootWin.bind("", self.resized) ####################### defaultFont = tkFont.nametofont("TkDefaultFont") - if core.sysName in ("linux", "freebsd"): + if core.sysName in {"linux", "freebsd"}: defaultFont.configure(size=int(defaultFont.cget("size") * 1.4)) #### self.biggerFont = defaultFont.copy() diff --git a/pyglossary/xdxf/transform.py b/pyglossary/xdxf/transform.py index 0b832f5f7..d2123fa2d 100644 --- a/pyglossary/xdxf/transform.py +++ b/pyglossary/xdxf/transform.py @@ -55,7 +55,8 @@ def __init__(self, encoding: str = "utf-8") -> None: "etm": self._write_etm, } - def tostring(self, elem: "Element") -> str: + @staticmethod + def tostring(elem: "Element") -> str: from lxml import etree as ET return ( @@ -68,14 +69,15 @@ def tostring(self, elem: "Element") -> str: .strip() ) - def hasPrevText(self, prev: "None | str | Element") -> bool: + @staticmethod + def hasPrevText(prev: "None | str | Element") -> bool: if isinstance(prev, str): return True if prev is None: return False if prev.tag == "k": return False - if prev.tag in ( + if prev.tag in { "dtrn", "def", "span", @@ -87,7 +89,7 @@ def hasPrevText(self, prev: "None | str | Element") -> bool: "tt", "big", "small", - ): + }: return True if prev.text: return True @@ -112,7 +114,7 @@ def addSep() -> None: hasPrev = self.hasPrevText(prev) trail = False - if parent.tag in ("ar", "font"): + if parent.tag in {"ar", "font"}: if child.startswith("\n"): child = child.lstrip("\n") if hasPrev: @@ -157,7 +159,7 @@ def _write_example(self, hf: "T_htmlfile", elem: "Element") -> None: with hf.element("div"): self._write_iref(hf, child) # NESTED 5 continue - if child.tag in ("ex_orig", "ex_tran"): + if child.tag in {"ex_orig", "ex_tran"}: with hf.element("div"): self.writeChildrenOf(hf, child, stringSep=stringSep) # NESTED 5 continue @@ -207,7 +209,7 @@ def _write_k(self, hf: "T_htmlfile", child: "Element") -> None: with hf.element("b"): self.writeChildrenOf(hf, child) - def _write_mrkd(self, hf: "T_htmlfile", child: "Element") -> None: + def _write_mrkd(self, hf: "T_htmlfile", child: "Element") -> None: # noqa: PLR6301 if not child.text: return with hf.element("span", attrib={"class": child.tag}): @@ -308,22 +310,22 @@ def _write_categ(self, hf: "T_htmlfile", child: "Element") -> None: with hf.element("span", style="background-color: green;"): self.writeChildrenOf(hf, child, stringSep=" ") - def _write_opt(self, hf: "T_htmlfile", child: "Element") -> None: + def _write_opt(self, hf: "T_htmlfile", child: "Element") -> None: # noqa: PLR6301 if child.text: hf.write(" (") hf.write(child.text) hf.write(")") - def _write_img(self, hf: "T_htmlfile", child: "Element") -> None: + def _write_img(self, hf: "T_htmlfile", child: "Element") -> None: # noqa: PLR6301 with hf.element("img", attrib=dict(child.attrib)): pass - def _write_abbr(self, hf: "T_htmlfile", child: "Element") -> None: + def _write_abbr(self, hf: "T_htmlfile", child: "Element") -> None: # noqa: PLR6301 # FIXME: may need an space or newline before it with hf.element("i"): hf.write(f"{child.text}") - def _write_etm(self, hf: "T_htmlfile", child: "Element") -> None: + def _write_etm(self, hf: "T_htmlfile", child: "Element") -> None: # noqa: PLR6301 # Etymology (history and origin) # TODO: formatting? hf.write(f"{child.text}") @@ -374,7 +376,7 @@ def writeChild( stringSep=stringSep, ) - def shouldAddSep( + def shouldAddSep( # noqa: PLR6301 self, child: "str | Element", prev: "str | Element", @@ -384,12 +386,12 @@ def shouldAddSep( return False return True - if child.tag in ("sub", "sup"): + if child.tag in {"sub", "sup"}: return False if isinstance(prev, str): pass - elif prev.tag in ("sub", "sup"): + elif prev.tag in {"sub", "sup"}: return False return True diff --git a/pyglossary/xdxf/xsl_transform.py b/pyglossary/xdxf/xsl_transform.py index d9f7936ee..01915caaf 100644 --- a/pyglossary/xdxf/xsl_transform.py +++ b/pyglossary/xdxf/xsl_transform.py @@ -29,14 +29,18 @@ def __init__(self, encoding: str = "utf-8") -> None: e.msg += f", run `{core.pip} install lxml` to install" raise e - with open(join(rootDir, "pyglossary", "xdxf", "xdxf.xsl")) as f: + with open( + join(rootDir, "pyglossary", "xdxf", "xdxf.xsl"), + encoding="utf-8", + ) as f: xslt_txt = f.read() xslt = ET.XML(xslt_txt) self._transform = ET.XSLT(xslt) self._encoding = encoding - def tostring(self, elem: "_XSLTResultTree | Element") -> str: + @staticmethod + def tostring(elem: "_XSLTResultTree | Element") -> str: from lxml import etree as ET return ( diff --git a/pyproject.toml b/pyproject.toml index ce73f4ac0..f4a47c90c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ lint.select = [ # "ERA", # eradicate "PD", # pandas-vet "PGH", # pygrep-hooks - # "PL", # Pylint + "PL", # Pylint # "TRY", # tryceratops, they all sound BS # "FLY", # flynt "NPY", # NumPy-specific rules @@ -71,6 +71,16 @@ lint.ignore = [ "DTZ005", # The use of `datetime.datetime.now()` without `tz` argument is not allowed "PGH003", # Use specific rule codes when ignoring type issues + "PLR0904", # Too many public methods (? > 20) + "PLR0912", # Too many branches + "PLR0913", # Too many arguments in function definition + "PLR0914", # Too many local variables + "PLR0915", # Too many statements + "PLR0917", # Too many positional arguments + "PLR2004", # Magic value used in comparison, consider replacing `...` with a constant variable + + "PLC0415", # `import` should be at the top-level of a file + "PLW0603", # Using the global statement to update `mockLog` is discouraged "PT027", # Use `pytest.raises` instead of unittest-style `assertRaises`, why? "PD011", # Use `.to_numpy()` instead of `.values`, WTF? "ICN001", # `tkinter` should be imported as `tk`, WTF? @@ -119,37 +129,25 @@ lint.ignore = [ # since which Python is comma after **kwargs allowd? # Allow autofix for all enabled rules (when `--fix`) is provided. -lint.fixable = [ - "RUF", - "Q", - "UP010", # Unnecessary `__future__`" - "SIM108", # Use ternary operator {contents} instead of if-else-block - "C408", # Unnecessary `dict` call (rewrite as a literal) - "F401", - "E", "F", "W", - "RET", - "I", - "COM", - "TCH", - "ANN", - "W291", - "W293", - "D", - "UP004", - "UP006", - "UP008", - "UP015", - "UP024", - "UP028", - "UP030", - "UP031", - "UP032", - "UP033", - "UP034", - "UP039", - "UP035", # Import from `collections.abc` instead: `Generator|Iterator` - # "TCH003", Move standard library import `...` into a type-checking block -] +# lint.fixable = [ +# "ANN", +# "C408", # Unnecessary `dict` call (rewrite as a literal) +# "COM", +# "D", +# "E", +# "F", +# "W", +# "F401", +# "I", +# "Q", +# "PIE790", +# "RET", +# "RUF", +# # "SIM", +# "SIM108", # Use ternary operator {contents} instead of if-else-block +# "TCH", +# "UP", +# ] lint.unfixable = [] # Exclude a variety of commonly ignored directories. @@ -214,13 +212,21 @@ lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" "PT018", # Assertion should be broken down into multiple parts "D", "RUF015", # Prefer `next(zip(*_list, strict=False))` over single element slice + "PLR2004", # Magic value used in comparison, consider replacing `...` with a constant variable ] "scripts/wiki-formats.py" = ["E501"] "pyglossary/io_utils.py" = ["ANN"] "pyglossary/plugins/babylon_bgl/bgl_reader_debug.py" = ["ANN", "FURB"] "pyglossary/ui/*.py" = ["ANN", "T201", "PERF203"] "pyglossary/ui/*/*.py" = ["ANN", "T201", "PERF203"] -"tests/*.py" = ["ANN", "T201"] +"pyglossary/ui/ui_*.py" = [ + "PLR6301", # Method `...` could be a function, class method, or static method +] +"tests/*.py" = [ + "ANN", + "T201", + "PLR6301", # Method `...` could be a function, class method, or static method +] "*_test.py" = [ "ANN", "T201", diff --git a/scripts/config-doc.py b/scripts/config-doc.py index 5308e03b5..3b6e90a2f 100755 --- a/scripts/config-doc.py +++ b/scripts/config-doc.py @@ -52,7 +52,7 @@ """, ) -with open(join(rootDir, "scripts/term-colors.json")) as _file: +with open(join(rootDir, "scripts/term-colors.json"), encoding="utf-8") as _file: termColors = json.load(_file) @@ -188,5 +188,5 @@ def defaultOptionValue(name, _opt, images): for image in images: text += "\n" + image -with open(join(rootDir, "doc", "config.rst"), mode="w") as _file: +with open(join(rootDir, "doc", "config.rst"), mode="w", encoding="utf-8") as _file: _file.write(text) diff --git a/scripts/entry-filters-doc.py b/scripts/entry-filters-doc.py index e5416649a..c4adec41b 100755 --- a/scripts/entry-filters-doc.py +++ b/scripts/entry-filters-doc.py @@ -93,5 +93,9 @@ def getCommandFlagsMD(name): text = template.render( entryFiltersTable=entryFiltersTable, ) -with open(join(rootDir, "doc", "entry-filters.md"), mode="w") as _file: +with open( + join(rootDir, "doc", "entry-filters.md"), + mode="w", + encoding="utf-8", +) as _file: _file.write(text) diff --git a/scripts/tools-py2toml.py b/scripts/tools-py2toml.py index 9d5291067..720ef83bb 100755 --- a/scripts/tools-py2toml.py +++ b/scripts/tools-py2toml.py @@ -37,5 +37,5 @@ # pprint(tools) - with open(join(toolsDir, f"{p.lname}.toml"), mode="w") as _file: + with open(join(toolsDir, f"{p.lname}.toml"), mode="w", encoding="utf-8") as _file: toml.dump(tools, _file) diff --git a/scripts/wiktextract/extract-schema.py b/scripts/wiktextract/extract-schema.py index 2ef834191..a7efe86ba 100644 --- a/scripts/wiktextract/extract-schema.py +++ b/scripts/wiktextract/extract-schema.py @@ -137,7 +137,7 @@ def parseDict(data: "dict[str, Any]", path: "list[str]", node: Node): with open(jsonl_path, encoding="utf-8") as _file: for line in _file: - line = line.strip() + line = line.strip() # noqa: PLW2901 if not line: continue try: diff --git a/scripts/wiktextract/sort-jsonl.py b/scripts/wiktextract/sort-jsonl.py index 2d36dc75d..9078a5ab0 100755 --- a/scripts/wiktextract/sort-jsonl.py +++ b/scripts/wiktextract/sort-jsonl.py @@ -10,7 +10,7 @@ data: "list[tuple[str, str]]" = [] for line in sys.stdin: - line = line.strip() + line = line.strip() # noqa: PLW2901 if not line: continue row = loads(line) diff --git a/tests/dictzip_test.py b/tests/dictzip_test.py index 5246f2f35..1af49e9cf 100644 --- a/tests/dictzip_test.py +++ b/tests/dictzip_test.py @@ -23,7 +23,7 @@ def setUp(self) -> None: self.test_file_path = Path(self.tempDir) / "test_file.txt" filename = self.test_file_path.name + ".dz" self.result_file_path = self.test_file_path.parent / filename - with open(self.test_file_path, "a") as tmp_file: + with open(self.test_file_path, "a", encoding="utf-8") as tmp_file: tmp_file.write(TEXT) def skip_on_dep(self, method: str) -> None: diff --git a/tests/g_stardict_test.py b/tests/g_stardict_test.py index 5578f191d..588a02afb 100644 --- a/tests/g_stardict_test.py +++ b/tests/g_stardict_test.py @@ -335,7 +335,7 @@ def test_convert_from_stardict_invalid_sametypesequence(self): inputFilename = self.newTempFilePath(f"{fname}.ifo") outputFilename = self.newTempFilePath(f"{fname}.txt") - with open(inputFilename, mode="w") as _file: + with open(inputFilename, mode="w", encoding="utf-8") as _file: _file.write( """StarDict's dict ifo file version=3.0.0 diff --git a/tests/glossary_errors_test.py b/tests/glossary_errors_test.py index 2b90704bf..181f88f42 100644 --- a/tests/glossary_errors_test.py +++ b/tests/glossary_errors_test.py @@ -384,7 +384,7 @@ def test_convert_sameFilename(self): def test_convert_dirExists(self): glos = Glossary() tempFilePath = self.newTempFilePath("test_convert_dirExists") - with open(tempFilePath, mode="w") as _file: + with open(tempFilePath, mode="w", encoding="utf-8") as _file: _file.write("") res = glos.convert( inputFilename="test5.txt", diff --git a/tests/glossary_v2_errors_test.py b/tests/glossary_v2_errors_test.py index da3644ef2..b91c5e29b 100644 --- a/tests/glossary_v2_errors_test.py +++ b/tests/glossary_v2_errors_test.py @@ -371,7 +371,7 @@ def test_convert_sameFilename(self): def test_convert_dirExists(self): glos = Glossary() tempFilePath = self.newTempFilePath("test_convert_dirExists") - with open(tempFilePath, mode="w") as _file: + with open(tempFilePath, mode="w", encoding="utf-8") as _file: _file.write("") res = glos.convert( ConvertArgs( diff --git a/tests/slob_test.py b/tests/slob_test.py index ac9822dfe..a6433a185 100644 --- a/tests/slob_test.py +++ b/tests/slob_test.py @@ -134,10 +134,10 @@ def setUp(self): for k, t, v in self.data: if isinstance(k, str): - k = (k,) + k = (k,) # noqa: PLW2901 for key in k: if isinstance(key, tuple): - key, fragment = key + key, fragment = key # noqa: PLW2901 else: fragment = "" self.all_keys.append(key)