fix type checking errors

ilius · Dec 17, 2024 · 0b785e2 · 0b785e2
1 parent ea74d75
commit 0b785e2
Show file tree

Hide file tree

Showing 18 changed files with 154 additions and 56 deletions.
diff --git a/pyglossary/core.py b/pyglossary/core.py
@@ -22,9 +22,10 @@
 
 def exc_note(e: Exception, note: str) -> Exception:
 	try:
-		e.add_note(note)  # # pyright: ignore[reportAttributeAccessIssue]
+		e.add_note(note)  # pyright: ignore[reportAttributeAccessIssue]
 	except AttributeError:
-		e.msg += "\n" + note  # # pyright: ignore[reportAttributeAccessIssue]
+		if hasattr(e, "msg"):
+			e.msg += "\n" + note  # pyright: ignore[reportAttributeAccessIssue]
 	return e
 
 

diff --git a/pyglossary/entry.py b/pyglossary/entry.py
@@ -104,6 +104,10 @@ def s_word(self) -> str:
 	def l_word(self) -> list[str]:
 		return [self._fname]
 
+	@property
+	def lb_word(self) -> list[bytes]:
+		return [self._fname.encode("trf-8")]
+
 	@property
 	def defi(self) -> str:
 		return f"File: {self._fname}"
@@ -194,7 +198,7 @@ def isData(cls) -> bool:
 
 	@staticmethod
 	def getRawEntrySortKey(
-		key: Callable[[bytes], Any],
+		key: Callable[[list[str]], Any],
 	) -> Callable[[RawEntryType], Any]:
 		def newKey(x: RawEntryType) -> Any:  # noqa: ANN401
 			# x is rawEntry, so x[2:] is list[bytes]: list of words in bytes

diff --git a/pyglossary/entry_base.py b/pyglossary/entry_base.py
@@ -3,26 +3,51 @@
 from __future__ import annotations
 
 import typing
+from typing import TYPE_CHECKING
 
-# from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+	from collections.abc import Callable
 
 __all__ = ["BaseEntry", "MultiStr"]
 
 MultiStr: typing.TypeAlias = "str | list[str]"
 
 
-class BaseEntry:
+class BaseEntry:  # noqa: PLR0904
 	__slots__: list[str] = [
 		"_word",
 	]
 
 	def __init__(self) -> None:
 		self._word: str | list[str]
 
+		def isData(self) -> bool: ...
+
+	def getFileName(self) -> str:
+		raise NotImplementedError
+
+	@property
+	def data(self) -> bytes:
+		raise NotImplementedError
+
+	def size(self) -> int:
+		raise NotImplementedError
+
+	def save(self, directory: str) -> str:
+		raise NotImplementedError
+
 	@property
 	def s_word(self) -> str:
 		raise NotImplementedError
 
+	@property
+	def l_word(self) -> list[str]:
+		raise NotImplementedError
+
+	@property
+	def lb_word(self) -> list[bytes]:
+		raise NotImplementedError
+
 	@property
 	def defi(self) -> str:
 		raise NotImplementedError
@@ -36,3 +61,35 @@ def b_word(self) -> bytes:
 	def b_defi(self) -> bytes:
 		"""Returns definition in bytes."""
 		return self.defi.encode("utf-8")
+
+	@property
+	def defiFormat(self) -> str:
+		# TODO: type: Literal["m", "h", "x", "b"]
+		...
+
+	@defiFormat.setter
+	def defiFormat(self, defiFormat: str) -> None:
+		# TODO: type: Literal["m", "h", "x", "b"]
+		...
+
+	def detectDefiFormat(self, default: str = "") -> str: ...
+
+	def addAlt(self, alt: str) -> None: ...
+
+	def editFuncWord(self, func: Callable[[str], str]) -> None: ...
+
+	def editFuncDefi(self, func: Callable[[str], str]) -> None: ...
+
+	def strip(self) -> None: ...
+
+	def replaceInWord(self, source: str, target: str) -> None: ...
+
+	def replaceInDefi(self, source: str, target: str) -> None: ...
+
+	def replace(self, source: str, target: str) -> None: ...
+
+	def byteProgress(self) -> tuple[int, int] | None: ...
+
+	def removeEmptyAndDuplicateAltWords(self) -> None: ...
+
+	def stripFullHtml(self) -> str | None: ...
diff --git a/pyglossary/glossary.py b/pyglossary/glossary.py
@@ -164,15 +164,23 @@ def sortWords(
 		self._iter = self._loadedEntryGen()
 
 	@classmethod
-	def detectInputFormat(cls, *args, **kwargs) -> DetectedFormat | None:  # pyright: ignore[reportIncompatibleMethodOverride]
+	def detectInputFormat(  # type: ignore # pyright: ignore[reportIncompatibleMethodOverride]
+		cls,
+		*args,
+		**kwargs,
+	) -> DetectedFormat | None:
 		try:
 			return GlossaryCommon.detectInputFormat(*args, **kwargs)
 		except Error as e:
 			log.critical(str(e))
 			return None
 
 	@classmethod
-	def detectOutputFormat(cls, *args, **kwargs) -> DetectedFormat | None:  # pyright: ignore[reportIncompatibleMethodOverride]
+	def detectOutputFormat(  # type: ignore # pyright: ignore[reportIncompatibleMethodOverride]
+		cls,
+		*args,
+		**kwargs,
+	) -> DetectedFormat | None:
 		try:
 			return GlossaryCommon.detectOutputFormat(*args, **kwargs)
 		except Error as e:

diff --git a/pyglossary/glossary_types.py b/pyglossary/glossary_types.py
@@ -38,7 +38,7 @@
 
 
 class EntryType(typing.Protocol):  # noqa: PLR0904
-	def __init__(self) -> None: ...
+	# def __init__(self) -> None: ...
 
 	def isData(self) -> bool: ...
 
@@ -225,6 +225,8 @@ def stripFullHtml(
 
 	def preventDuplicateWords(self) -> None: ...
 
+	def mergeEntriesWithSameHeadwordPlaintext(self) -> None: ...
+
 	def removeHtmlTagsAll(self) -> None: ...
 
 	def addCleanupPath(self, path: str) -> None: ...

diff --git a/pyglossary/glossary_v2.py b/pyglossary/glossary_v2.py
@@ -273,13 +273,13 @@ def _entryFromRaw(self, rawEntry: RawEntryType) -> EntryType:
 			fname = rawEntry[2].decode("utf-8")
 			if isinstance(fname, list):
 				fname = fname[0]  # NESTED 4
-			return DataEntry(fname, tmpPath=defi)  # pyright: ignore[reportReturnType]
+			return DataEntry(fname, tmpPath=defi)
 
 		return Entry(
 			[b.decode("utf-8") for b in rawEntry[2:]],
 			defi,
 			defiFormat=defiFormat,
-		)  # pyright: ignore[reportReturnType]
+		)
 
 	@property
 	def rawEntryCompress(self) -> bool:
@@ -430,7 +430,7 @@ def _loadedEntryGen(self) -> Iterator[EntryType]:
 		for _entry in self._data:
 			entry = _entry
 			for f in filters:
-				entry = f.run(entry)  # pyright: ignore[reportArgumentType]
+				entry = f.run(entry)  # type: ignore # pyright: ignore[reportArgumentType]
 				# assert entry  # TODO: measure running time in non-optimized mode
 			yield entry  # pyright: ignore[reportReturnType]
 		self.progressEnd()

diff --git a/pyglossary/plugin_prop.py b/pyglossary/plugin_prop.py
@@ -331,10 +331,11 @@ def _getOptionAttrNamesFromClass(rwclass: type) -> list[str]:
 		return nameList
 
 	def _getOptionsFromClass(self, rwclass: type | None) -> dict[str, Any]:
-		optionsProp = self.optionsProp
-		options = odict()
 		if rwclass is None:
-			return options
+			return {}
+
+		optionsProp = self.optionsProp
+		options: dict[str, Any] = odict()
 
 		for attrName in self._getOptionAttrNamesFromClass(rwclass):
 			name = attrName[1:]

diff --git a/pyglossary/plugins/appledict/_content.py b/pyglossary/plugins/appledict/_content.py
@@ -225,7 +225,7 @@ def prepare_content_with_soup(  # noqa: PLR0912
 		tag.name = "del"
 
 	if title and "<h" not in body:
-		h1 = BeautifulSoup.Tag(name="h1")
+		h1 = BeautifulSoup.Tag(name="h1")  # type: ignore
 		h1.string = title
 		soup.insert(0, h1)
 

diff --git a/pyglossary/plugins/appledict_bin/appledict_properties.py b/pyglossary/plugins/appledict_bin/appledict_properties.py
@@ -55,12 +55,10 @@ def from_metadata(metadata: dict[str, Any]) -> AppleDictProperties:
 	dictionaryIndexes: list[dict[str, Any]] | None = metadata.get(
 		"IDXDictionaryIndexes",
 	)
-	if dictionaryIndexes:
-		key_text_metadata: dict[str, Any] = dictionaryIndexes[0]
-		body_metadata: dict[str, Any] = dictionaryIndexes[2]
-	else:
-		key_text_metadata: dict[str, Any] = {}
-		body_metadata: dict[str, Any] = {}
+	key_text_metadata: dict[str, Any] = (
+		dictionaryIndexes[0] if dictionaryIndexes else {}
+	)
+	body_metadata: dict[str, Any] = dictionaryIndexes[2] if dictionaryIndexes else {}
 
 	key_text_data_fields = key_text_metadata.get("IDXIndexDataFields", {})
 	key_text_variable_fields = [

diff --git a/pyglossary/plugins/quickdic6/quickdic.py b/pyglossary/plugins/quickdic6/quickdic.py
@@ -94,6 +94,7 @@ def add_index(  # noqa: PLR0913
 		index_entries: list[IndexEntryType] = []
 		for token, token_norm, ttype, tidx in tokens:
 			prev_token = index_entries[-1][0] if index_entries else ""
+			html_indices: list[int]
 			if prev_token == token:
 				(
 					token,  # noqa: PLW2901
@@ -107,7 +108,7 @@ def add_index(  # noqa: PLR0913
 				index_start = len(rows)
 				count = 0
 				token_norm = "" if token == token_norm else token_norm  # noqa: PLW2901
-				html_indices: list[int] = []
+				html_indices = []
 				rows.append((1, i_entry))
 			if ttype == 4:
 				if tidx not in html_indices:

diff --git a/pyglossary/plugins/stardict_textual.py b/pyglossary/plugins/stardict_textual.py
@@ -180,22 +180,20 @@ def renderDefiList(
 		defiFormatSet.update(_type for _, _type in defisWithFormat)
 
 		if len(defiFormatSet) == 1:
-			defis = [_defi for _defi, _ in defisWithFormat]
 			format_ = defiFormatSet.pop()
 			if format_ == "h":
-				return "\n<hr>".join(defis), format_
-			return "\n".join(defis), format_
+				return "\n<hr>".join([defi for defi, _ in defisWithFormat]), format_
+			return "\n".join([defi for defi, _ in defisWithFormat]), format_
 
 		# convert plaintext or xdxf to html
 		defis: list[str] = []
 		for defi_, format_ in defisWithFormat:
-			defi = defi_
 			if format_ == "m":
-				defi = defi.replace("\n", "<br/>")
-				defi = f"<pre>{defi}</pre>"
+				defis.append("<pre>" + defi_.replace("\n", "<br/>") + "</pre>")
 			elif format_ == "x":
-				defi = self.xdxf_transform(defi)
-			defis.append(defi)
+				defis.append(self.xdxf_transform(defi_))
+			else:
+				defis.append(defi_)
 		return "\n<hr>\n".join(defis), "h"
 
 	def __iter__(self) -> Iterator[EntryType]:
@@ -213,7 +211,7 @@ def __iter__(self) -> Iterator[EntryType]:
 			elem = cast("Element", _elem)
 			words: list[str] = []
 			defisWithFormat: list[tuple[str, str]] = []
-			for child in elem.getchildren():
+			for child in elem.iterchildren():
 				if not child.text:
 					continue
 				if child.tag in {"key", "synonym"}:

diff --git a/pyglossary/plugins/wiktextract.py b/pyglossary/plugins/wiktextract.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import collections
+from collections import Counter
 from io import BytesIO, IOBase
 from json import loads as json_loads
 from typing import TYPE_CHECKING, cast
@@ -140,7 +141,7 @@ def open(
 			self._glos.setInfo("definition_has_headwords", "True")
 
 		self._file = cfile
-		self._warnings = collections.Counter()
+		self._warnings: Counter[str] = collections.Counter()
 
 	def close(self) -> None:
 		self._file.close()
@@ -366,12 +367,13 @@ def writeSenseCategories(
 	def writeSenseExample(  # noqa: PLR6301, PLR0912
 		self,
 		hf: T_htmlfile,
-		example: dict[str, str],
+		example: dict[str, str | list],
 	) -> None:
 		# example keys: text, "english", "ref", "type"
-		textList: list[tuple[str, str]] = []
-		text_ = example.pop("example", "")
+		textList: list[tuple[str | None, str]] = []
+		text_: str | list = example.pop("example", "")
 		if text_:
+			assert isinstance(text_, str)
 			textList.append((None, text_))
 
 		example.pop("ref", "")
@@ -380,15 +382,15 @@ def writeSenseExample(  # noqa: PLR6301, PLR0912
 		for key, value in example.items():
 			if not value:
 				continue
-			prefix = key
+			prefix: str | None = key
 			if prefix in ("text",):  # noqa: PLR6201, FURB171
 				prefix = None
 			if isinstance(value, str):
 				textList.append((prefix, value))
 			elif isinstance(value, list):
 				for item in value:
 					if isinstance(item, str):
-						textList.append((prefix, value))
+						textList.append((prefix, item))
 					elif isinstance(item, list):
 						textList += [(prefix, item2) for item2 in item]
 			else:
@@ -397,7 +399,7 @@ def writeSenseExample(  # noqa: PLR6301, PLR0912
 		if not textList:
 			return
 
-		def writePair(prefix: str, text: str) -> None:
+		def writePair(prefix: str | None, text: str) -> None:
 			if prefix:
 				with hf.element("b"):
 					hf.write(prefix)
@@ -417,7 +419,7 @@ def writePair(prefix: str, text: str) -> None:
 	def writeSenseExamples(
 		self,
 		hf: T_htmlfile,
-		examples: list[dict[str, str]] | None,
+		examples: list[dict[str, str | list]] | None,
 	) -> None:
 		from lxml import etree as ET