diff --git a/doc/p/wiktextract.md b/doc/p/wiktextract.md index b723ddbd4..17cc17ac0 100644 --- a/doc/p/wiktextract.md +++ b/doc/p/wiktextract.md @@ -27,6 +27,7 @@ | example_padding | `10px 20px` | str | Padding for examples (css value) | | audio | `True` | bool | Enable audio | | audio_formats | `['ogg', 'mp3']` | list | List of audio formats to use | +| categories | `False` | bool | Enable categories | ### Dependencies for reading diff --git a/plugins-meta/index.json b/plugins-meta/index.json index c77264a2c..145559aa2 100644 --- a/plugins-meta/index.json +++ b/plugins-meta/index.json @@ -1820,6 +1820,11 @@ "class": "ListOption", "type": "list", "comment": "List of audio formats to use" + }, + "categories": { + "class": "BoolOption", + "type": "bool", + "comment": "Enable categories" } }, "canRead": true, @@ -1833,7 +1838,8 @@ "audio_formats": [ "ogg", "mp3" - ] + ], + "categories": false }, "readDepends": { "lxml": "lxml" diff --git a/pyglossary/plugins/wiktextract.py b/pyglossary/plugins/wiktextract.py index 50f0eccef..742962078 100644 --- a/pyglossary/plugins/wiktextract.py +++ b/pyglossary/plugins/wiktextract.py @@ -78,6 +78,9 @@ "audio_formats": ListOption( comment="List of audio formats to use", ), + "categories": BoolOption( + comment="Enable categories", + ), } @@ -98,6 +101,8 @@ class Reader: _audio_formats: list[str] = ["ogg", "mp3"] + _categories: bool = False + topicStyle = ( "color:white;" "background:green;" @@ -167,7 +172,7 @@ def __iter__(self) -> Iterator[EntryType]: def warning(self, msg: str) -> None: self._warnings[msg] += 1 - def makeEntry(self, data: dict[str, Any]) -> EntryType: + def makeEntry(self, data: dict[str, Any]) -> EntryType: # noqa: PLR0912 from lxml import etree as ET glos = self._glos @@ -234,12 +239,13 @@ def br() -> Element: with hf.element("div"): hf.write(f"Etymology: {etymology}") - categories = [] - for sense in senses: - senseCats = sense.get("categories") - if senseCats: - categories += senseCats - self.writeSenseCategories(hf_, categories) + if self._categories: + categories = [] + for sense in senses: + senseCats = sense.get("categories") + if senseCats: + categories += senseCats + self.writeSenseCategories(hf_, categories) defi = f.getvalue().decode("utf-8") # defi = defi.replace("\xa0", " ") # do we need to do this?