From 19675337cc6b5209f5fc74e8aea5363dd4a0fa9b Mon Sep 17 00:00:00 2001 From: Saeed Rasooli Date: Sun, 14 Apr 2024 04:58:41 +0330 Subject: [PATCH] DSL: detect utf-16-le and utf-16-be encodings, #555 --- pyglossary/plugins/dsl/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pyglossary/plugins/dsl/__init__.py b/pyglossary/plugins/dsl/__init__.py index 77fc91244..6e7e4c241 100644 --- a/pyglossary/plugins/dsl/__init__.py +++ b/pyglossary/plugins/dsl/__init__.py @@ -230,7 +230,12 @@ def open( self.processHeaderLine(line) def detectEncoding(self) -> str: - for testEncoding in ("utf-8", "utf-16"): + for testEncoding in ( + "utf-8", + "utf-16", + "utf-16-le", + "utf-16-be", + ): with compressionOpen( self._filename, dz=True, @@ -240,7 +245,7 @@ def detectEncoding(self) -> str: try: for _ in range(10): fileObj.readline() - except UnicodeDecodeError: + except (UnicodeDecodeError, UnicodeError): log.info(f"Encoding of DSL file is not {testEncoding}") continue else: