diff --git a/fetcher.log b/fetcher.log index 3b4759d..f2da7af 100644 --- a/fetcher.log +++ b/fetcher.log @@ -18468,3 +18468,39 @@ libarchive.exception.ArchiveError: Invalid string table (errno=22, retcode=-30, [2024-05-12 23:53:46] INFO root fetch - processPackages [ediri-scaleway]: Remove: archive file [/home/abi/fetcher/tmp/ediri_scaleway-2.39.0.tar.gz] [2024-05-12 23:53:46] INFO root fetch - main [MainThread]: End processing [ediri-scaleway]: success [2024-05-12 23:53:46] INFO root fetch - main [MainThread]: Store serial: [23174235] +[2024-05-12 23:55:50] INFO root fetch - main [MainThread]: Get Changelog since Serial: [23174235] +[2024-05-12 23:55:55] INFO root fetch - main [MainThread]: Found 5 changed packages +[2024-05-12 23:55:55] INFO root fetch - processPackages [ThreadPoolExecutor-0_0]: Start processing: aspreadz +[2024-05-12 23:55:55] INFO root fetch - processPackages [ThreadPoolExecutor-0_1]: Start processing: tghtml +[2024-05-12 23:55:55] INFO root fetch - processPackages [ThreadPoolExecutor-0_2]: Start processing: pyrepka +[2024-05-12 23:55:55] INFO root fetch - processPackages [ThreadPoolExecutor-0_3]: Start processing: marcel +[2024-05-12 23:55:55] INFO root fetch - processPackages [ThreadPoolExecutor-0_4]: Start processing: JsMacrosAC +[2024-05-12 23:55:55] INFO root fetch - processPackages [JsMacrosAC]: New version: [1.9.1.5495966] Old Version: [1.9.0.873835] +[2024-05-12 23:55:55] INFO root fetch - processPackages [marcel]: New version: [0.9.9] Old Version: [0.9.8] +[2024-05-12 23:55:55] INFO root fetch - processPackages [JsMacrosAC]: Downloading https://files.pythonhosted.org/packages/96/53/06b506599acb24833e20cf6a1b764657259c1150a2114ff6c6d2a3a32a9a/JsMacrosAC-1.9.0.873835.tar.gz +[2024-05-12 23:55:55] INFO root fetch - processPackages [marcel]: Downloading https://files.pythonhosted.org/packages/3a/07/c626f18aa14df89e1f194e115cbffaa2d99bcfc736f7251795e2f889c0d6/marcel-0.9.8-py3-none-any.whl +[2024-05-12 23:55:55] WARNING root fetch - processPackages [aspreadz]: Skipping, unable to determine old version +[2024-05-12 23:55:55] INFO root fetch - main [MainThread]: End processing [aspreadz] error +[2024-05-12 23:55:55] INFO root fetch - processPackages [tghtml]: New version: [1.1.5] Old Version: [1.1.4] +[2024-05-12 23:55:55] WARNING root fetch - processPackages [pyrepka]: Skipping, unable to determine old version +[2024-05-12 23:55:55] INFO root fetch - main [MainThread]: End processing [pyrepka] error +[2024-05-12 23:55:56] INFO root fetch - processPackages [tghtml]: Downloading https://files.pythonhosted.org/packages/6b/aa/bf8640618de515c5aaced63888c6174c73e3e0a0ec49eba26f2f369b2828/tghtml-1.1.4.tar.gz +[2024-05-12 23:55:56] INFO root fetch - processPackages [JsMacrosAC]: Downloading https://files.pythonhosted.org/packages/6d/53/09847bd056cd1e84da8d6af935709859ceb922708eb3bf1a50f82f93c16c/JsMacrosAC-1.9.1.5495966.tar.gz +[2024-05-12 23:55:56] INFO root fetch - processPackages [marcel]: Downloading https://files.pythonhosted.org/packages/4b/3d/fd4bc1e7906ea7d19b4ad1010b8dce76e96202ac6886bcab6162db5f992f/marcel-0.9.9-py3-none-any.whl +[2024-05-12 23:55:56] INFO root fetch - processPackages [JsMacrosAC]: executing diffoscope +[2024-05-12 23:55:56] INFO root fetch - processPackages [JsMacrosAC]: podman run --user 0:0 --rm -w /home/abi/fetcher -v /home/abi/fetcher/tmp:/home/abi/fetcher/tmp:ro -v /home/abi/fetcher/20240512:/home/abi/fetcher/20240512:rw registry.salsa.debian.org/reproducible-builds/diffoscope --no-progress tmp/JsMacrosAC-1.9.0.873835.tar.gz tmp/JsMacrosAC-1.9.1.5495966.tar.gz --markdown 20240512/J/JsMacrosAC/1.9.0.873835-1.9.1.5495966/README.md --exclude *.pyd +[2024-05-12 23:55:56] INFO root fetch - processPackages [marcel]: executing diffoscope +[2024-05-12 23:55:56] INFO root fetch - processPackages [marcel]: podman run --user 0:0 --rm -w /home/abi/fetcher -v /home/abi/fetcher/tmp:/home/abi/fetcher/tmp:ro -v /home/abi/fetcher/20240512:/home/abi/fetcher/20240512:rw registry.salsa.debian.org/reproducible-builds/diffoscope --no-progress tmp/marcel-0.9.8-py3-none-any.whl.zip tmp/marcel-0.9.9-py3-none-any.whl.zip --markdown 20240512/m/marcel/0.9.8-0.9.9/README.md --exclude *.pyd +[2024-05-12 23:55:56] INFO root fetch - processPackages [tghtml]: Downloading https://files.pythonhosted.org/packages/9a/cc/b570a5991ed3d819fcb95a982e65fc4773070291a1f7e90fbc0002c09c09/tghtml-1.1.5.tar.gz +[2024-05-12 23:55:56] INFO root fetch - processPackages [tghtml]: executing diffoscope +[2024-05-12 23:55:56] INFO root fetch - processPackages [tghtml]: podman run --user 0:0 --rm -w /home/abi/fetcher -v /home/abi/fetcher/tmp:/home/abi/fetcher/tmp:ro -v /home/abi/fetcher/20240512:/home/abi/fetcher/20240512:rw registry.salsa.debian.org/reproducible-builds/diffoscope --no-progress tmp/tghtml-1.1.4.tar.gz tmp/tghtml-1.1.5.tar.gz --markdown 20240512/t/tghtml/1.1.4-1.1.5/README.md --exclude *.pyd +[2024-05-12 23:55:56] INFO root fetch - processPackages [tghtml]: Remove: archive file [/home/abi/fetcher/tmp/tghtml-1.1.4.tar.gz] +[2024-05-12 23:55:56] INFO root fetch - processPackages [tghtml]: Remove: archive file [/home/abi/fetcher/tmp/tghtml-1.1.5.tar.gz] +[2024-05-12 23:55:56] INFO root fetch - main [MainThread]: End processing [tghtml]: success +[2024-05-12 23:55:57] INFO root fetch - processPackages [marcel]: Remove: archive file [/home/abi/fetcher/tmp/marcel-0.9.8-py3-none-any.whl.zip] +[2024-05-12 23:55:57] INFO root fetch - processPackages [marcel]: Remove: archive file [/home/abi/fetcher/tmp/marcel-0.9.9-py3-none-any.whl.zip] +[2024-05-12 23:55:57] INFO root fetch - main [MainThread]: End processing [marcel]: success +[2024-05-12 23:55:58] INFO root fetch - processPackages [JsMacrosAC]: Remove: archive file [/home/abi/fetcher/tmp/JsMacrosAC-1.9.0.873835.tar.gz] +[2024-05-12 23:55:58] INFO root fetch - processPackages [JsMacrosAC]: Remove: archive file [/home/abi/fetcher/tmp/JsMacrosAC-1.9.1.5495966.tar.gz] +[2024-05-12 23:55:58] INFO root fetch - main [MainThread]: End processing [JsMacrosAC]: success +[2024-05-12 23:55:58] INFO root fetch - main [MainThread]: Store serial: [23174287] diff --git a/serial b/serial index c06635e..8b3ad0b 100644 --- a/serial +++ b/serial @@ -1 +1 @@ -23174235 \ No newline at end of file +23174287 \ No newline at end of file diff --git a/t/tghtml/1.1.4-1.1.5/README.md b/t/tghtml/1.1.4-1.1.5/README.md new file mode 100644 index 0000000..619349a --- /dev/null +++ b/t/tghtml/1.1.4-1.1.5/README.md @@ -0,0 +1,135 @@ +# Comparing `tmp/tghtml-1.1.4.tar.gz` & `tmp/tghtml-1.1.5.tar.gz` + +## filetype from file(1) + +```diff +@@ -1 +1 @@ +-gzip compressed data, was "tghtml-1.1.4.tar", max compression ++gzip compressed data, was "tghtml-1.1.5.tar", max compression +``` + +## Comparing `tghtml-1.1.4.tar` & `tghtml-1.1.5.tar` + +### file list + +```diff +@@ -1,5 +1,5 @@ +--rw-r--r-- 0 0 0 1072 2023-12-31 10:33:23.146381 tghtml-1.1.4/LICENSE +--rw-r--r-- 0 0 0 424 2023-12-31 13:51:51.923667 tghtml-1.1.4/pyproject.toml +--rw-r--r-- 0 0 0 91 2023-12-31 13:51:56.130667 tghtml-1.1.4/tghtml/__init__.py +--rw-r--r-- 0 0 0 5228 2023-12-31 13:44:52.977692 tghtml-1.1.4/tghtml/core.py +--rw-r--r-- 0 0 0 507 1970-01-01 00:00:00.000000 tghtml-1.1.4/PKG-INFO ++-rw-r--r-- 0 0 0 1072 2024-05-12 21:01:53.124555 tghtml-1.1.5/LICENSE ++-rw-r--r-- 0 0 0 424 2024-05-12 21:46:47.537436 tghtml-1.1.5/pyproject.toml ++-rw-r--r-- 0 0 0 91 2024-05-12 21:01:53.124555 tghtml-1.1.5/tghtml/__init__.py ++-rw-r--r-- 0 0 0 5306 2024-05-12 21:46:29.272374 tghtml-1.1.5/tghtml/core.py ++-rw-r--r-- 0 0 0 558 1970-01-01 00:00:00.000000 tghtml-1.1.5/PKG-INFO +``` + +### Comparing `tghtml-1.1.4/LICENSE` & `tghtml-1.1.5/LICENSE` + + * *Files identical despite different names* + +### Comparing `tghtml-1.1.4/tghtml/core.py` & `tghtml-1.1.5/tghtml/core.py` + + * *Files 2% similar despite different names* + +```diff +@@ -1,7 +1,9 @@ ++import re ++ + from dataclasses import dataclass, field + from readability import Document + + from bs4 import BeautifulSoup, Tag + + + def get_tag_content(tag: Tag) -> str: +@@ -15,15 +17,15 @@ + "em", + "code", + "s", + "strike", + "del", + "u", + "pre", +- "blockquote" ++ "blockquote", + ] + + + @dataclass + class TgHTML: + html: str + blocklist: list | tuple = () +@@ -43,18 +45,22 @@ + return self.parsed + + def __str__(self) -> str: + return self.parsed + + @property + def parsed(self): +- print(self.soup) + self._filter() + self._clean() +- return self.html.strip().replace("\n", "\n\n") ++ ++ return ( ++ re.sub("\n{2,}", "\n", self.html.strip().replace("\ufeff", "\n")) ++ .replace("JDAN_EXTRA_SPACE", "\n") ++ .replace("\n", "\n\n") ++ ) + + def _filter(self): + for p in self.soup.findAll("p"): + if "Это статья о" in p.text or "Vide etiam paginam discretivam:" in p.text: + p.replace_with("") + + elif p.text.replace("\n", "") == "": +@@ -103,16 +109,15 @@ + tag.replace_with("") + except Exception: + pass + + for tag in self.soup.find_all("span"): + if ( + getattr(tag, "attrs", {}) +- or {} +- .get("style", "") ++ or {}.get("style", "") + .strip() + .replace(" ", "") + .find("font-style:italic") + != -1 + ): + tag.name = "i" + +@@ -129,23 +134,22 @@ + BeautifulSoup( + "

" + get_tag_content(tag) + "

", "html.parser" + ) + ) + + for tag in self.soup.find_all(["cite"]): + tag.replace_with( +- BeautifulSoup( +- " " + get_tag_content(tag) + "", "html.parser" +- ) ++ BeautifulSoup(" " + get_tag_content(tag) + "", "html.parser") + ) + + for tag in self.soup.find_all("div", {"class": "ts-Цитата"}): + child = tag.find("blockquote") + new_tag = BeautifulSoup( +- TgHTML(get_tag_content(child), allowed_tags=["b", "i"]).parsed, "html.parser" ++ TgHTML(get_tag_content(child), allowed_tags=["b", "i"]).parsed, ++ "html.parser", + ) + + tag.replace_with(new_tag) + + for tag in self.soup.find_all("blockquote"): + tag.wrap(Tag(name="p")) +``` +