diff --git a/examples/lp_parser.py b/examples/lp_parser.py deleted file mode 100644 index 573cdca..0000000 --- a/examples/lp_parser.py +++ /dev/null @@ -1,44 +0,0 @@ -# -# Outdated version -# - -import re - -from tuparser import TelegraphParser, YamlOutputFile - -EXCEPTIONS = ["dmca@telegram.org"] - -LOGIN_REGEX = re.compile(r"\S+@\S+\.\S+") -PASSWORD_REGEX = re.compile(r"\S*\d\S*") - - -class LPParser(TelegraphParser): - async def parse(self, url, soup) -> None: - website_text = list(soup.stripped_strings) - output_data = self.extract_credentials(website_text) - - if output_data[0] != "": - self.output_file.write_data(*output_data, url) - - def extract_credentials(self, website_text): - login = password = "" - for i, current in enumerate(website_text): - email_match = LOGIN_REGEX.search(current) - if email_match is None or email_match.group() in EXCEPTIONS: - continue - login = email_match.group() - if ":" in login: - data = login.split(":") - login, password = data[0], data[-1] - return login, password - for k in range(1, min(4, len(website_text) - i)): - password_match = PASSWORD_REGEX.search(website_text[i + k]) - if password_match is None: - continue - password = password_match.group() - break - - return login, password - - -LPParser().run(titles=["PLACEHOLDER"], output_file=YamlOutputFile(pattern={"login": {}, "password": {}, "url": {}})) diff --git a/examples/media_parser.py b/examples/media_parser.py index 076a912..8191e64 100644 --- a/examples/media_parser.py +++ b/examples/media_parser.py @@ -6,10 +6,10 @@ class MediaParser(TelegraphParser): - async def parse(self): - self.article = self.url.split("/")[-1] - images = self.get_urls(self.soup.find_all("img")) - videos = self.get_urls(self.soup.find_all("video")) + async def parse(self, url, soup): + self.article = url.split("/")[-1] + images = self.get_urls(soup.find_all("img")) + videos = self.get_urls(soup.find_all("video")) if images: await self.download_media(images, "images", "gif") diff --git a/setup.py b/setup.py index 1487234..32bbae5 100644 --- a/setup.py +++ b/setup.py @@ -15,9 +15,9 @@ def get_requirements(file_name: str = "requirements.txt") -> list[str]: author_email="zombyacoff@gmail.com", url="https://github.com/zombyacoff/tu-parser", description="A flexible module for building custom parsers for the Telegraph website", - long_description=dedent(f"""\ + long_description=dedent("""\ # Telegraph Universal Parser - **Telegraph Universal Parser (tu-parser)** is a flexible module designed for creating custom parsers for the [Telegraph]({tuparser.TELEGRAPH_URL}) website. + **Telegraph Universal Parser (tu-parser)** is a flexible module designed for creating custom parsers for the [Telegraph](https://telegra.ph) website. """), long_description_content_type="text/markdown", packages=["tuparser"],