-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
91 lines (65 loc) · 2.86 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time
import json
def get_news():
url = "https://www.playground.ru/misc/news"
req = requests.get(url=url)
soup = BeautifulSoup(req.text, "lxml")
article_cards = soup.find_all("div", class_="post-content")
news_dict = {}
for article in article_cards:
title_article = article.find("div", class_="post-title").text.strip()
url_a = article.find("div", class_="post-title").find("a")
url_article = f"{url_a.get('href')}"
date_article = article.find('div', class_="post-metadata").find("time").get("datetime")
date_iso = datetime.fromisoformat(date_article)
date_time = datetime.strftime(date_iso, "%Y-%m-%d %H:%M:%S")
article_date_timestamp = time.mktime(datetime.strptime(date_time, "%Y-%m-%d %H:%M:%S").timetuple())
article_id = url_article.split("/")[-1]
news_dict[article_id] = {
"article_date_timestamp": article_date_timestamp,
"title_article": title_article,
"url_article": url_article,
}
with open("news_dict.json", "w") as file:
json.dump(news_dict, file, indent=4, ensure_ascii=False)
def check_update():
with open("news_dict.json") as file:
news_dict = json.load(file)
url = "https://www.playground.ru/misc/news"
req = requests.get(url=url)
soup = BeautifulSoup(req.text, "lxml")
article_cards = soup.find_all("div", class_="post-content")
fresh_news = {}
for article in article_cards:
url_a = article.find("div", class_="post-title").find("a")
url_article = f"{url_a.get('href')}"
article_id = url_article.split("/")[-1]
if article_id in news_dict:
continue
else:
title_article = article.find("div", class_="post-title").text.strip()
date_article = article.find('div', class_="post-metadata").find("time").get("datetime")
date_iso = datetime.fromisoformat(date_article)
date_time = datetime.strftime(date_iso, "%Y-%m-%d %H:%M:%S")
article_date_timestamp = time.mktime(datetime.strptime(date_time, "%Y-%m-%d %H:%M:%S").timetuple())
news_dict[article_id] = {
"article_date_timestamp": article_date_timestamp,
"title_article": title_article,
"url_article": url_article,
}
fresh_news[article_id] = {
"article_date_timestamp": article_date_timestamp,
"title_article": title_article,
"url_article": url_article,
}
with open("news_dict.json", "w") as file:
json.dump(news_dict, file, indent=4, ensure_ascii=False)
return fresh_news
def main():
get_news()
#print(check_update())
if __name__ == "__main__":
main()