-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.py
143 lines (115 loc) · 4.81 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from models import Character, Style
import constants
import requests
import bs4
import datetime
def get_info_from_aewiki(character: Character):
"""
Scrape Another Eden Wiki page for character and return relevant informations
Args:
character (Character): Character object to scrape
Returns:
tuple: Tuple of (code, category, light_shadow, is_awaken, aewiki_url, update_date, personalities, japanese_name, korean_name, english_class_name)
"""
aewiki_url = f'{constants.AEWIKI_BASE_URL}{character.english_name}{constants.AEWIKI_STYLE_SUFFIXS[character.style]}{constants.AEWIKI_ALTER_SUFFIX if character.is_alter else ""}'
res = requests.get(aewiki_url)
if res.status_code != 200:
raise Exception("Wiki page not found", res.status_code)
soup = bs4.BeautifulSoup(res.text, 'lxml')
general_datas = soup.find("article", {"title": "General Data"}).find_all("td")
is_awaken = "Stellar Awakened" in str(general_datas[0].text) and not character.is_original_4star
light_shadow = "light" if str(general_datas[5].text).lower().strip().startswith("light") else "shadow"
obtain = str(general_datas[6].text).strip()
category = "FREE"
if obtain == "Dreams":
category = "ENCOUNTER"
elif "Symphony" in obtain:
category = "COLAB"
personalities = list(map(
lambda x: x.text.strip(),
general_datas[7].find_all("a")
))
other_datas = soup.find("article", {"title": "Other Data"}).find_all("td")
code = int(str(other_datas[1].text).strip())
japanese_name = str(other_datas[2].text).split("(")[0].strip()
korean_name = str(other_datas[3].text).split("(")[0].strip()
date_index = 9 if character.is_alter else 6
update_datestr = str(other_datas[date_index].text).split(" / ")[1].strip() # Oct 10, 2024 or October 10, 2024
try:
update_date = datetime.datetime.strptime(update_datestr, "%b %d, %Y").strftime("%Y-%m-%d")
except ValueError:
try:
update_date = datetime.datetime.strptime(update_datestr, "%B %d, %Y").strftime("%Y-%m-%d")
except ValueError:
try:
update_date = datetime.datetime.strptime(update_datestr, "%b %d %Y").strftime("%Y-%m-%d")
except ValueError:
update_date = datetime.datetime.strptime(update_datestr, "%Y-%m-%d").strftime("%Y-%m-%d")
character_classes = soup.find("div", {"class": "character-class"}).find_all("td")
english_class_name = str(character_classes[7].text).split(" ...▽ ")[0] if character.style != Style.FOUR.value else None
return (
code,
category,
light_shadow,
is_awaken,
aewiki_url,
update_date,
personalities,
japanese_name,
korean_name,
english_class_name
)
def get_dungeon_from_aewiki(style: Style, english_class_name: str):
"""
Scrape Another Eden Wiki page for character class and return its dungeon name
Args:
style (Style): Style of the character
english_class_name (str): English name of the character class
Returns:
str: Name of the dungeon
"""
if style == Style.AS.value:
return "Treatise"
elif style == Style.ES.value:
return "Codex"
elif style == Style.FOUR.value:
return None
else:
aewiki_url = f'{constants.AEWIKI_BASE_URL}{english_class_name}_Tome'
res = requests.get(aewiki_url)
if res.status_code != 200:
return "Opus"
soup = bs4.BeautifulSoup(res.text, 'lxml')
li_texts = list(map(
lambda x: x.text.strip(),
soup.find_all("li")
))
for txt in li_texts:
if "(VH)" in txt and not txt.startswith("Obtained"):
return str(txt).split("(")[0].strip()
raise Exception("Dungeon not found")
def get_info_from_altema(character: Character) -> str:
"""
Scrape Altema page for character and return its Japanese class name
Args:
character (Character): Character object to scrape
Returns:
str: Japanese name of the character class
"""
if character.style == Style.FOUR.value:
return None
def fetch_page_content(url: str) -> str:
res = requests.get(url)
if res.status_code != 200:
raise Exception("Altema page not found")
return res.text
def extract_japanese_class_name(td_texts: list) -> str:
for txt in td_texts:
if "(★5)" in txt:
return str(txt).split("(")[0].strip()
raise Exception("Japanese class name not found")
altema_url = character.altema_url
page_content = fetch_page_content(altema_url)
soup = bs4.BeautifulSoup(page_content, 'lxml')
td_texts = [td.text.strip() for td in soup.find_all("td")]
return extract_japanese_class_name(td_texts)