-
Notifications
You must be signed in to change notification settings - Fork 0
/
game_crawling.py
147 lines (115 loc) · 5.28 KB
/
game_crawling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from selenium import webdriver
import openpyxl
import json
#컬럼 순서 변경, 팀명을 idx로 바꾸기
wb = openpyxl.Workbook()
sheet = wb.active
sheet.append(['날짜','시간','팀1','팀2','장소','사유'])
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("lang=ko_KR")
options.add_argument('headless')
options.add_argument('window-size=1920x1080')
options.add_argument("disable-gpu")
options.add_argument("--no-sandbox")
# 웹드라이버 켜기
driver = webdriver.Chrome('chromedriver', chrome_options=options )
# 야구 경기 사이트 접속하기
driver.get("https://www.koreabaseball.com/Schedule/Schedule.aspx?seriesId=0,9")
# 일정 검색 버튼 누르기
schedule = []
for input_year in range(2022,2023,1):
##년도 선택
year_select = driver.find_element_by_css_selector("select#ddlYear")
for option in year_select.find_elements_by_tag_name('option'):
if option.text == str(input_year): #년도 입력
option.click()
break
for input_month in ["04","05","06","07","08","09","10","11","12"]:
##월 선택
month_select = driver.find_element_by_css_selector("select#ddlMonth")
for option in month_select.find_elements_by_tag_name('option'):
print("0"+str(input_month))
if option.text == input_month: #월 입력
option.click()
break
# 검색 결과 수집하기
## 선택자 (컨테이너)
## 날짜, 시간, 경기 팀 2개, 점수, 구장
container = driver.find_elements_by_css_selector("tbody tr")
for c in container:
game = {}
try:
date = c.find_element_by_css_selector("td.day").text
except:
pass
try:
time = c.find_element_by_css_selector("td.time b").text
except:
continue
team1 = c.find_element_by_css_selector("td.play>span").text
team2 = c.find_elements_by_css_selector("td.play>span")[1].text
place = c.find_elements_by_css_selector("td")[-2].text
game = {'date' : date, 'time' : time, 'team1' : team1, 'team2' : team2, 'place' : place}
schedule.append(game)
with open('schedule.json', 'w', encoding="utf-8") as make_file:
json.dump(schedule, make_file, ensure_ascii=False, indent="\t")
print("%s %s %s vs %s , %s" %(date, time, team1, team2, place))
sheet.append([str(input_year)+"-"+date.split(".")[0]+"-"+date.split(".")[1].split("(")[0],time+":00",team1,team2,place])
wb.save('baseball2.csv')
'''
from selenium import webdriver
import openpyxl
wb = openpyxl.Workbook()
sheet = wb.active
sheet.append(['년','월','날짜','시간','팀1','팀2','점수1','점수2','장소','사유'])
# 웹드라이버 켜기
driver = webdriver.Chrome("./chromedriver")
# 야구 경기 사이트 접속하기
driver.get("https://www.koreabaseball.com/Schedule/Schedule.aspx")
# 일정 검색 버튼 누르기
for input_year in range(2020,2021,1):
##년도 선택
year_select = driver.find_element_by_css_selector("select#ddlYear")
for option in year_select.find_elements_by_tag_name('option'):
if option.text == str(input_year): #년도 입력
option.click()
break
for input_month in ["01","02","03","04","05","06","07","08","09","10","11","12"]:
##월 선택
month_select = driver.find_element_by_css_selector("select#ddlMonth")
for option in month_select.find_elements_by_tag_name('option'):
print("0"+str(input_month))
if option.text == input_month: #월 입력
option.click()
break
# 검색 결과 수집하기
## 선택자 (컨테이너)
## 날짜, 시간, 경기 팀 2개, 점수, 구장
container = driver.find_elements_by_css_selector("tbody tr")
for c in container:
try:
date = c.find_element_by_css_selector("td.day").text
except:
pass
try:
time = c.find_element_by_css_selector("td.time b").text
except:
continue
team1 = c.find_element_by_css_selector("td.play>span").text
team2 = c.find_elements_by_css_selector("td.play>span")[1].text
place = c.find_elements_by_css_selector("td")[-2].text
try:
score1 = c.find_elements_by_css_selector("td.play em span")[0].text
score2 = c.find_elements_by_css_selector("td.play em span")[2].text
except: #특정 사유로 경기 취소될 경우
score1 = "-"
score2 = "-"
reason = c.find_elements_by_css_selector("td")[-1].text
print("%s %s %s %s vs %s %s , %s %s" %(date, time, team1,score1,score2, team2, place, reason))
sheet.append([input_year,input_month,date,time,team1,team2,score1,score2,place,reason])
continue
print("%s %s %s %s vs %s %s , %s" %(date, time, team1,score1,score2, team2, place))
sheet.append([int(input_year),int(input_month.replace("0","")),date,time,team1,team2,int(score1),int(score2),place])
wb.save('baseball.csv')
'''