Skip to content

Commit

Permalink
Merge pull request #22 from dcstats/1.0.5
Browse files Browse the repository at this point in the history
1.0.5
  • Loading branch information
dcstats authored Sep 30, 2022
2 parents 73a511d + 02a545b commit a019155
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 20 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "CBBpy"
version = "1.0.4"
version = "1.0.5"
description = 'A Python-based web scraper for NCAA basketball.'
readme = "README.md"
authors = [{ name = "Daniel Cowan", email = "dnlcowan37@gmail.com" }]
Expand Down
48 changes: 29 additions & 19 deletions src/cbbpy/mens_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,10 @@ def get_game_pbp(game_id: str) -> pd.DataFrame:
# GET PBP DATA
div = soup.find("div", {"id": "gamepackage-play-by-play"})
tables = div.find_all("table")

if '0th Half' in div.get_text():
tables = tables[1:]

num_halves = len(tables)
pbp_halves = []

Expand Down Expand Up @@ -392,27 +396,32 @@ def get_game_info(game_id: str) -> pd.DataFrame:
except:
game_network = np.nan

game_arena_pre = game_info_div.find(
'div', {'class': 'caption-wrapper'})

if not game_arena_pre:
div_loc = game_info_div.find(
'div', {'class': 'location-details'})
game_arena = div_loc.find('span', {'class': 'game-location'})

if game_arena:
game_arena = game_arena.get_text().strip()
game_loc = div_loc.find(
'div', {'class': 'game-location'}).get_text().strip()

try:
game_arena_pre = game_info_div.find(
'div', {'class': 'caption-wrapper'})

if not game_arena_pre:
div_loc = game_info_div.find(
'div', {'class': 'location-details'})
game_arena = div_loc.find(
'span', {'class': 'game-location'})

if game_arena:
game_arena = game_arena.get_text().strip()
game_loc = div_loc.find(
'div', {'class': 'game-location'}).get_text().strip()

else:
game_arena = game_info_div.find(
'div', {'class': 'game-location'}).get_text().strip()
game_loc = None
else:
game_arena = game_info_div.find(
game_arena = game_arena_pre.get_text().strip()
game_loc = game_info_div.find(
'div', {'class': 'game-location'}).get_text().strip()
game_loc = None
else:
game_arena = game_arena_pre.get_text().strip()
game_loc = game_info_div.find(
'div', {'class': 'game-location'}).get_text().strip()
except:
game_arena = None
game_loc = None

game_cap_pre = game_info_div.find_all(
"div", {"class": "game-info-note capacity"})
Expand Down Expand Up @@ -910,6 +919,7 @@ def _clean_pbp_table(table, info):
df = df.dropna(axis=1, how="all")
df.columns = [x.lower() for x in df.columns]
df = df.loc[:, ~df.columns.str.contains('unnamed')]
df = df.rename(columns={'time_of_day': 'time'})

# type handling
df.time = df.time.astype(str)
Expand Down

0 comments on commit a019155

Please sign in to comment.