-
Notifications
You must be signed in to change notification settings - Fork 49
/
porter.py
136 lines (129 loc) · 5.34 KB
/
porter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import typing
import requests
from bs4 import BeautifulSoup
import zipfile
import os
import shutil
import json
import urllib.parse
SHEET = "https://docs.google.com/spreadsheets/d/13WWHcGiYJQD_rUqfGL17Lp0zn7MveU5xf7Fy-CNghzo/gviz/tq?tqx=out:html&tq&gid=1"
DOWNLOAD_AUDIO = True
def scrapeDataFromSpreadsheet() -> typing.List[typing.List[str]]:
html = requests.get(SHEET).text
soup = BeautifulSoup(html, 'lxml')
salas_cine = soup.find_all('table')[0]
rows = [[td.text for td in row.find_all("td")] for row in salas_cine.find_all('tr')]
return rows
invalid_chars = [
":", "/", "\'", "\"", "?", "#", "%", "&", "{", "}", "\\", "<", ">", "*", "$",
"!", "@", "+", "`", "|", "=", "."
]
def filterFilename(name: str) -> str:
for char in invalid_chars:
name = name.replace(char, "")
return name
headers = [
"Date",
"Game",
"Song",
"Category",
"Composers",
"Converters",
"Binary",
"Audio",
"Duration",
"Tracks",
"Tags",
"Notes",
]
# Parse Data
data = scrapeDataFromSpreadsheet()
parsed_data = []
game_list = []
for index, val in enumerate(data):
if index >= 2:
new_data = {}
for header_index, header in enumerate(headers):
substring = val[header_index]
if substring[0] == "|":
substring = substring[1:]
substring = substring.replace("\xa0","")
if len(substring) > 0:
new_data[header] = substring
parsed_data.append(new_data)
game_name = new_data["Game"]
if game_name not in game_list:
game_list.append(game_name)
# Clear Games
file_dirs = ["binaries"]
if DOWNLOAD_AUDIO:
file_dirs = ["binaries", "previews"]
for d in file_dirs:
if os.path.exists(f"./{d}"):
shutil.rmtree(f"./{d}")
os.mkdir(f"./{d}")
for game in game_list:
if os.path.exists(f"./{d}/{filterFilename(game)}"):
shutil.rmtree(f"./{d}/{filterFilename(game)}")
# Generate files
new_json = []
made_files = {}
with zipfile.ZipFile("gdrive dump.zip", 'r') as zip_ref:
for index, new_data in enumerate(parsed_data):
if len(new_data["Binary"].strip()) > 0:
game_name = new_data["Game"]
song_name = new_data["Song"]
for placement_index, entry in enumerate(new_json):
if entry["Game"] == game_name and entry["Song"] == song_name:
new_data["Update of"] = placement_index
converters = new_data["Converters"]
new_file_name_raw = f"binaries/{filterFilename(game_name)}/{filterFilename(song_name)} by {filterFilename(converters)}"
new_audio_name_raw = f"previews/{filterFilename(game_name)}/{filterFilename(song_name)} by {filterFilename(converters)}"
if new_file_name_raw in made_files:
new_file_name = f"{new_file_name_raw} (REV {made_files[new_file_name_raw]}).bin"
new_audio_name_raw = f"{new_audio_name_raw} (REV {made_files[new_file_name_raw]})"
made_files[new_file_name_raw] += 1
else:
new_file_name = f"{new_file_name_raw}.bin"
new_audio_name_raw = f"{new_audio_name_raw}"
made_files[new_file_name_raw] = 1
for d in file_dirs:
if not os.path.exists(f"./{d}/{filterFilename(game_name)}"):
os.mkdir(f"./{d}/{filterFilename(game_name)}")
bin_file_name = f"{new_data['Binary']}.bin"
if bin_file_name in zip_ref.namelist():
with zip_ref.open(bin_file_name) as binary:
with open(new_file_name, "wb") as fh:
fh.write(binary.read())
new_data["Binary"] = new_file_name
new_data["Verified"] = True
if "Tracks" in new_data:
new_data["Tracks"] = int(new_data["Tracks"])
if "Duration" in new_data:
new_data["Duration"] = float(new_data["Duration"])
if "Tags" in new_data:
new_data["Tags"] = new_data["Tags"].split(", ")
if "Notes" in new_data:
note_keys = ("Additional Notes", "Update Notes")
for ki, k in enumerate(note_keys):
arr = new_data["Notes"].split("|")
new_data[k] = "" if len(arr) <= ki else arr[ki]
if len(new_data[k]) == 0:
del new_data[k]
del new_data["Notes"]
if "Audio" in new_data:
audio_f = new_data["Audio"]
if "cdn.discordapp.com" in audio_f or "drive.google.com" in audio_f:
audio_ext = None
accepted_exts = [".mp3", ".wav"]
for ext in accepted_exts:
if ext in audio_f:
audio_ext = ext
if DOWNLOAD_AUDIO:
audio_response = requests.get(audio_f)
with open(f"./{new_audio_name_raw}{audio_ext}", "wb") as af:
af.write(audio_response.content)
new_data["Audio"] = "https://" + urllib.parse.quote(f"github.com/theballaam96/candys-shop/raw/main/{new_audio_name_raw}{audio_ext}")
new_json.append(new_data)
with open("mapping.json", "w", encoding="utf-8") as output_data:
output_data.write(json.dumps(new_json, indent=4))