-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload.py
135 lines (118 loc) · 3.71 KB
/
load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import csv
from pathlib import Path
import pyodbc
from rich.console import Console
from rich.progress import track
def open_conn():
server = "tcp:131.114.72.230" # lds.di.unipi.it ?
database = "Group_4_DB"
username = "Group_4"
password = "6VEKJ00D"
connectionString = (
"DRIVER={ODBC Driver 17 for SQL Server};SERVER="
+ server
+ ";DATABASE="
+ database
+ ";UID="
+ username
+ ";PWD="
+ password
)
cn = pyodbc.connect(connectionString)
return cn, cn.cursor()
def close_conn(cn, cursor):
cursor.close()
cn.close()
def get_header_types(name: str):
if name == "Date":
return ["int", "int", "int", "int", "int"]
elif name == "Geography":
return ["str", "str", "str", "str"]
elif name == "Player":
return ["int", "str", "str", "str", "str", "str"]
elif name == "Match":
return [
"int",
"str",
"int",
"int",
"int",
"str",
"str",
"str",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"int",
"float",
"float",
"float",
"float",
]
elif name == "Tournament":
return ["str", "str", "str", "int", "str", "int", "int", "float"]
else:
raise ValueError("get_header_type got a strange name.")
def load_table(name: str, my_path: Path, csv_len: int):
# 0. get header types
header = get_header_types(name)
# 1. open connection to server
cn, cursor = open_conn()
# 2. load table into memory
with open(my_path) as source:
reader = csv.DictReader(source)
# 3. write table onto server, row by row? Remember the data-types
commit_counter = 0
for row in track(reader, total=csv_len, description=f"{name}…"):
to_send = ""
for i, value in enumerate(row.values()):
if header[i] == "str":
# check if there's an apostrophe in the string and "escape" it
position = value.find("'")
if position != -1:
value = f"{value[:position]}'{value[position:]}"
to_send = f"{to_send},'{value}'"
else:
to_send = f"{to_send},{value}"
try:
query = f"INSERT INTO {name} VALUES ({to_send[1:]});"
cursor.execute(query)
except Exception as e:
print(f"Program failed on query {query}\nwith exception {e}")
close_conn(cn, cursor)
# commit once every 100 rows/queries
commit_counter += 1
if commit_counter == 100:
cn.commit()
commit_counter = 0
# if there are "leftover" rows, commit them too.
if commit_counter > 0:
cn.commit()
# 4. close connection
close_conn(cn, cursor)
console = Console()
tables = {}
# tables["Date"] = (Path("data/dates.csv"), 375)
# tables["Geography"] = (Path("data/countries.csv"), 124)
# tables["Player"] = (Path("data/players.csv"), 10074)
# tables["Tournament"] = (Path("data/tournaments.csv"), 4853)
tables["Match"] = (Path("data/matches.csv"), 186073)
console.log(f"Loading…")
for name, path_and_len in tables.items():
load_table(name, path_and_len[0], csv_len=path_and_len[1])
console.log(f"Loaded {len(tables)} tables onto server.")