-
Notifications
You must be signed in to change notification settings - Fork 0
/
import_data.py
53 lines (44 loc) · 2.43 KB
/
import_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import pymongo
import os
import pandas as pd
import json
def main():
client = pymongo.MongoClient("mongodb://localhost:27017/")
databases = client.list_database_names()
if "fifa" not in databases:
db = client["fifa"]
players_collection = db["players"]
ultimate_team_collection = db["ultimate_teams"]
for file in os.listdir("data/players"):
data = pd.read_csv("data/players/" + file)
data["year"] = "20" + file.split(".")[0][-2:]
if "female" in file:
data["gender"] = "F"
else:
data["gender"] = "M"
data_json = json.loads(data.to_json(orient='records'))
for player in data_json:
columns_to_format = ["ls", "st", "rs", "lw", "lf", "cf", "rf", "rw", "lam", "cam", "ram", "lm", "lcm", "cm", "rcm", "rm", "lwb", "ldm", "cdm", "rdm", "rwb", "lb", "lcb", "cb", "rcb", "rb", "gk"]
for column in columns_to_format:
if isinstance(player[column], str):
if "+" in player[column]:
split = player[column].split("+")
player[column] = int(split[0]) + int(split[1])
elif "-" in player[column]:
split = player[column].split("-")
player[column] = int(split[0]) - int(split[1])
list_columns = ["player_positions", "player_tags", "player_traits"]
for column in list_columns:
if player[column] is not None:
player[column] = [x.strip() for x in player[column].split(',')]
players_collection.insert_many(data_json)
print("Successfully loaded data for", file)
print("Creating Indices for Faster Searching")
players_collection.create_index([('year', pymongo.ASCENDING), ('gender', pymongo.ASCENDING)])
players_collection.create_index([('year', pymongo.ASCENDING), ('gender', pymongo.ASCENDING), ('short_name', pymongo.ASCENDING)])
players_collection.create_index([('year', pymongo.ASCENDING), ('gender', pymongo.ASCENDING), ('overall', pymongo.DESCENDING)])
ultimate_team_collection.create_index([('year', pymongo.ASCENDING), ('username', pymongo.ASCENDING), ('team_name', pymongo.ASCENDING)])
else:
print("Data has been previously loaded.")
if __name__ == "__main__":
main()