generated from Itheum/template-datastream-aws-s3-trailblazer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
72 lines (56 loc) · 2.15 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os, json, yaml
import pandas as pd
from datetime import datetime
with open("config.yaml") as f:
cfg = yaml.load(f, Loader=yaml.FullLoader)
NAME = cfg["name"]
CREATOR = cfg["creator"]
INITIAL_CREATION_DATE = cfg["initial-creation-date"]
# Get the current script's directory
script_dir = os.path.dirname(os.path.abspath(__file__))
# Read the CSV files from the "raw" folder
df = pd.read_csv(os.path.join(script_dir, "input", "trailblazer.csv"))
# Convert the 'Date' column to datetime format
df["Date"] = pd.to_datetime(df["Date"], format="%d/%m/%Y")
# Sort the dataframe by date in ascending order
df = df.sort_values("Date", ascending=True)
json_entries = [] # List to store JSON entries
for index, row in df.iterrows():
# Convert the date format to ISO format
date_iso = row["Date"].isoformat() + "Z"
entry = {
"category": row["Content Pillar"],
"date": date_iso,
"title": row["Event"],
"link": row["Social Media Link"],
}
json_entries.append(entry)
current_date = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
# Check if the {NAME}.json file exists in the output folder
if os.path.exists(os.path.join(script_dir, "output", f"{NAME}.json")):
# Read the output/trailblazer.json file
with open(os.path.join(script_dir, "output", f"{NAME}.json")) as file:
current_trailblazer = json.load(file)
else:
current_trailblazer = {"data": ""}
# Check if current_trailblazer["data"] is different from json_entries
if current_trailblazer["data"] == json_entries:
print("No changes detected.")
else:
json_data = {
"data_stream": {
"name": NAME,
"creator": CREATOR,
"created_on": INITIAL_CREATION_DATE,
"last_modified_on": current_date,
},
"data": json_entries,
}
# Convert JSON data to a string
json_string = json.dumps(json_data)
# Define the output file path in the script's directory
output_file = os.path.join(script_dir, f"output/{NAME}.json")
# Write JSON data to file
with open(output_file, "w") as file:
json.dump(json_data, file, indent=4)
print(f"JSON file created: {output_file}")