-
Notifications
You must be signed in to change notification settings - Fork 1
/
protocol1.py
71 lines (47 loc) · 2.55 KB
/
protocol1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os.path
from change import petitions, tags
import pandas
from dotenv import load_dotenv
# USEFUL LINKS
# https://towardsdatascience.com/how-to-import-google-sheets-data-into-a-pandas-dataframe-using-googles-api-v4-2020-f50e84ea4530
load_dotenv()
PETITIONS_SPREADSHEET_ID = os.environ.get('PETITION_SPREADSHEET_ID')
sheet_cleared = False
stored_petitions = []
chosen_tags = pandas.read_csv(os.path.join(os.environ.get('ONEDRIVE_FOLDER_PATH'), 'json', 'chosen_tags.csv'))
if __name__ == '__main__':
keyword = 'covid'
# save_list_to_sheets_tab(all_tags, 'tantissimitags')
# all_petitions.drop_duplicates('id', inplace=True)
filtered_by_country = petitions.filter_only_for_chosen_countries(petitions.get())
for country, pets in filtered_by_country.groupby('country'):
country_tags = chosen_tags.loc[chosen_tags['country'] == country]
country_tags = country_tags.sort_values(by='total_count', ascending=False)
edges = pandas.DataFrame(
tags.from_petitions_get_list_of_tags(pets, normalized=True, ),
columns=['source', 'target'])
edges = edges.loc[edges['target'].isin(country_tags['normalized'].head(75))]
#
edges = edges.loc[~edges['target'].isin(
('coronavirus', 'covid', 'covid-19', 'covid-19 epidemic', 'covid-19 pandemic', 'pandemic'))]
petition_nodes = pandas.DataFrame()
petition_nodes = petition_nodes.assign(id=edges['source'].unique(), label='',
category='petition')
tag_nodes = pandas.DataFrame()
tag_nodes = tag_nodes.assign(id=edges['target'].unique(), label=lambda x: x['id'], category='tag')
edges.to_csv(
path_or_buf=os.path.join(os.environ.get('ONEDRIVE_FOLDER_PATH'), 'python',
f"{country}-edges.csv"),
index=False)
pandas.concat([tag_nodes, petition_nodes], ignore_index=True).to_csv(
path_or_buf=os.path.join(os.environ.get('ONEDRIVE_FOLDER_PATH'), 'python',
f"{country}-nodes.csv"),
index=False
)
# list_of_tags = from_petitions_get_list_of_tags(get_all_petitions(), with_id=True)
# Salvo in CSV
# pandas.DataFrame(list_of_tags).to_csv('taglist.csv', index=False)
# if os.environ.get('DOWNLOAD_IMAGES', False):
# download_images_from_petitions(petitions, os.path.join('keywords', lang, keyword))
# store_petitions(all_pets, '')
# save_list_to_sheets_tab(stored_petitions, 'petitions')