-
Notifications
You must be signed in to change notification settings - Fork 10
/
update_notion_db.py
92 lines (81 loc) · 4.26 KB
/
update_notion_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""Updates the target Notability database using a CSV exported from Paperpile
How it works:
- The CSV file must be extracted from Paperpile with the default export functionality.
- If a paper with the same title is already in the Notability database, we update the fields that have changed except for the title.
"""
import csv
import yaml
import argparse
from typing import Dict, Any
from rich import print
from lib.notion import NotionDBInterface
from lib.preproc import format_entry
def hamming_distance(x, y):
return sum(c1 != c2 for c1, c2 in zip(x, y))
def check_identical(entry: Dict[str, Dict[str, Any]], page: Dict[str, Any]) -> bool:
for key, val in [(k,v) for k,v in page.items() if k not in ['id', 'Institutions', 'Date', 'Code']]:
# If the paper is marked as "Reading" in Notion and still "To Read" in Paperpile, it is not updated
if key == 'Status' and val == 'Reading' and key not in entry.keys():
continue
if isinstance(val, str):
try:
if entry[key]['value'] != val:
print(f"Found mismatching key '{key}' with values {entry[key]['value']} (Paperpile) and {val} (Notion)")
return False
except KeyError:
raise AttributeError(f"Attribute {key} found with value {val} in Notion, but missing in Paperpile.")
elif isinstance(val, list):
if any([x not in val for x in entry[key]['value']]):
print(f"[bright_magenta]Found[/bright_magenta] mismatching key '{key}' with values {entry[key]['value']} (Paperpile) and {val} (Notion)")
return False
return True
def main(args: argparse.Namespace) -> None:
"""Main function"""
# Load the configuration file
with open(args.config, 'r') as ymlfile:
cfg = yaml.safe_load(ymlfile)
# Open CSV file
csv_file = open(args.input, 'r+')
csv_reader = csv.DictReader(csv_file)
# Count length and reset pointer to beginning of file
tot_input_values = len(list(csv_reader))
csv_file.seek(0)
# Query Notion database
notion = NotionDBInterface(args.database, args.token)
notion.query_database()
print(f'[italic dark_orange3]Found {len(notion.pages)} pages on Notion and {tot_input_values} in the input dataset.[/italic dark_orange3]')
# Iterate over CSV rows
for i, row in enumerate(csv_reader):
if i == 0:
continue # Skip header
# Remove BiBTeX capitalization
row['Title'] = row['Title'].replace('{', '').replace('}','')
matches = [
hamming_distance(row['Title'].lower(), page['Title'].lower()) < args.max_distance
for page in notion.pages
]
matches_idxs = [i for i, val in enumerate(matches) if val]
if len(matches_idxs) > 1:
print(f'[dark_orange3]Skipping[/dark_orange3] [dodger_blue1]"{row["Title"]}"[/dodger_blue1]: multiple matches found.')
else:
curr_entry = format_entry(row, cfg['journals'], cfg['conferences'])
if len(matches_idxs) == 0:
print(f'[green]Adding[/green] [dodger_blue1]"{row["Title"]}"[/dodger_blue1]...')
notion.create_page(curr_entry)
else:
match = notion.pages[matches_idxs[0]]
if not check_identical(curr_entry, match):
print(f'[bright_magenta]Updating[/bright_magenta] [dodger_blue1]{match["Title"]}[/dodger_blue1]...')
notion.update_page(match['id'], curr_entry)
else:
print(f'[gold3]Skipping[/gold3] [dodger_blue1]"{row["Title"]}"[/dodger_blue1]: already in the Notion database.')
print('\nDone!', ":tada:")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input", help="CSV file to read", required=True)
parser.add_argument("-c", "--config", help="Config file", required=True)
parser.add_argument("-d", "--database", help="Database to update", required=True)
parser.add_argument("-t", "--token", help="Notion API token", required=True)
parser.add_argument("-m", "--max_distance", help="Maximum accepted Hamming distance for not filtering", default=1)
args = parser.parse_args()
main(args)