-
Notifications
You must be signed in to change notification settings - Fork 0
/
nlp-csv2db.py
28 lines (23 loc) · 1.16 KB
/
nlp-csv2db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# This is script 2 of 2 to wrap around an NLP algorithm like CheXpert, this script reads the CSV generated by the
# algorithm's label.py script, and stores the results into the database
import psycopg2, psycopg2.extras, datetime, json, csv
def insert_row(cursor, table, data):
sql = 'insert into "%s" ("%s") values ' % (table, '","'.join(data.keys())) + "(%s);" % (','.join(['%s'] * len(data)))
cursor.execute(sql, list(data.values()))
if __name__ == "__main__":
pgconn = psycopg2.connect("host='pghost' dbname='pgdb' user='pguser' password='pgpass' client_encoding='UTF8'")
pgconn.autocommit = True
pgsql = pgconn.cursor()
input = csv.reader(open("input.csv"))
output = csv.reader(open("labeled_reports.csv"))
# Read/consume headers
next(input)
headers = next(output)
for row in output:
# Get the study_id from the input CSV
study_id = next(input)[1]
scores = dict(zip(headers, row))
del scores['Report Impression']
scores_str = json.dumps(scores)
data = {'study_id': study_id, 'algorithm_id': 6, 'results': scores_str, 'data': scores_str}
insert_row(pgsql, 'nlp_results', data)