-
Notifications
You must be signed in to change notification settings - Fork 0
/
driver.py
43 lines (31 loc) · 1.52 KB
/
driver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from data import DataManager
import time
# Considers the question a real world value if at least one of the given categories is true
def is_real_world(values):
return any(values)
if __name__=="__main__":
# Source file
source = "Assistments 12-13 NSF.csv"
# Output file
target = "out.csv"
# Row containing the problem body
DATA_COL = 32
# Categories to be checked using ConceptNet
conceptnet_categories = ['car','animal','sport','object','food','subject','place']
# Categories to be checked using Stanford NER
NER_categories = ['location','time', 'person', 'organization' ,'money', 'percent','date']
# Categories based on given keywords
keyword_categories = [('geometry',['square','circle','rectangle','triangle','angle','quadrant'])]
# Categories based on values from other columns (limited only to categories in this program)
columnval_categories = [('real_world_reference',is_real_world,['car','animal','sport','object','food','subject','place','location','person','organization','money'])]
# Specify column in source data containing questions
dm = DataManager(source, target, DATA_COL, write_mode='a', batch = 100, pool_size = 8)
# Add categories into header
dm.add_header(conceptnet_categories,header_type = "conceptnet")
dm.add_header(NER_categories,header_type = "NER")
dm.add_header(keyword_categories,header_type = "keyword")
dm.add_header(columnval_categories,header_type = "columnval")
# Measure duration of running the program
start_time = time.time()
dm.process()
print time.time() - start_time, "seconds"