-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathqType.py
59 lines (47 loc) · 1.61 KB
/
qType.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Hardcoded word lists
import sys
import numpy
import nltk
import collections
import json
yesnowords = ["can", "could", "would", "is", "does", "has", "was", "were", "had", "have", "did", "are", "will"]
commonwords = ["the", "a", "an", "is", "are", "were", "."]
questionwords = ["who", "what", "where", "when", "why", "how", "whose", "which", "whom"]
# Take in a tokenized question and return the question type and body
def processquestion(qwords):
# Find "question word" (what, who, where, etc.)
questionword = ""
qidx = -1
qw = [wo.lower() for wo in qwords if wo.lower() in questionwords]
if len(qw)==0:
return ("YESNO", qwords[1:])
for (idx, word) in enumerate(qwords):
if word.lower() in questionwords:
questionword = word.lower()
qidx = idx
break
if qidx < 0:
return ("MISC", qwords)
target = qwords[:qidx]+qwords[qidx+1:]
type = "MISC"
# Determine question type
if questionword in ["who", "whose", "whom"]:
type = "PERSON"
elif questionword == "where":
type = "PLACE"
elif questionword == "when":
type = "TIME"
elif questionword == "which":
type = "ITEM"
elif questionword == "how":
if target[0] in ["few", "little", "much", "many"]:
type = "QUANTITY"
target = target[1:]
elif target[0] in ["young", "old", "long"]:
type = "TIME"
target = target[1:]
# Trim possible extra helper verb
if target[0] in yesnowords:
target = target[1:]
# Return question data
return (type, target)