-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstanfordprocessor.py
executable file
·80 lines (64 loc) · 1.68 KB
/
stanfordprocessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import hashlib
import pickle
import os
import json
import tempfile
curdir = os.path.dirname(os.path.realpath(__file__)) + "/"
USECACHE = True
try:
f = open(curdir + 'settings.json')
data = json.load(f)
f.close()
if 'CACHEDIR' in data.keys():
CACHEDIR = data['CACHEDIR']
else:
CACHEDIR = tempfile.gettempdir() + "/"
if 'USECACHE' in data.keys():
USECACHE = data['USECACHE']
if USECACHE.lower()=="false":
USECACHE = False
else:
USECACHE = True
CORENLP_JARS_DIR = data['corenlp_jars']
except Exception:
print "CRITICAL ERROR: NO CORENLP JARS DIR FOUND IN SETTINGS.\nEDIT *settings.json*"
raise ImportError
def emptyCache():
import os
filelist = [ f for f in os.listdir(CACHEDIR) ]
print "removed", len(filelist), "files"
for f in filelist:
os.remove(CACHEDIR + f)
return
def getProcessor():
from stanford_corenlp_pywrapper import CoreNLP
class Processor(CoreNLP, object):
def __init__(self):
global CACHEDIR
CoreNLP.__init__(self, "parse", corenlp_jars=[CORENLP_JARS_DIR + "*"])
def fetchFromCache(self, st):
fname = hashlib.sha224(st.encode('utf-8')).hexdigest()
if os.path.isfile(CACHEDIR + fname + ".pickle"):
rs = pickle.load(open(CACHEDIR + fname + ".pickle", "r"))
return rs
else:
return None
def parse_doc(self, st):
tmp = None
if USECACHE:
tmp = self.fetchFromCache(st)
if tmp is None:
try:
rs = super(Processor, self).parse_doc(st)
except:
return None
fname = hashlib.sha224(st.encode('utf-8')).hexdigest()
import pickle
if USECACHE:
pickle.dump(rs, open(CACHEDIR + fname + ".pickle", "w"))
return rs
else:
return tmp
proc = Processor()
return proc
proc = getProcessor()