-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathspecificity.py
85 lines (69 loc) · 2.5 KB
/
specificity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import argparse
import pickle
import sys
import os
from createFeatures import *
MODELFILE = "./model/best_model.pkl"
# Load the model with best performance.
def getFeatures(fin):
## main function to run specifictTwitter parser and return predictions
## sentlist should be a list of sentence strings, tokenized;
print("Start initialize word_embedding ...")
embeddings = features.init_embeding()
print("finished word_embedding ...")
a = ModelNewText(embeddings=embeddings)
## When use our data.
# a.loadFromCSV(fin)
a.loadFromFile(fin)
a.transLexical()
a.transEmbedding()
a.transEmotionFeature()
a.transform_features()
def predict(model=MODELFILE):
with open(model, 'rb') as file:
pickle_model = pickle.load(file)
print("successfully laod")
f = pd.read_csv("./output/test.csv", sep="\t")
feature = f.iloc[:, 3:]
output = pickle_model.predict(feature)
return output
def writeSpecificity(preds, outf):
with open(outf, "w") as f:
for x in preds:
f.write("%f\n" % x)
f.close()
print("output to " + outf + " done")
clean()
def run(identifier, sentlist):
## main function to run the parser and return predictions
## sentlist should be a list of sentence strings, tokenized;
## identifier is a string serving as the header of this sentlst
print("Start initialize word_embedding ...")
embeddings = features.init_embeding()
print("finished word_embedding ...")
a = ModelNewText(embeddings=embeddings)
a.loadFromFile(fin)
a.transLexical()
a.transEmbedding()
a.transEmotionFeature()
a.transform_features()
return predict(model=MODELFILE)
def clean():
# clean the intermediate files.
os.remove("NE_Concrete_Emo.csv")
os.remove("sample-tagged.txt")
os.remove("USEFUL_TAG.csv")
if __name__ == "__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument("--inputfile",
help="input raw text file, one sentence per line, tokenized",
required=True)
argparser.add_argument("--outputfile",
help="output file to save the specificity scores",
required=True)
sys.stderr.write(
"Predictor: please make sure that your input sentences are WORD-TOKENIZED for better prediction.\n")
args = argparser.parse_args()
getFeatures(args.inputfile)
preds = predict(model=MODELFILE)
writeSpecificity(preds, args.outputfile)