-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinspect_reports.py
78 lines (64 loc) · 2.27 KB
/
inspect_reports.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
# coding: utf-8
import os
import re
import sys
import nltk
from glob import glob
def read_as_list(l, encoding):
l_ = []
with open(l, "rt", encoding=encoding) as f:
l_ = f.read().splitlines()
return l_
def query_yes_no(question, default="yes"):
"""Ask a yes/no question via raw_input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is True for "yes" or False for "no".
"""
valid = {"yes": True, "y": True, "ye": True,
"no": False, "n": False}
if default is None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while True:
sys.stdout.write(question + prompt)
choice = input().lower()
if default is not None and choice == '':
return valid[default]
elif choice in valid:
return valid[choice]
else:
sys.stdout.write("Please respond with 'yes' or 'no' "
"(or 'y' or 'n').\n")
return choice
def return_positive(file, positive_words):
text = re.sub(r' ', r' ', re.sub(r'[^\x00-\x7f]', r' ', ' '.join(read_as_list(file, 'latin-1'))))
positive_sentences = []
for i in nltk.sent_tokenize(text):
if positive_words in i:
print(i)
print('\n')
answer = query_yes_no("Is this text about impact?")
if answer:
f = open("report.sentences", "a+")
f.write(i)
f.write("\n")
f.close()
if '__main__' == __name__:
print("Searching for evidences of impact on text...")
files = [y for x in os.walk('./data/') for y in glob(os.path.join(x[0], '*.txt'))]
positive_words = 'impact'
for i, file in enumerate(files):
print('\n')
print('Analyzing ' + str(file))
print('--------------------------------')
print('\n')
return_positive(file, positive_words)