-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordFilter.py
52 lines (44 loc) · 1.37 KB
/
wordFilter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import math
import re
import urllib.request , urllib.parse , urllib.error
from bs4 import BeautifulSoup
import ssl
def wordfilter(user):
fhand = open(user)
lst1 = list(range(65,91))
lst2 = list(range(97,123))
flst = lst1 + lst2
validword2 = list()
strings = list()
invalidword = list()
validword = list()
for line in fhand :
line = line.rstrip()
x = re.findall('\s([a-zA-Z]+)\.\s', line)
y = re.findall('\s([a-zA-Z]+),\s', line)
z = re.findall('([a-zA-Z]+),\s', line)
w = re.findall('([a-zA-Z]+)\.\s', line)
if x is not None :
for word in x:
validword2.append(word)
if y is not None :
for word in y:
validword2.append(word)
if z is not None :
for word in z:
validword2.append(word)
if w is not None :
for word in w:
validword2.append(word)
line = line.split()
for word in line :
strings.append(word)
for string in strings :
for letter in string :
if ord(letter) not in flst :
invalidword.append(string)
validword = set(strings)-set(invalidword)
for word in validword2 :
validword.add(word)
print(validword)
print(len(validword))