-
Notifications
You must be signed in to change notification settings - Fork 1
/
newextract.py
80 lines (73 loc) · 2.13 KB
/
newextract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# New file for extarcting data
import json
file = open('convertedFile.txt', 'r', encoding='utf-8')
nfile = open('converted.txt', 'wb')
tar = file.read()
# print(type(tar))
def hasNumbers(inputString):
return any(char.isdigit() for char in inputString)
for x in range(len(tar)):
if tar[x] == '<':
flag = 0
while tar[x] != '>':
if flag == 1:
if tar[x] == "\\":
nfile.write('\n'.encode("utf-8"))
x += 2
nfile.write(tar[x].encode("utf-8"))
# print(tar[x], end="")
x += 1
if tar[x] == "\'":
flag = 1
nfile.write('\n\n'.encode("utf-8"))
# print('\n')
nfile.close()
file.close()
f = open('converted.txt', 'r', encoding="utf-8")
tar = f.readlines()
search = {}
d = ''
for x in range(3,len(tar)):
if tar[x].isupper() and ':' not in tar[x]:
if tar[x].isalnum():
if '-' in tar[x]:
d = tar[x]
break
else:
d = tar[x]
break
search[d] = {}
search[d]['name'] = d
y = ['name']
for x in range(4,len(tar)):
if tar[x].isupper() and ':' not in tar[x] and '/' not in tar[x]:
# print("True")
# y = list(tar[x].split(":"))
# search[d][y[0]] = tar[x]
if hasNumbers(tar[x]):
# print('in it')
if ',' in tar[x] or '-' in tar[x]:
d = tar[x]
else:
# print(tar[x])
d = tar[x]
while len(tar[x]) == 0 or tar[x] == '' or tar[x] == "'\n" or tar[x] == '\n':
x += 1
# print(d)
search[d] = {}
y = ['name']
search[d][y[0]] = d
if ':' in tar[x]:
# print(tar[x])
y = list(tar[x].split(":"))
search[d][y[0]] = y[1]
else:
# print(tar[x])
# print(d, 'This is y', y)
search[d][y[0]] += str(tar[x])
# print(tar[x])
# print('This is my dictionary\n\n\n', search)
jso = json.dumps(search)
target = open('search.txt', 'w+')
target.write(jso)
target.close()