-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwande.py
219 lines (188 loc) · 7.11 KB
/
wande.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import datetime
import math
import time
import certifi
import pyautogui
import pymongo
from cnocr import CnOcr
import mss
import mss.tools
import pyperclip
import os
from pymongo.server_api import ServerApi
# ret = w.start()
# print(ret)
#
# ret = w.isconnected()
# print(ret)
#
# # test WSD function
# ret = w.wsd("000001.SZ", "sec_name", "2022-05-08", "2022-06-08", "")
# print(ret)
# capture selected portion of the screen
def autogui(num, title):
# im = pyautogui.screenshot(region=(282, 800, 1252, 604))
# im.save("demo.png")
with mss.mss() as sct:
# The screen part to capture
region = {'top': 290, 'left': 280, 'width': 1252, 'height': 604}
# Grab the data
img = sct.grab(region)
# Save to the picture file
mss.tools.to_png(img.rgb, img.size,
output=f'wande_images\curr_{num}.png')
# ocr
def ocr(path):
curr_ocr = CnOcr()
title_total = []
time1 = time.time()
res = curr_ocr.ocr(path)
flag = 0
curr_total = []
for each in res:
if flag == 0 and \
(len(each['text']) == 5 or len(each['text']) == 10) and \
(each['text'][2] == '-' or
each['text'][2] == ':' or
each['text'][4] == '-'):
flag = 1
if each['text'][2] == ':':
curr_time = datetime.datetime.now()
parse_time = datetime.datetime.strptime(
f"{curr_time.year}-{curr_time.month}-{curr_time.day}T00:00:00.000Z",
"%Y-%m-%dT%H:%M:%S.000Z")
curr_total.append(parse_time)
elif each['text'][2] == '-':
curr_month, curr_day = each['text'].split("-")[0], \
each['text'].split("-")[1]
try:
parse_time = datetime.datetime.strptime(
f"{datetime.datetime.now().year}-{curr_month}-{curr_day}T00:00:00.000Z",
"%Y-%m-%dT%H:%M:%S.000Z")
curr_total.append(parse_time)
except:
print("ocr issue")
curr_total.append("error")
else:
curr_year, curr_month, curr_day = each['text'].split("-")[0], \
each['text'].split("-")[1], \
each['text'].split("-")[2]
try:
parse_time = datetime.datetime.strptime(
f"{curr_year}-{curr_month}-{curr_day}T00:00:00.000Z",
"%Y-%m-%dT%H:%M:%S.000Z")
curr_total.append(parse_time)
except:
print("ocr issue")
curr_total.append("error")
if flag == 1 and len(each['text']) > 10:
flag = 0
# print(each['text'])
curr_total.append(each['text'])
if len(curr_total) == 2:
print(curr_total)
title_total.append(curr_total)
curr_total = []
time2 = time.time()
print('本次图片识别总共耗时%s s' % (time2 - time1))
return title_total
def run(titles):
# search = ["长江证券情绪周报", "兴业证券期权水晶球预测日报", "中信建投市场情绪跟踪",
# "申万宏源证券量化择时周报", "光大证券金融工程市场跟踪", "海通证券周报",
# "中泰证券期权周报", "广发证券A股量化择时", "天风证券宏观点评"]
# print("目前搜索词条如下:")
# for i in range(len(titles)):
# print(f"{i + 1}. {titles[i]}")
#
# ask = input("\n是否添加新搜索词条(y/n):")
# if ask == "y":
# bool = True
# while bool:
# add = input("\n添加搜索词条:")
# confirm = input(f"确认添加该词条:{add}(y/n)")
# if confirm == "y":
# titles.append(add)
# print("添加成功")
# con = input("是否继续添加(y/n)")
# if con == "n":
# bool = False
#
# print("目前搜索词条如下:")
# for i in range(len(titles)):
# print(f"{i + 1}. {titles[i]}")
# create new dir for storing images
dir = "wande_images"
if not os.path.exists(dir):
os.makedirs(dir)
password = "huaxin12345"
uri = f"mongodb+srv://stevenwyl:{password}@cluster0.ihlnkdr.mongodb.net/?retryWrites=true&w=majority"
client = pymongo.MongoClient(uri, server_api=ServerApi('1'), tlsCAFile=certifi.where())
db = client.db2
paper = db.paper
total_data = []
for title in titles:
print(title)
# automation
pyautogui.PAUSE = 1
# move to search bar and search
pyperclip.copy(title)
pyautogui.moveTo(750, 150, duration=1)
pyautogui.click()
pyautogui.hotkey('ctrl', 'v')
time.sleep(0.5)
pyautogui.press("enter")
time.sleep(1)
# identify curr number of pages
with mss.mss() as sct:
# The screen part to capture
region = {'top': 900, 'left': 1705, 'width': 30, 'height': 18}
img = sct.grab(region)
mss.tools.to_png(img.rgb, img.size,
output=f'wande_images\curr_page.png')
with mss.mss() as sct:
# The screen part to capture
region = {'top': 260, 'left': 390, 'width': 50, 'height': 20}
img = sct.grab(region)
mss.tools.to_png(img.rgb, img.size,
output=f'wande_images\curr_page2.png')
# ocr to find number of pages
curr_ocr = CnOcr()
res1 = curr_ocr.ocr('wande_images\curr_page.png')
res2 = curr_ocr.ocr('wande_images\curr_page2.png')
try:
number = res1[0]['text']
result1 = int(number)
except:
result1 = 1
try:
number = res2[0]['text']
pages = int(number)
print(f"there are {pages} results")
result2 = int(math.ceil(pages / 20))
except:
result2 = 1
result = max(result1, result2)
print(f"there are {result} pages")
# start looping through pages
pyautogui.moveTo(1755, 905, duration=1)
for i in range(1, result + 1):
autogui(i, title)
pyautogui.click()
time.sleep(1.5)
# loop through all the screenshot
for i in range(1, result + 1):
print(f"image {i}")
path = os.path.join(dir, f"curr_{i}.png")
total_data.extend(ocr(path))
for item in total_data:
if item[0] != "error":
curr = {
'time': item[0],
'title': item[1],
}
paper.insert_one(curr)
if __name__ == '__main__':
title = ["长江证券情绪周报", "兴业证券期权水晶球预测日报", "中信建投市场情绪跟踪",
"申万宏源证券量化择时周报", "光大证券金融工程市场跟踪", "海通证券周报",
"中泰证券期权周报", "广发证券A股量化择时", "天风证券宏观点评"]
run(title)