-
Notifications
You must be signed in to change notification settings - Fork 0
/
st_client.py
83 lines (70 loc) · 2.76 KB
/
st_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from scrap_chord.util import save_files
from altair.vegalite.v4.api import value
import streamlit as st
from zmq.sugar.poll import Poller
from scrap_chord import ScrapChordClient
from threading import Thread
import zmq.sugar as zmq
from sortedcontainers import SortedSet
from utils.tools import zpipe
@st.cache(allow_output_mutation=True)
def create_chord_client(port, m):
context = zmq.Context()
pipe = zpipe(context)
client = ScrapChordClient(port, m, pipe[0])
t = Thread(target=client.run, daemon=True)
t.start()
return t, pipe[1]
if __name__ == "__main__":
st.title("ScrapKord Client")
st.sidebar.markdown("# Options")
port = st.sidebar.text_input(value=8000, label="Port")
bits = st.sidebar.text_input(value=32, label="Bits")
show_html = st.sidebar.checkbox(value=False, label="Show html from page")
show_urls_found = st.sidebar.checkbox(value=False, label="Show urls found")
show_search_tree = st.sidebar.checkbox(value=True, label="Show Search Tree")
st.sidebar.markdown("""#### Developed by CoolCows""")
urls_req = st.text_input("Enter urls for scraping")
start = st.button("Start")
saves = 0
max_saves = 5
if start:
t, chord_sock = create_chord_client(int(port), int(bits))
done = False
chord_sock.send_pyobj(urls_req)
chord_sock.rcvtimeo = 8000
count = 0
key = 0
while t.is_alive():
try:
obj = chord_sock.recv_pyobj()
key += 1
if len(obj) == 3:
url, html, url_list = obj
if not show_html and not show_urls_found:
st.text(f"Scraping({count})")
count += 1
if show_html or show_urls_found:
st.markdown(f"Scraped: {url}")
if show_html:
st.text("HTML:")
st.text(html)
if show_urls_found:
st.text("Links in page:")
st.text("\n".join(urlx for urlx in url_list))
elif len(obj) == 2 and show_search_tree:
done = True
visual, url_html = obj
st.markdown("Search Tree Completed")
st.text(visual)
st.text("Saving files ...")
loc = save_files(obj[1], max_saves, saves)
saves = (saves + 1) % max_saves
st.text(f"Save completed in {loc}")
st.button("Clean")
break
except zmq.error.Again:
if not done:
st.warning("No message from server")
else:
st.text("")