Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Kuucheen committed Jan 27, 2023
1 parent 0258c9e commit f69f9ef
Show file tree
Hide file tree
Showing 7 changed files with 3,932 additions and 122 deletions.
160 changes: 80 additions & 80 deletions KC-Scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
import httpx, yaml
from pystyle import Colors, Colorate, Center
except ImportError:
os.system('python -m pip install httpx[http2]')
os.system('python -m pip install pyyaml')
os.system('python -m pip install pystyle')
os.system('python -m pip install httpx[http2] pyyaml pystyle')
import httpx, yaml
from pystyle import Colors, Colorate, Center

Expand All @@ -27,6 +25,82 @@
def main():
global threadcount, clearingcnt, clearingproxy, randomUseragent, timeout, sitelist, start

printLogo()
print()
config, clearingcnt, clearingproxy, randomUseragent, threads, timeout = getSettings()

terminal()

start = time.time()

#dispatcher
with open(config) as sites:

sitelist = sites.readlines()
threading.Thread(target=terminalthread).start()

while len(sitelist) > 0:
if threadcount < threads:
threading.Thread(target=scrape, args=[sitelist[0]]).start()
threadcount += 1
sitelist.pop(0)

while threadcount > 0:
terminal(f"| Waiting for threads to finish | active threads {threadcount} | Proxies {proxycount} | Time {time.time()-start:.2f}s")

terminal()


lenproxies = len(proxies)
print(f"\n{white}[{color}^{white}] Removed {color}{proxycount-lenproxies} {white}Duplicates\n")
print(f"{white}[{color}^{white}] Remaining Proxies: {color}{lenproxies}{white}\n")
print(f"{white}[{color}^{white}] Writing {color}Proxies\n")

with open("proxies.txt", "w") as output:

for i in proxies:
output.write(i.replace("\n", "") + "\n")

if clearingcnt == True or clearingproxy == True:

print(f"{white}[{color}^{white}] Removing {color}bad Websites\n")

with open(config, "w") as inp:

for site in goodsites:
inp.write(site + "\n")

terminal()

print(f"{white}[{color}^{white}] Finished in {color}{time.time()-start:.2f}s{white}!\n")
print("You can now close the tab")

input("")


def scrape(site: str):
global proxies, threadcount, proxycount
uas=["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0; Trident/5.0)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0; MDDCJS)", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"]
site = site.replace("\n", "")
try:
with httpx.Client(http2=True,headers = {'accept-language': 'en','user-agent': random.choice(uas) if randomUseragent == True else uas[0]},follow_redirects=True) as client:
r = client.get(site, timeout=timeout).text
except:
print(f"{white}[{Colors.red}!{white}] Failed connecting to {color}{site}")
else:
goodsites.add(site)
r = r.replace("&colon", ":")
locProxies = re.findall(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\:\d{1,5}\b", r)
length = len(locProxies)
print(f"{white}[{Colors.green}+{white}] Scraped {color}{length}{white} from {color}{site}")
proxycount += length
proxies = proxies | set(locProxies)
if clearingproxy == True and length == 0:
goodsites.remove(site)
finally:
threadcount -= 1

def getSettings() -> list:
with open("settings.yaml") as setting:
settings = yaml.safe_load(setting.read())

Expand All @@ -37,11 +111,6 @@ def main():
threads = settings["threads"]
timeout = settings["timeout"]

terminal()

printLogo()
print()

yes = ["yes", "y", "ye"]
no = ["no", "n", "nah"]

Expand Down Expand Up @@ -183,76 +252,8 @@ def main():
time.sleep(3)
main()
exit()

printLogo()
start = time.time()

with open(config) as sites:

sitelist = sites.readlines()
threading.Thread(target=terminalthread).start()

while len(sitelist) > 0:
if threadcount < threads:
threading.Thread(target=scrape, args=[sitelist[0]]).start()
threadcount += 1
sitelist.pop(0)

while threadcount > 0:
terminal(f"| Waiting for threads to finish | active threads {threadcount} | Proxies {proxycount} | Time {time.time()-start:.2f}s")

terminal()


lenproxies = len(proxies)
print(f"\n{white}[{color}^{white}] Removed {color}{proxycount-lenproxies} {white}Duplicates\n")
print(f"{white}[{color}^{white}] Remaining Proxies: {color}{lenproxies}{white}\n")
print(f"{white}[{color}^{white}] Writing {color}Proxies\n")

with open("proxies.txt", "w") as output:

for i in proxies:
output.write(i.replace("\n", "") + "\n")

if clearingcnt == True or clearingproxy == True:

print(f"{white}[{color}^{white}] Removing {color}bad Websites\n")

with open(config, "w") as inp:

for site in goodsites:
inp.write(site + "\n")

terminal()

print(f"{white}[{color}^{white}] Finished in {color}{time.time()-start:.2f}s{white}!\n")
print("You can now close the tab")

input("")



def scrape(site: str):
global proxies, threadcount, proxycount
uas=["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0; Trident/5.0)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0; MDDCJS)", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"]
site = site.replace("\n", "")
try:
with httpx.Client(http2=True,headers = {'accept-language': 'en','user-agent': random.choice(uas) if randomUseragent == True else uas[0]},follow_redirects=True) as client:
r = client.get(site, timeout=timeout).text
except:
print(f"{white}[{Colors.red}!{white}] Failed connecting to {color}{site}")
else:
goodsites.add(site)
r = r.replace("&colon", ":")
locProxies = re.findall(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\:\d{1,5}\b", r)
length = len(locProxies)
print(f"{white}[{Colors.green}+{white}] Scraped {color}{length}{white} from {color}{site}")
proxycount += length
proxies = proxies | set(locProxies)
if clearingproxy == True and length == 0:
goodsites.remove(site)
finally:
threadcount -= 1
return config, clearingcnt, clearingproxy, randomUseragent, threads, timeout

def terminal(string:str = ""):
ctypes.windll.kernel32.SetConsoleTitleW("KC Scraper | github.com/Kuucheen " + string)
Expand All @@ -277,6 +278,5 @@ def printLogo():
print(Colorate.Diagonal(Colors.DynamicMIX((Colors.dark_gray, Colors.StaticMIX((Colors.purple, Colors.blue)))), Center.XCenter(logo)))




main()
if __name__ == "__main__":
main()
32 changes: 15 additions & 17 deletions premades/http.txt
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
https://api.proxyscrape.com/v2/?request=getproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all
https://www.proxyscan.io/download?type=https
https://openproxy.space/list/http
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-http.txt
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt
https://www.proxyscan.io/download?type=http
https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt
https://api.proxyscrape.com/v2/?request=getproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all
https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/https.txt
https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt
https://raw.githubusercontent.com/proxy4parsing/proxy-list/main/http.txt
https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt
http://pubproxy.com/api/proxy
https://www.juproxy.com/free_api
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/http.txt
https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/http.txt
https://www.proxy-list.download/api/v1/get?type=https
https://www.proxy-list.download/api/v1/get?type=http
https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt
https://raw.githubusercontent.com/mmpx12/proxy-list/master/https.txt
https://api.openproxylist.xyz/http.txt
https://raw.githubusercontent.com/hyperbeats/proxy-list/main/http.txt
https://raw.githubusercontent.com/jetkai/proxy-list/main/archive/txt/proxies-http.txt
https://raw.githubusercontent.com/jetkai/proxy-list/main/archive/txt/proxies-https.txt
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-http.txt
http://pubproxy.com/api/proxy
https://raw.githubusercontent.com/HyperBeats/proxy-list/main/http.txt
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-https.txt
https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt
https://raw.githubusercontent.com/proxy4parsing/proxy-list/main/http.txt
https://raw.githubusercontent.com/roosterkid/openproxylist/main/https_raw.txt
https://raw.githubusercontent.com/shiftytr/proxy-list/master/http.txt
https://raw.githubusercontent.com/thespeedx/proxy-list/master/http.txt
https://www.proxy-list.download/api/v1/get?type=http
https://www.proxy-list.download/api/v1/get?type=https
https://www.proxyscan.io/download?type=http
https://www.proxyscan.io/download?type=https
25 changes: 12 additions & 13 deletions premades/socks4.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
https://openproxy.space/list/socks4
https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt
https://www.socks-proxy.net/
https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks4&timeout=10000&country=all
https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt
https://www.proxy-list.download/api/v1/get?type=socks4
https://www.proxyscan.io/download?type=socks4
https://api.openproxylist.xyz/socks4.txt
https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt
https://www.proxy-list.download/api/v1/get?type=socks4
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt
https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt
https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks4.txt
https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks4.txt
https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks4.txt
https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt
https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt
https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt
https://www.proxyscan.io/download?type=socks4
https://www.socks-proxy.net/
https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt
https://openproxy.space/list/socks4
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks4.txt
https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt
https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks4&timeout=10000&country=all
21 changes: 10 additions & 11 deletions premades/socks5.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
https://openproxy.space/list/socks5
https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt
https://raw.githubusercontent.com/thespeedx/proxy-list/master/socks5.txt
https://api.openproxylist.xyz/socks5.txt
https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5&timeout=10000&country=all
https://www.proxyscan.io/download?type=socks5
https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt
https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt
https://www.proxy-list.download/api/v1/get?type=socks5
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt
https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks5.txt
https://raw.githubusercontent.com/hyperbeats/proxy-list/main/socks5.txt
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt
https://raw.githubusercontent.com/manugmg/proxy-365/main/socks5.txt
https://openproxy.space/list/socks5
https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt
https://raw.githubusercontent.com/roosterkid/openproxylist/main/socks5_raw.txt
https://raw.githubusercontent.com/saschazesiger/free-proxies/master/proxies/socks5.txt
https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5&timeout=10000&country=all
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks5.txt
https://raw.githubusercontent.com/shiftytr/proxy-list/master/socks5.txt
https://raw.githubusercontent.com/thespeedx/proxy-list/master/socks5.txt
https://www.proxy-list.download/api/v1/get?type=socks5
https://www.proxyscan.io/download?type=socks5
Loading

0 comments on commit f69f9ef

Please sign in to comment.