-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrequest.py
50 lines (43 loc) · 1.51 KB
/
request.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import urllib.request
from file import READ
from file import WRITE
# REQUEST(address)
# DETAILS: gets the HTML from any website address
from urllib.error import HTTPError
def REQUEST(address):
try:
req = urllib.request.Request(address)
req.add_header('User-Agent', 'RESEARCH (LINUX; Pacific North West, USA)')
response = urllib.request.urlopen(req)
if response.getcode() == 200: # Checking if page exists
html = response.read().decode('utf-8') # Decoding response
return html
else:
print("Page not found")
return None
except HTTPError as e:
print(f"HTTP Error: {e.code} - {e.reason}")
return None
except Exception as e:
print(f"Error: {e}")
return None
# REQUEST(address, filename, directory)
# DETAILS: gets the HTML from any website and saves it to a file and directory
# if the filename is None or "" then the address is used
# if the directory is None or "" then the default directory is "./"
def REQUEST_FILE(address, **kwargs):
directory = kwargs["directory"]
filename = kwargs["filename"]
if filename is None:
filename = address
filename = filename.replace('/', '_')
if directory is None:
directory = "./"
html = READ(filename, directory)
if html is None:
html = REQUEST(address)
print("REQUEST (ONLINE): " + address)
WRITE(filename, directory, html)
else:
print("REQUEST (FILE): " + address)
return html