-
Notifications
You must be signed in to change notification settings - Fork 7
/
down.py
65 lines (57 loc) · 1.69 KB
/
down.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import urllib.request, urllib.parse, urllib.error
import urllib.request, urllib.error, urllib.parse
import os
from urllib.parse import urlparse
from PyPDF2 import PdfFileWriter,PdfFileReader
from configuration import PDF_DOWNLOAD_DIRECTORY
def download_pdfs(list_of_pdfs,index):
if not os.path.exists(PDF_DOWNLOAD_DIRECTORY):
os.mkdir(PDF_DOWNLOAD_DIRECTORY)
try:
url = list_of_pdfs[index]
except:
print("Index out of range \n")
return
directory = urlparse(url).netloc
url=url.replace(' ','%20')
directory=directory.replace('~','til')
if directory == '':
return
# print(url)
# print(directory)
os.chdir(PDF_DOWNLOAD_DIRECTORY)
if not os.path.exists(os.path.join(os.getcwd(),directory)):
os.mkdir(directory)
os.chdir(directory)
file_name = url.split('/')[-1]
if os.path.exists(os.path.join(os.getcwd(),file_name)) and not (os.path.getsize(os.path.join(os.getcwd(),file_name))):
print("File exists and revoming: ")
os.remove(os.path.join(os.getcwd(),file_name))
if os.path.exists(os.path.join(os.getcwd(),file_name)):
print("File exists :Skipping: ")
return
try:
u = urllib.request.urlopen(url,timeout = 100)
except:
print("Timeout \n")
return
f = open(file_name,'wb+')
block_sz = 8192
while True:
buffers = u.read(block_sz)
if not buffers:
break
f.write(buffers)
f.close()
'''
Function: download_pdfs
Parameters:
list_of_pdfs - a list of pdf links to download_pdfs
index - starting index of link from where downloading will be started
( Not used much, but used in case some error happened in downloading and some files have been downloaded )
Returns:
Nothing.
Just saves the downloaded files in folder '/home/rishabh/downpdfs/'
Note to User:
None
'''