-
Notifications
You must be signed in to change notification settings - Fork 0
/
amazon_items.py
65 lines (57 loc) · 2.09 KB
/
amazon_items.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import copy
import requests
from bs4 import BeautifulSoup
import csv
from selenium import webdriver
import time
NUM_ITEM=30
URL="https://www.amazon.com/s?i=grocery&rh=n%3A16310101&page="
def getInfo(html):
dir_list=[]
soup=BeautifulSoup(html,features="html.parser")
# soup=soup.find("div",{"data-test":"productGridContainer"})
soup=soup.find("div", {"class":"s-result-list s-search-results sg-row"})
for div in soup.find_all("div", class_="sg-col-4-of-24 sg-col-4-of-12 "+
"sg-col-4-of-36 s-result-item s-asin sg-col-4-of-28 sg-col-4-of-16 sg-col"+
" sg-col-4-of-20 sg-col-4-of-32"):
a=div.find_all("a")
link="www.amazon.com"+a[0]['href']
name=(link.split('/'))[1]
try:
price=div.find("span",class_="a-offscreen").text
except(AttributeError):
print("cannot find span")
continue
picLinks=div.find("img")['srcset']
picLink=picLinks.split(' ')[0]
dir_list.append({"Name":name,
"Price":price,
"PicLink":picLink
})
return dir_list
options=webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument("--test-type")
options.binary_location = "/usr/bin/chromium"
driver = webdriver.Chrome()
#req_header={"Host":"www.ralphs.com","User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0"}
start=2
while(start<=NUM_ITEM):
items=[]
URL="https://www.amazon.com/s?i=grocery&rh=n%3A16310101&page="
URL=URL+str(start)
driver.get(URL)
print(URL)
time.sleep(0.1)
for i in range(0,3):
driver.execute_script("window.scrollTo(0,{}*document.body.scrollHeight/3);".format(i))
time.sleep(0.1)
time.sleep(0.1)
page_html=driver.page_source
itemlist=getInfo(page_html)
items+=copy.deepcopy(itemlist)
start+=1
with open("amazon_items.csv","a+",newline="",encoding="utf-8") as csvfile:
catagories=["Name","Price","PicLink"]
writer=csv.DictWriter(csvfile,catagories)
writer.writerows(items)