-
Notifications
You must be signed in to change notification settings - Fork 0
/
hummart.py
69 lines (65 loc) · 2.01 KB
/
hummart.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import os
import sched
import time
import pandas as pd
import numpy as np
data = pd.read_csv('ishopping_categories.csv')
site_name = 'ishopping'
def getDriver():
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--headless')
# options.add_argument("--kiosk")
driver = webdriver.Chrome()
return driver
def scroll_down(driver):
"""A method for scrolling the page."""
# Get scroll height.
last_height = driver.execute_script("return document.body.scrollHeight")
time1 = time.time()
while True:
time2 = time.time()
total_time = (time2 - time1)
if(total_time > 180):
break
# Scroll down to the bottom.
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load the page.
time.sleep(3)
# Calculate new scroll height and compare with last scroll height.
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
driver = getDriver()
url = 'https://hummart.com/'
driver.get(url)
time.sleep(5)
links = []
categories = []
nav = driver.find_element_by_css_selector('.navigation.sw-megamenu')
ul = nav.find_elements_by_tag_name('ul')[0]
li = ul.find_elements_by_tag_name('li')
for l in li:
anchor_tag = l.find_elements_by_tag_name('a')[0]
span = l.find_elements_by_tag_name('span')[0]
cat = span.text
href = anchor_tag.get_attribute('href')
links.append(href)
categories.append(cat)
print(links)
sites = []
for i in range(len(links)):
sites.append('Hum Mart')
data = pd.DataFrame()
data['Category'] = np.array(categories)
data['Link'] = np.array(links)
data['Site'] = np.array(sites)
data = data.drop([6], axis=0)
data.reset_index(inplace = True)
data.to_csv('./hummart/categories.csv')
driver.quit()