-
Notifications
You must be signed in to change notification settings - Fork 1
/
source_code.py
98 lines (84 loc) · 3.87 KB
/
source_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import time, pickle, os, csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from dotenv import load_dotenv
# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless") # Ensure GUI is off
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
def login(driver,username,password):
website = "http://quotes.toscrape.com//"
# Open the website
driver.get(website)
# Click the login button to access the login page
login_button = driver.find_element(By.XPATH, "/html/body/div/div[1]/div[2]/p/a")
login_button.click()
time.sleep(5)
try:
# Find the username field and enter the username
username_field = driver.find_element(By.ID, "username")
username_field.send_keys(username)
# Find the password field and enter the password
password_field = driver.find_element(By.ID, "password")
password_field.send_keys(password)
# Find the login button and click it
login_button = driver.find_element(By.XPATH, "/html/body/div/form/input[2]")
login_button.click()
time.sleep(5) # Wait for login to complete
except:
raise Exception("Login failed!")
def scrape_data(driver):
# Scrape all the quotes of the first page
quotes = driver.find_elements(By.CLASS_NAME, "quote")
# Initialize a variable to store all the quotes
data = []
# Extract various available attributes within each quote
for quote in quotes:
text = quote.find_element(By.CLASS_NAME, "text").text
author = quote.find_element(By.CLASS_NAME, "author").text
tags = quote.find_element(By.CLASS_NAME, "tags").text
text = str(text).removeprefix('“').removesuffix('”')
data.append([text,author,tags])
# Find the next button and click it
next_button = driver.find_element(By.CSS_SELECTOR, "li.next a")
# Loop until the next button doesn't exist on the webpage i.e. it is the last webpage containing quotes
while next_button:
next_button.click() # Click the next button
# Scrap all quotes present within the page
quotes = driver.find_elements(By.CLASS_NAME, "quote")
for quote in quotes:
text = quote.find_element(By.CLASS_NAME, "text").text
author = quote.find_element(By.CLASS_NAME, "author").text
tags = quote.find_element(By.CLASS_NAME, "tags").text
text = str(text).removeprefix('“').removesuffix('”')
data.append([text,author,tags])
try:
next_button = driver.find_element(By.CSS_SELECTOR, "li.next a") # Find the next button at each iteration
except:
break # Terminate the loop if the next button doesn't appear
return data
def main():
try:
# Load the .env file
load_dotenv('login_credentials.env')
# Get the login credentials from the .env file
username = str(os.getenv("username")).lower()
password = str(os.getenv("password")).lower()
# Set path to Chrome driver
webdriver_service = Service('../../chromedriver.exe')
# Create a new instance of the Chrome driver
driver = webdriver.Chrome(service=webdriver_service,options=chrome_options)
login(driver,username,password)
data = scrape_data(driver)
# Save data to a CSV file
with open("quotes.csv", "w", newline="", encoding="utf-8") as file:
writer = csv.writer(file)
writer.writerow(["Text","Author","Tags"])
writer.writerows(data)
finally:
driver.close() # Close the web driver
if __name__ == '__main__':
main()