-
Notifications
You must be signed in to change notification settings - Fork 0
/
agent.py
93 lines (76 loc) · 2.72 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Import required libraries
from dotenv import load_dotenv
import agentql
from agentql.sync_api import ScrollDirection
import csv
import time
# Load environment variables
load_dotenv()
# GraphQL query to fetch product details
PRODUCT_QUERY = """
{
results {
products[] {
product_name
product_price
num_reviews
rating
}
}
}
"""
# GraphQL query to check pagination status
PAGINATION_QUERY = """
{
next_page_button_enabled
next_page_button_disabled
}
"""
def start_scraping_session(url):
"""Initialize and return an AgentQL session"""
session = agentql.start_session(url)
session.driver.scroll_to_bottom()
return session
def write_product_to_csv(writer, product):
"""Write a single product to the CSV file"""
writer.writerow(product)
print(f"Product written to CSV: {product['product_name']}")
def main():
# URL for Amazon search results
url = "https://www.amazon.sa/s?k=macbook+pro+m3&crid=3OVD0YRB3FI6A&sprefix=macbook+pro+%2Caps%2C410&ref=nb_sb_ss_pltr-xclick_1_12"
# Start the scraping session
session = start_scraping_session(url)
try:
with open("Products.csv", "w", newline="", encoding="utf-8") as file:
columns = ["product_name", "product_price", "num_reviews", "rating"]
writer = csv.DictWriter(file, fieldnames=columns)
writer.writeheader()
page_number = 1
while True:
print(f"\nScraping page {page_number}")
# Fetch product data
products = session.query(PRODUCT_QUERY)
product_data = products.to_data()['results']['products']
print(f"Scraped {len(product_data)} products successfully")
# Write products to CSV
for product in product_data:
write_product_to_csv(writer, product)
# Check pagination status
pagination = session.query(PAGINATION_QUERY)
pagination_data = pagination.to_data()
if not pagination_data['next_page_button_enabled'] or pagination_data['next_page_button_disabled']:
print("Reached the last page. Scraping complete.")
break
# Navigate to the next page
pagination.next_page_button_enabled.click()
print("Navigated to the next page")
session.driver.scroll_to_bottom()
time.sleep(2) # Wait for the page to load
page_number += 1
except Exception as e:
print(f"An error occurred: {e}")
finally:
session.stop()
print("Scraping session ended")
if __name__ == "__main__":
main()