Skip to content

Commit

Permalink
Make the total page variable and dynamic
Browse files Browse the repository at this point in the history
  • Loading branch information
SiddharthaAnand committed Jun 29, 2020
1 parent 89e7a15 commit 0e3e063
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion ncbi_sars2_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
# logging.basicConfig(level=logging.DEBUG)


def get_total_pages_to_scrape(driver):
page = BeautifulSoup(driver.page_source)
return int(page.find('div', attrs={'class': 'ncbi-pagination-page'}).text.split()[2])


def crawl_nucleotide_relative_url(url=None, chromepath=None):
"""
This method is used to store the relative urls for visiting those pages
Expand All @@ -35,9 +40,11 @@ def crawl_nucleotide_relative_url(url=None, chromepath=None):
driver.get(url)
url_count = 0
c = 0
total_pages_to_crawl = get_total_pages_to_scrape(driver=driver)

nucleotide_details_dict = {}
try:
for page in range(29):
for page in range(total_pages_to_crawl):
########################################################
# Get the page first #
########################################################
Expand Down

0 comments on commit 0e3e063

Please sign in to comment.