From 37d3b6b132f579f6a1c44fb5a535350f6910c873 Mon Sep 17 00:00:00 2001 From: madereddy <49539048+madereddy@users.noreply.github.com> Date: Thu, 7 Sep 2023 10:15:10 -0400 Subject: [PATCH] Update to use FakeUserAgent (#8) * Remove Python2 and Add FakeUserAgent * Update config.yml Allowing building test build * Update noisy.py Fix fake useragent requiring float * Update noisy.py Fix urllib parse import --- .circleci/config.yml | 8 +------- noisy.py | 17 +++++------------ requirements.txt | 1 + 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7ed907f1..b73ea1ef 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -70,13 +70,7 @@ workflows: version: 2 build-master: jobs: - - build: - filters: - branches: - only: master + - build - publish-latest: requires: - build - filters: - branches: - only: master diff --git a/noisy.py b/noisy.py index b3cd337a..8345ca88 100644 --- a/noisy.py +++ b/noisy.py @@ -4,23 +4,16 @@ import logging import random import re -import sys import time import requests from urllib3.exceptions import LocationParseError -try: # Python 2 - from urllib.parse import urljoin, urlparse -except ImportError: # Python 3 - from urlparse import urljoin, urlparse - -try: # Python 2 - reload(sys) - sys.setdefaultencoding('latin-1') -except NameError: # Python 3 - pass +import fake_useragent +from fake_useragent import UserAgent +ua = UserAgent(min_percentage=15.1) +from urllib.parse import urljoin, urlparse class Crawler(object): def __init__(self): @@ -43,7 +36,7 @@ def _request(self, url): :param url: the url to visit :return: the response Requests object """ - random_user_agent = random.choice(self._config["user_agents"]) + random_user_agent = ua.random headers = {'user-agent': random_user_agent} response = requests.get(url, headers=headers, timeout=5) diff --git a/requirements.txt b/requirements.txt index f2293605..25b59a11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ requests +fake-useragent \ No newline at end of file