Skip to content

Commit

Permalink
fix(requests): Fixed issue [BUG] Responses with no headers break some…
Browse files Browse the repository at this point in the history
… of the internal code #635
  • Loading branch information
AndyTheFactory committed May 11, 2024
1 parent 7ed25e9 commit 802ae11
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
11 changes: 7 additions & 4 deletions newspaper/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class Article:
successful, ArticleDownloadState.FAILED_RESPONSE if `download()` failed,
`ArticleDownloadState.NOT_STARTED` if `download()` was not called.
download_exception_msg (str): The exception message if download() failed.
history (List[str]): Redirection history from the requests.get call.
history (List[str]): Redirection history from the ``requests``.``get`` call.
meta_description (str): The description extracted from the meta data.
meta_lang (str): The language extracted from the meta data.
If config.language is not set, this value will be used
Expand Down Expand Up @@ -168,9 +168,11 @@ def __init__(
**kwargs: Any Configuration class property can be overwritten
through init keyword params.
Additionally, you can specify any of the following
requests parameters:
``requests``.``get`` parameters:
headers, cookies, auth, timeout, allow_redirects,
proxies, verify, cert
For other ``requests`` parameters, you can use the
``Configuration``.``requests_params`` dictionary.
Raises:
ArticleException: Error parsing and preparing the article
Expand All @@ -182,7 +184,8 @@ def __init__(
)

self.config: Configuration = config or Configuration()
# Set requests parameters. These are passed directly to requests.get
# Set ``requests`` library parameters.
# These are passed directly to ``requests``.``get``
for k in available_requests_params:
if k in kwargs:
self.config.requests_params[k] = kwargs[k]
Expand Down Expand Up @@ -260,7 +263,7 @@ def __init__(
self.download_state = ArticleDownloadState.NOT_STARTED
self.download_exception_msg: Optional[str] = None

# Redirection history from the requests.get call
# Redirection history from the ``requests``.``get`` call
self.history: Optional[List[str]] = []

# Meta description field in the HTML source
Expand Down
5 changes: 3 additions & 2 deletions newspaper/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

def get_session() -> requests.Session:
"""
Get an HTTP requests session for making requests.
Get an HTTP requests session for making ``requests``.
This function returns an HTTP session object that can be used to make HTTP requests.
If the `cloudscraper` library is available, it will be used to create the session.
Expand Down Expand Up @@ -205,7 +205,8 @@ def do_request(url: str, config: Configuration) -> Response:
requests.Response: The response object containing the server's response
to the request.
"""
session.headers.update(config.requests_params["headers"])
if "headers" in config.requests_params:
session.headers.update(config.requests_params["headers"])

if not config.allow_binary_content:
if is_binary_url(url):
Expand Down

1 comment on commit 802ae11

@AndyTheFactory
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fix for #635

Please sign in to comment.