diff --git a/README.md b/README.md
index 1d75baa..b05834c 100755
--- a/README.md
+++ b/README.md
@@ -4,12 +4,15 @@
 [![Coverage status](https://coveralls.io/repos/github/AndyTheFactory/newspaper4k/badge.svg?branch=master)](https://coveralls.io/github/AndyTheFactory/newspaper4k)
 [![Documentation Status](https://readthedocs.org/projects/newspaper4k/badge/?version=latest)](https://newspaper4k.readthedocs.io/en/latest/)
 
-At the moment the Newspaper4k Project is a fork of the well known newspaper3k  by [codelucas](https://github.com/codelucas/newspaper) which was not updated since Sept 2020. The initial goal of this fork is to keep the project alive and to add new features and fix bugs.
+At the moment the Newspaper4k Project is a fork of the well known newspaper3k  by [codelucas](https://github.com/codelucas/newspaper) which was not updated since September 2020. The initial goal of this fork is to keep the project alive and to add new features and fix bugs.
 
 I have duplicated all issues on the original project and will try to fix them. If you have any issues or feature requests please open an issue here.
 
-**Experimental ChatGPT helper bot for Newspaper4k:**
-[![ChatGPT helper](docs/user_guide/assets/chatgpt_chat.png)](https://chat.openai.com/g/g-OxSqyKAhi-newspaper-4k-gpt)
+| <!-- -->    | <!-- -->    |
+|-------------|-------------|
+| **Experimental ChatGPT helper bot for Newspaper4k:**         | [![ChatGPT helper](docs/user_guide/assets/chatgpt_chat200x75.png)](https://chat.openai.com/g/g-OxSqyKAhi-newspaper-4k-gpt)|
+
+
 
 ## Python compatibility
     - Recommended: Python 3.8+
@@ -29,10 +32,10 @@ You can start directly from the command line, using the included CLI:
 python -m newspaper --url="https://edition.cnn.com/2023/11/17/success/job-seekers-use-ai/index.html" --language=en --output-format=json --output-file=article.json
 
 ```
-
+More information about the CLI can be found in the [CLI documentation](https://newspaper4k.readthedocs.io/en/latest/user_guide/cli_reference.html).
 ## Using the Python API
 
-Alternatively, you can use the Python API:
+Alternatively, you can use Newspaper4k in Python:
 
 ### Processing one article / url at a time
 
@@ -82,22 +85,22 @@ import newspaper
 
 cnn_paper = newspaper.build('http://cnn.com', number_threads=3)
 print(cnn_paper.category_urls())
-> ['https://cnn.com', 'https://money.cnn.com', 'https://arabic.cnn.com',
-> 'https://cnnespanol.cnn.com', 'http://edition.cnn.com',
-> 'https://edition.cnn.com', 'https://us.cnn.com', 'https://www.cnn.com']
+>> ['https://cnn.com', 'https://money.cnn.com', 'https://arabic.cnn.com',
+>> 'https://cnnespanol.cnn.com', 'http://edition.cnn.com',
+>> 'https://edition.cnn.com', 'https://us.cnn.com', 'https://www.cnn.com']
 
 article_urls = [article.url for article in cnn_paper.articles]
 print(article_urls[:3])
-> ['https://arabic.cnn.com/middle-east/article/2023/10/30/number-of-hostages-held-in-gaza-now-up-to-239-idf-spokesperson',
-> 'https://arabic.cnn.com/middle-east/video/2023/10/30/v146619-sotu-sullivan-hostage-negotiations',
-> 'https://arabic.cnn.com/middle-east/article/2023/10/29/norwegian-pm-israel-gaza']
+>> ['https://arabic.cnn.com/middle-east/article/2023/10/30/number-of-hostages-held-in-gaza-now-up-to-239-idf-spokesperson',
+>> 'https://arabic.cnn.com/middle-east/video/2023/10/30/v146619-sotu-sullivan-hostage-negotiations',
+>> 'https://arabic.cnn.com/middle-east/article/2023/10/29/norwegian-pm-israel-gaza']
 
 article = cnn_paper.articles[0]
 article.download()
 article.parse()
 
 print(article.title)
-> المتحدث باسم الجيش الإسرائيلي: عدد الرهائن المحتجزين في غزة يصل إلى
+>> المتحدث باسم الجيش الإسرائيلي: عدد الرهائن المحتجزين في غزة يصل إلى
 
 ```
 Or if you want to get bulk articles from the website (have in mind that this could take a long time and could get your IP blocked by the newssite):
@@ -130,7 +133,7 @@ article.download()
 article.parse()
 
 print(article.title)
-> 晶片大战：台湾厂商助攻华为突破美国封锁？
+>> 晶片大战：台湾厂商助攻华为突破美国封锁？
 
 if article.config.use_meta_language:
   # If we use the autodetected language, this config attribute will be true
@@ -138,7 +141,7 @@ if article.config.use_meta_language:
 else:
   print(article.config.language)
 
-> zh
+>> zh
 ```
 
 # Docs
@@ -158,8 +161,25 @@ detailed guides using newspaper.
 -   Autoatic article text summarization
 -   Author extraction from text
 -   Easy to use Command Line Interface (`python -m newspaper....`)
+-   Output in various formats (json, csv, text)
 -   Works in 10+ languages (English, Chinese, German, Arabic, \...)
 
+# Evaluation
+
+## Evaluation Results
+
+
+Using the dataset from [ScrapingHub](https://github.com/scrapinghub/article-extraction-benchmark) I created an [evaluator script](tests/evaluation/evaluate.py) that compares the performance of newspaper against it's previous versions. This way we can see how newspaper updates improve or worsen the performance of the library.
+
+| Version            | Corpus BLEU Score | Corpus Precision Score | Corpus Recall Score | Corpus F1 Score |
+|--------------------|-------------------|------------------------|---------------------|-----------------|
+| Newspaper3k 0.2.8  | 0.8660            | 0.9128                 | 0.9071              | 0.9100          |
+| Newspaper4k 0.9.0  | 0.9212            | 0.8992                 | 0.9336              | 0.9161          |
+| Newspaper4k 0.9.1  | 0.9224            | 0.8895                 | 0.9242              | 0.9065          |
+| Newspaper4k 0.9.2  | 0.9426            | 0.9070                 | 0.9087              | 0.9078          |
+
+Precision, Recall and F1 are computed using overlap of shingles with n-grams of size 4. The corpus BLEU score is computed using the [nltk's bleu_score](https://www.nltk.org/api/nltk.translate.bleu).
+
 # Requirements and dependencies
 
 Following system packages are required:
diff --git a/docs/user_guide/advanced.rst b/docs/user_guide/advanced.rst
index bbc2a83..d224332 100755
--- a/docs/user_guide/advanced.rst
+++ b/docs/user_guide/advanced.rst
@@ -11,7 +11,7 @@ Multi-threading article downloads
 
 **Downloading articles one at a time is slow.** But spamming a single news source
 like cnn.com with tons of threads or with ASYNC-IO will cause rate limiting
-and also doing that is very mean.
+and also doing that can lead to your ip to be blocked by the site.
 
 We solve this problem by allocating 1-2 threads per news source to both greatly
 speed up the download time while being respectful.
@@ -19,22 +19,50 @@ speed up the download time while being respectful.
 .. code-block:: python
 
     import newspaper
-    from newspaper import news_pool
+    from newspaper.mthreading import fetch_news
 
     slate_paper = newspaper.build('http://slate.com')
     tc_paper = newspaper.build('http://techcrunch.com')
     espn_paper = newspaper.build('http://espn.com')
 
     papers = [slate_paper, tc_paper, espn_paper]
-    news_pool.set(papers, threads_per_source=2) # (3*2) = 6 threads total
-    news_pool.join()
+    results = fetch_news(papers, threads=4)
+
 
     #At this point, you can safely assume that download() has been
     #called on every single article for all 3 sources.
 
-    print(slate_paper.articles[10].html)
+    print(slate_paper.articles[10].tite)
     #'<html> ...'
 
+
+In addition to :any:`Source` objects, :any:`fetch_news` also accepts :any:`Article` objects or simple urls.
+
+.. code-block:: python
+
+    article_urls = [f'https://abcnews.go.com/US/x/story?id={i}' for i in range(106379500, 106379520)]
+    articles = [Article(url=u) for u in article_urls]
+
+    results = fetch_news(articles, threads=4)
+
+    urls = [
+        "https://www.foxnews.com/media/homeowner-new-florida-bill-close-squatting-loophole-return-some-fairness",
+        "https://edition.cnn.com/2023/12/27/middleeast/dutch-diplomat-humanitarian-aid-gaza-sigrid-kaag-intl/index.html",
+    ]
+
+    results = fetch_news(urls, threads=4)
+
+    # or everything at once
+    papers = [slate_paper, tc_paper, espn_paper]
+    papers.extend(articles)
+    papers.extend(urls)
+
+    results = fetch_news(papers, threads=4)
+
+
+**Note:** in previous versions of newspaper, this could be done with the ``news_pool`` call, but it was not very robust
+and was replaced with a ThreadPoolExecutor implementation.
+
 Keeping just the Html of the  main body article
 ------------------------------------------------
 
@@ -191,12 +219,84 @@ The full available options are available under the :any:`Configuration` section
 Caching
 -------
 
-TODO
+The Newspaper4k library provides a simple caching mechanism that can be used to avoid repeatedly downloading the same article. Additionally, when building an :any:`Source` object, the category url detection is cached for 24 hours.
+
+Both mechanisms are enabled by default. The article caching is controlled by the ``memoize_articles`` parameter in the :any:`newspaper.build()` function or, alternatively, when creating an :any:`Source` object, the ``memoize_articles`` parameter in the constructor. Setting it to ``False`` will disable the caching mechanism.
+
+The category detection caching is controlled by `utils.cache_disk.enabled` setting. This disables the caching decorator on the ``Source._get_category_urls(..)`` method.
+
+For example:
+
+.. code-block:: python
+
+    import newspaper
+    from newspaper import utils
+
+    cbs_paper = newspaper.build('http://cbs.com')
+
+    # Disable article caching
+    utils.cache_disk.enabled = False
+
+    cbs_paper2 = newspaper.build('http://cbs.com') # The categories will be re-detected
+
+    # Enable article caching
+    utils.cache_disk.enabled = True
+
+    cbs_paper3 = newspaper.build('http://cbs.com') # The cached category urls will be loaded
+
+
 
 Proxy Usage
 --------------
 
-TODO
+Often times websites block repeated access from a single IP address. Or, some websites might limit access from certain geographic locations (due to legal reasons, etc.). To bypass these restrictions, you can use a proxy. Newspaper supports using a proxy by passing the ``proxies`` parameter to the :any:`Article` object's constructor or :any:`Source` object's constructor. The ``proxies`` parameter should be a dictionary, as required by the ``requests library``,  with the following format:
+
+.. code-block:: python
+
+    from newspaper import Article
+
+    # Define your proxy
+    proxies = {
+        'http': 'http://your_http_proxy:port',
+        'https': 'https://your_https_proxy:port'
+    }
+
+    # URL of the article you want to scrape
+    url = 'https://abcnews.go.com/Technology/wireStory/indonesias-mount-marapi-erupts-leading-evacuations-reported-casualties-106358667'
+
+    # Create an Article object, passing the proxies parameter
+    article = Article(url, proxies=proxies)
+
+    # Download and parse the article
+    article.download()
+    article.parse()
+
+    # Access the article's text, keywords, and summary
+    print("Title:", article.title)
+    print("Text:", article.text)
+
+or the shorter version:
+
+.. code-block:: python
+
+    from newspaper import article
+
+    # Define your proxy
+    proxies = {
+        'http': 'http://your_http_proxy:port',
+        'https': 'https://your_https_proxy:port'
+    }
+
+    # URL of the article you want to scrape
+    url = 'https://abcnews.go.com/Technology/wireStory/indonesias-mount-marapi-erupts-leading-evacuations-reported-casualties-106358667'
+
+    # Create an Article object,
+    article = article(url, proxies=proxies)
+
+    # Access the article's text, keywords, and summary
+    print("Title:", article.title)
+    print("Text:", article.text)
+
 
 Cookie Usage (simulate logged in user)
 --------------------------------------
diff --git a/docs/user_guide/api_reference.rst b/docs/user_guide/api_reference.rst
index 4f7d7da..13b39f0 100755
--- a/docs/user_guide/api_reference.rst
+++ b/docs/user_guide/api_reference.rst
@@ -6,6 +6,20 @@ Newspaper API
 .. autosummary::
    :toctree: generated
 
+Function calls
+--------------
+
+.. autofunction:: newspaper.article
+
+.. autofunction:: newspaper.build
+
+.. autofunction:: newspaper.mthreading.fetch_news
+
+.. autofunction:: newspaper.hot
+
+.. autofunction:: newspaper.languages
+
+
 Configuration
 -------------
 
@@ -44,7 +58,9 @@ Source
 .. automethod:: newspaper.Source.purge_articles()
 .. automethod:: newspaper.Source.feeds_to_articles()
 .. automethod:: newspaper.Source.categories_to_articles()
+.. automethod:: newspaper.Source.generate_articles()
 .. automethod:: newspaper.Source.download_articles()
+.. automethod:: newspaper.Source.download()
 .. automethod:: newspaper.Source.size()
 
 Category
@@ -55,3 +71,10 @@ Category
 Feed
 ----
 .. autoclass:: newspaper.source.Feed
+
+
+Exceptions
+----------
+.. autoclass:: newspaper.ArticleException
+
+.. autoclass:: newspaper.ArticleBinaryDataException
diff --git a/docs/user_guide/assets/chatgpt_chat200x75.png b/docs/user_guide/assets/chatgpt_chat200x75.png
new file mode 100644
index 0000000..3eac304
Binary files /dev/null and b/docs/user_guide/assets/chatgpt_chat200x75.png differ
diff --git a/docs/user_guide/assets/chatgpt_chat.png b/docs/user_guide/assets/chatgpt_chat75x75.png
similarity index 100%
rename from docs/user_guide/assets/chatgpt_chat.png
rename to docs/user_guide/assets/chatgpt_chat75x75.png
diff --git a/docs/user_guide/examples.rst b/docs/user_guide/examples.rst
index 90343f0..68aa4f5 100755
--- a/docs/user_guide/examples.rst
+++ b/docs/user_guide/examples.rst
@@ -3,20 +3,214 @@
 Examples and Tutorials
 ======================
 
-Building and Crawling a News Source
------------------------------------
 
+1. Building and Crawling a News Sources using a Multithreaded approach
+----------------------------------------------------------------------
+Building and crawling news websites can require the handling of multiple sources simultaneously and processing a large volume of articles. You can singnificantly improve the performance of this process by using multiple threads when crawling. Even if Python is not truly multithreaded (due to the GIL), i/o requests can be handled in parallel.
 
 
-Getting Articles with Scrapy
-----------------------------
+.. code-block:: python
 
 
+    from newspaper import Source
+    from newspaper.mthreading import fetch_news
+    import threading
 
-Using Playwright to Scrape Websites built with Javascript
----------------------------------------------------------
+    class NewsCrawler:
 
+        def __init__(self, source_urls, config=None):
+            self.sources = [Source(url, config=config) for url in source_urls]
+            self.articles = []
 
+        def build_sources(self):
+            # Multithreaded source building
+            threads = [threading.Thread(target=source.build) for source in self.sources]
+            for thread in threads:
+                thread.start()
+            for thread in threads:
+                thread.join()
 
-Using Playwright to Scrape Websites that require login
-------------------------------------------------------
+        def crawl_articles(self):
+            # Multithreaded article downloading
+            self.articles = fetch_news(self.sources, threads=4)
+
+        def extract_information(self):
+            # Extract information from each article
+            for source in self.sources:
+                print(f"Source {source.url}")
+                for article in source.articles[:10]:
+                    article.parse()
+                    print(f"Title: {article.title}")
+                    print(f"Authors: {article.authors}")
+                    print(f"Text: {article.text[:150]}...")  # Printing first 150 characters of text
+                    print("-------------------------------")
+
+    if __name__ == "__main__":
+        source_urls = ['https://slate.com', 'https://time.com']  # Add your news source URLs here
+        crawler = NewsCrawler(source_urls)
+        crawler.build_sources()
+        crawler.crawl_articles()
+        crawler.extract_information()
+
+
+2. Getting Articles with Scrapy
+--------------------------------
+
+Install Necessary Packages
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+    pip install scrapy
+    pip install newspaper4k
+
+Create the scrapy project:
+
+.. code-block:: bash
+
+    scrapy startproject news_scraper
+
+This command creates a new folder news_scraper with the necessary Scrapy files.
+
+
+Code the Scrapy Spider
+^^^^^^^^^^^^^^^^^^^^^^
+Navigate to the news_scraper/spiders folder and create a new spider. For example, news_spider.py:
+
+    .. code-block:: python
+
+        import scrapy
+        import newspaper
+
+        class NewsSpider(scrapy.Spider):
+            name = 'news'
+            start_urls = ['https://abcnews.go.com/elections']  # Replace with your target URLs
+
+            def parse(self, response):
+                # Extract URLs from the response and yield Scrapy Requests
+                for href in response.css('a::attr(href)'):
+                    yield response.follow(href, self.parse_article)
+
+            def parse_article(self, response):
+                # Use Newspaper4k to parse the article
+                article = newspaper.article(response.url, language='en', input_html=response.text)
+                article.parse()
+                article.nlp()
+
+                # Extracted information
+                yield {
+                    'url': response.url,
+                    'title': article.title,
+                    'authors': article.authors,
+                    'text': article.text,
+                    'publish_date': article.publish_date,
+                    'keywords': article.keywords,
+                    'summary': article.summary,
+                }
+
+
+Run the Spider
+^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+    scrapy crawl news -o output.json
+
+
+3. Using Playwright to Scrape Websites built with Javascript
+-------------------------------------------------------------
+
+Install Necessary Packages
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+    pip install newspaper4k
+    pip install playwright
+    playwright install
+
+Scrape with Playwright
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+    from playwright.sync_api import sync_playwright
+    import newspaper
+    import time
+
+    def scrape_with_playwright(url):
+        # Using Playwright to render JavaScript
+        with sync_playwright() as p:
+            browser = p.chromium.launch()
+            page = browser.new_page()
+            page.goto(url)
+            time.sleep(1) # Allow the javascript to render
+            content = page.content()
+            browser.close()
+
+        # Using Newspaper4k to parse the page content
+        article = newspaper.article(url, input_html=content, language='en')
+
+        return article
+
+    # Example URL
+    url = 'https://ec.europa.eu/commission/presscorner/detail/en/ac_24_84'  # Replace with the URL of your choice
+
+    # Scrape and process the article
+    article = scrape_with_playwright(url)
+    article.nlp()
+
+    print(f"Title: {article.title}")
+    print(f"Authors: {article.authors}")
+    print(f"Publication Date: {article.publish_date}")
+    print(f"Summary: {article.summary}")
+    print(f"Keywords: {article.keywords}")
+
+
+4. Using Playwright to Scrape Websites that require login
+----------------------------------------------------------
+
+
+.. code-block:: python
+
+    from playwright.sync_api import sync_playwright
+    import newspaper
+
+    def login_and_fetch_article(url, login_url, username, password):
+        # Using Playwright to handle login and fetch article
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)  # Set headless=False to watch the browser actions
+            page = browser.new_page()
+
+            # Automating login
+            page.goto(login_url)
+            page.fill('input[name="log"]', username)  # Adjust the selector as per the site's HTML
+            page.fill('input[name="pwd"]', password)  # Adjust the selector as per the site's HTML
+            page.click('input[type="submit"][value="Login"]')  # Adjust the selector as per the site's HTML
+
+            # Wait for navigation after login
+            page.wait_for_url('/')
+            # Navigating to the article
+            page.goto(url)
+            content = page.content()
+            browser.close()
+
+        # Using Newspaper4k to parse the page content
+        article = newspaper.article(url, input_html=content, language='en')
+
+        return article
+
+    # Example URLs and credentials
+    login_url = 'https://www.undercurrentnews.com/login/'  # Replace with the actual login URL
+    article_url = 'https://www.undercurrentnews.com/2024/01/08/editors-choice-farmed-shrimp-output-to-drop-in-2024-fallout-from-us-expanded-russia-ban/'  # Replace with the URL of the article you want to scrape
+    username = 'tester_news'  # Replace with your username
+    password = 'test'  # Replace with your password
+
+    # Fetch and process the article
+    article = login_and_fetch_article(article_url, login_url, username, password)
+    article.nlp()
+    print(f"Title: {article.title}")
+    print(f"Authors: {article.authors}")
+    print(f"Publication Date: {article.publish_date}")
+    print(f"Summary: {article.summary}")
+    print(f"Keywords: {article.keywords}")
diff --git a/newspaper/api.py b/newspaper/api.py
index 5aaadc5..4e16eb6 100755
--- a/newspaper/api.py
+++ b/newspaper/api.py
@@ -3,6 +3,7 @@
 # Copyright (c) Lucas Ou-Yang (codelucas)
 
 
+from typing import List
 import feedparser
 
 from .article import Article
@@ -14,8 +15,22 @@
 
 
 def build(url="", dry=False, config=None, **kwargs) -> Source:
-    """Returns a constructed source object without
+    """Returns a constructed :any:`Source` object without
     downloading or parsing the articles
+
+    Args:
+        url (str): The url of the source (news website) to build. For example,
+            `https://www.cnn.com`.
+        dry (bool): If true, the source object will be constructed but not
+            downloaded or parsed.
+        config (Configuration): A configuration object to use for the source.
+        kwargs: Any other keyword arguments to pass to the Source constructor.
+            If you omit the config object, you can add any configuration
+            options here.
+
+    Returns:
+        Source: The constructed :any:`Source` object.
+
     """
     config = config or Configuration()
     config.update(**kwargs)
@@ -40,11 +55,11 @@ def build_article(url="", config=None, **kwargs) -> Article:
 
 
 def languages():
-    """Returns a list of the supported languages"""
+    """Prints a list of the supported languages"""
     print_available_languages()
 
 
-def popular_urls():
+def popular_urls() -> List[str]:
     """Returns a list of pre-extracted popular source urls"""
     with open(POPULAR_URLS, encoding="utf-8") as f:
         urls = ["http://" + u.strip() for u in f.readlines()]
diff --git a/newspaper/article.py b/newspaper/article.py
index 22da870..89f7e86 100755
--- a/newspaper/article.py
+++ b/newspaper/article.py
@@ -140,9 +140,9 @@ class Article:
     def __init__(
         self,
         url: str,
-        title: str = "",
-        source_url: str = "",
-        read_more_link: str = "",
+        title: Optional[str] = "",
+        source_url: Optional[str] = "",
+        read_more_link: Optional[str] = "",
         config: Optional[Configuration] = None,
         **kwargs: Dict[str, Any],
     ):
diff --git a/newspaper/configuration.py b/newspaper/configuration.py
index c882d57..2dba63e 100755
--- a/newspaper/configuration.py
+++ b/newspaper/configuration.py
@@ -31,7 +31,9 @@
 
 class Configuration:
     """Modifies Article / Source properties.
+
     Attributes:
+
         min_word_count (int): minimum number of word tokens in an article text
         min_sent_count (int): minimum number of sentences in an article text
         max_title (int): :any:`Article.title` max number of chars. ``title``
@@ -60,9 +62,9 @@ class Configuration:
         memorize_articles (bool): If True, it will cache and save
             articles run between runs. The articles are *NOT* cached.
             It will save the parsed article urls between different
-            `Source`.`generate_articles()` runs. default True.
-        disable_category_cache (bool): If True, it will not cache the `Source`
-            category urls. default False.
+            :any:`Source.generate_articles()` runs. default True.
+        disable_category_cache (bool): If True, it will not cache
+            the :any:`Source` category urls. default False.
         fetch_images (bool): If False, it will not download images
             to verify if they obide by the settings in top_image_settings.
             default True.
@@ -72,7 +74,7 @@ class Configuration:
             from the article body html.
             Affected property is :any:`Article.article_html`.
             Default True.
-        http_success_only (bool): if True, it will raise an ``ArticleException``
+        http_success_only (bool): if True, it will raise an :any:`ArticleException`
              if the html status_code is >= 400 (e.g. 404 page). default True.
         stopwords_class (obj): unique stopword classes for oriental languages,
             don't toggle
@@ -88,13 +90,13 @@ class Configuration:
             and could hang the process due to huge binary files (such as movies)
             default False.
         ignored_content_types_defaults (dict): dictionary of content-types
-            and a default stub content.
-            These content type will not be downloaded.
-            **Note:**
-             If `allow_binary_content` is False,
-            binary content will lead to `ArticleBinaryDataException` for
-            `Article.download()` and will be skipped in `Source.build()`. This
-            will override the defaults in :any:`ignored_content_types_defaults`
+            and a default stub content. These content type will not be downloaded.
+
+            **Note:** If :any:`allow_binary_content` is False,
+            binary content will lead to :any:`ArticleBinaryDataException` for
+            :any:`Article.download()` and will be skipped in
+            :any:`Source.build()`. This will override the defaults
+            in :any:`ignored_content_types_defaults`
             if these match binary files.
         use_cached_categories (bool): if set to False, the cached categories
             will be ignored and a the :any:`Source` will recompute the category
@@ -206,8 +208,9 @@ def __init__(self):
 
     def update(self, **kwargs):
         """Update the configuration object with the given keyword arguments.
+
         Arguments:
-                **kwargs: The keyword arguments to update.
+            **kwargs: The keyword arguments to update.
         """
 
         for key, value in kwargs.items():
@@ -292,6 +295,7 @@ def language(self, value: str):
     def use_meta_language(self):
         """Read-only property that indicates whether the meta language
         read from the website was used or the language was explicitly set.
+
         Returns:
             bool: True if the meta language was used, False if the language
             was explicitly set.
diff --git a/newspaper/exceptions.py b/newspaper/exceptions.py
index 1893fce..66e57d0 100755
--- a/newspaper/exceptions.py
+++ b/newspaper/exceptions.py
@@ -4,7 +4,7 @@
 
 class ArticleBinaryDataException(Exception):
     """Exception raised for binary data in urls.
-    will be raised if allow_binary_content is False.
+    will be raised if :any:`Configuration.allow_binary_content` is False.
     """
 
 
diff --git a/newspaper/mthreading.py b/newspaper/mthreading.py
index e846e8b..db7fb33 100755
--- a/newspaper/mthreading.py
+++ b/newspaper/mthreading.py
@@ -21,17 +21,19 @@ def fetch_news(
     If there is a problem in detecting the language of the urls, then instantiate
     the `Article` object yourself with the language parameter and pass it in.
 
-    Arguments:
-        news_list {List[Union[str, Article, Source]]} -- List of sources,
-        articles, urls or a mix of them.
-
-        threads {int} -- Number of threads to use for fetching. This affects
-        how many items from the news_list are fetched at once. In order to control
-        how many threads are used in a `Source` object, use the
-        `Configuration`.`number_threads` setting. This could result in
-        a high number of threads. Maximum number of threads would be
-        `threads` * `Configuration`.`number_threads`.
-
+    Args:
+        news_list(List[Union[str, Article, Source]]): List of sources,
+            articles, urls or a mix of them.
+
+        threads(int):  Number of threads to use for fetching. This affects
+            how many items from the news_list are fetched at once. In order to
+            control
+            how many threads are used in a `Source` object, use the
+            `Configuration`.`number_threads` setting. This could result in
+            a high number of threads. Maximum number of threads would be
+            `threads` * `Configuration`.`number_threads`.
+    Returns:
+        List[Union[Article, Source]]: List of articles or sources.
     """
 
     def get_item(item: Union[str, Article, Source]) -> Union[Article, Source]:
diff --git a/newspaper/source.py b/newspaper/source.py
index c76386c..10080cb 100755
--- a/newspaper/source.py
+++ b/newspaper/source.py
@@ -244,7 +244,7 @@ def set_description(self):
         self.description = metadata["description"]
 
     def download(self):
-        """Downloads html of source"""
+        """Downloads html of source, i.e. the news site homppage"""
         self.html = network.get_html(self.url, self.config)
 
     def download_categories(self):
@@ -408,14 +408,23 @@ def _generate_articles(self):
         return list(uniq.values())
 
     def generate_articles(self, limit=5000):
-        """Saves all current articles of news source, filter out bad urls"""
+        """Creates the :any:`Source.articles` List of :any:`Article` objects.
+        It gets the Urls from all detected categories and RSS feeds, checks
+        them for plausibility based on their URL (using some heuristics defined
+        in the ``urls.valid_url`` function). These can be further
+        downloaded using :any:`Source.download_articles()`
+
+        Args:
+            limit (int, optional): The maximum number of articles to generate.
+                Defaults to 5000.
+        """
         articles = self._generate_articles()
         self.articles = articles[:limit]
         log.debug("%d articles generated and cutoff at %d", len(articles), limit)
 
     def download_articles(self) -> List[Article]:
         """Starts the ``download()`` for all :any:`Article` objects
-        from the ``articles`` property. It can run single threaded or
+        in the :any:`Source.articles` property. It can run single threaded or
         multi-threaded.
         Returns:
             List[:any:`Article`]: A list of downloaded articles.
diff --git a/tests/test_source.py b/tests/test_source.py
index 3d2a75a..c7affff 100755
--- a/tests/test_source.py
+++ b/tests/test_source.py
@@ -110,6 +110,8 @@ def test_empty_url_source(self):
         with pytest.raises(ValueError):
             Source(url=None)
 
+    # Skip if GITHUB_ACTIONS. It can fail because of internet access
+    @pytest.mark.skipif("GITHUB_ACTIONS" in os.environ, reason="Skip if GITHUB_ACTIONS")
     def test_build_source(self, cnn_source):
         source = Source(cnn_source["url"], verbose=False, memorize_articles=False)
         source.clean_memo_cache()
@@ -130,6 +132,8 @@ def test_build_source(self, cnn_source):
         # assert sorted(source.category_urls()) == sorted(cnn_source["category_urls"])
         # assert sorted(source.feed_urls()) == sorted(cnn_source["feeds"])
 
+    # Skip if GITHUB_ACTIONS. It can fail because of internet access
+    @pytest.mark.skipif("GITHUB_ACTIONS" in os.environ, reason="Skip if GITHUB_ACTIONS")
     def test_memorize_articles(self, cnn_source):
         source = Source(cnn_source["url"], verbose=False, memorize_articles=True)
         source.clean_memo_cache()
@@ -184,6 +188,8 @@ def stub_func(_, domain):
         with pytest.raises(Exception):
             stub_func(None, source.domain)
 
+    # Skip if GITHUB_ACTIONS. It can fail because of internet access
+    @pytest.mark.skipif("GITHUB_ACTIONS" in os.environ, reason="Skip if GITHUB_ACTIONS")
     def test_get_feeds(self, feed_sources):
         for feed_source in feed_sources:
             source = Source(feed_source["url"])