diff --git a/autollm/utils/document_reading.py b/autollm/utils/document_reading.py
index 304fb507..7dee190d 100644
--- a/autollm/utils/document_reading.py
+++ b/autollm/utils/document_reading.py
@@ -11,8 +11,8 @@
 from autollm.utils.logging import logger
 from autollm.utils.markdown_reader import MarkdownReader
 from autollm.utils.pdf_reader import LangchainPDFReader
-from autollm.utils.web_docs_reader import WebDocsReader
-from autollm.utils.web_page_reader import WebPageReader
+from autollm.utils.webpage_reader import WebPageReader
+from autollm.utils.website_reader import WebSiteReader
 
 
 def read_files_as_documents(
@@ -118,18 +118,41 @@ def read_github_repo_as_documents(
     return documents
 
 
-def read_website_as_documents(url: str) -> List[Document]:
+def read_website_as_documents(
+        parent_url: Optional[str] = None,
+        sitemap_url: Optional[str] = None,
+        include_filter_str: Optional[str] = None,
+        exclude_filter_str: Optional[str] = None) -> List[Document]:
     """
-    Read documents from a website with all its child pages using the WebDocsReader.
+    Read documents from a website or a sitemap.
 
     Parameters:
-        url (str): The starting URL from which to scrape documents.
+        parent_url (str, optional): The starting URL from which to scrape documents.
+        sitemap_url (str, optional): The URL of the sitemap to process.
+        include_filter_str (str, optional): Filter string to include certain URLs.
+        exclude_filter_str (str, optional): Filter string to exclude certain URLs.
 
     Returns:
-        List[Document]: A list of Document objects containing content and metadata from the web pages.
+        List[Document]: A list of Document objects containing content and metadata.
+
+    Raises:
+        ValueError: If neither parent_url nor sitemap_url is provided, or if both are provided.
     """
-    reader = WebDocsReader()
-    documents = reader.load_data(url)
+    if (parent_url is None and sitemap_url is None) or (parent_url is not None and sitemap_url is not None):
+        raise ValueError("Please provide either parent_url or sitemap_url, not both or none.")
+
+    reader = WebSiteReader()
+    if parent_url:
+        documents = reader.load_data(
+            parent_url=parent_url,
+            include_filter_str=include_filter_str,
+            exclude_filter_str=exclude_filter_str)
+    else:
+        documents = reader.load_data(
+            sitemap_url=sitemap_url,
+            include_filter_str=include_filter_str,
+            exclude_filter_str=exclude_filter_str)
+
     return documents
 
 
diff --git a/autollm/utils/web_docs_reader.py b/autollm/utils/web_docs_reader.py
deleted file mode 100644
index 65a4123e..00000000
--- a/autollm/utils/web_docs_reader.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from typing import List
-from urllib.parse import urljoin, urlparse
-
-import requests
-from bs4 import BeautifulSoup
-from llama_index.schema import Document
-from tqdm import tqdm
-
-from autollm.utils.constants import WEBPAGE_READER_TIMEOUT
-from autollm.utils.logging import logger
-from autollm.utils.web_page_reader import WebPageReader
-
-
-class WebDocsReader:
-
-    def __init__(self):
-        self.visited_links = set()
-
-    def _get_child_links_recursive(self, url):
-        parsed_url = urlparse(url)
-        base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
-        current_path = parsed_url.path
-
-        response = requests.get(url, timeout=WEBPAGE_READER_TIMEOUT)  # timeout in seconds
-        if response.status_code != 200:
-            logger.warning(f"Failed to fetch the website: {response.status_code}")
-            return
-
-        soup = BeautifulSoup(response.text, "html.parser")
-        all_links = [link.get("href") for link in soup.find_all("a")]
-
-        # Normalize links and filter out external links and anchors
-        child_links = set()
-        for link in all_links:
-            # Skip any None or empty hrefs, and anchors
-            if not link or link.startswith('#') or link == current_path:
-                continue
-            # Convert relative links to absolute
-            full_link = urljoin(base_url, link)
-            # Add to set if the link is internal
-            if urlparse(full_link).netloc == parsed_url.netloc:
-                child_links.add(full_link)
-
-        # Process each child link
-        for link in child_links:
-            if link not in self.visited_links:
-                self.visited_links.add(link)
-                self._get_child_links_recursive(link)
-
-    def _get_all_urls(self, url):
-        self.visited_links = set()
-        self._get_child_links_recursive(url)
-        urls = [link for link in self.visited_links if urlparse(link).netloc == urlparse(url).netloc]
-        return urls
-
-    def load_data(self, url: str) -> List[Document]:
-        logger.info(f"Listing child pages of {url}..")
-        all_urls = self._get_all_urls(url)
-        logger.info(f"Total URLs to process: {len(all_urls)}")
-
-        web_reader = WebPageReader()
-        documents = []
-        for u in tqdm(all_urls, desc="Processing URLs"):
-            documents.extend(web_reader.load_data(u))
-
-        return documents
diff --git a/autollm/utils/web_page_reader.py b/autollm/utils/webpage_reader.py
similarity index 100%
rename from autollm/utils/web_page_reader.py
rename to autollm/utils/webpage_reader.py
diff --git a/autollm/utils/website_reader.py b/autollm/utils/website_reader.py
new file mode 100644
index 00000000..4d21a439
--- /dev/null
+++ b/autollm/utils/website_reader.py
@@ -0,0 +1,116 @@
+import xml.etree.ElementTree as ET
+from typing import List, Optional
+from urllib.parse import urljoin, urlparse
+
+import requests
+from bs4 import BeautifulSoup
+from llama_index.schema import Document
+from tqdm import tqdm
+
+from autollm.utils.constants import WEBPAGE_READER_TIMEOUT
+from autollm.utils.logging import logger
+from autollm.utils.webpage_reader import WebPageReader
+
+
+class WebSiteReader:
+
+    def __init__(self):
+        self.visited_links = set()
+
+    def _fetch_and_parse_sitemap(
+            self,
+            sitemap_url: str,
+            include_filter_str: Optional[str] = None,
+            exclude_filter_str: Optional[str] = None) -> List[str]:
+        """Fetches and parses the sitemap, returning URLs."""
+        try:
+            response = requests.get(sitemap_url)
+            response.raise_for_status()
+            sitemap_content = response.text
+        except requests.RequestException as e:
+            logger.error(f"Error fetching sitemap: {e}")
+            return []
+
+        return self._extract_urls_from_sitemap(sitemap_content, include_filter_str, exclude_filter_str)
+
+    def _extract_urls_from_sitemap(
+            self, sitemap_content: str, include_filter_str: Optional[str],
+            exclude_filter_str: Optional[str]) -> List[str]:
+        """Extracts URLs from sitemap content."""
+        sitemap = ET.fromstring(sitemap_content)
+        urls = []
+
+        for url in sitemap.findall(".//{http://www.sitemaps.org/schemas/sitemap/0.9}url"):
+            location = url.find("{http://www.sitemaps.org/schemas/sitemap/0.9}loc").text
+            if (include_filter_str and
+                    include_filter_str not in location) or (exclude_filter_str and
+                                                            exclude_filter_str in location):
+                continue
+            urls.append(location)
+
+        return urls
+
+    def _get_child_links_recursive(self, url):
+        parsed_url = urlparse(url)
+        base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+        current_path = parsed_url.path
+
+        response = requests.get(url, timeout=WEBPAGE_READER_TIMEOUT)  # timeout in seconds
+        if response.status_code != 200:
+            logger.warning(f"Failed to fetch the website: {response.status_code}")
+            return
+
+        soup = BeautifulSoup(response.text, "html.parser")
+        all_links = [link.get("href") for link in soup.find_all("a")]
+
+        # Normalize links and filter out external links and anchors
+        child_links = set()
+        for link in all_links:
+            # Skip any None or empty hrefs, and anchors
+            if not link or link.startswith('#') or link == current_path:
+                continue
+            # Convert relative links to absolute
+            full_link = urljoin(base_url, link)
+            # Add to set if the link is internal
+            if urlparse(full_link).netloc == parsed_url.netloc:
+                child_links.add(full_link)
+
+        # Process each child link
+        for link in child_links:
+            if link not in self.visited_links:
+                self.visited_links.add(link)
+                self._get_child_links_recursive(link)
+
+    def _get_all_urls(self, url):
+        self.visited_links = set()
+        self._get_child_links_recursive(url)
+        urls = [link for link in self.visited_links if urlparse(link).netloc == urlparse(url).netloc]
+        return urls
+
+    def load_data(
+            self,
+            parent_url: str = None,
+            sitemap_url: str = None,
+            include_filter_str: str = None,
+            exclude_filter_str: str = None) -> List[Document]:
+        """Loads data from either a standard URL or a sitemap URL."""
+        if sitemap_url:
+            logger.info(f"Fetching and parsing sitemap {sitemap_url}..")
+            all_urls = self._fetch_and_parse_sitemap(sitemap_url, include_filter_str, exclude_filter_str)
+        elif parent_url:
+            logger.info(f"Parsing child pages of {parent_url}..")
+            all_urls = self._get_child_links_recursive(parent_url)
+        else:
+            raise ValueError("Either sitemap_url or parent_url must be provided.")
+
+        web_reader = WebPageReader()
+        documents = []
+
+        logger.info(f"Total URLs to process: {len(all_urls)}")
+
+        for u in tqdm(all_urls, desc="Processing URLs"):
+            if u not in self.visited_links:
+                self.visited_links.add(u)
+                documents.extend(web_reader.load_data(u))
+
+        return documents