diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..a744c0e
Binary files /dev/null and b/.DS_Store differ
diff --git a/.gitignore b/.gitignore
index 6769e21..2ac7656 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,9 @@ __pycache__/
 
 # Distribution / packaging
 .Python
+_build/
+_static/
+_templates/
 build/
 develop-eggs/
 dist/
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..124910a
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,35 @@
+# Read the Docs configuration file for Sphinx projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.12"
+    # You can also specify other tool versions:
+    # nodejs: "20"
+    # rust: "1.70"
+    # golang: "1.20"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/conf.py
+  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+  # builder: "dirhtml"
+  # Fail on all warnings to avoid broken references
+  # fail_on_warning: true
+
+# Optionally build your docs in additional formats such as PDF and ePub
+# formats:
+#   - pdf
+#   - epub
+
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+# python:
+#   install:
+#     - requirements: docs/requirements.txt
\ No newline at end of file
diff --git a/README.md b/README.md
index 974cbde..979c832 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,9 @@ You can experiment with the library directly in Google Colab [here](https://cola
 
 Here's a [writeup](https://open.substack.com/pub/ambikasukla/p/efficient-rag-with-document-layout?r=ft8uc&utm_campaign=post&utm_medium=web) explaining the problem and our approach. 
 
-Here'a another [blog](https://medium.com/@kirankurup/mastering-pdfs-extracting-sections-headings-paragraphs-and-tables-with-cutting-edge-parser-faea18870125) explaining the solution. 
+Here'a LlamaIndex [blog](https://medium.com/@kirankurup/mastering-pdfs-extracting-sections-headings-paragraphs-and-tables-with-cutting-edge-parser-faea18870125) explaining the need for smart chunking. 
+
+API Reference: [https://llmsherpa.readthedocs.io/](https://llmsherpa.readthedocs.io/)
 
 ### Installation
 
@@ -64,6 +66,53 @@ pip install llama-index
 import openai
 openai.api_key = #<Insert API Key>
 ```
+
+### Vector search and Retrieval Augmented Generation with Smart Chunking
+
+LayoutPDFReader does smart chunking keeping related text due to document structure together:
+
+* All list items are together including the paragraph that precedes the list.
+* Items in a table are chuncked together
+* Contextual information from section headers and nested section headers is included
+
+The following code creates a LlamaIndex query engine from LayoutPDFReader document chunks
+
+```python
+from llama_index.readers.schema.base import Document
+from llama_index import VectorStoreIndex
+
+index = VectorStoreIndex([])
+for chunk in doc.chunks():
+    index.insert(Document(text=chunk.to_context_text(), extra_info={}))
+query_engine = index.as_query_engine()
+```
+
+Let's run one query:
+
+```python
+response = query_engine.query("list all the tasks that work with bart")
+print(response)
+```
+
+We get the following response:
+
+```
+BART works well for text generation, comprehension tasks, abstractive dialogue, question answering, and summarization tasks.
+```
+
+Let's try another query that needs answer from a table:
+
+```python
+response = query_engine.query("what is the bart performance score on squad")
+print(response)
+```
+
+Here's the response we get:
+
+```
+The BART performance score on SQuAD is 88.8 for EM and 94.6 for F1.
+```
+
 ### Summarize a Section using prompts
 
 LayoutPDFReader offers powerful ways to pick sections and subsections from a large document and use LLMs to extract insights from a section.
@@ -179,51 +228,6 @@ R1 of BART for different datasets:
 - For the XSum dataset, the R1 score of BART is 45.14.
 ```
 
-### Vector search and Retrieval Augmented Generation with Smart Chunking
-
-LayoutPDFReader does smart chunking keeping the integrity of related text together:
-
-* All list items are together including the paragraph that precedes the list.
-* Items in a table are chuncked together
-* Contextual information from section headers and nested section headers is included
-
-The following code creates a LlamaIndex query engine from LayoutPDFReader document chunks
-
-```python
-from llama_index.readers.schema.base import Document
-from llama_index import VectorStoreIndex
-
-index = VectorStoreIndex([])
-for chunk in doc.chunks():
-    index.insert(Document(text=chunk.to_context_text(), extra_info={}))
-query_engine = index.as_query_engine()
-```
-
-Let's run one query:
-
-```python
-response = query_engine.query("list all the tasks that work with bart")
-print(response)
-```
-
-We get the following response:
-
-```
-BART works well for text generation, comprehension tasks, abstractive dialogue, question answering, and summarization tasks.
-```
-
-Let's try another query that needs answer from a table:
-
-```python
-response = query_engine.query("what is the bart performance score on squad")
-print(response)
-```
-
-Here's the response we get:
-
-```
-The BART performance score on SQuAD is 88.8 for EM and 94.6 for F1.
-```
 
 ### Get the Raw JSON
 
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..474d92a
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,36 @@
+import os
+import sys
+sys.path.insert(0, os.path.abspath('../'))
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = 'LLM Sherpa'
+copyright = '2023, Ambika Sukla'
+author = 'Ambika Sukla'
+release = '0.1.3'
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+   'sphinx.ext.doctest',
+   'sphinx.ext.autodoc',
+   'sphinx.ext.autosummary',
+   'sphinx.ext.napoleon',    
+]
+
+templates_path = ['_templates']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = 'alabaster'
+html_static_path = ['_static']
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..60b6467
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,20 @@
+.. LLM Sherpa documentation master file, created by
+   sphinx-quickstart on Wed Nov  1 09:09:16 2023.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to LLM Sherpa's documentation!
+======================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   API reference <modules>
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/docs/llmsherpa.readers.rst b/docs/llmsherpa.readers.rst
new file mode 100644
index 0000000..3825dfb
--- /dev/null
+++ b/docs/llmsherpa.readers.rst
@@ -0,0 +1,29 @@
+llmsherpa.readers package
+=========================
+
+Submodules
+----------
+
+llmsherpa.readers.file\_reader module
+-------------------------------------
+
+.. automodule:: llmsherpa.readers.file_reader
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+llmsherpa.readers.layout\_reader module
+---------------------------------------
+
+.. automodule:: llmsherpa.readers.layout_reader
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: llmsherpa.readers
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/llmsherpa.rst b/docs/llmsherpa.rst
new file mode 100644
index 0000000..a812297
--- /dev/null
+++ b/docs/llmsherpa.rst
@@ -0,0 +1,18 @@
+llmsherpa package
+=================
+
+Subpackages
+-----------
+
+.. toctree::
+   :maxdepth: 4
+
+   llmsherpa.readers
+
+Module contents
+---------------
+
+.. automodule:: llmsherpa
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/modules.rst b/docs/modules.rst
new file mode 100644
index 0000000..f91605d
--- /dev/null
+++ b/docs/modules.rst
@@ -0,0 +1,7 @@
+llmsherpa
+=========
+
+.. toctree::
+   :maxdepth: 4
+
+   llmsherpa
diff --git a/llmsherpa/.DS_Store b/llmsherpa/.DS_Store
new file mode 100644
index 0000000..06a5668
Binary files /dev/null and b/llmsherpa/.DS_Store differ
diff --git a/llmsherpa/__init__.py b/llmsherpa/__init__.py
index 3fa7b91..ecc2de2 100644
--- a/llmsherpa/__init__.py
+++ b/llmsherpa/__init__.py
@@ -4,6 +4,6 @@
 APIs to accelerate LLM use cases.
 """
 
-__version__ = "0.1.2"
+__version__ = "0.1.3"
 __author__ = 'Ambika Sukla'
 __credits__ = 'NLMATICS CORP.'
\ No newline at end of file
diff --git a/llmsherpa/readers/file_reader.py b/llmsherpa/readers/file_reader.py
index 5abc97f..ff18347 100644
--- a/llmsherpa/readers/file_reader.py
+++ b/llmsherpa/readers/file_reader.py
@@ -5,12 +5,30 @@
 from llmsherpa.readers import Document
 
 class LayoutPDFReader:
+    """
+    Reads PDF content and understands hierarchical layout of the document sections and structural components such as paragraphs, sentences, tables, lists, sublists
+
+    Parameters
+    ----------
+    parser_api_url: str
+        API url for LLM Sherpa. Use customer url for your private instance here            
+    
+    """
     def __init__(self, parser_api_url):
+        """
+            Constructs a LayoutPDFReader from a parser endpoint.
+
+            Parameters
+            ----------
+            parser_api_url: str
+                API url for LLM Sherpa. Use customer url for your private instance here            
+        """
         self.parser_api_url = parser_api_url
         self.download_connection = urllib3.PoolManager()
         self.api_connection = urllib3.PoolManager()
 
     def _download_pdf(self, pdf_url):
+        
         # some servers only allow browers user_agent to download
         user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
         # add authorization headers if using external API (see upload_pdf for an example)
@@ -28,6 +46,16 @@ def _parse_pdf(self, pdf_file):
         return parser_response
 
     def read_pdf(self, path_or_url, contents=None):
+        """
+        Reads pdf from a url or path
+
+        Parameters
+        ----------
+        path_or_url: str
+            path or url to the pdf file e.g. https://someexapmple.com/myfile.pdf or /home/user/myfile.pdf
+        contents: bytes
+            contents of the pdf file. If contents is given, path_or_url is ignored. This is useful when you already have the pdf file contents in memory such as if you are using streamlit or flask.
+        """
         # file contents were given
         if contents is not None:
             pdf_file = (path_or_url, contents, 'application/pdf')
@@ -43,18 +71,4 @@ def read_pdf(self, path_or_url, contents=None):
         parser_response = self._parse_pdf(pdf_file)
         response_json = json.loads(parser_response.data.decode("utf-8"))
         blocks = response_json['return_dict']['result']['blocks']
-        return Document(blocks)
-    # def read_file(file_path):
-
-def main(): 
-    llmsherpa_api_url = "https://readers.llmsherpa.com/api/document/developer/parseDocument?renderFormat=all"
-    pdf_url = "https://arxiv.org/pdf/1910.13461.pdf"
-    pdf_url = "/Users/ambikasukla/Documents/1910.13461.pdf"
-    pdf_reader = LayoutPDFReader(llmsherpa_api_url)
-    doc = pdf_reader.read_pdf(pdf_url)
-    print(doc.sections()[5].to_html(include_children=True, recurse=True))
-    
-# Using the special variable  
-# __name__ 
-if __name__=="__main__": 
-    main()
+        return Document(blocks)
\ No newline at end of file
diff --git a/llmsherpa/readers/layout_reader.py b/llmsherpa/readers/layout_reader.py
index 82c82da..82e64a3 100644
--- a/llmsherpa/readers/layout_reader.py
+++ b/llmsherpa/readers/layout_reader.py
@@ -1,23 +1,67 @@
 class Block:
+    """
+    A block is a node in the layout tree. It can be a paragraph, a list item, a table, or a section header. 
+    This is the base class for all blocks such as Paragraph, ListItem, Table, Section.
+
+    Attributes
+    ----------
+    tag: str
+        tag of the block e.g. para, list_item, table, header
+    level: int
+        level of the block in the layout tree
+    page_idx: int
+        page index of the block in the document. It starts from 0 and is -1 if the page number is not available
+    block_idx: int
+        id of the block as returned from the server. It starts from 0 and is -1 if the id is not available
+    top: float
+        top position of the block in the page and it is -1 if the position is not available
+    left: float
+        left position of the block in the page and it is -1 if the position is not available
+    sentences: list
+        list of sentences in the block
+    children: list
+        list of immediate child blocks, but not the children of the children
+    parent: Block
+        parent of the block
+    block_json: dict
+        json returned by the parser API for the block
+    """
     tag: str
     def __init__(self, block_json=None):
         self.tag = block_json['tag'] if block_json and 'tag' in block_json else None
         self.level = block_json['level'] if block_json and 'level' in block_json else -1
+        self.page_idx = block_json['page_idx'] if block_json and 'page_idx' in block_json else -1
+        self.block_idx = block_json['block_idx'] if block_json and 'block_idx' in block_json else -1
+        self.top = block_json['top'] if block_json and 'top' in block_json else -1
+        self.left = block_json['left'] if block_json and 'left' in block_json else -1
         self.sentences = block_json['sentences'] if block_json and 'sentences' in block_json else []
         self.children = []
         self.parent = None
+        self.block_json = block_json
 
     def add_child(self, node):
+        """
+        Adds a child to the block. Sets the parent of the child to self.
+        """
         self.children.append(node)
         node.parent = self
 
     def to_html(self, include_children=False, recurse=False):
+        """
+        Converts the block to html. This is a virtual method and should be implemented by the derived classes.
+        """
         pass
 
     def to_text(self, include_children=False, recurse=False):
+        """
+        Converts the block to text. This is a virtual method and should be implemented by the derived classes.
+        """
         pass
 
     def parent_chain(self):
+        """
+        Returns the parent chain of the block consisting of all the parents of the block until the root.
+        """
         chain = []
         parent = self.parent
         while parent:
@@ -27,6 +71,9 @@ def parent_chain(self):
         return chain
 
     def parent_text(self):
+        """
+        Returns the text of the parent chain of the block. This is useful for adding section information to the text.
+        """
         parent_chain = self.parent_chain()
         header_texts = []
         para_texts = []
@@ -41,6 +88,9 @@ def parent_text(self):
         return text                
 
     def to_context_text(self, include_section_info=True):
+        """
+        Returns the text of the block with section information. This provides context to the text.
+        """
         text = ""
         if include_section_info:
             text += self.parent_text() + "\n"
@@ -51,6 +101,9 @@ def to_context_text(self, include_section_info=True):
         return text
     
     def iter_children(self, node, level, node_visitor):
+        """
+        Iterates over all the children of the node and calls the node_visitor function on each child.
+        """
         for child in node.children:
             node_visitor(child)
             # print("-"*level, child.tag, f"({len(child.children)})", child.to_text())
@@ -58,6 +111,9 @@ def iter_children(self, node, level, node_visitor):
                 self.iter_children(child, level + 1, node_visitor)
 
     def paragraphs(self):
+        """
+        Returns all the paragraphs in the block. This is useful for getting all the paragraphs in a section.
+        """
         paragraphs = []
         def para_collector(node):
             if node.tag == 'para':
@@ -66,6 +122,9 @@ def para_collector(node):
         return paragraphs
        
     def chunks(self):
+        """
+        Returns all the chunks in the block. Chunking automatically splits the document into paragraphs, lists, and tables without any prior knowledge of the document structure.
+        """
         chunks = []
         def chunk_collector(node):
             if node.tag in ['para', 'list_item', 'table']:
@@ -74,14 +133,20 @@ def chunk_collector(node):
         return chunks
     
     def tables(self):
+        """
+        Returns all the tables in the block. This is useful for getting all the tables in a section.
+        """
         tables = []
         def chunk_collector(node):
             if node.tag in ['table']:
                 tables.append(node)
-        self.iter_children(self, 0, chunk_collector)
+        self.iter_children(self, 0, chunk_collector)        
         return tables
 
     def sections(self):
+        """
+        Returns all the sections in the block. This is useful for getting all the sections in a document.
+        """
         sections = []
         def chunk_collector(node):
             if node.tag in ['header']:
@@ -90,15 +155,38 @@ def chunk_collector(node):
         return sections
 
 class Paragraph(Block):
+    """
+    A paragraph is a block of text. It can have children such as lists. A paragraph has tag 'para'.
+    """
     def __init__(self, para_json):
         super().__init__(para_json)
     def to_text(self, include_children=False, recurse=False):
+        """
+        Converts the paragraph to text. If include_children is True, then the text of the children is also included. If recurse is True, then the text of the children's children are also included.
+        
+        Parameters
+        ----------
+        include_children: bool
+            If True, then the text of the children are also included
+        recurse: bool
+            If True, then the text of the children's children are also included
+        """
         para_text = "\n".join(self.sentences)
         if include_children:
             for child in self.children:
                 para_text += "\n" + child.to_text(include_children=recurse, recurse=recurse)
         return para_text    
     def to_html(self, include_children=False, recurse=False):
+        """
+        Converts the paragraph to html. If include_children is True, then the html of the children is also included. If recurse is True, then the html of the children's children are also included.
+
+        Parameters
+        ----------
+        include_children: bool
+            If True, then the html of the children are also included
+        recurse: bool
+            If True, then the html of the children's children are also included
+        """
         html_str = "<p>"
         html_str = html_str + "\n".join(self.sentences)
         if include_children:
@@ -111,10 +199,28 @@ def to_html(self, include_children=False, recurse=False):
         return html_str
     
 class Section(Block):
+    """
+    A section is a block of text. It can have children such as paragraphs, lists, and tables. A section has tag 'header'.
+
+    Attributes
+    ----------
+    title: str
+        title of the section
+    """
     def __init__(self, section_json):
         super().__init__(section_json)
         self.title = "\n".join(self.sentences)
     def to_text(self, include_children=False, recurse=False):
+        """
+        Converts the section to text. If include_children is True, then the text of the children is also included. If recurse is True, then the text of the children's children are also included.
+
+        Parameters
+        ----------
+        include_children: bool
+            If True, then the text of the children are also included
+        recurse: bool
+            If True, then the text of the children's children are also included
+        """
         text = self.title
         if include_children:
             for child in self.children:
@@ -122,6 +228,16 @@ def to_text(self, include_children=False, recurse=False):
         return text    
 
     def to_html(self, include_children=False, recurse=False):
+        """
+        Converts the section to html. If include_children is True, then the html of the children is also included. If recurse is True, then the html of the children's children are also included.
+
+        Parameters
+        ----------
+        include_children: bool
+            If True, then the html of the children are also included
+        recurse: bool
+            If True, then the html of the children's children are also included
+        """
         html_str = f"<h{self.level + 1}>"
         html_str = html_str + self.title
         html_str = html_str + f"</h{self.level + 1}>"
@@ -131,10 +247,23 @@ def to_html(self, include_children=False, recurse=False):
         return html_str
 
 class ListItem(Block):
+    """
+    A list item is a block of text. It can have child list items. A list item has tag 'list_item'.
+    """
     def __init__(self, list_json):
         super().__init__(list_json)
 
     def to_text(self, include_children=False, recurse=False):
+        """
+        Converts the list item to text. If include_children is True, then the text of the children is also included. If recurse is True, then the text of the children's children are also included.
+        
+        Parameters
+        ----------
+        include_children: bool
+            If True, then the text of the children are also included
+        recurse: bool
+            If True, then the text of the children's children are also included
+        """
         text = "\n".join(self.sentences)
         if include_children:
             for child in self.children:
@@ -142,6 +271,16 @@ def to_text(self, include_children=False, recurse=False):
         return text    
 
     def to_html(self, include_children=False, recurse=False):
+        """
+        Converts the list item to html. If include_children is True, then the html of the children is also included. If recurse is True, then the html of the children's children are also included.
+        
+        Parameters
+        ----------
+        include_children: bool
+            If True, then the html of the children are also included
+        recurse: bool
+            If True, then the html of the children's children are also included
+        """
         html_str = f"<li>"
         html_str = html_str + "\n".join(self.sentences)
         if include_children:
@@ -153,11 +292,12 @@ def to_html(self, include_children=False, recurse=False):
         html_str = html_str + f"</li>"
         return html_str
 
-class List(Block):
-    def __init__(self, list_json):
-        self.x = 0
     
 class TableCell(Block):
+    """
+    A table cell is a block of text. It can have child paragraphs. A table cell has tag 'table_cell'.
+    A table cell is contained within table rows.
+    """
     def __init__(self, cell_json):
         super().__init__(cell_json)
         self.col_span = cell_json['col_span'] if 'col_span' in cell_json else 1
@@ -167,11 +307,17 @@ def __init__(self, cell_json):
         else:
             self.cell_node = None
     def to_text(self):
+        """
+        Returns the cell value of the text. If the cell value is a paragraph node, then the text of the node is returned.
+        """
         cell_text = self.cell_value
         if self.cell_node:
             cell_text = self.cell_node.to_text()
         return cell_text
     def to_html(self):
+        """
+        Returns the cell value ashtml. If the cell value is a paragraph node, then the html of the node is returned.
+        """
         cell_html = self.cell_value
         if self.cell_node:
             cell_html = self.cell_node.to_html()
@@ -182,6 +328,9 @@ def to_html(self):
         return html_str
             
 class TableRow(Block):
+    """
+    A table row is a block of text. It can have child table cells.
+    """
     def __init__(self, row_json):
         self.cells = []
         if row_json['type'] == 'full_row':
@@ -192,11 +341,17 @@ def __init__(self, row_json):
                 cell = TableCell(cell_json)
                 self.cells.append(cell)
     def to_text(self, include_children=False, recurse=False):
+        """
+        Returns text of a row with text from all the cells in the row delimited by '|'
+        """
         cell_text = ""
         for cell in self.cells:
             cell_text = cell_text + " | " + cell.to_text()
         return cell_text
     def to_html(self, include_children=False, recurse=False):
+        """
+        Returns html for a <tr> with html from all the cells in the row as <td>
+        """
         html_str = "<tr>"
         for cell in self.cells:
             html_str = html_str + cell.to_html()
@@ -204,6 +359,9 @@ def to_html(self, include_children=False, recurse=False):
         return html_str
 
 class TableHeader(Block):
+    """
+    A table header is a block of text. It can have child table cells.
+    """
     def __init__(self, row_json):
         super().__init__(row_json)
         self.cells = []
@@ -211,6 +369,10 @@ def __init__(self, row_json):
             cell = TableCell(cell_json)
             self.cells.append(cell)
     def to_text(self, include_children=False, recurse=False):
+        """
+        Returns text of a row with text from all the cells in the row delimited by '|' and the header row is delimited by '---'
+        Text is returned in markdown format.
+        """
         cell_text = ""
         for cell in self.cells:
             cell_text = cell_text + " | " + cell.to_text()
@@ -219,13 +381,19 @@ def to_text(self, include_children=False, recurse=False):
             cell_text = cell_text + " | " + "---"           
         return cell_text
     def to_html(self, include_children=False, recurse=False):
-            html_str = "<th>"
-            for cell in self.cells:
-                html_str = html_str + cell.to_html()
-            html_str = html_str + "</th>"
-            return html_str
+        """
+        Returns html for a <th> with html from all the cells in the row as <td>
+        """
+        html_str = "<th>"
+        for cell in self.cells:
+            html_str = html_str + cell.to_html()
+        html_str = html_str + "</th>"
+        return html_str
         
 class Table(Block):
+    """
+    A table is a block of text. It can have child table rows. A table has tag 'table'.
+    """
     def __init__(self, table_json, parent):
         # self.title = parent.name
         super().__init__(table_json)
@@ -241,6 +409,9 @@ def __init__(self, table_json, parent):
                     row = TableRow(row_json)
                     self.rows.append(row)
     def to_text(self, include_children=False, recurse=False):
+        """
+        Returns text of a table with text from all the rows in the table delimited by '\n'
+        """
         text = ""
         for header in self.headers:
             text = text + header.to_text() + "\n"
@@ -249,6 +420,9 @@ def to_text(self, include_children=False, recurse=False):
         return text
                    
     def to_html(self, include_children=False, recurse=False):
+        """
+        Returns html for a <table> with html from all the rows in the table as <tr>
+        """
         html_str = "<table>"
         for header in self.headers:
             html_str = html_str + header.to_html()
@@ -258,6 +432,9 @@ def to_html(self, include_children=False, recurse=False):
         return html_str
 
 class LayoutReader:
+    """
+    Reads the layout tree from the json returned by the parser API.
+    """
     def debug(self, pdf_root):
         def iter_children(node, level):
             for child in node.children:
@@ -266,12 +443,11 @@ def iter_children(node, level):
         iter_children(pdf_root, 0)
 
     def read(self, blocks_json):
+        """
+        Reads the layout tree from the json returned by the parser API. Constructs a tree of Block objects.
+        """
         root = Block()
         parent = None
-        # table_node = None
-        table_nodes = []
-        sections = []
-        # prev_list = None
         parent_stack = [root]
         prev_node = root
         parent = root
@@ -320,13 +496,25 @@ def read(self, blocks_json):
         return root
 
 class Document:
+    """
+    A document is a tree of blocks. It is the root node of the layout tree.
+    """
     def __init__(self, blocks_json):
         self.reader = LayoutReader()
         self.root_node = self.reader.read(blocks_json)
         self.json = blocks_json
     def chunks(self):
+        """
+        Returns all the chunks in the document. Chunking automatically splits the document into paragraphs, lists, and tables without any prior knowledge of the document structure.
+        """
         return self.root_node.chunks()
     def tables(self):
+        """
+        Returns all the tables in the document. This is useful for getting all the tables in a document.
+        """
         return self.root_node.tables()
     def sections(self):
+        """
+        Returns all the sections in the document. This is useful for getting all the sections in a document.
+        """
         return self.root_node.sections()
diff --git a/llmsherpa/readers/tests/test_layout_reader.py b/llmsherpa/readers/tests/test_layout_reader.py
index 92f959d..e2f34f5 100644
--- a/llmsherpa/readers/tests/test_layout_reader.py
+++ b/llmsherpa/readers/tests/test_layout_reader.py
@@ -119,6 +119,14 @@ def test_chunk_iterator(self):
         correct_text = self.clean_text(correct_text)
         self.assertEqual(chunks[3].to_context_text(), correct_text)
 
+    def test_meta_data(self):
+        doc = self.read_layout("table_test.json")
+        chunks = doc.chunks()
+        
+        self.assertEqual(chunks[0].page_idx, 5)
+        self.assertEqual(chunks[0].block_idx, 112)
+        self.assertEqual(chunks[0].top, 64.8)
+        self.assertEqual(chunks[0].left, 130.05)
 
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file
diff --git a/setup.py b/setup.py
index d7c3ac5..e86089f 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='llmsherpa',
-    version='0.1.2',    
+    version='0.1.3',    
     description='Strategic APIs to Accelerate LLM Use Cases',
     long_description=open('README.md').read(),
     long_description_content_type='text/markdown',