diff --git a/.gitignore b/.gitignore
index 735c1ed..ceb9aba 100755
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 ## SQLite3 DB ignore
 /**/*.db
+/**/*.db-journal
 
 ## .xlsx ignore
 /**/*.xlsx
@@ -98,7 +99,6 @@ ENV/
 ### VirtualEnv template
 # Virtualenv
 # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
-.Python
 [Bb]in
 [Ii]nclude
 [Ll]ib
diff --git a/README.md b/README.md
index eae54da..44498b9 100755
--- a/README.md
+++ b/README.md
@@ -1,5 +1,118 @@
-# PatentsView API mining
+# PatentsView API wrapper
 
-This project calls the PatentsView API.
+This project is a wrapper for the PatentsView API.
 
-* [PatentsView Glossary](http://www.patentsview.org/api/glossary.html) provides a description of the variables.
\ No newline at end of file
+* [PatentsView Glossary](http://www.patentsview.org/api/glossary.html) provides a description of the variables.
+
+## Important Notes:
+
+* Use <https://dev.patentsview.org> and not <https://www.patentsview.org/>; the former is laxer in terms of accepting input
+* Naming is finicky (more so on the second one), even spacing and other character affect the search results (see below).
+
+## Remarks about the data
+
+* Patent Numbers are alphanumeric (they can include letters)
+* PatentsView only includes information about the patent at issue. It does not include changes to patent information after the patent has been issued.
+    * This means that if the company changes name, it won't be reflected in the patent. Example: if "International Business Machines" renames itself to "IBM", patents issued to "International Business Machines" will still be issued to "International Business Machines" (and not "IBM").
+    * As an example: `NETFLIX, INC.` has an `assignee_key_id` of `17594` and an `assignee_id` of `org_2lAuxOpAtNMvtTxhuLmX`; `NETFLIX.COM, INC.` on the other hand an `assignee_key_id` of `org_UNHkzir8tY7NlQrOJKT4` and an `assignee_id` of `363028`. (This of course assumes `NETFLIX, INC.` and `NETFLIX.COM, INC.` are the same company, which is highly probable).
+    * The same applies for acquisitions. Example: Company A has patent *X*; once company B acquires company A, patent *X* would still show that it is assigned to company *A*.
+    * Probably the same thing holds if a company acquires certain patents of another company.
+* The patents can be assigned to organizations (as opposed to individuals). This is indicated by the 'assignee organization' field returned by the API.
+* The assignee organizations (i.e. companies) are distinguished by name. Each organization name is a 'separate' company.
+    * This means that a patent can be assigned to "IBM", "IBM Inc.", "International Business Machines".
+    * Different organization names have different `assignee_id`s and `assignee_key_id`s (see `NETFLIX` example above).
+* **Different endpoints behave differently**: particularly <https://www.patentsview.org/> and <https://dev.patentsview.org>
+    * **Naming is finicky on the first one**: If you search for `Abbott Laboratories` or for `ABBOTT LABORATORIES`, 
+    you will get the same results. If you search for `ABBOTT Laboratories`, `Abbott LABORATORIES`, 
+    or `abbott laboratories`, you will get nothing.
+    * The second one seems to work better, but you still have to replace the carriage return and line break characters.
+
+## Adding companies
+
+Create an Microsoft Excel spreadsheet (`.xlsx` file) with the following structure:
+
+<table>
+    <thead>
+        <tr>
+            <th>Firm ID</th>
+            <th>Firm Name</th>
+            <th>Alternative names </th>
+            <th></th>
+            <th></th>
+            <th></th>
+            <th></th>
+        </tr>
+       <tr>
+            <th>ID</th>
+            <th>Name 1</th>
+            <th>Name 2</th>
+            <th>Name 3</th>
+            <th>Name 4</th>
+            <th>...</th>
+            <th>Name X</th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <td>ID2</td>
+            <td>Company 2 Primary Name / Name 1</td>
+            <td>Name 2</td>
+            <td>Name 3</td>
+            <td>Name 4</td>
+            <td>...</td>
+            <td>Name X</td>
+        </tr>
+        <tr>
+            <td>ID1</td>
+            <td>Company 1 Primary Name / Name 1</td>
+            <td>Name 2</td>
+            <td>Name 3</td>
+            <td>Name 4</td>
+            <td>...</td>
+            <td>Name X</td>
+        </tr>
+    </tbody>
+</table>
+
+
+## Database Structure
+
+Here is an Entity Relationship Diagram (ERD) of the database structure.
+
+![Entity Relationship Diagram (ERD) of the database structure](images/patents_view_table.png)
+
+## Using SQL to Select Patents
+
+The `sql` folder has some SQL scripts that might come in handy. 
+As an example, here is SQL query that selects patents between two dates:
+
+```
+SELECT
+	p.patent_number as "Patent Number",
+	p.patent_title as "Patent Title",
+	-- p.company_id as "Company ID",
+	c.name as "Company Name",
+	-- p.company_alternate_name_id as "Alternate Name ID",
+	an.name as "Company Name Listed on Patent",
+	p.year,
+	p.grant_date as "Grant Date",
+	p.uspc_class as "USPC Classes"
+FROM 
+	patents as p
+JOIN 
+	companies as c
+ON
+	p.company_id = c.id
+LEFT JOIN 
+	alternate_company_names as an
+ON
+	p.company_alternate_name_id = an.id
+WHERE
+	p.grant_date > DATE("2006-01-03") AND
+	p.grant_date < DATE("2010-06-13");
+```
+
+## Software Applications
+
+* [DbVisualizer](https://www.dbvis.com/) was used to generate the graphs
+* [DB Browser for SQLite](https://sqlitebrowser.org/) was used to look at the data and execute SQL queries
diff --git a/images/patents_view_table.png b/images/patents_view_table.png
new file mode 100755
index 0000000..29cff1d
Binary files /dev/null and b/images/patents_view_table.png differ
diff --git a/sql/select_cited_patent_numbers_not_populated.sql b/sql/select_cited_patent_numbers_not_populated.sql
new file mode 100755
index 0000000..d53a92a
--- /dev/null
+++ b/sql/select_cited_patent_numbers_not_populated.sql
@@ -0,0 +1,9 @@
+/*
+ * Use this script to find which cited patents need to be added to the Patents table.
+ */
+SELECT
+    cited_patent_number
+FROM
+    cited_patents
+WHERE
+    cited_patent_number NOT IN (SELECT DISTINCT patent_number FROM patents);
\ No newline at end of file
diff --git a/sql/select_cited_patents.sql b/sql/select_cited_patents.sql
new file mode 100755
index 0000000..ac48725
--- /dev/null
+++ b/sql/select_cited_patents.sql
@@ -0,0 +1,28 @@
+SELECT
+	DISTINCT
+	p.patent_number as "Citing Patent Number"
+	,co.name as "Citing Company"
+	,cp.cited_patent_number as "Cited Patent Number"
+	,pp.patent_title as "Cited Patent Title"
+	,pp.year as "Year"
+	,pp.grant_date as "Grant Date"
+	,pp.uspc_class as "USPC Class"
+FROM
+	patents as p
+LEFT JOIN
+	companies as co
+ON
+	co.id = p.company_id
+JOIN
+	cited_patents as cp
+ON
+	p.patent_number = cp.citing_patent_number
+LEFT JOIN
+	patents as pp
+ON
+	cp.cited_patent_number = pp.patent_number
+-- Uncomment the following 2 lines if you to filter by patent_number (or something else of your choosing)
+--WHERE
+--	p.patent_number = "10001497"
+ORDER BY 
+	p.patent_number ASC
\ No newline at end of file
diff --git a/sql/select_patents_between_two_dates.sql b/sql/select_patents_between_two_dates.sql
new file mode 100755
index 0000000..bdd12e7
--- /dev/null
+++ b/sql/select_patents_between_two_dates.sql
@@ -0,0 +1,27 @@
+/*
+ * Use this SQL query to select patents between two dates.
+ * Uncomment the lines to retrieve the company ID and the alternate name ID.
+ */
+SELECT
+    p.patent_number as "Patent Number",
+    p.patent_title as "Patent Title",
+    -- p.company_id as "Company ID",
+    c.name as "Company Name",
+    -- p.company_alternate_name_id as "Alternate Name ID",
+    an.name as "Company Name Listed on Patent",
+    p.year,
+    p.grant_date as "Grant Date",
+    p.uspc_class as "USPC Classes"
+FROM 
+    patents as p
+JOIN 
+    companies as c
+ON
+    p.company_id = c.id
+LEFT JOIN 
+    alternate_company_names as an
+ON
+    p.company_alternate_name_id = an.id
+WHERE
+    p.grant_date > DATE("2006-01-03") AND
+    p.grant_date < DATE("2010-06-13");
\ No newline at end of file
diff --git a/src/main.py b/src/main.py
index 4c07a1d..7b0b7bb 100755
--- a/src/main.py
+++ b/src/main.py
@@ -1,25 +1,30 @@
 from query_formatting import PatentsViewQueryFormatting as PVQF
 import requests
 import json
-from pprint import pprint
 from sqlalchemy import *
 from sqlalchemy.orm import *
 from sqlalchemy.ext.declarative import declarative_base
 from os import path
 import pandas
-import html
+from urllib.parse import quote
 import argparse
+from datetime import datetime
+import re
+from typing import List
 
 Base = declarative_base()
-engine = create_engine('sqlite:///patensview.db')
+engine = create_engine('sqlite:///patentsview.db')
 
 
+# Move Base classes to different file: https://stackoverflow.com/a/7479122/6288413
 class AlternateName(Base):
     __tablename__ = "alternate_company_names"
 
     id = Column(Integer, primary_key=True)
     company_id = Column(Integer, ForeignKey('companies.id'))
     name = Column(String, nullable=False, unique=True)
+    assignee_id = Column(String, nullable=True, unique=True)
+    assignee_key_id = Column(String, nullable=True, unique=True)
 
     def __init__(self, company_id, name):
         self.company_id = company_id
@@ -30,6 +35,8 @@ class Company(Base):
     __tablename__ = 'companies'
     id = Column(Integer, primary_key=True)
     name = Column(String, nullable=False, unique=True)
+    assignee_id = Column(String, nullable=True, unique=True)
+    assignee_key_id = Column(String, nullable=True, unique=True)
 
     def __init__(self, name):
         self.name = name
@@ -49,13 +56,17 @@ def add_companies(companies):
 
 class Patent(Base):
     __tablename__ = 'patents'
-    # id/patent_id
-    patent_number = Column(String, primary_key=True, autoincrement=False)
+    # the combination of Patent Number, Company Name ID, and Alternate Name ID should be unique
+    # Source: https://stackoverflow.com/a/10061143/6288413
+    __table_args__ = (UniqueConstraint('patent_number', 'company_id', 'company_alternate_name_id',
+                                       name='_patents_uc'),)
+    id = Column(Integer, primary_key=True)
+    patent_number = Column(String)
     patent_title = Column(String)
-    company_id = Column(Integer, ForeignKey('companies.id'))
+    company_id = Column(Integer, ForeignKey('companies.id'), nullable=True)
     company_alternate_name_id = Column(Integer, ForeignKey('alternate_company_names.id'), nullable=True)
-    year = Column(String)
-    grant_date = Column(String)
+    year = Column(Integer)
+    grant_date = Column(DateTime)
     uspc_class = Column(String)
     assignee_first_name = Column(String)
     assignee_last_name = Column(String)
@@ -67,7 +78,7 @@ def __init__(self, patent_number, patent_title, company_id, year, grant_date, us
         self.company_id = company_id
         self.company_alternate_name_id = company_alternate_name_id
         self.year = year
-        self.grant_date = grant_date
+        self.grant_date = datetime.strptime(grant_date, '%Y-%m-%d')
         self.uspc_class = uspc_class
         self.assignee_first_name = assignee_first_name
         self.assignee_last_name = assignee_last_name
@@ -75,9 +86,19 @@ def __init__(self, patent_number, patent_title, company_id, year, grant_date, us
 
 class CitedPatent(Base):
     __tablename__ = 'cited_patents'
-    # id/patent_id
-    citing_patent_number = Column(String, ForeignKey('patents.patent_number'), primary_key=True)
-    cited_patent_number = Column(String, ForeignKey('patents.patent_number'))
+    __table_args__ = (
+        # PrimaryKeyConstraint('citing_patent_number', 'cited_patent_number'),
+        UniqueConstraint('citing_patent_number', 'cited_patent_number',
+                         name='_citing_patents_uc'),
+    )
+
+    id = Column(Integer, primary_key=True)
+    citing_patent_number = Column(String) #, ForeignKey('patents.patent_number'))
+    cited_patent_number = Column(String) #, ForeignKey('patents.patent_number'))
+
+    def __init__(self, patent_number, cited_patent_number):
+        self.citing_patent_number = patent_number
+        self.cited_patent_number = cited_patent_number
 
 
 Base.metadata.create_all(engine)
@@ -85,49 +106,57 @@ class CitedPatent(Base):
 dbSession = sessionmaker(bind=engine)
 session = dbSession()
 
-
-
 # setting for searching for company name
 # e.g.:     "_eq", "_begins", etc.
 COMPANY_SEARCH_CRITERIA = '_eq'
 
-
 # Application Variables
-search_base_url = "http://www.patentsview.org/"
+search_base_url = "https://dev.patentsview.org/"
 patent_search_endpoint = search_base_url + "api/patents/query"
 assignee_search_endpoint = search_base_url + "api/assignees/query"
 
 
+def get_patent(patent_number, fields=None):
+    patent_query = '{"patent_number":"%s"}' % patent_number
+    fields = ('["patent_number","patent_title","patent_abstract","patent_date","patent_year",'
+              '"patent_kind","patent_type","patent_processing_time","app_number","assignee_country","assignee_id",'
+              '"assignee_organization","nber_category_title","nber_subcategory_title",'
+              '"wipo_sector_title","wipo_field_title"]')
+    return patentsview_get_request(patent_search_endpoint, patent_query, fields)
+
+
 def get_all_company_patents(company, beginning_year=None, end_year=None, verbose=False):
     first_page = get_one_page_of_company_patents(company, beginning_year, end_year, verbose=verbose)
     patents = first_page["patents"]
     number_of_pages = 1
     if first_page["total_patent_count"] > first_page["count"]:
-        number_of_pages = first_page["total_patent_count"] // 25 + 1
+        number_of_pages = first_page["total_patent_count"] // 25
+        if first_page["total_patent_count"] % 25:
+            number_of_pages += 1
     for page_number in range(2, number_of_pages + 1):
         page_results = get_one_page_of_company_patents(company, beginning_year, end_year, page_number, verbose=verbose)
-        patents += page_results["patents"]
+        if page_results["patents"]:
+            patents += page_results["patents"]
     # TODO see if it is better to yield instead of to return
     return patents
 
 
 def get_one_page_of_company_patents(company, beginning_year=None, end_year=None, page=1, perpage=25, verbose=False):
     print("Requesting PatentsView: %s, page %d" % (company, page))
-    company = html.escape(company).replace("&#x27;", "'")
-    company_query = '{"%s":{"assignee_organization":"%s"} }' % (COMPANY_SEARCH_CRITERIA, company)
+    company_query = '{"%s":{"assignee_organization":"%s"}}' % (COMPANY_SEARCH_CRITERIA, company)
     date_range = None
 
     if beginning_year is not None and end_year is not None:
         date_range = PVQF.format_year_range(str(beginning_year) + "-01-01", str(end_year) + "-12-31")
     if date_range is not None:
-        search_query = PVQF.pv_and_or("_and", company_query + date_range)
+        search_query = PVQF.pv_and_or("_and", [company_query] + date_range)
     else:
         search_query = company_query
 
-    results_format = ('["patent_number","patent_date","patent_year","assignee_organization", "app_date",'
+    results_format = ('["patent_number","patent_date","patent_year","assignee_organization","app_date",'
                       '"patent_title","uspc_mainclass_id","assignee_first_name","assignee_last_name"]'
                       )
-    sorting_format = '{"page": %d,"per_page": %d}' % (page, perpage)
+    sorting_format = '{"page":%d,"per_page":%d}' % (page, perpage)
     response_in_json = patentsview_get_request(patent_search_endpoint, search_query,
                                                results_format, sorting_format, verbose=verbose)
     response = json.loads(response_in_json)
@@ -136,19 +165,24 @@ def get_one_page_of_company_patents(company, beginning_year=None, end_year=None,
     return response
 
 
-# https://stackoverflow.com/questions/41686536/querying-patentsview-for-patents-of-multiple-assignee-organization
+# https://stackoverflow.com/a/41837318/6288413
 def patentsview_get_request(endpoint, query_param, format_param=None, options_param=None, sort_param=None,
                             verbose=False):
-    if endpoint == "" and query_param == "":
-        return False
-
-    endpoint_query = endpoint + "?q=" + query_param
+    if not endpoint:
+        raise ValueError("Endpoint is empty or None.")
+    if not query_param:
+        raise ValueError("query_param is empty or None.")
+
+    # Use urllib.parse's quote to escape JSON strings. See:
+    # - https://stackoverflow.com/a/45758514/6288413
+    # - https://stackoverflow.com/a/18723973/6288413
+    endpoint_query = endpoint + "?q=" + quote(re.sub("(\r?\n)", " ", query_param))
     if format_param is not None:
-        endpoint_query = endpoint_query + "&f=" + format_param
+        endpoint_query = endpoint_query + "&f=" + quote(format_param)
     if options_param is not None:
-        endpoint_query = endpoint_query + "&o=" + options_param
+        endpoint_query = endpoint_query + "&o=" + quote(options_param)
     if sort_param is not None:
-        endpoint_query = endpoint_query + "&so=" + sort_param
+        endpoint_query = endpoint_query + "&so=" + quote(sort_param)
     if verbose:
         print(endpoint_query)
     r = requests.get(endpoint_query)
@@ -190,7 +224,7 @@ def insert_names(file_path):
             index = df.columns.get_loc("Name 1")
             primary_name = row[index]
             primary_id = session.query(Company.id).filter_by(name=primary_name).scalar()
-            alternate_names = [name for name in row[index+1:] if type(name) == str]
+            alternate_names = [name for name in row[index + 1:] if type(name) == str]
             insert_alternate_names(primary_id, alternate_names, False)
         session.commit()
 
@@ -205,64 +239,279 @@ def get_company_primary_id(name):
     return None
 
 
-def add_patents(patents, company_id, company_alternate_name_id, company_name):
+def fetch_all_cited_patent_numbers_for_all_patents_in_db(verbose=False):
+    l = []
+    for number in session.query(Patent.patent_number).distinct().all():
+        l.append(number.patent_number)
+    add_cited_patent_numbers(l, verbose=verbose)
+
+
+def add_cited_patents(limit=25, verbose=False):
+    # This function populates the patents table with the missing information for the
+    # patent numbers found in the cited_patents table
+    # TODO refactor this function to accept a list of patents
+    results_format = ('["patent_number","patent_date","patent_year","assignee_organization","app_date",'
+                      '"patent_title","uspc_mainclass_id","assignee_first_name","assignee_last_name"]'
+                      )
+    patents_in_db = session.query(Patent.patent_number)
+    cited_patents_to_add = [x.cited_patent_number for x in session.query(CitedPatent.cited_patent_number)\
+        .filter(~CitedPatent.cited_patent_number.in_(patents_in_db)).all()]
+    for patents in fetch_patents_by_number(cited_patents_to_add, results_format, limit=limit, verbose=verbose):
+        add_patents(patents)
+
+
+def add_cited_patent_numbers(patents_list, limit=25, verbose=False):
+    results_format = '["patent_number","cited_patent_number"]'
+    for patents in fetch_patents_by_number(patents_list, results_format, limit=limit, verbose=verbose):
+        add_cited_patent_numbers_to_db(patents)
+
+
+def fetch_patents_by_number(patents_list, results_format, limit=25, verbose=False):
+    q_list = ['"%s"' % patent_number for patent_number in patents_list]
+    q_str = '{"patent_number":[%s]}' % ",".join(q_list)
+
+    # PatentsView only accepts GET requests; the endpoints for GET requests have a max length of 2000 characters.
+    # As such if the length of the endpoint exceeds the maximum allowed length, a '414 URI Too Long' error is returned.
+    # (for an explanation see: https://stackoverflow.com/a/50018203/6288413)
+    # To circumvent the issue, we have to break up the query into chunks
+    patents = []
+    endpoint_length = len(patent_search_endpoint) + len('&q=') + len(q_str) + len('&f=') + len(results_format)
+
+    # The PatentsView API apparently only allows 25 patents to be looked up at a time, hence the need for limit
+    # TODO: investigate why this is and if there is a way to change it
+    if endpoint_length < 2000 and not limit:
+        response = patentsview_get_request(patent_search_endpoint, q_str, results_format, verbose=verbose)
+        results = json.loads(response)
+        patents = results['patents']
+        yield patents
+    else:
+        if limit and ((endpoint_length // 2000) < (endpoint_length // limit)):
+            number_of_chunks = endpoint_length // limit + 1
+        else:
+            number_of_chunks = endpoint_length // 2000 + 1
+
+        interval = max(len(q_list) // number_of_chunks, limit)
+        num_intervals = range(len(q_list) // interval + 2)
+        for i in num_intervals:
+            start_index = i * interval
+            end_index = (i + 1) * interval
+            q_str = '{"patent_number":[%s]}' % ",".join(q_list[start_index:end_index])
+            response = patentsview_get_request(patent_search_endpoint, q_str, results_format, verbose=verbose)
+            results = json.loads(response)
+            if verbose:
+                print(results)
+            if results['patents']:
+                patents += results['patents']
+
+            # This is to potentially avoid a "Segmentation Fault (core dumped)" error
+
+            # TODO change this to an implementation that is more programmatic
+            if len(patents) >= 1000:
+                yield patents
+                patents = []
+        yield patents
+
+
+def add_cited_patent_numbers_to_db(citing_patent_numbers: List) -> None:
+    print("Adding cited patent numbers to db.")
+    # Patents that are already in the db
+    cited_patents_in_db = [(x.citing_patent_number, x.cited_patent_number) for x in
+                           session.query(CitedPatent).all()]
+    # Patents fetched
+    cited_patent_objects = []
+    # Add ALL cited patents to cited_patent_objects list
+    for patent in citing_patent_numbers:
+        patent_number = patent["patent_number"]
+        for cited_patent_number in patent["cited_patents"]:
+            # Check if there are cited patents in the results and if they are already in the database
+            cited_patent_number = cited_patent_number["cited_patent_number"]
+            if cited_patent_number:
+                cited_patent_objects.append((patent_number, cited_patent_number))
+
+    # Remove the patents that already in the database
+    cited_patent_objects = list(set(cited_patent_objects) - set(cited_patents_in_db))
+
+    # Add the cited patents to the database
+    for i in range(len(cited_patent_objects)):
+        patent_number, cited_patent_number = cited_patent_objects[i]
+        cited_patent_objects[i] = CitedPatent(patent_number, cited_patent_number)
+
+    session.bulk_save_objects(cited_patent_objects)
+    session.commit()
+    # TODO: add patents not in Patents table
+
+
+def add_patents(patents):
     patent_objects = []
     for p in patents:
         uspc_main_classes = ""
 
+        # Concatenate the USPC Main class codes into the 'uspc_class' field
+        # Example entry: '250; 376; 976; '
         for mainclass in p["uspcs"]:
             if mainclass["uspc_mainclass_id"]:
                 uspc_main_classes += mainclass["uspc_mainclass_id"] + "; "
-
-        p_obj = Patent(patent_number=p["patent_number"],
-                       patent_title=p["patent_title"],
-                       company_id=company_id,
-                       year=p["patent_year"],
-                       grant_date=p["patent_date"],
-                       uspc_class=uspc_main_classes,
-                       # TODO: fix this.
-                       assignee_first_name=None,    # p["assignees"]["assignee_first_name"],
-                       assignee_last_name=None,      # p["assignees"]["assignee_last_name"],
-                       company_alternate_name_id=company_alternate_name_id
-                       )
-        # TODO: handle case where a patent is assigned to more than 1 company
-        if session.query(Patent.patent_number).filter_by(patent_number=p["patent_number"]).scalar() is None:
-            patent_objects.append(p_obj)
+        if not uspc_main_classes:
+            uspc_main_classes = None
+
+        # A patent can have multiple assignees. If the assignee orgnization is in one of our tables (e.g. Companies,
+        # AlternateNames), add an entry in the patents table for each name
+        for assignee in p["assignees"]:
+            # There is also an "assignee_key_id" field, which is currently unused
+            assignee_organization = assignee["assignee_organization"]
+            if assignee_organization:
+                assignee_organization = assignee_organization.lower()
+            assignee_first_name = assignee["assignee_first_name"]
+            assignee_last_name = assignee["assignee_last_name"]
+
+            # Check if the assignee is in one of the tables: companies, alternate_names
+            assignee_id = session.query(Company.id).filter(
+                func.lower(Company.name) == assignee_organization).first()
+            assignee_alternate_id = None
+            if assignee_id:
+                assignee_id = assignee_id.id
+            else:
+                # TODO find a company/patent that satisfies this path so that this can be tested
+                # TODO handle case where there is no assignee organization, just an individual's first & last name
+                result = session.query(AlternateName.id, AlternateName.company_id) \
+                    .filter(func.lower(AlternateName.name) == assignee_organization).first()
+                if result:
+                    assignee_id = result.company_id
+                    assignee_alternate_id = result.id
+
+            p_obj = Patent(patent_number=p["patent_number"],
+                           patent_title=p["patent_title"],
+                           company_id=assignee_id,
+                           year=p["patent_year"],
+                           grant_date=p["patent_date"],
+                           uspc_class=uspc_main_classes,
+                           assignee_first_name=assignee_first_name,
+                           assignee_last_name=assignee_last_name,
+                           company_alternate_name_id=assignee_alternate_id
+                           )
+
+            # Check if the patent is already in the database; add it if it is not
+            # TODO: change this so that the database is not read so frequently from disk
+            if session.query(Patent).filter_by(patent_number=p["patent_number"],
+                                               company_id=assignee_id,
+                                               company_alternate_name_id=assignee_alternate_id,
+                                               assignee_first_name=assignee_first_name,
+                                               assignee_last_name=assignee_last_name,).first() is None:
+                patent_objects.append(p_obj)
+
+    # Save the patents
     session.bulk_save_objects(patent_objects)
     session.commit()
 
 
-def main():
-    options = get_options()
-    # Insert company names
-    insert_names(options.path[0])
+def fetch_patents_for_all_companies_in_db(resume_from_company_id=None, verbose=False):
+    if resume_from_company_id and type(resume_from_company_id) == int:
+        company_query = session.query(Company.id).filter(Company.id >= resume_from_company_id).order_by(
+            Company.id.asc()).all()
+    else:
+        company_query = session.query(Company.id).order_by(Company.id.asc()).all()
 
-    max_company_id = session.query(func.max(Patent.company_id)).scalar()
     # Insert patents
-    for company_id in session.query(Company.id).all():
+    for company_id in company_query:
         company_id = company_id[0]
-        companies = session.query(Company.name).filter_by(id=company_id).all()
-        companies += session.query(AlternateName.name).filter_by(company_id=company_id).all()
+        primary_names = session.query(Company.name).filter_by(id=company_id).all()
+        alternate_names = session.query(AlternateName.name, AlternateName.id).filter_by(company_id=company_id).all()
+
+        for org in primary_names:
+            patents = get_all_company_patents(org[0], verbose=verbose)
+            if patents:
+                add_patents(patents)
 
-        for org in companies:
-            patents = get_all_company_patents(org[0], verbose=True)
+        for org, alternate_name_id in alternate_names:
+            patents = get_all_company_patents(org, verbose=verbose)
             if patents:
-                add_patents(patents, company_id, org)
+                add_patents(patents)
+
+
+def main():
+    options = get_options()
+
+    # Insert company names
+    if options.path:
+        try:
+            insert_names(options.path)
+        except Exception as e:
+            print("Error Occurred: %s" % str(e))
+
+    start_date = None
+    if options.start_date:
+        start_date = options.start_date[0]
+
+    end_date = None
+    if options.start_date:
+        end_date = options.end_date[0]
+
+    # TODO: implement functionality that uses the Start and End dates
+    if options.fetch_patents_for_all_companies:
+        company_id = options.resume_from_company_id
+        if company_id:
+            print("Fetching patents for all companies in the database, starting with company id %s." % company_id)
+            fetch_patents_for_all_companies_in_db(company_id)
+        else:
+            print("Fetching patents for all companies in the database.")
+            fetch_patents_for_all_companies_in_db()
+
+    fetch_all_cited_patent_numbers_for_all_patents_in_db()
+    add_cited_patents(verbose=True)
 
 
 def get_options():
-    parser = argparse.ArgumentParser(
-        description="A script that calls the PatentsView API.",
-        formatter_class=argparse.RawDescriptionHelpFormatter
-    )
+    parser = argparse.ArgumentParser(description="A script that calls the PatentsView API.",
+                                     # formatter_class=argparse.RawDescriptionHelpFormatter
+                                     )
+
     parser.add_argument(
-        'path', type=str, metavar="path", nargs=1,
+        '-p', '--path', type=str, metavar="path",
         help="The path of the spreadsheet that has the list of names and alternate names."
     )
 
+    parser.add_argument(
+        '--fetch-patents-for-all-companies', action='store_true',
+        help="If passed, fetch patents for all companies in the database."
+    )
+
+    parser.add_argument(
+        '--fetch-cited-patent-numbers', action='store_true',
+        help="If passed, fetch the patent numbers for all patents in the database"
+    )
+
+    parser.add_argument(
+        '--fetch-all-cited-patents', action='store_true',
+        help="If passed, fetch patents for all companies in the database."
+    )
+
+    parser.add_argument(
+        '-r', '--resume-from-company-id', type=int,
+        help="Resume fetching patent from this company id."
+    )
+
+    parser.add_argument(
+        '--start-date', type=int, metavar="start_date", nargs=1,
+        help="The patents will have a grant date greater than or equal to the start date."
+    )
+
+    parser.add_argument(
+        '--end-date', type=int, metavar="end_date", nargs=1,
+        help="The patents will have a grant date less than or equal to the end date."
+    )
+
+    parser.add_argument(
+        '-c', '--companies', type=str, metavar="companies", nargs='+',
+        help="The companies whose patents you want to retrieve."
+    )
+
+    parser.add_argument(
+        '--verbose', action="store_true",
+        help="Enable verbose."
+    )
+
     options = parser.parse_args()
-    if not options.path:
-        parser.error("Please submit a path.")
+
     return options
 
 
diff --git a/src/query_formatting.py b/src/query_formatting.py
index 93f4dc3..62fcd9a 100755
--- a/src/query_formatting.py
+++ b/src/query_formatting.py
@@ -3,26 +3,26 @@
 
 class PatentsViewQueryFormatting:
     @staticmethod
-    def pv_and_or(self, criterion, pair_list):
+    def pv_and_or(criterion, pair_list):
         if len(pair_list):
-            return ""
-        else:
             return '{"%s":[%s]}' % (criterion, ",".join(pair_list))
+        else:
+            return ""
 
     @staticmethod
     # ISO date is of format: YYYY-MM-DD
-    def iso_date_string_to_datetime(self, iso_date):
+    def iso_date_string_to_datetime(iso_date):
         return datetime.strptime(iso_date, '%Y-%m-%d')
 
     @staticmethod
-    def get_beginning_of_year(self, iso_date):
+    def get_beginning_of_year(iso_date):
         return datetime.strptime(iso_date[:4], '%Y')
 
     @staticmethod
-    def get_end_of_year(self, iso_date):
+    def get_end_of_year(iso_date):
         d = datetime.strptime(iso_date[:4], '%Y')
-        d = d.replace(year=d.year + 1);
-        d = d - timedelta(microseconds=1);
+        d = d.replace(year=d.year + 1)
+        d = d - timedelta(microseconds=1)
         return d
 
     @staticmethod
@@ -30,25 +30,30 @@ def datetime_to_iso_date(date_time):
         return date_time.strftime('%Y-%m-%d')
 
     @staticmethod
-    def format_year_range(self, beginning_date, end_date):
-        start = self.get_beginning_of_year(beginning_date)
-        start = self.datetime_to_iso_date(start)
-        end = self.get_end_of_year(end_date)
-        end = self.datetime_to_iso_date(end)
-        return ["{\"_gte\":{\"patent_date\":\"" + str(start) + "\"}}",
-                "{\"_lte\":{\"patent_date\":\"" + str(end) + "\"}}"
-                ]
-
-    @staticmethod
-    def get_date_difference(self, iso_date_one, iso_date_two):
-        date_one = self.iso_date_string_to_datetime(iso_date_one)
-        date_two = self.iso_date_string_to_datetime(iso_date_two)
+    def format_year_range(beginning_date, end_date):
+        if not beginning_date and not end_date:
+            raise ValueError("Must provide a beginning date, an end_date, or both.")
+        lst = []
+        if beginning_date:
+            start = PatentsViewQueryFormatting.get_beginning_of_year(beginning_date)
+            start = PatentsViewQueryFormatting.datetime_to_iso_date(start)
+            lst.append('{"_gte":{"patent_date":"%s"}}' % str(start))
+        if end_date:
+            end = PatentsViewQueryFormatting.get_end_of_year(end_date)
+            end = PatentsViewQueryFormatting.datetime_to_iso_date(end)
+            lst.append('{"_lte":{"patent_date":"%s"}}' % str(end))
+        return lst
+
+    @staticmethod
+    def get_date_difference(iso_date_one, iso_date_two):
+        date_one = PatentsViewQueryFormatting.iso_date_string_to_datetime(iso_date_one)
+        date_two = PatentsViewQueryFormatting.iso_date_string_to_datetime(iso_date_two)
         difference = abs((date_one - date_two).days) / 365.25
         return difference
 
     @staticmethod
-    def subtract_x_years(self, iso_date, years):
-        d = self.iso_date_string_to_datetime(iso_date)
+    def subtract_x_years(iso_date, years):
+        d = PatentsViewQueryFormatting.iso_date_string_to_datetime(iso_date)
         d = d.replace(year=d.year + years)
 
-        return self.datetime_to_iso_date(d)
+        return PatentsViewQueryFormatting.datetime_to_iso_date(d)

Firm ID	Firm Name	Alternative names
ID	Name 1	Name 2	Name 3	Name 4	...	Name X
ID2	Company 2 Primary Name / Name 1	Name 2	Name 3	Name 4	...	Name X
ID1	Company 1 Primary Name / Name 1	Name 2	Name 3	Name 4	...	Name X