diff --git a/mdagent/agent/agent.py b/mdagent/agent/agent.py
index a5ce7092..760e8a8c 100644
--- a/mdagent/agent/agent.py
+++ b/mdagent/agent/agent.py
@@ -40,7 +40,6 @@ def __init__(
         model="gpt-4-1106-preview",  # current name for gpt-4 turbo
         tools_model="gpt-4-1106-preview",
         temp=0.1,
-        max_iterations=40,
         verbose=True,
         ckpt_dir="ckpt",
         top_k_tools=20,  # set "all" if you want to use all tools
diff --git a/mdagent/tools/base_tools/__init__.py b/mdagent/tools/base_tools/__init__.py
index a2a6c6df..36fe856b 100644
--- a/mdagent/tools/base_tools/__init__.py
+++ b/mdagent/tools/base_tools/__init__.py
@@ -27,6 +27,26 @@
 from .preprocess_tools.clean_tools import CleaningToolFunction
 from .preprocess_tools.packing import PackMolTool
 from .preprocess_tools.pdb_get import ProteinName2PDBTool, SmallMolPDB, get_pdb
+from .preprocess_tools.uniprot import (
+    GetActiveSites,
+    GetAllKnownSites,
+    GetAllSequences,
+    GetBindingSites,
+    GetGeneNames,
+    GetInteractions,
+    GetKineticProperties,
+    GetPDB3DInfo,
+    GetPDBProcessingInfo,
+    GetProteinAssociatedKeywords,
+    GetProteinFunction,
+    GetRelevantSites,
+    GetSequenceInfo,
+    GetSubunitStructure,
+    GetTurnsBetaSheetsHelices,
+    GetUniprotID,
+    MapProteinRepresentation,
+    UniprotID2Name,
+)
 from .simulation_tools.create_simulation import ModifyBaseSimulationScriptTool
 from .simulation_tools.setup_and_run import (
     SetUpandRunFunction,
@@ -39,6 +59,22 @@
 
 __all__ = [
     "CleaningToolFunction",
+    "GetActiveSites",
+    "GetAllKnownSites",
+    "GetAllSequences",
+    "GetBindingSites",
+    "GetGeneNames",
+    "GetInteractions",
+    "GetKineticProperties",
+    "GetPDB3DInfo",
+    "GetPDBProcessingInfo",
+    "GetProteinAssociatedKeywords",
+    "GetProteinFunction",
+    "GetRelevantSites",
+    "GetSequenceInfo",
+    "GetSubunitStructure",
+    "GetTurnsBetaSheetsHelices",
+    "GetUniprotID",
     "ComputeAngles",
     "ComputeChi1",
     "ComputeChi2",
@@ -58,6 +94,7 @@
     "DistanceMatrixTool",
     "ListRegistryPaths",
     "MapPath2Name",
+    "MapProteinRepresentation",
     "ModifyBaseSimulationScriptTool",
     "MomentOfInertia",
     "PackMolTool",
@@ -72,11 +109,12 @@
     "RMSDCalculator",
     "Scholar2ResultLLM",
     "SerpGitTool",
-    "SetUpandRunFunction",
     "SetUpAndRunTool",
+    "SetUpandRunFunction",
     "SimulationFunctions",
     "SimulationOutputFigures",
     "SmallMolPDB",
+    "UniprotID2Name",
     "SolventAccessibleSurfaceArea",
     "VisFunctions",
     "VisualizeProtein",
diff --git a/mdagent/tools/base_tools/preprocess_tools/pdb_get.py b/mdagent/tools/base_tools/preprocess_tools/pdb_get.py
index 031e6885..eac925ec 100644
--- a/mdagent/tools/base_tools/preprocess_tools/pdb_get.py
+++ b/mdagent/tools/base_tools/preprocess_tools/pdb_get.py
@@ -33,7 +33,8 @@ def get_pdb(query_string: str, path_registry: PathRegistry):
     else:
         filetype = "pdb"
     if "result_set" in r.json() and len(r.json()["result_set"]) > 0:
-        pdbid = r.json()["result_set"][0]["identifier"]
+        results = r.json()["result_set"]
+        pdbid = max(results, key=lambda x: x["score"])["identifier"]
         print(f"PDB file found with this ID: {pdbid}")
         st.markdown(f"PDB file found with this ID: {pdbid}", unsafe_allow_html=True)
         url = f"https://files.rcsb.org/download/{pdbid}.{filetype}"
diff --git a/mdagent/tools/base_tools/preprocess_tools/uniprot.py b/mdagent/tools/base_tools/preprocess_tools/uniprot.py
new file mode 100644
index 00000000..3d6c03e0
--- /dev/null
+++ b/mdagent/tools/base_tools/preprocess_tools/uniprot.py
@@ -0,0 +1,1273 @@
+import time
+from enum import Enum
+
+import requests
+from langchain.tools import BaseTool
+from requests.adapters import HTTPAdapter
+from urllib3.util import Retry
+
+
+class SiteType(Enum):
+    ACTIVE = ("ft_act_site", "active site")
+    BINDING = ("ft_binding", "binding site")
+    SITES = ("ft_site", "site")
+
+
+class PTMType(Enum):
+    CHAIN = ("ft_chain", "Chain")
+    CROSSLINK = ("ft_crosslnk", "Cross-link")
+    DISULFIDE_BOND = ("ft_disulfid", "Disulfide bond")
+    GLYCOSYLATION = ("ft_carbohyd", "Glycosylation")
+    INITIATOR_METHIONINE = ("ft_init_met", "Initiator methionine")
+    LIPIDATION = ("ft_lipid", "Lipidation")
+    MODIFIED_RESIDUE = ("ft_mod_res", "Modified residue")
+    PEPTIDE = ("ft_peptide", "Peptide")
+    PROPEPTIDE = ("ft_propep", "Propeptide")
+    SIGNAL_PEPTIDE = ("ft_signal", "Signal peptide")
+    TRANSIT_PEPTIDE = ("ft_transit", "Transit peptide")
+
+
+class StructureMap(Enum):
+    BETA = ("ft_strand", "Beta strand")
+    HELIX = ("ft_helix", "Helix")
+    TURN = ("ft_turn", "Turn")
+
+
+class QueryUniprot:
+    API_URL = "https://rest.uniprot.org"
+
+    def get_sequence_mapping(
+        self,
+        query: str,
+        from_db: str = "UniProtKB_AC-ID",
+        to_db: str = "PDB",
+        polling_interval: int = 3,
+    ) -> list:
+        """
+        Fetch specific ID mapping from UniProt and extract the 'to' field f
+        rom results.
+
+        Args:
+            query: The UniProt ID to map (e.g. 'P05067')
+            from_db: The source database to map from.
+                Defaults to 'UniProtKB_AC-ID'.
+            to_db: The target database to map to. Defaults to 'PDB'.
+            polling_interval: The interval to poll the API for results.
+                Defaults to 3 seconds.
+
+        Returns:
+            A list of mapped database entries from the 'to' field if
+                successful, otherwise an empty list.
+        """
+        with requests.Session() as session:
+            session.mount(
+                "https://",
+                HTTPAdapter(
+                    max_retries=Retry(
+                        total=5,
+                        backoff_factor=0.25,
+                        status_forcelist=[500, 502, 503, 504],
+                    )
+                ),
+            )
+            try:
+                response = session.post(
+                    f"{self.API_URL}/idmapping/run",
+                    data={"from": from_db, "to": to_db, "ids": query},
+                )
+                response.raise_for_status()
+                job_id = response.json()["jobId"]
+
+                while True:
+                    response = session.get(f"{self.API_URL}/idmapping/status/{job_id}")
+                    response.raise_for_status()
+                    status_data = response.json()
+                    if status_data.get("jobStatus") == "RUNNING":
+                        print(f"Job is running. Retrying in {polling_interval}s.")
+                        time.sleep(polling_interval)
+                    else:
+                        break
+
+                response = session.get(f"{self.API_URL}/idmapping/details/{job_id}")
+                response.raise_for_status()
+                results_link = response.json().get("redirectURL")
+
+                response = session.get(results_link)
+                response.raise_for_status()
+                if response.headers["Content-Type"] != "application/json":
+                    raise ValueError(
+                        "Expected JSON response but got a different format."
+                    )
+
+                results_json = response.json()
+                results = results_json.get("results", [])
+                return [r["to"] for r in results]
+            except requests.HTTPError as http_err:
+                print(f"HTTP error occurred: {http_err}")
+                return []
+            except Exception as err:
+                print(f"An error occurred: {err}")
+                return []
+
+    def get_data(
+        self, query: str, desired_field: str, format_type: str = "json"
+    ) -> list | None:
+        """
+        Helper function to get data from the Uniprot API.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            desired_field: The desired field to retrieve from the API
+            format_type: The format of the data to retrieve. Defaults to 'json'.
+
+        Returns:
+            The data retrieved from the API or None if no data is found.
+        """
+        with requests.Session() as session:
+            session.mount(
+                "https://",
+                HTTPAdapter(
+                    max_retries=Retry(
+                        total=5,
+                        backoff_factor=0.25,
+                        status_forcelist=[500, 502, 503, 504],
+                    )
+                ),
+            )
+            url = f"https://rest.uniprot.org/uniprotkb/search?fields={desired_field}&format={format_type}&query={query}"
+            try:
+                response = session.get(url)
+                response.raise_for_status()
+                data = response.json()
+            except requests.HTTPError:
+                print(
+                    "Requested query not found, "
+                    "please try again with a valid protein identifier."
+                )
+                return None
+            if "results" not in data or not data["results"]:
+                raise ValueError(
+                    "Requested query not found, "
+                    "please try again with a valid protein identifier."
+                )
+            return data["results"]
+
+    def _match_primary_accession(self, data: list, primary_accession: str = "") -> list:
+        """
+        Helper function to match the primary accession number with the data.
+
+        Args:
+            data: The data to search through
+            primary_accession: The primary accession number to match
+
+        Returns:
+            The relevant data entry for the primary accession number or
+                the first entry if no match is found.
+        """
+        if primary_accession:
+            matched_data = next(
+                (
+                    entry
+                    for entry in data
+                    if entry["primaryAccession"] == primary_accession
+                ),
+                None,
+            )
+            if matched_data:
+                return [matched_data]
+            print(
+                "The primary accession number provided does not "
+                "match any entry in the data, using the first entry instead."
+            )
+            return [data][0]
+        return [data][0]
+
+    def get_protein_name(
+        self,
+        query: str,
+        primary_accession: str | None = None,
+        short_names: bool = True,
+        alternative_names: bool = True,
+    ) -> list:
+        """
+        Get the protein name for a specific protein, with the option to
+        filter by primary accession number and to include alternative
+        and shortened names.
+
+        Args:
+            query: The query string to search
+            primary_accession: The primary accession number of the protein.
+                Defaults to None.
+            short_names: Whether to include short names in the results. Defaults
+                to True.
+            alternative_names: Whether to include alternative names in the
+                results. Defaults to True.
+
+        Returns:
+            The protein name for the protein if found, otherwise an empty list.
+                If primary_accession is provided, returns the protein name
+                associated with that primary accession number, otherwise returns
+                all the protein names associated with the protein.
+        """
+        data = self.get_data(query, desired_field="protein_name")
+        if not data:
+            return []
+        if primary_accession:
+            data = [
+                entry
+                for entry in data
+                if entry["primaryAccession"] == primary_accession
+            ]
+
+        def _parse_names(recommended_names: dict, short_names: bool = True):
+            full_name = recommended_names["fullName"]["value"].split(",")
+            if not short_names:
+                return full_name
+            all_shortnames = recommended_names.get("shortNames", [])
+            short = [name["value"] for name in all_shortnames] if all_shortnames else []
+            return full_name + short
+
+        names = []
+        for d in data:
+            protein_description = d["proteinDescription"]
+            recommended_names = protein_description["recommendedName"]
+            names.extend(_parse_names(recommended_names, short_names=short_names))
+            if alternative_names:
+                alt_names_data = protein_description.get("alternativeNames", [])
+                names.extend(
+                    _parse_names(alt_names_data[0], short_names=short_names)
+                    if alt_names_data
+                    else []
+                )
+        return names
+
+    def _site_key(self, site_type: str) -> tuple[str, str]:
+        """
+        Helper function to get the desired field and associated key for
+        sites (active, binding, or sites).
+
+        Args:
+            site_type: The type of site to retrieve
+
+        Returns:
+            The desired field and associated key for the type
+
+        Raises:
+            ValueError: If an invalid type is provided
+        """
+        try:
+            site_type_map = SiteType[site_type.upper()]
+        except KeyError as e:
+            valid_types = ", ".join(f"'{s_type.name}'" for s_type in SiteType)
+            raise ValueError(
+                f"Invalid site type '{site_type}'. Valid types are: {valid_types}."
+            ) from e
+
+        return site_type_map.value
+
+    def get_relevant_sites(
+        self,
+        query: str,
+        primary_accession: str,
+        site_type: str,
+    ) -> list[dict]:
+        """
+        Get the relevant sites, active sites, or binding sites for a
+        specific protein, given the primary accession number.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein
+            site_type: The type of site to retrieve
+
+        Returns:
+            The relevant sites for the protein with the given primary accession number
+                The list contains a dict for each site with the following keys:
+                - 'start': The start position of the site
+                - 'start_modifier': The start position modifier of the site
+                - 'end': The end position of the site
+                - 'end_modifier': The end position modifier of the site
+                - 'description': The description of the site
+                - 'evidences': The evidences for the site
+        """
+        desired_field, associated_key = self._site_key(site_type)
+        if not desired_field:
+            return []
+        data = self.get_data(query, desired_field=desired_field)
+        if not data:
+            return []
+        data = self._match_primary_accession(data, primary_accession)
+        all_sites = {}
+        features = [
+            feature
+            for feature in data[0]["features"]
+            if feature["type"].lower() == associated_key
+        ]
+        all_sites[primary_accession] = features
+        if not all_sites:
+            return []
+        relevant_sites = all_sites.get(primary_accession)
+        if not relevant_sites:
+            return []
+
+        sites = []
+        for site in relevant_sites:
+            start = site["location"]["start"]["value"]
+            start_modifier = site["location"]["start"].get("modifier", "")
+            end = site["location"]["end"]["value"]
+            end_modifier = site["location"]["end"].get("modifier", "")
+            description = site["description"]
+            evidences = site.get("evidences", [])
+            sites.append(
+                {
+                    "start": start,
+                    "start_modifier": start_modifier,
+                    "end": end,
+                    "end_modifier": end_modifier,
+                    "description": description,
+                    "evidences": evidences,
+                }
+            )
+        return sites
+
+    def get_protein_function(
+        self, query: str, primary_accession: str | None = None
+    ) -> list:
+        """
+        Get the protein function for a specific protein, with the option to
+        filter by primary accession number.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein.
+                Defaults to None.
+
+        Returns:
+            The protein function for the protein.
+                If primary_accession is provided, returns the protein function
+                associated with that primary accession number, otherwise returns
+                all the protein functions associated with the protein.
+        """
+        data = self.get_data(query, desired_field="cc_function")
+        if not data:
+            return []
+        if primary_accession:
+            data = self._match_primary_accession(data, primary_accession)
+        return [
+            entry["comments"]
+            for entry in data
+            if "commentType" not in entry["comments"]
+        ]
+
+    def get_keywords(self, query: str, primary_accession: str | None = None) -> list:
+        """
+        Get the keywords for a specific protein, with the option to filter by
+        primary accession number.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein.
+                Defaults to None.
+
+        Returns:
+            The keywords for the protein.
+                If primary_accession is provided, returns the keywords
+                associated with that primary accession number. Otherwise,
+                returns all the keywords associated with the protein
+        """
+        keywords = self.get_data(query, desired_field="keyword")
+        if not keywords:
+            return []
+        if primary_accession:
+            keywords = self._match_primary_accession(keywords, primary_accession)
+            return [
+                f"{entry['category']}: {entry['name']}"
+                for entry in keywords[0]["keywords"]
+            ]
+        return [
+            f"{entry['category']}: {entry['name']}"
+            for kw_row in keywords
+            for entry in kw_row["keywords"]
+        ]
+
+    def get_all_sequences(self, query: str) -> list:
+        """
+        Get all the sequences for a specific protein.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+
+        Returns:
+            The sequences for the protein
+        """
+        data = self.get_data(query, desired_field="sequence")
+        return [entry["sequence"]["value"] for entry in data] if data else []
+
+    def get_interactions(self, query: str, primary_accession: str) -> list:
+        """
+        Get the interactions for a specific protein, given the primary accession
+        number.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein
+                (required)
+
+        Returns:
+            The interactions for the protein with the given primary accession
+                number
+        """
+        data = self.get_data(query, desired_field="cc_interaction")
+        if not data:
+            return []
+        data = self._match_primary_accession(data, primary_accession)
+        return next(
+            comment["interactions"]
+            for interaction in data
+            for comment in interaction["comments"]
+        )
+
+    def get_subunit_structure(self, query: str, primary_accession: str) -> list:
+        """
+        Get the subunit structure information for a specific protein, given the
+        primary accession number.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein
+
+        Returns:
+            The subunit structure information for the protein with the given
+                primary accession number, along with the evidence
+        """
+        data = self.get_data(query, desired_field="cc_subunit")
+        if not data:
+            return []
+        data = self._match_primary_accession(data, primary_accession)
+        texts = [comment["texts"] for comment in data[0]["comments"]]
+        if not texts:
+            print("No subunit structure information found.")
+            return []
+        return [
+            {
+                "subunit structure": text["value"],
+                "evidence": text.get("evidences", "No evidence provided"),
+            }
+            for text_list in texts
+            for text in text_list
+        ]
+
+    def get_sequence_info(self, query: str, primary_accession: str) -> dict:
+        """
+        Get the sequence information for a specific protein, given the primary
+        accession number.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein
+
+        Returns:
+            The sequence information for the protein with the given accession
+                The dictionary contains the following keys:
+                - 'sequence': The sequence of the protein
+                - 'length': The length of the protein sequence
+                - 'molWeight': The molecular weight of the protein
+                - 'crc64': The CRC64 hash of the protein sequence (probably not useful)
+                - 'md5': The MD5 hash of the protein sequence (probably not useful)
+        """
+        seq_info = self.data = self.get_data(query, desired_field="sequence")
+        if not seq_info:
+            return {}
+        seq_info_specific = self._match_primary_accession(seq_info, primary_accession)[
+            0
+        ]["sequence"]
+        seq_info_specific["sequence"] = seq_info_specific.pop("value")
+        return seq_info_specific
+
+    def _ptm_key(self, ptm_key: str) -> tuple[str, str]:
+        """
+        Helper function to get the desired field and associated key for PTM/
+        Processing (e.g., chain, crosslink, disulfide-bond, etc.).
+
+        Args:
+            ptm_key: The PTM/Processing key to retrieve.
+
+        Returns:
+            The desired field and associated key for the PTM/Processing key.
+
+        Raises:
+            ValueError: If an invalid PTM/Processing key is provided.
+        """
+        normalized_key = ptm_key.replace(" ", "_").replace("-", "_").lower()
+        try:
+            ptm_type = PTMType[normalized_key.upper()]
+        except KeyError as e:
+            valid_keys = ", ".join(
+                f"'{key.name.replace('_', ' ').lower()}'" for key in PTMType
+            )
+            raise ValueError(
+                "Invalid PTM/Processing key, "
+                f"please use one of the following: {valid_keys}."
+            ) from e
+        return ptm_type.value
+
+    def get_ptm_processing_info(
+        self,
+        query: str,
+        primary_accession: str,
+        ptm_key: str,
+    ) -> list[dict]:
+        """
+        Get the ptm/processing information for a specific protein, given the
+        primary accession number.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein
+            ptm_key: The PTM/Processing key to retrieve
+
+        Returns:
+            The relevant information for the protein with the given primary
+                accession number
+                The list contains a dictionary for each object with the
+                following keys:
+                - 'start': The start position
+                - 'start_modifier': The start position modifier
+                - 'end': The end position
+                - 'end_modifier': The end position modifier
+                - 'description': The description
+                - 'featureId': The feature ID
+        """
+        desired_field, associated_key = self._ptm_key(ptm_key)
+        if not desired_field:
+            return []
+        data = self.get_data(query, desired_field=desired_field)
+        if not data:
+            return []
+        data = self._match_primary_accession(data, primary_accession)
+
+        structure_info = []
+        relevant_fields = [
+            feature
+            for feature in data[0]["features"]
+            if feature["type"] == associated_key
+        ]
+        for field in relevant_fields:
+            start_ = field["location"]["start"]["value"]
+            start_modifier = field["location"]["start"].get("modifier", "")
+            end_ = field["location"]["end"]["value"]
+            end_modifier = field["location"]["end"].get("modifier", "")
+            description = field.get("description", "")
+            featureid = field.get("featureId", "")
+            structure_info.append(
+                {
+                    "start": start_,
+                    "start_modifier": start_modifier,
+                    "end": end_,
+                    "end_modifier": end_modifier,
+                    "description": description,
+                    "featureId": featureid,
+                }
+            )
+        return structure_info
+
+    def _structure_key(self, structure_key: str) -> tuple[str, str]:
+        """
+        Helper function to get the desired field and associated key for
+        structure beta, helix, turn).
+
+        Args:
+            structure_key: The structure key to retrieve
+
+        Returns:
+            The desired field and associated key for the structure key
+
+        Raises:
+            ValueError: If an invalid structure key is provided
+        """
+        try:
+            structure_key_map = StructureMap[structure_key.upper()]
+        except KeyError as e:
+            valid_keys = ", ".join(f"'{key.name}'" for key in StructureMap)
+            raise ValueError(
+                f"Invalid structure key '{structure_key}'. "
+                f"Valid keys are: {valid_keys}."
+            ) from e
+        return structure_key_map.value
+
+    def get_3d_info(self, query: str, primary_accession: str) -> list:
+        """
+        Get the 3D structure information for a specific protein, given the
+        primary accession number.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein
+
+        Returns:
+            The 3D structure information for the protein with the given primary
+                accession number
+        """
+        data = self.get_data(query, desired_field="structure_3d")
+        if not data:
+            return []
+        data = self._match_primary_accession(data, primary_accession)
+        return data[0]["uniProtKBCrossReferences"]
+
+    def get_structure_info(
+        self,
+        query: str,
+        primary_accession: str,
+        structure_key: str,
+    ) -> list[dict]:
+        """
+        Get the structure information for a specific protein, given the primary
+        accession number, including either beta sheets, helices, or turns.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein
+            structure_key: The structure key to retrieve
+
+        Returns:
+            The structure information for the protein with the given primary
+                accession number
+                The list contains a dictionary for each structure with the
+                following keys:
+                - 'start': The start position
+                - 'start_modifier': The start position modifier
+                - 'end': The end position
+                - 'end_modifier': The end position modifier
+                - 'evidences': The evidences for the structure
+        """
+        desired_field, associated_key = self._structure_key(structure_key)
+        if not desired_field:
+            return []
+        data = self.get_data(query, desired_field=desired_field)
+        if not data:
+            return []
+        data = self._match_primary_accession(data, primary_accession)
+
+        structure_info = []
+        relevant_fields = [
+            feature
+            for feature in data[0]["features"]
+            if feature["type"] == associated_key
+        ]
+
+        for field in relevant_fields:
+            start_ = field["location"]["start"]["value"]
+            start_modifier = field["location"]["start"].get("modifier", "")
+            end_ = field["location"]["end"]["value"]
+            end_modifier = field["location"]["end"].get("modifier", "")
+            evidences = field.get("evidences", [])
+            structure_info.append(
+                {
+                    "start": start_,
+                    "start_modifier": start_modifier,
+                    "end": end_,
+                    "end_modifier": end_modifier,
+                    "evidences": evidences,
+                }
+            )
+        return structure_info
+
+    def get_ids(
+        self, query: str, single_id: bool = False, include_uniprotkbids=False
+    ) -> list:
+        """
+        Get the IDs for a specific protein.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            single_id: Whether to return a single ID or all IDs. Defaults to
+                False.
+            include_uniprotkbids: Whether to include UniProtKB IDs in the
+                results. Defaults to False.
+
+        Returns:
+            The IDs for the protein
+        """
+        ids_ = self.get_data(query, desired_field="id")
+        all_ids = [entry["primaryAccession"] for entry in ids_] if ids_ else []
+        if include_uniprotkbids:
+            all_ids + [entry["uniProtkbId"] for entry in ids_] if ids_ else []
+        accession = self.get_data(query, desired_field="accession")
+        all_ids + [
+            entry["primaryAccession"] for entry in accession
+        ] if accession else []
+        if single_id:
+            return all_ids.pop()
+        return list(set(all_ids))
+
+    def get_gene_names(self, query: str, primary_accession: str | None = None) -> list:
+        """
+        Get the gene names for a specific protein, with the option to filter by
+        primary accession number.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein.
+                Defaults to None.
+
+        Returns:
+            The gene names for the protein if gene names are found, otherwise an
+                empty list.
+                If primary_accession is provided, returns the gene names
+                associated with that primary accession number, otherwise returns
+                all the gene names associated with the protein.
+        """
+        data = self.get_data(query, desired_field="gene_names")
+        if not data:
+            return []
+        if primary_accession:
+            data = [
+                entry
+                for entry in data
+                if entry["primaryAccession"] == primary_accession
+            ]
+        all_genes = []
+        for i in range(len(data)):
+            if "genes" not in data[i]:
+                continue
+            gene_info = data[i]["genes"]
+            gene_name = [gene_name["geneName"]["value"] for gene_name in gene_info]
+            synonyms = [
+                value["value"]
+                for synonym in gene_info
+                if "synonyms" in synonym
+                for value in synonym["synonyms"]
+            ]
+            orfNames = [
+                value["value"]
+                for orf in gene_info
+                if "orfNames" in orf
+                for value in orf["orfNames"]
+            ]
+            orderedlocus = [
+                value["value"]
+                for ordered in gene_info
+                if "orderedLocusNames" in ordered
+                for value in ordered["orderedLocusNames"]
+            ]
+            all_genes.extend(gene_name + synonyms + orfNames + orderedlocus)
+        return all_genes
+
+    def get_kinetics(self, query: str, primary_accession: str | None = None) -> list:
+        """
+        Get the kinetics information for a specific protein, given the primary
+        accession number.
+
+        Args:
+            query: The query string to search (e.g. 'hemoglobin')
+            primary_accession: The primary accession number of the protein
+
+        Returns:
+            The kinetics information for the protein with the given primary
+                accession number
+        """
+        data = self.get_data(query, desired_field="kinetics")
+        if not data:
+            return []
+
+        if primary_accession:
+            data = self._match_primary_accession(data, primary_accession)
+
+        return [entry["comments"] for entry in data if entry["comments"]]
+
+
+class MapProteinRepresentation(BaseTool):
+    name = "MapProteinRepresentation"
+    description = (
+        "Fetch specific ID mapping from UniProt. "
+        "You must specify the database to map from and to, "
+        "as well as the representation of the protein. "
+        "The defaults are 'UniProtKB_AC-ID' and 'PDB', respectively."
+    )
+    uniprot = QueryUniprot()
+
+    def _run(
+        self, query: str, src_db: str = "UniProtKB_AC-ID", dst_db: str = "PDB"
+    ) -> str:
+        """use the tool."""
+        try:
+            mapped_ids = self.uniprot.get_sequence_mapping(
+                query, from_db=src_db, to_db=dst_db
+            )
+            return str(mapped_ids)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, src_db: str | None, dst_db: str | None) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class UniprotID2Name(BaseTool):
+    name = "UniprotID2Name"
+    description = (
+        "Get the protein name for a specific protein, "
+        "with the option to filter by primary accession"
+        "number. If you have the primary accession "
+        "number, you can use it to filter the results. "
+        "Otherwise, all names associated with the "
+        "protein will be returned. Input the uniprot ID"
+        "of the protein."
+    )
+    uniprot = QueryUniprot()
+
+    def __init__(self, all_names: bool = True):
+        super().__init__()
+        self.all_names = all_names
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            names = self.uniprot.get_protein_name(
+                query,
+                primary_accession=primary_accession,
+                short_names=self.all_names,
+                alternative_names=self.all_names,
+            )
+            return ", ".join(names)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetBindingSites(BaseTool):
+    name = "GetBindingSites"
+    description = (
+        "Get the binding sites known for a specific "
+        "protein, given the primary accession number. "
+        "Both the query string and primary accession "
+        "number are required. "
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            sites = self.uniprot.get_relevant_sites(query, primary_accession, "binding")
+            return str(sites)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetActiveSites(BaseTool):
+    name = "GetActiveSites"
+    description = (
+        "Get the active sites known for a specific "
+        "protein, given the primary accession number. "
+        "Both the query string and primary accession "
+        "number are required. "
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            sites = self.uniprot.get_relevant_sites(query, primary_accession, "active")
+            return str(sites)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetRelevantSites(BaseTool):
+    name = "GetRelevantSites"
+    description = (
+        "Get the relevant sites for a specific protein, "
+        "given the primary accession number. You must "
+        "provide the query string and primary accession "
+        "number. The relevant sites are sites that are "
+        "known to be important for the protein's function, "
+        "but are not necessarily active or binding sites."
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            sites = self.uniprot.get_relevant_sites(query, primary_accession, "sites")
+            return str(sites)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetAllKnownSites(BaseTool):
+    name = "GetAllKnownSites"
+    description = (
+        "Get all known sites for a specific protein, "
+        "given the primary accession number. You must "
+        "provide the query string and primary accession "
+        "number. This tool is a one-stop shop to get all known sites "
+        "for the protein, including active sites, binding "
+        "sites, and other relevant sites."
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            active_sites = self.uniprot.get_relevant_sites(
+                query, primary_accession, "active"
+            )
+            active_sites_msg = (
+                f"Active Sites: {active_sites}"
+                if active_sites
+                else "No known active sites."
+            )
+
+            binding_sites = self.uniprot.get_relevant_sites(
+                query, primary_accession, "binding"
+            )
+            binding_sites_msg = (
+                f"Binding Sites: {binding_sites}"
+                if binding_sites
+                else "No known binding sites."
+            )
+
+            sites = self.uniprot.get_relevant_sites(query, primary_accession, "sites")
+            sites_msg = (
+                f"Other Relevant Sites: {sites}"
+                if sites
+                else "No other relevant sites."
+            )
+
+            return active_sites_msg + "\n" + binding_sites_msg + "\n" + sites_msg
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetProteinFunction(BaseTool):
+    name = "GetProteinFunction"
+    description = (
+        "Get the protein function for a specific protein, "
+        "with the option to filter by primary accession number. "
+        "If you have the primary accession number, you can use "
+        "it to filter the results. Otherwise, all functions "
+        "associated with the protein will be returned. "
+        "Input the uniprot ID of the protein."
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            functions = self.uniprot.get_protein_function(
+                query, primary_accession=primary_accession
+            )
+            return ", ".join(functions)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetProteinAssociatedKeywords(BaseTool):
+    name = "GetProteinAssociatedKeywords"
+    description = (
+        "Get the keywords associated with a specific protein, with "
+        "the option to filter by primary accession number. If you "
+        "have the primary accession number, you can use it to "
+        "filter the results. Otherwise, all keywords associated "
+        "with the protein will be returned. Input the uniprot ID "
+        "of the protein."
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            keywords = self.uniprot.get_keywords(
+                query, primary_accession=primary_accession
+            )
+            return ", ".join(keywords)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetAllSequences(BaseTool):
+    name = "GetAllSequences"
+    description = (
+        "Get all the sequences for a specific protein. "
+        "Input the uniprot ID of the protein."
+        "This tool will return all sequences associated with the protein."
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str) -> str:
+        """use the tool."""
+        try:
+            sequences = self.uniprot.get_all_sequences(query)
+            return ", ".join(sequences)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetInteractions(BaseTool):
+    name = "GetInteractions"
+    description = (
+        "Get the interactions for a specific protein, given the "
+        "primary accession number. Both the query string and primary "
+        "accession number are required. This tool will return the "
+        "interactions for the protein."
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            interactions = self.uniprot.get_interactions(query, primary_accession)
+            return str(interactions)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetSubunitStructure(BaseTool):
+    name = "GetSubunitStructure"
+    description = (
+        "Get the subunit structure information for a specific protein, "
+        "given the primary accession number. Both the query string and "
+        "primary accession number are required. This tool will return "
+        "the subunit structure information for the protein."
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            structure_info = self.uniprot.get_subunit_structure(
+                query, primary_accession
+            )
+            return str(structure_info)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetSequenceInfo(BaseTool):
+    name = "GetSequenceInfo"
+    description = (
+        "Get the sequence information for a specific protein, "
+        "given the primary accession number. Both the query string "
+        "and primary accession number are required. This tool will "
+        "return the sequence, length, and molecular weight. "
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            sequence_info = self.uniprot.get_sequence_info(query, primary_accession)
+            # remove crc64 and md5 keys, as they are not useful to the agent
+            sequence_info.pop("crc64", None)
+            sequence_info.pop("md5", None)
+            return str(sequence_info)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetPDBProcessingInfo(BaseTool):
+    name = "GetPDBProcessingInfo"
+    description = (
+        "Get the processing information for a specific protein, "
+        "given the primary accession number. Both the query string "
+        "and primary accession number are required. Input the query, accession "
+        "number, and the type of processing information to retrieve (e.g., "
+        "chain, crosslink, disulfide-bond, etc.). Here is a list of the "
+        "processing types you can retrieve: chain, crosslink, disulfide-bond, "
+        "glycosylation, initiator-methionine, lipidation, modified-residue, "
+        "peptide, propeptide, signal-peptide, transit-peptide"
+    )
+    uniprot = QueryUniprot()
+
+    def _run(
+        self, query: str, processing_type: str, primary_accession: str = ""
+    ) -> str:
+        """use the tool."""
+        try:
+            processing_info = self.uniprot.get_ptm_processing_info(
+                query, primary_accession, processing_type
+            )
+            return str(processing_info)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetPDB3DInfo(BaseTool):
+    name = "GetPDB3DInfo"
+    description = (
+        "Get the 3D structure information for a specific protein, "
+        "given the primary accession number. Both the query string "
+        "and primary accession number are required. This tool will "
+        "return information from the PDB database for the protein, "
+        "including the PDB ID, chain, and resolution."
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            structure_info = self.uniprot.get_3d_info(query, primary_accession)
+            return str(structure_info)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetTurnsBetaSheetsHelices(BaseTool):
+    name = "GetTurnsBetaSheetsHelices"
+    description = (
+        "Get the number and location of turns, beta sheets, and helices "
+        "for a specific protein, given the primary accession number. Both "
+        "the query string and primary accession number are required. This "
+        "tool will return the number and location of turns, beta sheets, and "
+        "helices for the protein. "
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            turns = self.uniprot.get_structure_info(query, primary_accession, "turn")
+            beta_sheets = self.uniprot.get_structure_info(
+                query, primary_accession, "beta"
+            )
+            helices = self.uniprot.get_structure_info(query, primary_accession, "helix")
+            return f"Turns: {turns}\nBeta sheets: {beta_sheets}\nHelices: {helices}"
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetUniprotID(BaseTool):
+    name = "GetUniprotID"
+    description = (
+        "Get the UniProt ID for a specific protein. "
+        "Input the query string of the protein. "
+        "This tool will return the UniProt ID of the protein. "
+        "You can optionally specify whether you want to return "
+        "all IDs or just one ID. By default, a single ID will be "
+        "returned."
+    )
+    uniprot = QueryUniprot()
+
+    def __init__(self, include_uniprotkbids: bool = False):
+        super().__init__()
+        self.include_uniprotkbids = include_uniprotkbids
+
+    def _run(self, query: str, all_ids: bool = False) -> str:
+        """use the tool."""
+        try:
+            ids = self.uniprot.get_ids(
+                query,
+                single_id=not all_ids,
+                include_uniprotkbids=self.include_uniprotkbids,
+            )
+            return ", ".join(ids)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, all_ids: bool) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetGeneNames(BaseTool):
+    name = "GetGeneNames"
+    description = (
+        "Get the gene names associated with a specific protein, "
+        "with the option to filter by primary accession number. "
+        "If you have the primary accession number, you can use it "
+        "to filter the results. Otherwise, all gene names associated "
+        "with the protein will be returned. Input the uniprot ID of "
+        "the protein."
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            gene_names = self.uniprot.get_gene_names(
+                query, primary_accession=primary_accession
+            )
+            return ", ".join(gene_names)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
+
+
+class GetKineticProperties(BaseTool):
+    name = "GetKineticProperties"
+    description = (
+        "Get the kinetics information for a specific protein, "
+        "given the primary accession number. "
+        "Both the query string and primary accession number are required. "
+    )
+    uniprot = QueryUniprot()
+
+    def _run(self, query: str, primary_accession: str = "") -> str:
+        """use the tool."""
+        try:
+            kinetics = self.uniprot.get_kinetics(query, primary_accession)
+            return str(kinetics)
+        except Exception as e:
+            return str(e)
+
+    async def _arun(self, query: str, dependency: str, primary_accession: str) -> str:
+        """use the tool asynchronously."""
+        raise NotImplementedError("This tool does not support asynchronous execution.")
diff --git a/mdagent/tools/maketools.py b/mdagent/tools/maketools.py
index df5b597b..a5482cbc 100644
--- a/mdagent/tools/maketools.py
+++ b/mdagent/tools/maketools.py
@@ -23,7 +23,24 @@
     ComputeRMSF,
     ContactsTool,
     DistanceMatrixTool,
+    GetActiveSites,
+    GetAllKnownSites,
+    GetAllSequences,
+    GetBindingSites,
+    GetGeneNames,
+    GetInteractions,
+    GetKineticProperties,
+    GetPDB3DInfo,
+    GetPDBProcessingInfo,
+    GetProteinAssociatedKeywords,
+    GetProteinFunction,
+    GetRelevantSites,
+    GetSequenceInfo,
+    GetSubunitStructure,
+    GetTurnsBetaSheetsHelices,
+    GetUniprotID,
     ListRegistryPaths,
+    MapProteinRepresentation,
     ModifyBaseSimulationScriptTool,
     MomentOfInertia,
     PackMolTool,
@@ -40,6 +57,7 @@
     SimulationOutputFigures,
     SmallMolPDB,
     SolventAccessibleSurfaceArea,
+    UniprotID2Name,
     VisualizeProtein,
 )
 
@@ -94,6 +112,24 @@ def make_all_tools(
         SmallMolPDB(path_registry=path_instance),
         SolventAccessibleSurfaceArea(path_registry=path_instance),
         VisualizeProtein(path_registry=path_instance),
+        MapProteinRepresentation(),
+        UniprotID2Name(),
+        GetBindingSites(),
+        GetActiveSites(),
+        GetRelevantSites(),
+        GetAllKnownSites(),
+        GetProteinFunction(),
+        GetProteinAssociatedKeywords(),
+        GetAllSequences(),
+        GetInteractions(),
+        GetSubunitStructure(),
+        GetSequenceInfo(),
+        GetPDBProcessingInfo(),
+        GetPDB3DInfo(),
+        GetTurnsBetaSheetsHelices(),
+        GetUniprotID(),
+        GetGeneNames(),
+        GetKineticProperties(),
     ]
 
     all_tools += base_tools
diff --git a/tests/test_preprocess/test_uniprot.py b/tests/test_preprocess/test_uniprot.py
new file mode 100644
index 00000000..e4b118bc
--- /dev/null
+++ b/tests/test_preprocess/test_uniprot.py
@@ -0,0 +1,564 @@
+import pytest
+
+from mdagent.tools.base_tools.preprocess_tools.uniprot import (
+    GetAllKnownSites,
+    QueryUniprot,
+)
+
+
+@pytest.fixture()
+def query_uniprot():
+    return QueryUniprot()
+
+
+def test_match_primary_accession(query_uniprot):
+    mock_data = [
+        {"entryType": "UniProtKB reviewed (Swiss-Prot)", "primaryAccession": "P68871"},
+        {"entryType": "UniProtKB reviewed (Swiss-Prot)", "primaryAccession": "P69905"},
+    ]
+    assert query_uniprot._match_primary_accession(mock_data, "P69905") == [
+        {
+            "entryType": "UniProtKB reviewed (Swiss-Prot)",
+            "primaryAccession": "P69905",
+        }
+    ]
+
+
+def test_get_protein_name_accession(query_uniprot):
+    full_names = ["Glutathione reductase", " mitochondrial"]
+    short_names_included = ["Glutathione reductase", " mitochondrial", "GR", "GRase"]
+    assert full_names == query_uniprot.get_protein_name(
+        "gsr", "P00390", short_names=False, alternative_names=False
+    )
+    assert short_names_included == query_uniprot.get_protein_name(
+        "gsr", "P00390", short_names=True, alternative_names=False
+    )
+    assert short_names_included == query_uniprot.get_protein_name(
+        "gsr", "P00390", short_names=True, alternative_names=True
+    )
+
+
+def test_get_protein_name_no_accession(query_uniprot):
+    full_names = ["Glutathione reductase", " mitochondrial"]
+    short_names_included = ["Glutathione reductase", " mitochondrial", "GR", "GRase"]
+    full_names_result = query_uniprot.get_protein_name(
+        "gsr", short_names=False, alternative_names=False
+    )
+    length_full_name = 29
+    length_with_short = 46
+    length_with_all = 58
+
+    assert all(name in full_names_result for name in full_names)
+    assert len(full_names_result) >= length_full_name
+
+    short_names_included_result = query_uniprot.get_protein_name(
+        "gsr", short_names=True, alternative_names=False
+    )
+    assert all(name in short_names_included_result for name in short_names_included)
+    assert len(short_names_included_result) >= length_with_short
+
+    all_names_included_result = query_uniprot.get_protein_name(
+        "gsr", short_names=True, alternative_names=True
+    )
+    assert all(name in all_names_included_result for name in short_names_included)
+    assert len(all_names_included_result) >= length_with_all
+
+
+def test_get_relevant_sites(query_uniprot):
+    binding_sites = query_uniprot.get_relevant_sites("gsr", "P70619", "active")
+    active_sites = query_uniprot.get_relevant_sites("gsr", "P70619", "binding")
+    sites = query_uniprot.get_relevant_sites("gsr", "P70619", "sites")
+
+    true_binding_sites = {
+        "start": 413,
+        "start_modifier": "EXACT",
+        "end": 413,
+        "end_modifier": "EXACT",
+        "description": "Proton acceptor",
+        "evidences": [{"evidenceCode": "ECO:0000250"}],
+    }
+    assert true_binding_sites in binding_sites
+    assert not active_sites
+    assert not sites
+
+
+def test_get_all_known_sites():
+    all_known_sites = GetAllKnownSites()
+    site_msg = all_known_sites._run("hemoglobin", "P69905")
+    assert "No known active sites." in site_msg
+
+    assert (
+        "Binding Sites: [{'start': 59, "
+        "'start_modifier': 'EXACT', 'end': 59, "
+        "'end_modifier': 'EXACT', 'description': "
+        "'', 'evidences': [{'evidenceCode': "
+        "'ECO:0000255', 'source': 'PROSITE-ProRule', "
+        "'id': 'PRU00238'}]},"
+    ) in site_msg
+
+    assert (
+        "Other Relevant Sites: [{'start': 9, "
+        "'start_modifier': 'EXACT', 'end': 10, "
+        "'end_modifier': 'EXACT', 'description': "
+        "'(Microbial infection) Cleavage; by "
+        "N.americanus apr-2', 'evidences': "
+        "[{'evidenceCode': 'ECO:0000269', 'source': "
+        "'PubMed', 'id': '12552433'}]}"
+    ) in site_msg
+
+
+def test_get_protein_function(query_uniprot):
+    fxn_data_specific = query_uniprot.get_protein_function("hemoglobin", "P69905")
+    texts = [
+        "Involved in oxygen transport from the lung to the various peripheral tissues",
+        (
+            "Hemopressin acts as an antagonist "
+            "peptide of the cannabinoid receptor "
+            "CNR1 (PubMed:18077343). "
+            "Hemopressin-binding efficiently blocks "
+            "cannabinoid receptor CNR1 and "
+            "subsequent signaling (PubMed:18077343)"
+        ),
+    ]
+    data_texts = [comment["texts"][0]["value"] for comment in fxn_data_specific[0]]
+    assert all(text in data_texts for text in texts)
+
+
+def test_get_keywords(query_uniprot):
+    kw = query_uniprot.get_keywords("gsr", primary_accession="P70619")
+    kw_true = [
+        "PTM: Disulfide bond",
+        "Ligand: FAD",
+        "Ligand: Flavoprotein",
+        "Ligand: NADP",
+        "Molecular function: Oxidoreductase",
+        "Domain: Redox-active center",
+        "Technical term: Reference proteome",
+    ]
+    assert all(k in kw for k in kw_true)
+
+    kw_long = query_uniprot.get_keywords("gsr")
+    assert len(kw_long) >= len(kw)
+    assert all(k in kw_long for k in kw)
+
+
+def test_get_all_sequences(query_uniprot):
+    one_gfp_seq = "MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK"  # noqa: E501
+    all_seq = query_uniprot.get_all_sequences("gfp")
+    length_all_seq = 25
+    assert one_gfp_seq in all_seq
+    assert len(all_seq) >= length_all_seq
+
+
+def test_get_interactions(query_uniprot):
+    interactions = query_uniprot.get_interactions("hemoglobin", "P69905")
+    length_interactions = 13
+    assert len(interactions) >= length_interactions
+    i1 = [
+        interactions[i]["interactantOne"]["uniProtKBAccession"]
+        for i in range(len(interactions))
+    ]
+    assert set(i1) == {"P69905"}
+    i2 = [
+        interactions[i]["interactantTwo"]["uniProtKBAccession"]
+        for i in range(len(interactions))
+    ]
+    assert all(
+        i in i2
+        for i in (
+            "Q9NZD4",
+            "Q2TAC2",
+            "Q15323",
+            "O76011",
+            "P02042",
+            "P00387",
+            "P02100",
+            "P29474",
+            "Q6A162",
+            "P0DPK4",
+            "P09105",
+            "P69892",
+            "P68871",
+        )
+    )
+
+
+def test_get_subunit_structure(query_uniprot):
+    sus = query_uniprot.get_subunit_structure("hemoglobin", "P69905")
+    sus_sus = [s["subunit structure"] for s in sus]
+    assert all(
+        texts in sus_sus
+        for texts in [
+            "Heterotetramer of two alpha chains and two beta chains in adult hemoglobin A (HbA); two alpha chains and two delta chains in adult hemoglobin A2 (HbA2); two alpha chains and two epsilon chains in early embryonic hemoglobin Gower-2; two alpha chains and two gamma chains in fetal hemoglobin F (HbF)",  # noqa: E501
+            "(Microbial infection) Interacts with Staphylococcus aureus protein isdB",
+        ]
+    )
+
+
+def test_get_sequence_info(query_uniprot):
+    seq_info = query_uniprot.get_sequence_info("gsr", "P70619")
+    crc64, md5 = (
+        "0714FF531F90BEBA",  # pragma: allowlist secret
+        "B3EF8C2F41BE8D44040346F274687F49",  # pragma: allowlist secret
+    )
+    sequence = "VNVGCVPKKVMWNTAVHSEFIHDHVDYGFQNCKSKFNWHVIKEKRDAYVSRLNNIYQNNLTKSHIEVIHGYATFRDGPQPTAEVNGKKFTAPHILIATGGVPTVPHENQIPGASLGITSDGFFQLEDLPSRSVIVGAGYIAVEIAGILSALGSKTSLMIRHDKVLRSFDSLISSNCTEELENAGGVEVLTVKKFSQVKEVKKTSSGLELHVVTALPGRKPTVTTIPDVDCLLWAIGRDPNSKGLNLNKLGIQTDDKGHILVDEFQNTNVKGVYAVGDVCGKALLTPVAIAAGRKLAHRLFEGKEDSRLDYDNIPTVVFSHPPIGTVGLTEDEAVHKYGKDNVKIYSTAFTPMYHAVTTRKTKCVMKMVCANKEEKVVGIHMQGIGCDEMLQGFAVAVKMGATKADFDNRVAIHPTSSEELVTLR"  # pragma: allowlist secret # noqa: E501
+    length, molWeight = 424, 46301
+    assert seq_info["length"] == length
+    assert seq_info["molWeight"] == molWeight
+    assert seq_info["crc64"] == crc64
+    assert seq_info["md5"] == md5
+    assert seq_info["sequence"] == sequence
+
+
+def test_get_ptm_processing_info(query_uniprot):
+    chains = query_uniprot.get_ptm_processing_info("hemoglobin", "P69905", "chain")
+    assert {
+        "start": 2,
+        "start_modifier": "EXACT",
+        "end": 142,
+        "end_modifier": "EXACT",
+        "description": "Hemoglobin subunit alpha",
+        "featureId": "PRO_0000052653",
+    } in chains
+
+    assert not query_uniprot.get_ptm_processing_info(
+        "hemoglobin", "P69905", "crosslink"
+    )
+
+    assert not query_uniprot.get_ptm_processing_info(
+        "hemoglobin", "P69905", "disulfide-bond"
+    )
+
+    glyco = query_uniprot.get_ptm_processing_info(
+        "hemoglobin", "P69905", "glycosylation"
+    )
+    assert all(
+        g in glyco
+        for g in [
+            {
+                "start": 8,
+                "start_modifier": "EXACT",
+                "end": 8,
+                "end_modifier": "EXACT",
+                "description": "N-linked (Glc) (glycation) lysine; alternate",
+                "featureId": "",
+            },
+            {
+                "start": 17,
+                "start_modifier": "EXACT",
+                "end": 17,
+                "end_modifier": "EXACT",
+                "description": "N-linked (Glc) (glycation) lysine; alternate",
+                "featureId": "",
+            },
+            {
+                "start": 41,
+                "start_modifier": "EXACT",
+                "end": 41,
+                "end_modifier": "EXACT",
+                "description": "N-linked (Glc) (glycation) lysine; alternate",
+                "featureId": "",
+            },
+            {
+                "start": 62,
+                "start_modifier": "EXACT",
+                "end": 62,
+                "end_modifier": "EXACT",
+                "description": "N-linked (Glc) (glycation) lysine",
+                "featureId": "",
+            },
+        ]
+    )
+
+    i_m = query_uniprot.get_ptm_processing_info(
+        "hemoglobin", "P69905", "initiator-methionine"
+    )
+    assert {
+        "start": 1,
+        "start_modifier": "EXACT",
+        "end": 1,
+        "end_modifier": "EXACT",
+        "description": "Removed",
+        "featureId": "",
+    } in i_m
+
+    assert not query_uniprot.get_ptm_processing_info(
+        "hemoglobin", "P69905", "lipidation"
+    )
+
+    mr = query_uniprot.get_ptm_processing_info(
+        "hemoglobin", "P69905", "modified-residue"
+    )
+    assert all(
+        m in mr
+        for m in [
+            {
+                "start": 4,
+                "start_modifier": "EXACT",
+                "end": 4,
+                "end_modifier": "EXACT",
+                "description": "Phosphoserine",
+                "featureId": "",
+            },
+            {
+                "start": 8,
+                "start_modifier": "EXACT",
+                "end": 8,
+                "end_modifier": "EXACT",
+                "description": "N6-succinyllysine; alternate",
+                "featureId": "",
+            },
+            {
+                "start": 9,
+                "start_modifier": "EXACT",
+                "end": 9,
+                "end_modifier": "EXACT",
+                "description": "Phosphothreonine",
+                "featureId": "",
+            },
+            {
+                "start": 12,
+                "start_modifier": "EXACT",
+                "end": 12,
+                "end_modifier": "EXACT",
+                "description": "N6-succinyllysine",
+                "featureId": "",
+            },
+            {
+                "start": 17,
+                "start_modifier": "EXACT",
+                "end": 17,
+                "end_modifier": "EXACT",
+                "description": "N6-acetyllysine; alternate",
+                "featureId": "",
+            },
+            {
+                "start": 17,
+                "start_modifier": "EXACT",
+                "end": 17,
+                "end_modifier": "EXACT",
+                "description": "N6-succinyllysine; alternate",
+                "featureId": "",
+            },
+            {
+                "start": 25,
+                "start_modifier": "EXACT",
+                "end": 25,
+                "end_modifier": "EXACT",
+                "description": "Phosphotyrosine",
+                "featureId": "",
+            },
+        ]
+    )
+
+    pep = query_uniprot.get_ptm_processing_info("hemoglobin", "P69905", "peptide")
+    assert {
+        "start": 96,
+        "start_modifier": "EXACT",
+        "end": 104,
+        "end_modifier": "EXACT",
+        "description": "Hemopressin",
+        "featureId": "PRO_0000455882",
+    } in pep
+
+    assert not query_uniprot.get_ptm_processing_info(
+        "hemoglobin", "P69905", "propeptide"
+    )
+
+    assert not query_uniprot.get_ptm_processing_info(
+        "hemoglobin", "P69905", "signal-peptide"
+    )
+
+    assert not query_uniprot.get_ptm_processing_info(
+        "hemoglobin", "P69905", "transit-peptide"
+    )
+
+
+def test_get_3d_info(query_uniprot):
+    gsr_3d = query_uniprot.get_3d_info("gsr", "P00390")
+    assert all(
+        i in gsr_3d
+        for i in [
+            {
+                "database": "PDB",
+                "id": "1ALG",
+                "properties": [
+                    {"key": "Method", "value": "NMR"},
+                    {"key": "Resolution", "value": "-"},
+                    {"key": "Chains", "value": "A=480-503"},
+                ],
+            },
+            {
+                "database": "PDB",
+                "id": "1BWC",
+                "properties": [
+                    {"key": "Method", "value": "X-ray"},
+                    {"key": "Resolution", "value": "2.10 A"},
+                    {"key": "Chains", "value": "A=45-522"},
+                ],
+            },
+        ]
+    )
+
+
+def test_get_structure_info(query_uniprot):
+    beta = query_uniprot.get_structure_info("hemoglobin", "P69905", "beta")
+    assert all(
+        b in beta
+        for b in [
+            {
+                "start": 45,
+                "start_modifier": "EXACT",
+                "end": 47,
+                "end_modifier": "EXACT",
+                "evidences": [
+                    {"evidenceCode": "ECO:0007829", "source": "PDB", "id": "1M9P"}
+                ],
+            },
+            {
+                "start": 50,
+                "start_modifier": "EXACT",
+                "end": 52,
+                "end_modifier": "EXACT",
+                "evidences": [
+                    {"evidenceCode": "ECO:0007829", "source": "PDB", "id": "6XDT"}
+                ],
+            },
+        ]
+    )
+
+    helix = query_uniprot.get_structure_info("hemoglobin", "P69905", "helix")
+    assert all(
+        h in helix
+        for h in [
+            {
+                "start": 5,
+                "start_modifier": "EXACT",
+                "end": 18,
+                "end_modifier": "EXACT",
+                "evidences": [
+                    {"evidenceCode": "ECO:0007829", "source": "PDB", "id": "2W72"}
+                ],
+            },
+            {
+                "start": 19,
+                "start_modifier": "EXACT",
+                "end": 21,
+                "end_modifier": "EXACT",
+                "evidences": [
+                    {"evidenceCode": "ECO:0007829", "source": "PDB", "id": "2W72"}
+                ],
+            },
+        ]
+    )
+
+    turns = query_uniprot.get_structure_info("hemoglobin", "P69905", "turn")
+    assert all(
+        t in turns
+        for t in [
+            {
+                "start": 73,
+                "start_modifier": "EXACT",
+                "end": 75,
+                "end_modifier": "EXACT",
+                "evidences": [
+                    {"evidenceCode": "ECO:0007829", "source": "PDB", "id": "2W72"}
+                ],
+            },
+            {
+                "start": 91,
+                "start_modifier": "EXACT",
+                "end": 93,
+                "end_modifier": "EXACT",
+                "evidences": [
+                    {"evidenceCode": "ECO:0007829", "source": "PDB", "id": "2M6Z"}
+                ],
+            },
+        ]
+    )
+
+
+def get_ids(query_uniprot):
+    hg_ids = [
+        "P84792",
+        "P02042",
+        "P69891",
+        "P69892",
+        "P68871",
+        "P02089",
+        "P02070",
+        "O13163",
+        "Q10733",
+        "P02008",
+        "B3EWR7",
+        "Q90487",
+        "P04244",
+        "P02094",
+        "P83479",
+        "P01966",
+        "O93349",
+        "P68872",
+        "P02110",
+        "P69905",
+        "P02088",
+        "P02100",
+        "P09105",
+        "P11517",
+        "P02091",
+    ]
+    all_ids = query_uniprot.get_ids("hemoglobin")
+    single_id = query_uniprot.get_ids("hemoglobin", single_id=True)
+    assert single_id in hg_ids
+    assert all(i in all_ids for i in hg_ids)
+
+
+def test_get_gene_names(query_uniprot):
+    specific_gene = query_uniprot.get_gene_names("gsr", "P00390")
+    assert all(gene in specific_gene for gene in ["GSR", "GLUR", "GRD1"])
+    all_genes = query_uniprot.get_gene_names("gsr")
+    assert len(all_genes) >= len(specific_gene)
+    assert all(gene in all_genes for gene in specific_gene)
+
+
+def test_get_sequence_mapping(query_uniprot):
+    identifiers = [
+        "1A00",
+        "1A01",
+        "1A0U",
+        "1A0Z",
+        "1A3N",
+        "1A3O",
+        "1A9W",
+        "1ABW",
+        "1ABY",
+        "1AJ9",
+        "1B86",
+        "1BAB",
+        "1BBB",
+        "1BIJ",
+        "1BUW",
+        "1BZ0",
+        "1BZ1",
+        "1BZZ",
+        "1C7B",
+        "1C7C",
+        "1C7D",
+        "1CLS",
+        "1CMY",
+        "1COH",
+        "1DKE",
+    ]
+    mapping = query_uniprot.get_sequence_mapping("P69905")
+    assert all(i in mapping for i in identifiers)
+
+
+def test_get_kinetics(query_uniprot):
+    with_a_t = query_uniprot.get_kinetics("rubisco", primary_accession="O85040")
+    no_a_t = query_uniprot.get_kinetics("rubisco")
+
+    assert len(with_a_t) == 1
+    assert len(no_a_t) > len(with_a_t)
+
+    assert (
+        with_a_t[0][0]["kineticParameters"]["maximumVelocities"][0]["velocity"] == 2.9
+    )