clean up notebooks and tests. debugged and fixed few tools

ur-whitelab · Dec 5, 2023 · 0e19f1e · 0e19f1e
1 parent 581e3e8
commit 0e19f1e
Show file tree

Hide file tree

Showing 35 changed files with 88 additions and 2,971 deletions.
diff --git a/.gitignore b/.gitignore
@@ -140,6 +140,7 @@ dmypy.json
 *.csv
 *.png
 ckpt/
+Visualization.ipynb
 
 # path registry files
 *registry.json

diff --git a/.secrets.baseline b/.secrets.baseline
@@ -7,4 +7,4 @@ OpenAI API Key: \b[secrets]{3}_[a-zA-Z0-9]{32}\b
 PQA API Key: "pqa[a-zA-Z0-9-._]+"
 
 # Rule for detecting serp API keys
-Serp API Key: "serp_[a-zA-Z0-9]{32}"
+# Serp API Key: "[a-zA-Z0-9]{64}"
diff --git a/mdagent/mainagent/agent.py b/mdagent/mainagent/agent.py
@@ -85,13 +85,13 @@ def run(self, user_prompt):
         )
         outputs = self.agent_executor(main_prompt.format(input=user_prompt))
         # Parse long output (with intermediate steps)
-        intermed = outputs["intermediate_steps"]
+        outputs["intermediate_steps"]
         final = ""
-        for step in intermed:
-            final += (
-                f"Action: {step[0].tool}\n"
-                f"Action Input: {step[0].tool_input}\n"
-                f"Observation: {step[1]}\n"
-            )
+        # for step in intermed:
+        #     final += (
+        #         f"Action: {step[0].tool}\n"
+        #         f"Action Input: {step[0].tool_input}\n"
+        #         f"Observation: {step[1]}\n"
+        #     )
         final += f"Final Answer: {outputs['output']}"
         return final
diff --git a/mdagent/subagents/__init__.py b/mdagent/subagents/__init__.py
@@ -1,11 +1,18 @@
+from .agents import (
+    ActionAgent,
+    CodeCriticAgent,
+    CurriculumAgent,
+    SkillManager,
+    TaskCriticAgent,
+)
 from .subagent_fxns import Iterator
 from .subagent_setup import SubAgentInitializer, SubAgentSettings
 
 __all__ = [
     "ActionAgent",
     "CodeCriticAgent",
     "CurriculumAgent",
-    "SkillAgent",
+    "SkillManager",
     "TaskCriticAgent",
     "Iterator",
     "SubAgentInitializer",

diff --git a/mdagent/subagents/agents/curriculum.py b/mdagent/subagents/agents/curriculum.py
@@ -1,3 +1,4 @@
+import json
 from typing import Optional
 
 from dotenv import load_dotenv
@@ -31,5 +32,12 @@ def run(self, task, curr_tools, files, failed_tasks=""):
                 "failed_tasks": failed_tasks,
             }
         )["text"]
-        # TODO: parse a list of subtasks from the message
-        return message
+
+        if message.startswith("```json"):
+            # Remove the triple backticks and 'json'
+            message = message.strip("`").replace("json\n", "")
+
+        parsed_message = json.loads(message)
+        rationale = parsed_message.get("Rationale", "")
+        subtasks = parsed_message.get("Plan", [])
+        return rationale, subtasks
diff --git a/mdagent/subagents/prompts/curriculum_prompts.py b/mdagent/subagents/prompts/curriculum_prompts.py
@@ -7,20 +7,26 @@
 determining what kind of preprocessing PDB files are needed,
  settings the simulations should be run at, analysis of the simulation results, etc.
 
+
 You must follow the following criteria:
 1) Return your plan as a Python list of subtasks that can be completed
 in order to complete the specified task.
 2) Ensure the response can be parsed by Python `json.loads`, e.g.:
-no trailing commas,no single quotes, etc.
+no trailing commas, no single quotes, etc. Don't start with ```json.
 3) briefly explain your rationale why you choose this curriculum of subtasks
-4) if we need to obtain any new tool or get any new files, include that
-as a separate subtask in the relevant order
-5) Only if you're asked to explore or find novel things: you should be able to
-offer creative and interesting subtasks. You should be looking for
-opportunities to discover as many diverse things as possible, accomplish as many
-diverse tasks as possible to be expert at running molecular dynamics.
-6) If you're asked to refine because the task failed, you should be able to offer
-subtasks that can help the user complete the task.
+4) For each subtask, specify which tool you should use. If and only if
+there is no suitable tool, mention that we need to obtain a new tool.
+5) Each subtask should be very specific and essential to the completion
+of the task. In other words, fewer steps are preferred over more wasteful steps.
+Don't suggest preprocessing PDB files or running simulations unless it's
+absolutely necessary or requested so.
+6) REFINE: If you're asked to make a plan because some task failed, you
+should be able to refine and help complete the task.
+7) EXPLORE: If and only if you're asked to explore or find novel things:
+you should be able to offer creative and interesting subtasks. You should
+be looking for opportunities to discover as many diverse things as possible,
+accomplish as many diverse tasks as possible to be expert at running
+molecular dynamics.
 
 You should only respond in JSON format as described below:
 {{

diff --git a/mdagent/subagents/subagent_fxns.py b/mdagent/subagents/subagent_fxns.py
@@ -247,7 +247,6 @@ def _pull_information(self):
         current_tools_string = ""
         if self.current_tools:
             current_tools_string = json.dumps(self.current_tools)
-        # TODO: include a list of packages we currently have/support
 
         info = {
             "recent_history": recent_history_string,

diff --git a/mdagent/subagents/subagent_setup.py b/mdagent/subagents/subagent_setup.py
@@ -1,4 +1,3 @@
-import warnings
 from typing import Optional
 
 from mdagent.subagents.agents import (
@@ -36,14 +35,15 @@ def __init__(
 
 
 class SubAgentInitializer:
-    def __init__(self, settings: Optional[SubAgentSettings]):
+    def __init__(self, settings: Optional[SubAgentSettings] = None):
         if settings is None:
-            raise ValueError("Settings cannot be None")
+            settings = SubAgentSettings()
         if settings.path_registry is None:
-            warnings.warn(
-                "path_registry is None, some agents may fail to be created.",
-                UserWarning,
-            )
+            # warnings.warn(
+            #     "'path_registry' isn't specified. Use current directory by default.",
+            #     UserWarning,
+            # )
+            settings.path_registry = PathRegistry.get_instance()
         self.path_registry = settings.path_registry
         self.subagents_model = settings.subagents_model
         self.temp = settings.temp

diff --git a/mdagent/tools/base_tools/preprocess_tools/pdb_tools.py b/mdagent/tools/base_tools/preprocess_tools/pdb_tools.py
@@ -13,11 +13,13 @@
 from mdagent.utils import PathRegistry
 
 
-def get_pdb(query_string, PathRegistry):
+def get_pdb(query_string, path_registry=None):
     """
     Search RSCB's protein data bank using the given query string
     and return the path to pdb file in either CIF or PDB format
     """
+    if path_registry is None:
+        path_registry = PathRegistry.get_instance()
     url = "https://search.rcsb.org/rcsbsearch/v2/query?json={search-request}"
     query = {
         "query": {
@@ -44,7 +46,7 @@ def get_pdb(query_string, PathRegistry):
             file.write(pdb.text)
         print(f"{filename} is created.")
         file_description = f"PDB file downloaded from RSCB, PDB ID: {pdbid}"
-        PathRegistry.map_path(filename, filename, file_description)
+        path_registry.map_path(filename, filename, file_description)
         return filename
     return None
 
@@ -1427,7 +1429,7 @@ def validate_input(cls, values: Union[str, Dict[str, Any]]) -> Dict:
 
 
 class FixPDBFile(BaseTool):
-    name: str = "PDB File Fixer"
+    name: str = "PDBFileFixer"
     description: str = "Fixes PDB files columns if needed"
     args_schema: Type[BaseModel] = PDBFilesFixInp
 

diff --git a/mdagent/tools/base_tools/simulation_tools/setup_and_run.py b/mdagent/tools/base_tools/simulation_tools/setup_and_run.py
@@ -351,7 +351,7 @@ async def _arun(self, query: str) -> str:
 
 
 class InstructionSummary(BaseTool):
-    name = "Instruction Summary"
+    name = "InstructionSummary"
     description = """This tool will summarize the instructions
      given by the human. This is the first tool you will
        use, unless you dont have a .cif or .pdb file in

diff --git a/mdagent/tools/base_tools/util_tools/git_issues_tool.py b/mdagent/tools/base_tools/util_tools/git_issues_tool.py
@@ -65,7 +65,7 @@ def make_encoding(self):
 
 
 class SerpGitTool(BaseTool):
-    name = "Openmm Github Issues Search"
+    name = "Openmm_Github_Issues_Search"
     description = """ Tool that searches inside
                     github issues in openmm. Make
                     your query as if you were googling something.
@@ -90,8 +90,12 @@ def _run(self, query: str):
         encoding = fxns.make_encoding()
         search = GoogleSearch(params)
         results = search.get_dict()
-
-        organic_results = results["organic_results"]
+        organic_results = results.get("organic_results")
+        if organic_results is None:
+            if results.get("error"):
+                return "error: " + results.get("error")
+            else:
+                return "Error: No 'organic_results' found"
         issues_numbers: List = (
             []
         )  # list that will contain issue id numbers retrieved from the google search

diff --git a/mdagent/tools/maketools.py b/mdagent/tools/maketools.py
@@ -102,18 +102,10 @@ def make_all_tools(
     subagents_tools = []
     if not skip_subagents:
         subagents_tools = [
-            CreateNewTool(
-                path_registry=path_instance, subagent_settings=subagent_settings
-            ),
-            ExecuteSkill(
-                path_registry=path_instance, subagent_settings=subagent_settings
-            ),
-            SkillRetrieval(
-                path_registry=path_instance, subagent_settings=subagent_settings
-            ),
-            WorkflowPlan(
-                path_registry=path_instance, subagent_settings=subagent_settings
-            ),
+            CreateNewTool(subagent_settings=subagent_settings),
+            ExecuteSkill(subagent_settings=subagent_settings),
+            SkillRetrieval(subagent_settings=subagent_settings),
+            WorkflowPlan(subagent_settings=subagent_settings),
         ]
 
     # add 'learned' tools here
@@ -146,17 +138,12 @@ def get_tools(
     retrieved_tools = []
     if subagents_required:
         # add subagents-related tools by default
-        path_instance = PathRegistry.get_instance()
+        PathRegistry.get_instance()
         retrieved_tools = [
-            CreateNewTool(
-                path_registry=path_instance, subagent_settings=subagent_settings
-            ),
-            ExecuteSkill(
-                path_registry=path_instance, subagent_settings=subagent_settings
-            ),
-            SkillRetrieval(
-                path_registry=path_instance, subagent_settings=subagent_settings
-            ),
+            CreateNewTool(subagent_settings=subagent_settings),
+            ExecuteSkill(subagent_settings=subagent_settings),
+            SkillRetrieval(subagent_settings=subagent_settings),
+            WorkflowPlan(subagent_settings=subagent_settings),
         ]
         retrieval_top_k -= len(retrieved_tools)
         all_tools = make_all_tools(
@@ -210,21 +197,15 @@ class CreateNewToolInputSchema(BaseModel):
 class CreateNewTool(BaseTool):
     name: str = "CreateNewTool"
     description: str = """
-        This tool is used to create a new tool.
+        Only use if you don't have right tools for sure and need a different tool.
         If succeeded, it will return the name of the tool.
         You can then use the tool in subsequent steps.
     """
     args_schema: Type[BaseModel] = CreateNewToolInputSchema
-    path_registry: Optional[PathRegistry]
     subagent_settings: Optional[SubAgentSettings]
 
-    def __init__(
-        self,
-        path_registry: Optional[PathRegistry],
-        subagent_settings: Optional[SubAgentSettings],
-    ):
+    def __init__(self, subagent_settings: Optional[SubAgentSettings] = None):
         super().__init__()
-        self.path_registry = path_registry
         self.subagent_settings = subagent_settings
 
     def get_all_tools_string(self):
@@ -238,16 +219,13 @@ def get_all_tools_string(self):
     def _run(self, task, orig_prompt, curr_tools):
         # def _run(self, task, orig_prompt):
         try:
-            if self.path_registry is None:
-                return "Path registry not initialized"
-            if self.subagent_settings is None:
-                return "Settings for subagents yet to be defined"
             # run iterator
+            path_registry = self.subagent_settings.path_registry
             print("getting all tools info")
             all_tools_string = self.get_all_tools_string()
             print("setting up iterator")
             newcode_iterator = Iterator(
-                self.path_registry,
+                path_registry,
                 self.subagent_settings,
                 all_tools_string=all_tools_string,
                 current_tools=curr_tools,