Skip to content

Commit

Permalink
clean up notebooks and tests. debugged and fixed few tools
Browse files Browse the repository at this point in the history
  • Loading branch information
qcampbel committed Dec 5, 2023
1 parent 581e3e8 commit 0e19f1e
Show file tree
Hide file tree
Showing 35 changed files with 88 additions and 2,971 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ dmypy.json
*.csv
*.png
ckpt/
Visualization.ipynb

# path registry files
*registry.json
Expand Down
2 changes: 1 addition & 1 deletion .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ OpenAI API Key: \b[secrets]{3}_[a-zA-Z0-9]{32}\b
PQA API Key: "pqa[a-zA-Z0-9-._]+"

# Rule for detecting serp API keys
Serp API Key: "serp_[a-zA-Z0-9]{32}"
# Serp API Key: "[a-zA-Z0-9]{64}"
14 changes: 7 additions & 7 deletions mdagent/mainagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,13 @@ def run(self, user_prompt):
)
outputs = self.agent_executor(main_prompt.format(input=user_prompt))
# Parse long output (with intermediate steps)
intermed = outputs["intermediate_steps"]
outputs["intermediate_steps"]
final = ""
for step in intermed:
final += (
f"Action: {step[0].tool}\n"
f"Action Input: {step[0].tool_input}\n"
f"Observation: {step[1]}\n"
)
# for step in intermed:
# final += (
# f"Action: {step[0].tool}\n"
# f"Action Input: {step[0].tool_input}\n"
# f"Observation: {step[1]}\n"
# )
final += f"Final Answer: {outputs['output']}"
return final
9 changes: 8 additions & 1 deletion mdagent/subagents/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
from .agents import (
ActionAgent,
CodeCriticAgent,
CurriculumAgent,
SkillManager,
TaskCriticAgent,
)
from .subagent_fxns import Iterator
from .subagent_setup import SubAgentInitializer, SubAgentSettings

__all__ = [
"ActionAgent",
"CodeCriticAgent",
"CurriculumAgent",
"SkillAgent",
"SkillManager",
"TaskCriticAgent",
"Iterator",
"SubAgentInitializer",
Expand Down
12 changes: 10 additions & 2 deletions mdagent/subagents/agents/curriculum.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from typing import Optional

from dotenv import load_dotenv
Expand Down Expand Up @@ -31,5 +32,12 @@ def run(self, task, curr_tools, files, failed_tasks=""):
"failed_tasks": failed_tasks,
}
)["text"]
# TODO: parse a list of subtasks from the message
return message

if message.startswith("```json"):
# Remove the triple backticks and 'json'
message = message.strip("`").replace("json\n", "")

parsed_message = json.loads(message)
rationale = parsed_message.get("Rationale", "")
subtasks = parsed_message.get("Plan", [])
return rationale, subtasks
24 changes: 15 additions & 9 deletions mdagent/subagents/prompts/curriculum_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,26 @@
determining what kind of preprocessing PDB files are needed,
settings the simulations should be run at, analysis of the simulation results, etc.
You must follow the following criteria:
1) Return your plan as a Python list of subtasks that can be completed
in order to complete the specified task.
2) Ensure the response can be parsed by Python `json.loads`, e.g.:
no trailing commas,no single quotes, etc.
no trailing commas, no single quotes, etc. Don't start with ```json.
3) briefly explain your rationale why you choose this curriculum of subtasks
4) if we need to obtain any new tool or get any new files, include that
as a separate subtask in the relevant order
5) Only if you're asked to explore or find novel things: you should be able to
offer creative and interesting subtasks. You should be looking for
opportunities to discover as many diverse things as possible, accomplish as many
diverse tasks as possible to be expert at running molecular dynamics.
6) If you're asked to refine because the task failed, you should be able to offer
subtasks that can help the user complete the task.
4) For each subtask, specify which tool you should use. If and only if
there is no suitable tool, mention that we need to obtain a new tool.
5) Each subtask should be very specific and essential to the completion
of the task. In other words, fewer steps are preferred over more wasteful steps.
Don't suggest preprocessing PDB files or running simulations unless it's
absolutely necessary or requested so.
6) REFINE: If you're asked to make a plan because some task failed, you
should be able to refine and help complete the task.
7) EXPLORE: If and only if you're asked to explore or find novel things:
you should be able to offer creative and interesting subtasks. You should
be looking for opportunities to discover as many diverse things as possible,
accomplish as many diverse tasks as possible to be expert at running
molecular dynamics.
You should only respond in JSON format as described below:
{{
Expand Down
1 change: 0 additions & 1 deletion mdagent/subagents/subagent_fxns.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ def _pull_information(self):
current_tools_string = ""
if self.current_tools:
current_tools_string = json.dumps(self.current_tools)
# TODO: include a list of packages we currently have/support

info = {
"recent_history": recent_history_string,
Expand Down
14 changes: 7 additions & 7 deletions mdagent/subagents/subagent_setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import warnings
from typing import Optional

from mdagent.subagents.agents import (
Expand Down Expand Up @@ -36,14 +35,15 @@ def __init__(


class SubAgentInitializer:
def __init__(self, settings: Optional[SubAgentSettings]):
def __init__(self, settings: Optional[SubAgentSettings] = None):
if settings is None:
raise ValueError("Settings cannot be None")
settings = SubAgentSettings()
if settings.path_registry is None:
warnings.warn(
"path_registry is None, some agents may fail to be created.",
UserWarning,
)
# warnings.warn(
# "'path_registry' isn't specified. Use current directory by default.",
# UserWarning,
# )
settings.path_registry = PathRegistry.get_instance()
self.path_registry = settings.path_registry
self.subagents_model = settings.subagents_model
self.temp = settings.temp
Expand Down
8 changes: 5 additions & 3 deletions mdagent/tools/base_tools/preprocess_tools/pdb_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
from mdagent.utils import PathRegistry


def get_pdb(query_string, PathRegistry):
def get_pdb(query_string, path_registry=None):
"""
Search RSCB's protein data bank using the given query string
and return the path to pdb file in either CIF or PDB format
"""
if path_registry is None:
path_registry = PathRegistry.get_instance()
url = "https://search.rcsb.org/rcsbsearch/v2/query?json={search-request}"
query = {
"query": {
Expand All @@ -44,7 +46,7 @@ def get_pdb(query_string, PathRegistry):
file.write(pdb.text)
print(f"{filename} is created.")
file_description = f"PDB file downloaded from RSCB, PDB ID: {pdbid}"
PathRegistry.map_path(filename, filename, file_description)
path_registry.map_path(filename, filename, file_description)
return filename
return None

Expand Down Expand Up @@ -1427,7 +1429,7 @@ def validate_input(cls, values: Union[str, Dict[str, Any]]) -> Dict:


class FixPDBFile(BaseTool):
name: str = "PDB File Fixer"
name: str = "PDBFileFixer"
description: str = "Fixes PDB files columns if needed"
args_schema: Type[BaseModel] = PDBFilesFixInp

Expand Down
2 changes: 1 addition & 1 deletion mdagent/tools/base_tools/simulation_tools/setup_and_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ async def _arun(self, query: str) -> str:


class InstructionSummary(BaseTool):
name = "Instruction Summary"
name = "InstructionSummary"
description = """This tool will summarize the instructions
given by the human. This is the first tool you will
use, unless you dont have a .cif or .pdb file in
Expand Down
10 changes: 7 additions & 3 deletions mdagent/tools/base_tools/util_tools/git_issues_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def make_encoding(self):


class SerpGitTool(BaseTool):
name = "Openmm Github Issues Search"
name = "Openmm_Github_Issues_Search"
description = """ Tool that searches inside
github issues in openmm. Make
your query as if you were googling something.
Expand All @@ -90,8 +90,12 @@ def _run(self, query: str):
encoding = fxns.make_encoding()
search = GoogleSearch(params)
results = search.get_dict()

organic_results = results["organic_results"]
organic_results = results.get("organic_results")
if organic_results is None:
if results.get("error"):
return "error: " + results.get("error")
else:
return "Error: No 'organic_results' found"
issues_numbers: List = (
[]
) # list that will contain issue id numbers retrieved from the google search
Expand Down
48 changes: 13 additions & 35 deletions mdagent/tools/maketools.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,18 +102,10 @@ def make_all_tools(
subagents_tools = []
if not skip_subagents:
subagents_tools = [
CreateNewTool(
path_registry=path_instance, subagent_settings=subagent_settings
),
ExecuteSkill(
path_registry=path_instance, subagent_settings=subagent_settings
),
SkillRetrieval(
path_registry=path_instance, subagent_settings=subagent_settings
),
WorkflowPlan(
path_registry=path_instance, subagent_settings=subagent_settings
),
CreateNewTool(subagent_settings=subagent_settings),
ExecuteSkill(subagent_settings=subagent_settings),
SkillRetrieval(subagent_settings=subagent_settings),
WorkflowPlan(subagent_settings=subagent_settings),
]

# add 'learned' tools here
Expand Down Expand Up @@ -146,17 +138,12 @@ def get_tools(
retrieved_tools = []
if subagents_required:
# add subagents-related tools by default
path_instance = PathRegistry.get_instance()
PathRegistry.get_instance()
retrieved_tools = [
CreateNewTool(
path_registry=path_instance, subagent_settings=subagent_settings
),
ExecuteSkill(
path_registry=path_instance, subagent_settings=subagent_settings
),
SkillRetrieval(
path_registry=path_instance, subagent_settings=subagent_settings
),
CreateNewTool(subagent_settings=subagent_settings),
ExecuteSkill(subagent_settings=subagent_settings),
SkillRetrieval(subagent_settings=subagent_settings),
WorkflowPlan(subagent_settings=subagent_settings),
]
retrieval_top_k -= len(retrieved_tools)
all_tools = make_all_tools(
Expand Down Expand Up @@ -210,21 +197,15 @@ class CreateNewToolInputSchema(BaseModel):
class CreateNewTool(BaseTool):
name: str = "CreateNewTool"
description: str = """
This tool is used to create a new tool.
Only use if you don't have right tools for sure and need a different tool.
If succeeded, it will return the name of the tool.
You can then use the tool in subsequent steps.
"""
args_schema: Type[BaseModel] = CreateNewToolInputSchema
path_registry: Optional[PathRegistry]
subagent_settings: Optional[SubAgentSettings]

def __init__(
self,
path_registry: Optional[PathRegistry],
subagent_settings: Optional[SubAgentSettings],
):
def __init__(self, subagent_settings: Optional[SubAgentSettings] = None):
super().__init__()
self.path_registry = path_registry
self.subagent_settings = subagent_settings

def get_all_tools_string(self):
Expand All @@ -238,16 +219,13 @@ def get_all_tools_string(self):
def _run(self, task, orig_prompt, curr_tools):
# def _run(self, task, orig_prompt):
try:
if self.path_registry is None:
return "Path registry not initialized"
if self.subagent_settings is None:
return "Settings for subagents yet to be defined"
# run iterator
path_registry = self.subagent_settings.path_registry
print("getting all tools info")
all_tools_string = self.get_all_tools_string()
print("setting up iterator")
newcode_iterator = Iterator(
self.path_registry,
path_registry,
self.subagent_settings,
all_tools_string=all_tools_string,
current_tools=curr_tools,
Expand Down
Loading

0 comments on commit 0e19f1e

Please sign in to comment.