From c5f4873b4fe7af71656bb623a0aa29afa5e41f49 Mon Sep 17 00:00:00 2001 From: Jorge Date: Wed, 10 Jul 2024 23:18:03 -0400 Subject: [PATCH] exp_24 --- .../gpt-4o-2024-05-13/exp_24.ipynb | 291 ++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 notebooks/experiments/gpt-4o-2024-05-13/exp_24.ipynb diff --git a/notebooks/experiments/gpt-4o-2024-05-13/exp_24.ipynb b/notebooks/experiments/gpt-4o-2024-05-13/exp_24.ipynb new file mode 100644 index 00000000..758fe1c4 --- /dev/null +++ b/notebooks/experiments/gpt-4o-2024-05-13/exp_24.ipynb @@ -0,0 +1,291 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import os\n", + "from mdagent import MDAgent\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-07-10\n", + "time: 23:04:46\n", + "LLM: gpt-4o-2024-05-13 \n", + "Temperature: 0.1\n" + ] + } + ], + "source": [ + "prompt24 = \"Analyze the rdf of the simulation of 1A3N solvated in water\"\n", + "llm_var = \"gpt-4o-2024-05-13\"\n", + "tools = \"all\"\n", + "agent = MDAgent(agent_type=\"Structured\", model=llm_var, top_k_tools=tools)\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)\n", + "print(\"LLM: \",agent.llm.model_name,\"\\nTemperature: \",agent.llm.temperature)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thought: To analyze the radial distribution function (RDF) of the simulation of the protein 1A3N solvated in water, I need to ensure that I have the necessary trajectory and topology files. I will use the RDFTool to perform this analysis.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"RDFTool\",\n", + " \"action_input\": {\n", + " \"input\": {\n", + " \"pdb_id\": \"1A3N\"\n", + " }\n", + " }\n", + "}\n", + "```Error in Inputs in RDF tool: Incorrect Inputs: Trajectory file ID ('trajectory_fileid')is required\n", + "To analyze the radial distribution function (RDF) of the simulation of the protein 1A3N solvated in water, I need to provide the correct trajectory file ID along with the topology file ID. Let's list the available paths to identify the correct file IDs.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"ListRegistryPaths\",\n", + " \"action_input\": {\n", + " \"paths\": \"paths\"\n", + " }\n", + "}\n", + "```To analyze the radial distribution function (RDF) of the simulation of the protein 1A3N solvated in water, I need to first download the necessary PDB file and then set up and run a short simulation to generate the required trajectory and topology files. After that, I can use the RDFTool to perform the analysis.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"PDBFileDownloader\",\n", + " \"action_input\": {\n", + " \"query\": \"1A3N\"\n", + " }\n", + "}\n", + "```PDB file found with this ID: 1A3N\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-10 23:04:54.205 \n", + " \u001b[33m\u001b[1mWarning:\u001b[0m to view this Streamlit app on a browser, run it with the following\n", + " command:\n", + "\n", + " streamlit run /Users/jorgemedina/miniforge3/envs/mdagent2/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "To proceed with the analysis of the radial distribution function (RDF) for the protein 1A3N solvated in water, I need to set up and run a short simulation to generate the required trajectory and topology files. After that, I can use the RDFTool to perform the analysis.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_id\": \"1A3N_230454\",\n", + " \"forcefield_files\": [],\n", + " \"save\": true,\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"PME\",\n", + " \"nonbondedCutoff\": \"1 * nanometers\",\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": true,\n", + " \"constraintTolerance\": 0.00001,\n", + " \"solvate\": true\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.002 * picoseconds\",\n", + " \"Pressure\": \"1.0 * bar\"\n", + " },\n", + " \"simulation_params\": {\n", + " \"Ensemble\": \"NPT\",\n", + " \"Number of Steps\": 5000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```\n", + "Setting default ewaldErrorTolerance: 0.0005 \n", + "Setting default forcefields v2\n", + "Building system...\n", + "About to create system...\n", + "Attempts at creating system: 0/3\n", + "Error adding solvent ValueError – No template found for residue 1 (VAL). The set of atoms is similar to PRO, but it is missing 7 hydrogen atoms. For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#template\n", + "Residue code: VAL\n", + "Residue code not in solvent list. Adding forcefield not supported.\n", + "To proceed with the analysis of the radial distribution function (RDF) for the protein 1A3N solvated in water, I need to address the issue with the missing hydrogen atoms in the VAL residue. I will use the CleaningToolFunction to add the missing atoms and hydrogens to the PDB file. After that, I will set up and run the simulation again.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"CleaningToolFunction\",\n", + " \"action_input\": {\n", + " \"pdb_id\": \"1A3N_230454\",\n", + " \"replace_nonstandard_residues\": true,\n", + " \"add_missing_atoms\": true,\n", + " \"remove_heterogens\": true,\n", + " \"remove_water\": false,\n", + " \"add_hydrogens\": true,\n", + " \"add_hydrogens_ph\": 7.0\n", + " }\n", + "}\n", + "```file path: /Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_107/pdb/1A3N_raw_20240710_230454.pdb\n", + "To proceed with the analysis of the radial distribution function (RDF) for the protein 1A3N solvated in water, I will now set up and run a short simulation using the cleaned PDB file. This will generate the required trajectory and topology files. After that, I can use the RDFTool to perform the analysis.\n", + "\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"SetUpandRunFunction\",\n", + " \"action_input\": {\n", + " \"pdb_id\": \"1A3N_230519\",\n", + " \"forcefield_files\": [],\n", + " \"save\": true,\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"PME\",\n", + " \"nonbondedCutoff\": \"1 * nanometers\",\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": true,\n", + " \"constraintTolerance\": 0.00001,\n", + " \"solvate\": true\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.002 * picoseconds\",\n", + " \"Pressure\": \"1.0 * bar\"\n", + " },\n", + " \"simulation_params\": {\n", + " \"Ensemble\": \"NPT\",\n", + " \"Number of Steps\": 5000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + " }\n", + "}\n", + "```Setting default ewaldErrorTolerance: 0.0005 \n", + "Setting default forcefields v2\n", + "Building system...\n", + "About to create system...\n", + "Attempts at creating system: 0/3\n", + "System built successfully\n", + " >\n", + "Setting up integrator...\n", + "Creating simulation...\n", + "simulation set!\n", + "Performing energy minimization...\n", + "Minimization complete!\n", + "Initial Positions saved to initial_positions.pdb\n", + "Equilibrating...\n", + "Simulating...\n" + ] + } + ], + "source": [ + "agent.run(prompt24)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)\n", + "registry = agent.path_registry\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "path_before = registry.get_mapped_path(\"1A3N_214800\")\n", + "path_after = registry.get_mapped_path(\"1A3N_214817\")\n", + "assert os.path.exists(path_before)\n", + "assert os.path.exists(path_after)\n", + "assert path_before != path_after" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import mdtraj as md\n", + "raw_file = md.load(path_before)\n", + "clean_file = md.load(path_after)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Experiment Result:\n", + "### Completed without Exception or TimeOut Errors ✅\n", + "### Attempted all necessary steps ✅\n", + "### Logic make sense ✅\n", + "### Correct Answer ✅" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}