From 02583490e98b39bcbc3ff20758f974006f3c3611 Mon Sep 17 00:00:00 2001
From: antonkulaga <antonkulaga@gmail.com>
Date: Wed, 23 Oct 2024 18:26:01 +0200
Subject: [PATCH] refactoring of coding examples

---
 examples/__init__.py                          |  0
 examples/bioinformatic_prompt.txt             | 27 --------
 examples/code_agent.py                        | 64 ++++++++++++++++++
 examples/code_agent.yaml                      | 13 ++--
 examples/input/.gitignore                     |  3 +-
 examples/output/.gitignore                    |  2 +-
 examples/run_code_with_output.py              | 48 --------------
 examples/run_simple_code.py                   |  9 ---
 examples/simple_code_agent.py                 | 18 +++++
 examples/simple_code_agent.yaml               |  3 +-
 .../containers/biosandbox/Dockerfile          |  5 ++
 .../containers/sandbox/Dockerfile             |  5 ++
 just_agents_coding/micromamba_session.py      | 65 -------------------
 just_agents_coding/tools.py                   |  4 +-
 14 files changed, 107 insertions(+), 159 deletions(-)
 create mode 100644 examples/__init__.py
 delete mode 100644 examples/bioinformatic_prompt.txt
 create mode 100644 examples/code_agent.py
 delete mode 100644 examples/run_code_with_output.py
 delete mode 100644 examples/run_simple_code.py
 create mode 100644 examples/simple_code_agent.py
 delete mode 100644 just_agents_coding/micromamba_session.py

diff --git a/examples/__init__.py b/examples/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/bioinformatic_prompt.txt b/examples/bioinformatic_prompt.txt
deleted file mode 100644
index 48311da..0000000
--- a/examples/bioinformatic_prompt.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-You are a bioinformatician AI assistant. Your role is to help with bioinformatics tasks and generate plans or code as needed. Please adhere to the following guidelines strictly:
-
-1. Always maintain your role as a bioinformatician.
-2. Respond only in the specified JSON format, enclosed in double quotes.
-3. Do not write anything outside the JSON response.
-4. Make your answers as detailed as possible when planning, and as simple as possible when coding.
-5. Use information provided in the input to write detailed plans or bash code to accomplish the given goal or task.
-6. Do not include loading data as a separate step in your plans.
-7. When writing code:
-   - Use full absolute paths for all files.
-   - Install dependencies and software using micromamba or pip with the -y flag.
-   - Use default values for unspecified parameters.
-   - Only use software directly installed with micromamba or pip.
-   - Do not repeat steps already completed in the history.
-8. Pay attention to the number of input files and do not miss any.
-9. Do not create or activate the micromamba environment 'base'.
-10. If RAG (Retrieval-Augmented Generation) information is provided, use it as a template to write code. Replace <...> with correct values and file paths based on the history information.
-11. Be aware of file name changes or outputs from previous steps when provided with history.
-12. Do not use software specified in the blacklist (which will be provided separately).
-13. If execution errors occur, fix the code based on the error information provided.
-
-System constraints:
-- You are working on an Ubuntu 24.04 system.
-- You have a micromamba environment named 'base'.
-- No other software is installed by default.
-
-Remember to adapt your response based on whether you're creating an initial plan or writing code for a specific task. Your goal is to provide accurate, efficient, and executable bioinformatics solutions.
\ No newline at end of file
diff --git a/examples/code_agent.py b/examples/code_agent.py
new file mode 100644
index 0000000..b7c7d3d
--- /dev/null
+++ b/examples/code_agent.py
@@ -0,0 +1,64 @@
+from pathlib import Path
+from dotenv import load_dotenv
+from just_agents.interfaces.IAgent import build_agent, IAgent
+from just_agents.llm_session import LLMSession
+from llm_sandbox.micromamba import MicromambaSession
+from llm_sandbox.docker import SandboxDockerSession
+from docker.types import Mount
+import os
+
+load_dotenv(override=True)
+
+"""
+This example shows how to use a Chain Of Thought code agent to run python code and bash commands, it uses volumes and is based on Chain Of Thought Agent class.
+"""
+
+def make_mounts():
+    examples_dir = Path(__file__).parent.absolute()
+    assert examples_dir.exists(), f"Examples directory {str(examples_dir)} does not exist, check the current working directory"
+    input_dir =  examples_dir / "input"
+    output_dir =  examples_dir / "output"
+    return [
+        Mount(target="/input", source=str(input_dir), type="bind"),
+        Mount(target="/output", source=str(output_dir), type="bind")
+    ]
+
+def run_bash_command(command: str):
+    """
+    command: str # command to run in bash, for example install software inside micromamba environment
+    """
+    mounts = make_mounts()
+
+    with  MicromambaSession(image="ghcr.io/longevity-genie/just-agents/biosandbox:main", 
+                            lang="python", 
+                            keep_template=True, 
+                            verbose=True,
+                            mounts=mounts
+                            ) as session:
+        result = session.execute_command(command=command)
+        return result
+        
+
+def run_python_code(code: str):
+    """
+    code: str # python code to run in micromamba environment
+    """
+    mounts = make_mounts()
+
+    with  MicromambaSession(image="ghcr.io/longevity-genie/just-agents/biosandbox:main", 
+                            lang="python", 
+                            keep_template=True, 
+                            verbose=True,
+                            mounts=mounts
+                            ) as session:
+        result = session.run(code)
+        return result
+
+if __name__ == "__main__":
+    examples_dir = Path(__file__).parent.absolute()
+    assert examples_dir.exists(), f"Examples directory {str(examples_dir)} does not exist, check the current working directory"
+
+    assistant: LLMSession= build_agent(examples_dir / "code_agent.yaml")
+    result, thoughts = assistant.query("Get FGF2 human protein sequence with biopython from uniprot and save it as FGF2.fasta")
+    print("Thoughts: ", thoughts)
+    print("Result: ", result)
\ No newline at end of file
diff --git a/examples/code_agent.yaml b/examples/code_agent.yaml
index 72387b7..567ebbc 100644
--- a/examples/code_agent.yaml
+++ b/examples/code_agent.yaml
@@ -1,23 +1,26 @@
-class: "ChainOfThoughtAgent" #TODO: still work in progress
+class: "ChainOfThoughtAgent"
 system_prompt: "You are a bioinformatician AI assistant. 
 Your role is to help with bioinformatics tasks and generate plans or code as needed. 
 Please adhere to the following guidelines strictly:
 1. Always maintain your role as a bioinformatician.
-2. You are working on an Ubuntu 24.04 system with base micromamba installed, no other software is installed by default.
+2. You are working on an Ubuntu 24.04 system with base micromamba environment installed which includes biopython, requests, polars, genomepy, pyensembl, plotly and GEOparse libraries installed. 
+However no other software is installed by default.
 3. You use run_bash_command tool to install new dependencies. You do not need to activate base micromamba environment, it is already preactivated when you run commands.
 4. Use run_python_code tool to run python code. The code will be run in the base micromamba environment in which the dependencies are installed with run_bash_command.
 5. Use information provided in the input to write detailed plans or bash code to accomplish the given goal or task.
-6. Do not include loading data as a separate step in your plans.
+6. Do not include loading data as a separate step in your plans. If input data is provided it will be in the /input directory.
 7. When writing code:
    - Use full absolute paths for all files.
    - Install dependencies and software using micromamba, pip with the -y flag.
    - Use default values for unspecified parameters.
    - Only use software directly installed with micromamba or pip.
    - Do not repeat steps already completed in the history.
+   - If you create files and folders with results save them inside /output directory unless other is specified explicitly.
 8. Pay attention to the number of input files and do not miss any.
 9. Do not create or activate the micromamba environment 'base', it is already activated by default.
 10. Be aware of file name changes or outputs from previous steps when provided with history.
 11. If execution errors occur, fix the code based on the error information provided.
+12. When you are ready to give the final answer, explain the results obtained and files and folders created in the /output (if any).
 System constraints:
 - You are working on an Ubuntu 24.04 system.
 - You have a micromamba environment named 'base'.
@@ -56,9 +59,9 @@ thought_max_tokes: 500
 max_steps: 25
 final_max_tokens: 1500
 tools:
-  - package: "just_agents_coding.tools"
+  - package: "examples.code_agent"
     function: "run_bash_command"
-  - package: "just_agents_coding.tools"
+  - package: "examples.code_agent"
     function: "run_python_code"
 options:
   model: "gpt-4o-mini"
diff --git a/examples/input/.gitignore b/examples/input/.gitignore
index b722e9e..73edf03 100644
--- a/examples/input/.gitignore
+++ b/examples/input/.gitignore
@@ -1 +1,2 @@
-!.gitignore
\ No newline at end of file
+!.gitignore
+*
\ No newline at end of file
diff --git a/examples/output/.gitignore b/examples/output/.gitignore
index 5aca4bd..73edf03 100644
--- a/examples/output/.gitignore
+++ b/examples/output/.gitignore
@@ -1,2 +1,2 @@
 !.gitignore
-./*
\ No newline at end of file
+*
\ No newline at end of file
diff --git a/examples/run_code_with_output.py b/examples/run_code_with_output.py
deleted file mode 100644
index 11a8ecc..0000000
--- a/examples/run_code_with_output.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from pathlib import Path
-from dotenv import load_dotenv
-from just_agents.interfaces.IAgent import build_agent, IAgent
-from just_agents.llm_session import LLMSession
-from just_agents_coding.micromamba_session import MicromambaSession
-from docker.types import Mount
-import os
-
-load_dotenv(override=True)
-
-output_dir =  Path.cwd().absolute() / "examples" / "output"
-
-
-def run_bash_command_with_output(command: str):
-    """
-    command: str # command to run in bash, for example install software inside micromamba environment
-    """
-    assert output_dir.exists(), "Output directory does not exist"
-
-    with  MicromambaSession(image="ghcr.io/longevity-genie/just-agents/biosandbox:main", 
-                            lang="python", 
-                            keep_template=True, 
-                            verbose=True,
-                            mounts=[Mount(target="/tmp", source=str(output_dir))]
-                            ) as session:
-        result = session.execute_command(command=command)
-        return result
-        
-
-def run_python_code_with_output(code: str):
-    """
-    code: str # python code to run in micromamba environment
-    """
-    assert output_dir.exists(), "Output directory does not exist"
-
-    with  MicromambaSession(image="ghcr.io/longevity-genie/just-agents/biosandbox:main", 
-                            lang="python", 
-                            keep_template=True, 
-                            verbose=True,
-                            mounts=[Mount(target="/tmp", source=str(output_dir))]
-                            ) as session:
-        result = session.run(code)
-        return result
-      
-
-assistant: LLMSession= build_agent("examples/simple_code_agent.yaml")
-assistant._prepare_tools( [run_bash_command_with_output, run_python_code_with_output] )
-assistant.query("Get FGF2 human protein sequence with biopython from uniprot and save it as output/FGF2.fasta")
\ No newline at end of file
diff --git a/examples/run_simple_code.py b/examples/run_simple_code.py
deleted file mode 100644
index 0933a08..0000000
--- a/examples/run_simple_code.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from dotenv import load_dotenv
-
-from just_agents.interfaces.IAgent import build_agent, IAgent
-load_dotenv(override=True)
-
-assistant: IAgent = build_agent("code_agent.yaml")
-result, thoughts = assistant.query("Get FGF2 human protein sequence with biopython from uniprot")
-print("Thoughts: ", thoughts)
-print("Result: ", result)
\ No newline at end of file
diff --git a/examples/simple_code_agent.py b/examples/simple_code_agent.py
new file mode 100644
index 0000000..1529da9
--- /dev/null
+++ b/examples/simple_code_agent.py
@@ -0,0 +1,18 @@
+from pathlib import Path
+from dotenv import load_dotenv
+
+from just_agents.interfaces.IAgent import build_agent, IAgent
+load_dotenv(override=True)
+
+"""
+This example shows how to use a simple code agent to run python code and bash commands, it does not use volumes and is based on basic LLMSession class.
+"""
+
+
+if __name__ == "__main__":
+
+    examples_dir = Path(__file__).parent.absolute()
+    assert examples_dir.exists(), f"Examples directory {str(examples_dir)} does not exist, check the current working directory"
+
+    assistant: IAgent = build_agent( examples_dir / "simple_code_agent.yaml")
+    assistant.query("Get FGF2 human protein sequence with biopython from uniprot")
\ No newline at end of file
diff --git a/examples/simple_code_agent.yaml b/examples/simple_code_agent.yaml
index 8cd1d9d..992106c 100644
--- a/examples/simple_code_agent.yaml
+++ b/examples/simple_code_agent.yaml
@@ -4,7 +4,8 @@ system_prompt: "You are a bioinformatician AI assistant.
 Your role is to help with bioinformatics tasks and generate plans or code as needed. 
 Please adhere to the following guidelines strictly:
 1. Always maintain your role as a bioinformatician.
-2. You are working on an Ubuntu 24.04 system with base micromamba installed, no other software is installed by default.
+2. You are working on an Ubuntu 24.04 system with base micromamba environment which includes biopython, requests, polars, genomepy, pyensembl, plotly and GEOparse libraries installed. 
+However, no other software is installed by default.
 3. You use run_bash_command tool to install new dependencies. You do not need to activate base micromamba environment, it is already preactivated when you run commands.
 4. Use run_python_code tool to run python code. The code will be run in the base micromamba environment in which the dependencies are installed with run_bash_command.
 5. Use information provided in the input to write detailed plans or bash code to accomplish the given goal or task.
diff --git a/just_agents_coding/containers/biosandbox/Dockerfile b/just_agents_coding/containers/biosandbox/Dockerfile
index dde1a4d..980c184 100644
--- a/just_agents_coding/containers/biosandbox/Dockerfile
+++ b/just_agents_coding/containers/biosandbox/Dockerfile
@@ -5,6 +5,11 @@ RUN apt update && \
     apt upgrade -y && \
     apt install -y tar gzip libz-dev software-properties-common python3-software-properties automake nano cmake zip wget gcc git build-essential curl gosu libbz2-dev zlib1g-dev gawk libxml2-dev
 
+RUN mkdir /input && \
+    chown -R $MAMBA_USER:$MAMBA_USER /input
+RUN mkdir /output && \
+    chown -R $MAMBA_USER:$MAMBA_USER /output
+
 USER $MAMBA_USER
 COPY --chown=$MAMBA_USER:$MAMBA_USER env.yaml /tmp/env.yaml
 RUN micromamba install -y -n base -f /tmp/env.yaml && \
diff --git a/just_agents_coding/containers/sandbox/Dockerfile b/just_agents_coding/containers/sandbox/Dockerfile
index 65fb294..09ab09e 100644
--- a/just_agents_coding/containers/sandbox/Dockerfile
+++ b/just_agents_coding/containers/sandbox/Dockerfile
@@ -5,6 +5,11 @@ RUN apt update && \
     apt upgrade -y && \
     apt install -y tar gzip libz-dev software-properties-common python3-software-properties automake nano cmake zip wget gcc git build-essential curl gosu libbz2-dev zlib1g-dev gawk libxml2-dev
 
+RUN mkdir /input && \
+    chown -R $MAMBA_USER:$MAMBA_USER /input
+RUN mkdir /output && \
+    chown -R $MAMBA_USER:$MAMBA_USER /output
+
 USER $MAMBA_USER
 COPY --chown=$MAMBA_USER:$MAMBA_USER env.yaml /tmp/env.yaml
 RUN micromamba install -y -n base -f /tmp/env.yaml && \
diff --git a/just_agents_coding/micromamba_session.py b/just_agents_coding/micromamba_session.py
deleted file mode 100644
index 1e392e6..0000000
--- a/just_agents_coding/micromamba_session.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from typing import Optional
-import docker
-from llm_sandbox.session import SandboxDockerSession
-from llm_sandbox.docker import ConsoleOutput
-from llm_sandbox.const import SupportedLanguage
-from docker.types import Mount
-
-class MicromambaSession(SandboxDockerSession):
-    """
-    Docker session for running micromamba in docker container
-    """
-    def __init__(self, client: Optional[docker.DockerClient] = None,
-        image: Optional[str] = None,
-        dockerfile: Optional[str] = None,
-        lang: str = SupportedLanguage.PYTHON,
-        keep_template: bool = False,
-        verbose: bool = False, 
-        environment: str = "base",
-        mounts: Optional[list[Mount]] = None,
-        ):
-        super().__init__(client=client,
-                         image=image,
-                         dockerfile=dockerfile,
-                         lang=lang,
-                         keep_template=keep_template,
-                         verbose=verbose, 
-                         mounts=mounts)
-        self.environment = environment
-
-
-    def execute_command(
-        self, command: Optional[str], workdir: Optional[str] = None
-    ) -> ConsoleOutput:
-        if not command:
-            raise ValueError("Command cannot be empty")
-
-        if not self.container:
-            raise RuntimeError(
-                "Session is not open. Please call open() method before executing commands."
-            )
-        command = f"micromamba run -n {self.environment} {command}"
-
-        if self.verbose:
-            print(f"Executing command: {command}")
-
-        if workdir:
-            exit_code, exec_log = self.container.exec_run(
-                command, stream=True, tty=True, workdir=workdir
-            )
-        else:
-            exit_code, exec_log = self.container.exec_run(
-                command, stream=True, tty=True
-            )
-
-        output = ""
-        if self.verbose:
-            print("Output:", end=" ")
-
-        for chunk in exec_log:
-            chunk_str = chunk.decode("utf-8")
-            output += chunk_str
-            if self.verbose:
-                print(chunk_str, end="")
-
-        return ConsoleOutput(output)
diff --git a/just_agents_coding/tools.py b/just_agents_coding/tools.py
index 61f0bbc..087e14d 100644
--- a/just_agents_coding/tools.py
+++ b/just_agents_coding/tools.py
@@ -1,4 +1,4 @@
-from just_agents_coding.micromamba_session import MicromambaSession
+from llm_sandbox.micromamba import MicromambaSession
 from llm_sandbox.docker import ConsoleOutput
 
 def run_bash_command(command: str):
@@ -18,7 +18,7 @@ def run_python_code(code: str):
         result: ConsoleOutput = session.run(code)
         return result
     
-def copy_files_from_runtime(src: str, dest: str):
+def copy_from_container(src: str, dest: str):
     """
     src: str # path to file in runtime
     dest: str # path to file in host