Skip to content

Commit

Permalink
Remove experimental TDD step configs (AntonOsika#737)
Browse files Browse the repository at this point in the history
* Remove respec

* Benchmark

* Small bugfix

* Small bugfix

* Cleanup

* precommit

* Fix tests

---------

Co-authored-by: Lukas Petersson <lukas.petersson.1999@gmail.com>
  • Loading branch information
AntonOsika and lukaspetersson authored Sep 24, 2023
1 parent 4c77f62 commit 745517e
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 117 deletions.
5 changes: 0 additions & 5 deletions gpt_engineer/preprompts/fix_code

This file was deleted.

10 changes: 0 additions & 10 deletions gpt_engineer/preprompts/spec

This file was deleted.

3 changes: 0 additions & 3 deletions gpt_engineer/preprompts/unit_tests

This file was deleted.

92 changes: 1 addition & 91 deletions gpt_engineer/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ def curr_fn() -> str:
return inspect.stack()[1].function


# All steps below have the Step signature


def lite_gen(ai: AI, dbs: DBs) -> List[Message]:
"""Run the AI on only the main prompt and save the results"""
messages = ai.start(
Expand Down Expand Up @@ -118,41 +115,6 @@ def clarify(ai: AI, dbs: DBs) -> List[Message]:
return messages


def gen_spec(ai: AI, dbs: DBs) -> List[Message]:
"""
Generate a spec from the main prompt + clarifications and save the results to
the workspace
"""
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
ai.fsystem(f"Instructions: {dbs.input['prompt']}"),
]

messages = ai.next(messages, dbs.preprompts["spec"], step_name=curr_fn())

dbs.memory["specification"] = messages[-1].content.strip()

return messages


def gen_unit_tests(ai: AI, dbs: DBs) -> List[Message]:
"""
Generate unit tests based on the specification, that should work.
"""
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
ai.fuser(f"Instructions: {dbs.input['prompt']}"),
ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"),
]

messages = ai.next(messages, dbs.preprompts["unit_tests"], step_name=curr_fn())

dbs.memory["unit_tests"] = messages[-1].content.strip()
to_files(dbs.memory["unit_tests"], dbs.workspace)

return messages


def gen_clarified_code(ai: AI, dbs: DBs) -> List[dict]:
"""Takes clarification and generates code"""
messages = AI.deserialize_messages(dbs.logs[clarify.__name__])
Expand All @@ -172,23 +134,6 @@ def gen_clarified_code(ai: AI, dbs: DBs) -> List[dict]:
return messages


def gen_code_after_unit_tests(ai: AI, dbs: DBs) -> List[dict]:
"""Generates project code after unit tests have been produced"""
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
ai.fuser(f"Instructions: {dbs.input['prompt']}"),
ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"),
ai.fuser(f"Unit tests:\n\n{dbs.memory['unit_tests']}"),
]
messages = ai.next(
messages,
dbs.preprompts["generate"].replace("FILE_FORMAT", dbs.preprompts["file_format"]),
step_name=curr_fn(),
)
to_files(messages[-1].content.strip(), dbs.workspace)
return messages


def execute_entrypoint(ai: AI, dbs: DBs) -> List[dict]:
command = dbs.workspace["run.sh"]

Expand Down Expand Up @@ -351,22 +296,6 @@ def improve_existing_code(ai: AI, dbs: DBs):
return messages


def fix_code(ai: AI, dbs: DBs):
messages = AI.deserialize_messages(dbs.logs[gen_code_after_unit_tests.__name__])
code_output = messages[-1].content.strip()
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
ai.fuser(f"Instructions: {dbs.input['prompt']}"),
ai.fuser(code_output),
ai.fsystem(dbs.preprompts["fix_code"]),
]
messages = ai.next(
messages, "Please fix any errors in the code above.", step_name=curr_fn()
)
to_files(messages[-1].content.strip(), dbs.workspace)
return messages


def human_review(ai: AI, dbs: DBs):
"""Collects and stores human review of the code"""
review = human_review_input()
Expand All @@ -380,8 +309,6 @@ class Config(str, Enum):
BENCHMARK = "benchmark"
SIMPLE = "simple"
LITE = "lite"
TDD = "tdd"
TDD_PLUS = "tdd+"
CLARIFY = "clarify"
RESPEC = "respec"
EXECUTE_ONLY = "execute_only"
Expand All @@ -392,7 +319,6 @@ class Config(str, Enum):
EVAL_NEW_CODE = "eval_new_code"


# Define the steps to run for different configs
STEPS = {
Config.DEFAULT: [
simple_gen,
Expand All @@ -419,23 +345,6 @@ class Config(str, Enum):
gen_entrypoint,
execute_entrypoint,
],
Config.TDD: [
gen_spec,
gen_unit_tests,
gen_code_after_unit_tests,
gen_entrypoint,
execute_entrypoint,
human_review,
],
Config.TDD_PLUS: [
gen_spec,
gen_unit_tests,
gen_code_after_unit_tests,
fix_code,
gen_entrypoint,
execute_entrypoint,
human_review,
],
Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review],
Config.EXECUTE_ONLY: [execute_entrypoint],
Config.EVALUATE: [execute_entrypoint, human_review],
Expand All @@ -448,6 +357,7 @@ class Config(str, Enum):
Config.EVAL_NEW_CODE: [simple_gen],
}


# Future steps that can be added:
# run_tests_and_fix_files
# execute_entrypoint_and_fix_files_if_it_results_in_error
12 changes: 4 additions & 8 deletions tests/test_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

from gpt_engineer.collect import collect_learnings, steps_file_hash
from gpt_engineer.db import DB, DBs
from gpt_engineer.learning import extract_learning
from gpt_engineer.steps import gen_code_after_unit_tests
from gpt_engineer.learning import collect_consent, extract_learning
from gpt_engineer.steps import simple_gen


def test_collect_learnings(monkeypatch):
Expand All @@ -18,7 +18,7 @@ def test_collect_learnings(monkeypatch):

model = "test_model"
temperature = 0.5
steps = [gen_code_after_unit_tests]
steps = [simple_gen]
dbs = DBs(
DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp")
)
Expand All @@ -27,11 +27,7 @@ def test_collect_learnings(monkeypatch):
"feedback": "test feedback",
}
code = "this is output\n\nit contains code"
dbs.logs = {
gen_code_after_unit_tests.__name__: json.dumps(
[{"role": "system", "content": code}]
)
}
dbs.logs = {steps[0].__name__: json.dumps([{"role": "system", "content": code}])}
dbs.workspace = {"all_output.txt": "test workspace\n" + code}

collect_learnings(model, temperature, steps, dbs)
Expand Down

0 comments on commit 745517e

Please sign in to comment.