Skip to content

Commit

Permalink
Improve form extraction (#290)
Browse files Browse the repository at this point in the history
- allow form sections to have sub-sections
- increase response token limit to 16K, to support longer forms with
more fields
  • Loading branch information
markwaddle authored Dec 13, 2024
1 parent 59d2e59 commit aa60990
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 23 deletions.
4 changes: 2 additions & 2 deletions assistants/prospector-assistant/assistant/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,12 @@ class RequestConfig(BaseModel):
title="Response Tokens",
description=(
"The number of tokens to use for the response, will reduce the number of tokens available for the"
" prompt. Current max supported by OpenAI is 4096 tokens [https://platform.openai.com/docs/models]"
" prompt. Current max supported by OpenAI is 16k tokens for gpt-4o, and 4098 for all others [https://platform.openai.com/docs/models]"
"(https://platform.openai.com/docs/models)."
),
),
UISchema(enable_markdown_in_description=True),
] = 4_048
] = 16_000

openai_model: Annotated[
str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@ class Section(BaseModel):
description: str = Field(description="The description of the section if one is provided on the form.")
instructions: str = Field(description="The instructions for the section if they are provided on the form.")
fields: list[FormField] = Field(description="The fields of the section.")
sections: list["Section"] = Field(description="The sub-sections of the section, if any.")


class Form(BaseModel):
class Form(Section):
title: str = Field(description="The title of the form.")
description: str = Field(description="The description of the form if one is provided on the form.")
instructions: str = Field(description="The instructions for the form if they are provided on the form.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,13 @@ async def execute(
Approach: Guided conversation / direct chat-completion (for document extraction)
"""

form_fields = form.fields.copy()
for section in form.sections:
form_fields.extend(section.fields)
def fields_for(section: state.Section) -> list[state.FormField]:
form_fields = section.fields.copy()
for sub_section in section.sections:
form_fields.extend(fields_for(sub_section))
return form_fields

form_fields = fields_for(form)

debug = {}

Expand Down Expand Up @@ -177,14 +181,14 @@ async def execute(
populated_fields=fill_form_gc_artifact,
)

async with step_state(step_context.context) as state:
state.populated_form_markdown = populated_form_markdown
async with step_state(step_context.context) as current_state:
current_state.populated_form_markdown = populated_form_markdown

if result.is_conversation_over:
return CompleteResult(
message=state.populated_form_markdown,
message=current_state.populated_form_markdown,
artifact=fill_form_gc_artifact,
populated_form_markdown=state.populated_form_markdown,
populated_form_markdown=current_state.populated_form_markdown,
debug=debug,
)

Expand Down Expand Up @@ -387,22 +391,19 @@ def field_values(fields: list[state.FormField]) -> str:

return "\n\n".join(markdown_fields)

top_level_fields = field_values(form.fields)

sections = (
f"## {section.title}\n{section.description}\n{section.instructions}\n{field_values(section.fields)}"
for section in form.sections
)
def for_section(level: int, section: state.Section) -> str:
sections = (for_section(level + 1, section) for section in section.sections)
return "\n".join((
f"{'#' * level} {section.title}",
section.description,
section.instructions,
field_values(section.fields),
*sections,
))

return "\n".join((
"```markdown",
f"# {form.title}",
form.description,
form.instructions,
"",
top_level_fields,
"",
*sections,
for_section(1, form),
"```",
))

Expand Down

0 comments on commit aa60990

Please sign in to comment.