Skip to content

Commit

Permalink
Merge pull request #358 from aitomatic/examples/FinanceBench
Browse files Browse the repository at this point in the history
update examples/FinanceBench
  • Loading branch information
TheVinhLuong102 committed Sep 23, 2024
2 parents 6a4977d + f790300 commit 577c3de
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 14 deletions.
8 changes: 5 additions & 3 deletions examples/FinanceBench/dana.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def solve_with_knowledge_and_program_store_with_llama3(fb_id: FbId) -> Answer:
if __name__ == '__main__':
arg_parser = ArgumentParser()
arg_parser.add_argument('fb_id')
arg_parser.add_argument('--from-id', action='store_true')
arg_parser.add_argument('--knowledge', action='store_true')
arg_parser.add_argument('--prog-store', action='store_true')
arg_parser.add_argument('--llama3', action='store_true')
Expand Down Expand Up @@ -148,6 +149,7 @@ def solve_with_knowledge_and_program_store_with_llama3(fb_id: FbId) -> Answer:
case (True, True, True):
solve_func: QAFunc = solve_with_knowledge_and_program_store_with_llama3

solve_func(fb_id
if (fb_id := args.fb_id).startswith(FB_ID_COL_NAME)
else f'{FB_ID_COL_NAME}_{fb_id}')
if not (fb_id := args.fb_id).startswith(FB_ID_COL_NAME):
fb_id: FbId = f'{FB_ID_COL_NAME}_{fb_id}'

solve_func(f'from:{fb_id}' if args.from_id else fb_id)
8 changes: 5 additions & 3 deletions examples/FinanceBench/langchain_react.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,10 @@ def solve(fb_id: FbId) -> Answer:
if __name__ == '__main__':
arg_parser = ArgumentParser()
arg_parser.add_argument('fb_id')
arg_parser.add_argument('--from-id', action='store_true')
args = arg_parser.parse_args()

solve(fb_id
if (fb_id := args.fb_id).startswith(FB_ID_COL_NAME)
else f'{FB_ID_COL_NAME}_{fb_id}')
if not (fb_id := args.fb_id).startswith(FB_ID_COL_NAME):
fb_id: FbId = f'{FB_ID_COL_NAME}_{fb_id}'

solve(f'from:{fb_id}' if args.from_id else fb_id)
8 changes: 5 additions & 3 deletions examples/FinanceBench/openai_assist.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,10 @@ def answer(fb_id: FbId) -> Answer:
if __name__ == '__main__':
arg_parser = ArgumentParser()
arg_parser.add_argument('fb_id')
arg_parser.add_argument('--from-id', action='store_true')
args = arg_parser.parse_args()

answer(fb_id
if (fb_id := args.fb_id).startswith(FB_ID_COL_NAME)
else f'{FB_ID_COL_NAME}_{fb_id}')
if not (fb_id := args.fb_id).startswith(FB_ID_COL_NAME):
fb_id: FbId = f'{FB_ID_COL_NAME}_{fb_id}'

answer(f'from:{fb_id}' if args.from_id else fb_id)
10 changes: 5 additions & 5 deletions examples/FinanceBench/rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ def answer_with_gpt4o_lm(fb_id: FbId) -> Answer:
if __name__ == '__main__':
arg_parser = ArgumentParser()
arg_parser.add_argument('fb_id')
arg_parser.add_argument('--from-id', action='store_true')
arg_parser.add_argument('--gpt4o', action='store_true')
args = arg_parser.parse_args()

(answer_with_gpt4o_lm
if args.gpt4o
else answer_with_default_lm)(fb_id
if (fb_id := args.fb_id).startswith(FB_ID_COL_NAME)
else f'{FB_ID_COL_NAME}_{fb_id}')
if not (fb_id := args.fb_id).startswith(FB_ID_COL_NAME):
fb_id: FbId = f'{FB_ID_COL_NAME}_{fb_id}'

(answer_with_gpt4o_lm if args.gpt4o else answer_with_default_lm)(f'from:{fb_id}' if args.from_id else fb_id)
9 changes: 9 additions & 0 deletions examples/FinanceBench/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ def decorated_qa_func(fb_id: FbId) -> Answer | None:
eval_all(output_name=self.output_name, refresh=True)
return None

if 'from:' in fb_id.lower():
for _fb_id in tqdm(FB_IDS[FB_IDS.index(fb_id[5:]):]):
# run inferencing and preliminarily evaluate
eval_correctness(fb_id=_fb_id, answer=qa_func(_fb_id), output_name=self.output_name, human=False)

# rigorously evaluate again, including human evaluation for difficult cases
eval_all(output_name=self.output_name, refresh=True)
return None

# run inferencing and evaluate
eval_correctness(fb_id=fb_id, answer=(answer := qa_func(fb_id)), output_name=self.output_name, human=True)
return answer
Expand Down

0 comments on commit 577c3de

Please sign in to comment.