Skip to content

Commit

Permalink
fix bleu
Browse files Browse the repository at this point in the history
  • Loading branch information
Wenshansilvia committed Oct 2, 2024
1 parent d035642 commit 019a541
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
5 changes: 3 additions & 2 deletions rageval/metrics/answer_correctness/_answer_bleu.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
'AnswerCorrectness'
>>> score, results = metric.compute(dataset["answers"], dataset["gt_answers"], 1)
>>> score
0.27008629492975705
0.3450835085970013
>>> results[0]
0.5401725898595141
"""
Expand Down Expand Up @@ -118,7 +118,8 @@ def compute(
"""Compute the bleu score on both corpus level and instance level."""
bleu = evaluate.load("bleu")
# corpus level
score = bleu.compute(predictions=pred_answers, references=ref_answers)
bleu_result = bleu.compute(predictions=pred_answers, references=ref_answers)
score = bleu_result['bleu']
# instance level
scores = []
for pred_answer, ref_answer in tqdm(zip(pred_answers, ref_answers),
Expand Down
2 changes: 1 addition & 1 deletion tests/units/test_answer_bleu.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,5 @@ def test_case_on_answer_bleu(testset):
assert metric.mtype == 'AnswerCorrectness'
assert repr(metric) == "answer_bleu"
score, results = metric.compute(testset['answers'], testset['gt_answers'], 1)
assert score == 0.27008629492975705
assert score == 0.3450835085970013
assert results[0] == 0.5401725898595141

0 comments on commit 019a541

Please sign in to comment.