update benchmark

mindspore-lab · Sep 27, 2024 · b45f17f · b45f17f
1 parent 1675636
commit b45f17f
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 1 deletion.
diff --git a/examples/opensora_hpcai/tools/caption/llava_next/README.md b/examples/opensora_hpcai/tools/caption/llava_next/README.md
@@ -37,3 +37,17 @@ starting from the same point. This particular radar chart is showing the perform
 
 The axes represent different metrics or benchmarks, such as MM-Vet, MM-Vet, MM-Vet, MM-Vet, MM-Vet, MM-V
 ```
+
+## Benchmark
+
+### Inference
+
+To perform the benchmark, you can run `python predict --benchmark` to get the throughput.
+
+|         Model         | Context       | Batch Size | Throughput (tokens/second)|
+|-----------------------|---------------|------------|---------------------------|
+| llava-v1.6-mistral-7b | D910*x1-MS2.3 |    1       | 21.2                      |
+
+> Context: {Ascend chip}-{number of NPUs}-{mindspore version}.\
+> Throughput (tokens/second): number of generated tokens per second.\
+> We use the second round of inference as the benchmark result.
diff --git a/examples/opensora_hpcai/tools/caption/llava_next/predict.py b/examples/opensora_hpcai/tools/caption/llava_next/predict.py
@@ -82,7 +82,7 @@ def main():
         start = time.time()
         output = pipeline.generate(**inputs)
         end = time.time()
-        logging.info(f"Time Taken: {end-start:.3f}")
+        logging.info(f"Time Taken: {end-start:.3f}, Tokens/Second: {len(output[0]) / (end - start):.1f}")
 
     print(processor.decode(output[0], skip_special_tokens=True))