diff --git a/tools/python/model_validation/README.md b/tools/python/model_validation/README.md new file mode 100644 index 000000000..c449984a8 --- /dev/null +++ b/tools/python/model_validation/README.md @@ -0,0 +1,33 @@ +# ONNX Runtime GenAI Model Validation Example + +## Setup + +Clone this repository and navigate to the `tools/python/model_validation folder`. + +```bash +git clone https://github.com/microsoft/onnxruntime-genai.git +cd tools/python/model_validation +``` + +In the model_validation folder, you should find the validation_tool.py script, validation_config.json file, and this README.md. + +### Current Support +* Gemma +* Llama +* Mistral +* Phi +* Qwen + +### Usage - Build the Model +This step creates optimized and quantized ONNX models that run with ONNX Runtime GenAI. + +1. In the validation_config.json file, enter the supported Hugging Face model name. Models can be found here. +2. Include the path to the output folder, precision, and execution provider. + +Once the model is built, you can find it in path_to_output_folder/{model_name}. This should include the ONNX model data and tokenizer. + +### Run the Model Validation Script +```bash +python validation_tool.py -j validation_config.json +``` + diff --git a/tools/python/model_validation/validation_config.json b/tools/python/model_validation/validation_config.json new file mode 100644 index 000000000..a8261c772 --- /dev/null +++ b/tools/python/model_validation/validation_config.json @@ -0,0 +1,23 @@ +{ + "models": [ + "Qwen/Qwen2-0.5B" + ], + "inputs": [ + "Provide a detailed analysis of the causes and consequences of the French Revolution, including key events, figures, and social changes.", + "Explain the process of photosynthesis in plants, detailing the chemical reactions involved, the role of chlorophyll, and the importance of sunlight" + ], + "output_directory": "../../../models_outputs/", + "cache_directory": "../../../cache_models", + "precision": "int4", + "executive_provider": "cpu", + "verbose": false, + "search_options": { + "max_length": 512, + "min_length": 0, + "do_sample": false, + "top_p": 0.0, + "top_k": 1, + "temperature": 1.0, + "repetition_penalty": 1.0 + } +} \ No newline at end of file diff --git a/tools/python/model_validation/validation_tool.py b/tools/python/model_validation/validation_tool.py new file mode 100644 index 000000000..824e45c07 --- /dev/null +++ b/tools/python/model_validation/validation_tool.py @@ -0,0 +1,126 @@ +import onnxruntime_genai as og +import argparse +import time +from onnxruntime_genai.models.builder import create_model +import json +import os +import pandas as pd + +def create_table(output): + df = pd.DataFrame(output, columns=['Model Name', 'Validation Completed', 'Exceptions / Failures']) + return df + +def validate_model(config, model_directory): + if config["verbose"]: print("Loading model...") + + model = og.Model(f'{model_directory}') + + if config["verbose"]: print("Model loaded") + tokenizer = og.Tokenizer(model) + tokenizer_stream = tokenizer.create_stream() + if config["verbose"]: print("Tokenizer created") + if config["verbose"]: print() + + chat_template = get_chat_template(model_directory) + + for input in config["inputs"]: + + complete_text = '' + + prompt = f'{chat_template.format(input=input)}' + + input_tokens = tokenizer.encode(prompt) + + params = og.GeneratorParams(model) + params.input_ids = input_tokens + + generator = og.Generator(model, params) + if config["verbose"]: print("Generator created") + + if config["verbose"]: print("Running generation loop ...") + + print() + print("Output: ", end='', flush=True) + + generation_successful = True + + try: + while not generator.is_done(): + generator.compute_logits() + generator.generate_next_token() + + new_token = generator.get_next_tokens()[0] + + value_to_save = tokenizer_stream.decode(new_token) + + complete_text += value_to_save + + print(tokenizer_stream.decode(new_token), end='', flush=True) + + except KeyboardInterrupt: + print(" --control+c pressed, aborting generation--") + generation_successful = False + except Exception as e: + print(f"An error occurred: {e}") + generation_successful = False + + with open(f'{model_directory}/output.txt', 'a') as file: + file.write(complete_text) + + # Delete the generator to free the captured graph for the next generator, if graph capture is enabled + del generator + + return generation_successful + +def get_chat_template(output_directory): + tokenizer_json = output_directory + '/tokenizer_config.json' + with open(tokenizer_json, 'r') as file: + config = json.load(file) + return config["chat_template"] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") + + parser.add_argument('-j', '--json', type=str, required=True, help='Path to the JSON file containing the arguments') + + args = parser.parse_args() + + with open(args.json, 'r') as file: + config = json.load(file) + + os.makedirs(config["output_directory"], exist_ok=True) + os.makedirs(config["cache_directory"], exist_ok=True) + + output = [] + + validation_complete = False + + for model in config["models"]: + + print(f"We are validating {model}") + adjusted_model = model.replace("/", "_") + output_path = config["output_directory"] + f'/{adjusted_model}' + # From the output directory, there exist a file named tokenizer_config.json which contains the chat + cache_path = config["cache_directory"] + f'/{adjusted_model}' + + try: + create_model(model, '', output_path, config["precision"], config["executive_provider"], cache_path) + except Exception as e: + print(f'Failure after create model {e}') + output.append([model, validation_complete, e]) + continue + try: + validation_complete = validate_model(config, output_path) + except Exception as e: + print(f'Failure after validation model {e}') + output.append([model, validation_complete, e]) + + + df = create_table(output) + + df.to_csv("models.csv") + + print(df) + + # From the folder name, get the chat template \ No newline at end of file