Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vllm install and test added. #160

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion scripts/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,41 @@ pipeline
}
}
}


stage('Install vLLM')
{
steps
{
sh '''
. preflight_qeff/bin/activate
git clone https://github.com/vllm-project/vllm.git
cd vllm
git checkout v0.6.0
ochougul marked this conversation as resolved.
Show resolved Hide resolved
git apply /opt/qti-aic/integrations/vllm/qaic_vllm.patch
export VLLM_TARGET_DEVICE="qaic"
pip install -e .
'''
}
}


stage('vLLM Test')
{
steps
{

timeout(time: 660, unit: 'MINUTES') {
sh '''
. preflight_qeff/bin/activate
pytest --disable-warnings -s -v tests/vllm --junitxml=tests/tests_log4.xml
junitparser merge tests/tests_log1.xml tests/tests_log2.xml tests/tests_log3.xml tests/tests_log4.xml tests/tests_log.xml
deactivate
ochougul marked this conversation as resolved.
Show resolved Hide resolved
exit
'''
}
}
}
}
post
{
Expand All @@ -59,4 +94,4 @@ pipeline
}
}

}
}
102 changes: 102 additions & 0 deletions tests/vllm/test_qaic_output_consistency.py
ochougul marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# -----------------------------------------------------------------------------
#
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# -----------------------------------------------------------------------------

import random

import pytest
from vllm import LLM, SamplingParams

# Model to test
test_models = [
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
]

# Constants for configuration
SEQ_LEN = 128
CTX_LEN = 256
DECOE_BSZ = 4
DTYPE = "mxfp6"
KV_DTYPE = "mxint8"
DEVICE_GROUP = [0]


@pytest.mark.parametrize("model_name", test_models)
def test_output_consistency(model_name):
"""This pytest function is used to check the consistency of vLLM.
1) Single prompt test to check if the output generated in 5 different
runs yields the same results
2) Multiple prompt check to test if multiple prompts yield same results
if run in different slots.

Parameters
----------
model_name : string
Huggingface model card name.
"""
sampling_params = SamplingParams(temperature=0.0, max_tokens=None)

ochougul marked this conversation as resolved.
Show resolved Hide resolved
# Creating LLM Object
qllm = LLM(
model=model_name,
device_group=DEVICE_GROUP,
max_num_seqs=DECOE_BSZ,
max_model_len=CTX_LEN,
max_seq_len_to_capture=SEQ_LEN,
quantization=DTYPE,
kv_cache_dtype=KV_DTYPE,
device="qaic",
)

# Single prompt test
prompt1 = ["My name is"]

output1 = qllm.generate(prompt1 * 5, sampling_params)

check_output1 = []
for i, op in enumerate(output1):
check_output1.append(op.outputs[0].text)


# Multiple prompt test
outputDict = dict()
prompt2 = [
"My name is",
"How to eat mangosteen?",
"How many people died in World War II",
"Hello ",
"Who is the president of United States",
"Who is the president of India",
"When it snowfalls in San Diego",
"In which country yamana river flows",
"How many people died in World War II",
"Thy youth is proud livery, so gazed on now",
"Will be a tattered weed, of small worth held:" "Then being asked where all thy beauty lies",
"Where all the treasure of thy lusty days",
"To say, within thine own deep-sunken eyes",
"Where is Statue of Liberty located?",
]

for p in prompt2:
outputDict[p] = []

for _ in range(5):
random.shuffle(prompt2)
output2 = qllm.generate(prompt2, sampling_params)
for i, op in enumerate(output2):
generated_text = op.outputs[0].text
outputDict[prompt2[i]].append(str(prompt2[i] + generated_text))


# Assertion to check the consistency of single prompt.
assert len(set(check_output1)) == 1, "Outputs from different slots for same prompt does not match!!"

# Assertion to check multiple prompts.
for key in outputDict.keys():
assert len(set(outputDict[key])) == 1, "Outputs from different slots for same prompt does not match!!"

# Assertion to check if any prompts are missed.
assert len(prompt2) == len(output2), "Number of Generated Tokens do not match the number of valid inputs!!"
Loading