android-perf #17
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: android-perf | |
on: | |
schedule: | |
- cron: 0 0 * * * | |
# Note: GitHub has an upper limit of 10 inputs | |
workflow_dispatch: | |
inputs: | |
models: | |
description: Models to be benchmarked | |
required: false | |
type: string | |
default: stories110M | |
devices: | |
description: Target devices to run benchmark | |
required: false | |
type: string | |
default: samsung_galaxy_s2x | |
delegates: | |
description: Backend delegates | |
required: false | |
type: string | |
default: xnnpack | |
threadpool: | |
description: Run with threadpool? | |
required: false | |
type: boolean | |
default: false | |
benchmark_configs: | |
description: The list of configs used the benchmark | |
required: false | |
type: string | |
test_spec: | |
description: The test spec to drive the test on AWS devices | |
required: false | |
type: string | |
workflow_call: | |
inputs: | |
models: | |
description: Models to be benchmarked | |
required: false | |
type: string | |
default: stories110M | |
devices: | |
description: Target devices to run benchmark | |
required: false | |
type: string | |
default: samsung_galaxy_s2x | |
delegates: | |
description: Backend delegates | |
required: false | |
type: string | |
default: xnnpack | |
threadpool: | |
description: Run with threadpool? | |
required: false | |
type: boolean | |
default: false | |
benchmark_configs: | |
description: The list of configs used the benchmark | |
required: false | |
type: string | |
test_spec: | |
description: The test spec to drive the test on AWS devices | |
required: false | |
type: string | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: true | |
jobs: | |
set-parameters: | |
runs-on: linux.2xlarge | |
outputs: | |
models: ${{ steps.set-parameters.outputs.models }} | |
devices: ${{ steps.set-parameters.outputs.devices }} | |
delegates: ${{ steps.set-parameters.outputs.delegates }} | |
steps: | |
- name: Set parameters | |
id: set-parameters | |
shell: bash | |
env: | |
# Separate default values from the workflow dispatch. To ensure defaults are accessible | |
# during scheduled runs and to provide flexibility for different defaults between | |
# on-demand and periodic benchmarking. | |
CRON_DEFAULT_MODELS: "stories110M" | |
CRON_DEFAULT_DEVICES: "samsung_galaxy_s2x" | |
CRON_DEFAULT_DELEGATES: "xnnpack" | |
run: | | |
set -ex | |
MODELS="${{ inputs.models }}" | |
if [ -z "$MODELS" ]; then | |
MODELS="$CRON_DEFAULT_MODELS" | |
fi | |
DEVICES="${{ inputs.devices }}" | |
if [ -z "$DEVICES" ]; then | |
DEVICES="$CRON_DEFAULT_DEVICES" | |
fi | |
DELEGATES="${{ inputs.delegates }}" | |
if [ -z "$DELEGATES" ]; then | |
DELEGATES="$CRON_DEFAULT_DELEGATES" | |
fi | |
# Mapping devices to their corresponding device-pool-arn | |
declare -A DEVICE_POOL_ARNS | |
DEVICE_POOL_ARNS[samsung_galaxy_s2x]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa" | |
# Resolve device names with their corresponding ARNs | |
if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then | |
DEVICES=$(echo "$DEVICES" | jq -Rc 'split(",")') | |
fi | |
declare -a MAPPED_ARNS=() | |
for DEVICE in $(echo "$DEVICES" | jq -r '.[]'); do | |
if [[ -z "${DEVICE_POOL_ARNS[$DEVICE]}" ]]; then | |
echo "Error: No ARN found for device '$DEVICE'. Abort." >&2 | |
exit 1 | |
fi | |
MAPPED_ARNS+=("${DEVICE_POOL_ARNS[$DEVICE]}") | |
done | |
echo "models=$(echo $MODELS | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT | |
MAPPED_ARNS_JSON=$(printf '%s\n' "${MAPPED_ARNS[@]}" | jq -R . | jq -s .) | |
echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT | |
echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT | |
export-models: | |
name: export-models | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
needs: set-parameters | |
strategy: | |
matrix: | |
model: ${{ fromJson(needs.set-parameters.outputs.models) }} | |
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }} | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
submodules: 'true' | |
timeout: 60 | |
upload-artifact: android-models | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" | |
echo "Exporting model: ${{ matrix.model }}" | |
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }} | |
# TODO(T197546696): Note that the following scripts/steps only work for llama. It's expected to fail for other models+delegates. | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh | |
# Test llama2 | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "cmake" "fp32" "xnnpack+custom+qe" "${ARTIFACTS_DIR_NAME}"\ | |
# Upload models to S3. The artifacts are needed not only by the device farm but also TorchChat | |
upload-models: | |
needs: export-models | |
runs-on: linux.2xlarge | |
steps: | |
- name: Download the models from GitHub | |
uses: actions/download-artifact@v3 | |
with: | |
# The name here needs to match the name of the upload-artifact parameter | |
name: android-models | |
path: ${{ runner.temp }}/artifacts/ | |
- name: Verify the models | |
shell: bash | |
working-directory: ${{ runner.temp }}/artifacts/ | |
run: | | |
ls -lah ./ | |
- name: Upload the models to S3 | |
uses: seemethere/upload-artifact-s3@v5 | |
with: | |
s3-bucket: gha-artifacts | |
s3-prefix: | | |
${{ github.repository }}/${{ github.run_id }}/artifact | |
retention-days: 1 | |
if-no-files-found: ignore | |
path: ${{ runner.temp }}/artifacts/ | |
build-llm-demo: | |
name: build-llm-demo | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
needs: set-parameters | |
strategy: | |
matrix: | |
tokenizer: [bpe] | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12-android | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
upload-artifact: android-apps | |
script: | | |
set -eux | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake | |
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded | |
# TODO: This needs to be replaced with a generic loader .apk | |
# Build LLM Demo for Android | |
bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME} | |
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat | |
upload-android-apps: | |
needs: build-llm-demo | |
runs-on: linux.2xlarge | |
steps: | |
- name: Download the apps from GitHub | |
uses: actions/download-artifact@v3 | |
with: | |
# The name here needs to match the name of the upload-artifact parameter | |
name: android-apps | |
path: ${{ runner.temp }}/artifacts/ | |
- name: Verify the apps | |
shell: bash | |
working-directory: ${{ runner.temp }}/artifacts/ | |
run: | | |
ls -lah ./ | |
- name: Upload the apps to S3 | |
uses: seemethere/upload-artifact-s3@v5 | |
with: | |
s3-bucket: gha-artifacts | |
s3-prefix: | | |
${{ github.repository }}/${{ github.run_id }}/artifact | |
retention-days: 14 | |
if-no-files-found: ignore | |
path: ${{ runner.temp }}/artifacts/ | |
# Let's see how expensive this job is, we might want to tone it down by running it periodically | |
benchmark-on-device: | |
permissions: | |
id-token: write | |
contents: read | |
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main | |
needs: | |
- set-parameters | |
- upload-models | |
- upload-android-apps | |
strategy: | |
matrix: | |
model: ${{ fromJson(needs.set-parameters.outputs.models) }} | |
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }} | |
device: ${{ fromJson(needs.set-parameters.outputs.devices) }} | |
with: | |
device-type: android | |
runner: linux.2xlarge | |
test-infra-ref: '' | |
# This is the ARN of ExecuTorch project on AWS | |
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 | |
device-pool-arn: ${{ matrix.device }} | |
# Uploaded to S3 from the previous job, the name of the app comes from the project itself. | |
# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer. | |
# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only | |
# one app+flavor that could load and run the model. | |
# TODO: Hard code llm_demo_bpe for now in this job. | |
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug.apk | |
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug-androidTest.apk | |
# NB: Need to set the default spec here so that it works for periodic too | |
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }} | |
# Uploaded to S3 from the previous job | |
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip |