Skip to content

Add LLaMA perf benchmark workflow for Apple iOS #2

Add LLaMA perf benchmark workflow for Apple iOS

Add LLaMA perf benchmark workflow for Apple iOS #2

Workflow file for this run

name: apple-perf
on:
pull_request:
schedule:
- cron: 0 1 * * *
# Note: GitHub has an upper limit of 10 inputs
workflow_dispatch:
inputs:
models:
description: Models to be benchmarked
required: false
type: string
default: stories110M
devices:
description: Target devices to run benchmark
required: false
type: string
default: apple_iphone_15
delegates:
description: Backend delegates
required: false
type: string
default: xnnpack
benchmark_configs:
description: The list of configs used the benchmark
required: false
type: string
test_spec:
description: The test spec to drive the test on AWS devices
required: false
type: string
workflow_call:
inputs:
models:
description: Models to be benchmarked
required: false
type: string
default: stories110M
devices:
description: Target devices to run benchmark
required: false
type: string
default: apple_iphone_15
delegates:
description: Backend delegates
required: false
type: string
default: xnnpack
benchmark_configs:
description: The list of configs used the benchmark
required: false
type: string
test_spec:
description: The test spec to drive the test on AWS devices
required: false
type: string
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
jobs:
set-parameters:
runs-on: linux.2xlarge
outputs:
models: ${{ steps.set-parameters.outputs.models }}
devices: ${{ steps.set-parameters.outputs.devices }}
delegates: ${{ steps.set-parameters.outputs.delegates }}
steps:
- name: Set parameters
id: set-parameters
shell: bash
env:
# Separate default values from the workflow dispatch. To ensure defaults are accessible
# during scheduled runs and to provide flexibility for different defaults between
# on-demand and periodic benchmarking.
CRON_DEFAULT_MODELS: "stories110M"
CRON_DEFAULT_DEVICES: "apple_iphone_15"
CRON_DEFAULT_DELEGATES: "xnnpack"
run: |
set -ex
MODELS="${{ inputs.models }}"
if [ -z "$MODELS" ]; then
MODELS="$CRON_DEFAULT_MODELS"
fi
DEVICES="${{ inputs.devices }}"
if [ -z "$DEVICES" ]; then
DEVICES="$CRON_DEFAULT_DEVICES"
fi
DELEGATES="${{ inputs.delegates }}"
if [ -z "$DELEGATES" ]; then
DELEGATES="$CRON_DEFAULT_DELEGATES"
fi
# Mapping devices to their corresponding device-pool-arn
declare -A DEVICE_POOL_ARNS
DEVICE_POOL_ARNS[apple_iphone_15]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d"
# Resolve device names with their corresponding ARNs
if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
DEVICES=$(echo "$DEVICES" | jq -Rc 'split(",")')
fi
declare -a MAPPED_ARNS=()
for DEVICE in $(echo "$DEVICES" | jq -r '.[]'); do
if [[ -z "${DEVICE_POOL_ARNS[$DEVICE]}" ]]; then
echo "Error: No ARN found for device '$DEVICE'. Abort." >&2
exit 1
fi
MAPPED_ARNS+=("${DEVICE_POOL_ARNS[$DEVICE]}")
done
echo "models=$(echo $MODELS | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
MAPPED_ARNS_JSON=$(printf '%s\n' "${MAPPED_ARNS[@]}" | jq -R . | jq -s .)
echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
export-models:
name: export-models
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
needs: set-parameters
strategy:
matrix:
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
fail-fast: false
with:
runner: macos-latest-xlarge
python-version: '3.11'
submodules: 'true'
timeout: 60
upload-artifact: ios-models
script: |
set -eux
.ci/scripts/setup-conda.sh
BUILD_TOOL=cmake
# Setup MacOS dependencies as there is no Docker support on MacOS atm
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
.ci/scripts/setup-macos.sh "${BUILD_TOOL}"
if [[ ${{ matrix.delegate }} == "qnn" ]]; then
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output bash \
.ci/scripts/setup-qnn-deps.sh
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output bash \
.ci/scripts/build-qnn-sdk.sh
fi
if [[ ${{ matrix.delegate }} == "coreml" ]]; then
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
backends/apple/coreml/scripts/install_requirements.sh
fi
if [[ ${{ matrix.delegate }} == "mps" ]]; then
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
backends/apple/mps/install_requirements.sh
fi
ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }}
echo "::group::Exporting ${{ matrix.delegate }} model: ${{ matrix.model }}"
BUILD_MODE="cmake"
DTYPE="fp32"
if [[ ${{ matrix.model }} =~ ^stories* ]]; then
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
# Test llama2
if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
DELEGATE_CONFIG="xnnpack+custom+qe"
fi
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
else
PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
fi
echo "::endgroup::"
# Upload models to S3. The artifacts are needed not only by the device farm but also TorchChat
upload-models:
needs: export-models
runs-on: linux.2xlarge
steps:
- name: Download the models from GitHub
uses: actions/download-artifact@v3
with:
# The name here needs to match the name of the upload-artifact parameter
name: ios-models
path: ${{ runner.temp }}/artifacts/
- name: Verify the models
shell: bash
working-directory: ${{ runner.temp }}/artifacts/
run: |
ls -lah ./
- name: Upload the models to S3
uses: seemethere/upload-artifact-s3@v5
with:
s3-bucket: gha-artifacts
s3-prefix: |
${{ github.repository }}/${{ github.run_id }}/artifact
retention-days: 1
if-no-files-found: ignore
path: ${{ runner.temp }}/artifacts/
build-llm-demo:
name: build-llm-demo
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
needs: set-parameters
secrets: inherit
strategy:
matrix:
tokenizer: [bpe]
with:
runner: macos-latest-xlarge
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
upload-artifact: ios-apps
secrets-env: BUILD_CERTIFICATE_BASE64 BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD
timeout: 90
script: |
set -eux
.ci/scripts/setup-conda.sh
BUILD_TOOL=cmake
# Setup MacOS dependencies as there is no Docker support on MacOS atm
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
.ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Setup Apple certificate for iOS development
BUILD_PROVISION_PROFILE_BASE64="${SECRET_EXECUTORCH_DEMO_BUILD_PROVISION_PROFILE_BASE64}" \
BUILD_CERTIFICATE_BASE64="${SECRET_BUILD_CERTIFICATE_BASE64}" \
KEYCHAIN_PASSWORD="${SECRET_KEYCHAIN_PASSWORD}" \
.ci/scripts/setup-ios.sh
# Install CoreML Backend Requirements
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
backends/apple/coreml/scripts/install_requirements.sh
# Install MPS Backend Requirements
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
backends/apple/mps/install_requirements.sh
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
bash build/build_apple_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
upload-ios-apps:
needs: build-llm-demo
runs-on: linux.2xlarge
steps:
- name: Download the apps from GitHub
uses: actions/download-artifact@v3
with:
# The name here needs to match the name of the upload-artifact parameter
name: ios-apps
path: ${{ runner.temp }}/artifacts/
- name: Verify the apps
shell: bash
working-directory: ${{ runner.temp }}/artifacts/
run: |
ls -lah ./
- name: Upload the apps to S3
uses: seemethere/upload-artifact-s3@v5
with:
s3-bucket: gha-artifacts
s3-prefix: |
${{ github.repository }}/${{ github.run_id }}/artifact
retention-days: 14
if-no-files-found: ignore
path: ${{ runner.temp }}/artifacts/