'File partition' option and 'document' directory specification #1020
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI | |
on: | |
push: | |
branches: | |
- main | |
- master | |
tags: | |
- "*" | |
pull_request: | |
branches: | |
- main | |
- master | |
workflow_dispatch: | |
permissions: | |
contents: read | |
env: | |
DOLMA_TESTS_SKIP_AWS: ${{ secrets.AWS_ACCESS_KEY_ID == '' && 'true' || 'false' }} | |
DOLMA_TEST_S3_PREFIX: s3://dolma-tests | |
RUST_CHANNEL: stable | |
jobs: | |
info: | |
name: Run info | |
runs-on: ubuntu-latest | |
steps: | |
- name: Echo environment variables | |
run: | | |
echo "reference: ${{ github.ref }}" | |
echo "event name: ${{ github.event_name }}" | |
echo "run tests: ${{ github.event_name == 'pull_request' || github.event_name == 'push' }}" | |
echo "is main: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master' }}" | |
echo "is release: ${{ startsWith(github.ref, 'refs/tags/') }}" | |
echo "skip AWS: ${{ env.DOLMA_TESTS_SKIP_AWS }}" | |
echo "commit: ${{ github.sha }}" | |
echo "PR base repo: ${{ github.event.pull_request.base.repo.full_name }}/tree/${{ github.event.pull_request.base.ref }}" | |
echo "PR head repo: ${{ github.event.pull_request.head.repo.full_name }}/tree/${{ github.event.pull_request.head.ref }}" | |
should_build: | |
name: "Check if build" | |
runs-on: ubuntu-latest | |
steps: | |
- name: checkout | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.ref }} | |
- name: List branches and tags | |
run: | | |
git branch -a | |
git tag -l | |
git log | head -n 1000 | |
- id: check_version | |
run: | | |
set +e | |
has_updated=$(git diff --name-only '${{ github.event.pull_request.base.sha }}' | grep -E 'pyproject.toml|Cargo.toml') | |
is_main_or_release='${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/') }}' | |
if [[ -n "${has_updated}" ]] || [[ "${is_main_or_release}" == 'true' ]]; then | |
echo "should_build=true" >> $GITHUB_OUTPUT | |
else | |
echo "should_build=false" >> $GITHUB_OUTPUT | |
fi | |
shell: bash | |
outputs: | |
should_build: ${{ steps.check_version.outputs.should_build }} | |
prepare-venv: | |
runs-on: ubuntu-latest | |
name: "Prepare Virtual Env" | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
- name: Cache Virtual Env | |
uses: actions/cache@v3 | |
# name for referring later | |
id: cache-venv | |
with: | |
# what we cache: the virtualenv | |
path: ./.venv/ | |
# The cache key depends on pyproject.toml and Cargo.toml | |
key: ${{ runner.os }}-venv-${{ hashFiles('**/pyproject.toml', '**/Cargo.toml, **/Cargo.lock') }}--${{ hashFiles('python/**', 'src/**') }} | |
- name: Setup system libraries | |
if: steps.cache-venv.outputs.cache-hit != 'true' | |
run: | | |
sudo apt-get update | |
sudo apt-get install --yes --upgrade build-essential cmake protobuf-compiler libssl-dev glibc-source musl-tools | |
- name: Install Rust toolchain | |
if: steps.cache-venv.outputs.cache-hit != 'true' | |
run: | | |
rustup update ${{ env.RUST_CHANNEL }} | |
rustup component add --toolchain ${{ env.RUST_CHANNEL }} rustfmt rust-src | |
rustup default ${{ env.RUST_CHANNEL }} | |
- name: Install Python | |
if: steps.cache-venv.outputs.cache-hit != 'true' | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.8" | |
architecture: "x64" | |
- name: Create a new Python environment & install maturin | |
if: steps.cache-venv.outputs.cache-hit != 'true' | |
run: | | |
python -m venv .venv | |
source .venv/bin/activate | |
pip install -U pip | |
pip install maturin==1.7.1 | |
- name: Install dolma wheels | |
if: steps.cache-venv.outputs.cache-hit != 'true' | |
run: | | |
source .venv/bin/activate | |
maturin build --release -i $(which python) --out dist | |
wheel_path=$(ls dist/*.whl) | |
pip install "${wheel_path}[all]" | |
tests: | |
runs-on: ubuntu-latest | |
needs: prepare-venv | |
env: | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
if: ${{ github.event_name == 'pull_request' || github.event_name == 'push' }} | |
name: "${{ matrix.task.name }}" | |
strategy: | |
fail-fast: true | |
matrix: | |
task: | |
- name: Check Python style | |
run: | | |
set -e | |
isort --check --verbose . | |
black --check --verbose . | |
- name: Check Rust style | |
run: | | |
rustfmt --edition 2021 src/*.rs --check | |
- name: Lint Python | |
run: | | |
flake8 tests/python/ && flake8 python/ | |
- name: Types Python | |
run: | | |
mypy tests/python/ && mypy python/ | |
- name: Run Python tests | |
run: | | |
pytest -vs --color=yes tests/python/ | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
- name: Cache Virtual Env | |
uses: actions/cache@v3 | |
# name for referring later | |
id: cache-venv | |
with: | |
# what we cache: the virtualenv | |
path: ./.venv/ | |
# The cache key depends on pyproject.toml and Cargo.toml | |
key: ${{ runner.os }}-venv-${{ hashFiles('**/pyproject.toml', '**/Cargo.toml, **/Cargo.lock') }}--${{ hashFiles('python/**', 'src/**') }} | |
- name: ${{ matrix.task.name }} | |
run: | | |
source .venv/bin/activate | |
${{ matrix.task.run }} | |
build-linux: | |
needs: should_build | |
if: ${{ needs.should_build.outputs.should_build == 'true' }} | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
# fails to build on x86, so removing for now; low use anyway. | |
# target: [x86_64, x86, aarch64, armv7] | |
target: [x86_64, aarch64, armv7] | |
name: "Build Linux (${{ matrix.target }})" | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.10" | |
- name: Install 32bit version of libc | |
if: ${{ matrix.target == 'x86' || contains(matrix.target, 'i686') }} | |
run: | | |
sudo apt-get update | |
sudo apt-get install --yes --upgrade libc6-dev-i386 | |
- name: Build wheels | |
uses: PyO3/maturin-action@v1 | |
with: | |
target: ${{ matrix.target }} | |
args: --release --out dist --find-interpreter | |
manylinux: manylinux_2_28 | |
container: "ghcr.io/rust-cross/manylinux_2_28-cross:${{ matrix.target }}" | |
before-script-linux: | | |
sudo apt-get update | |
sudo apt-get install --yes --upgrade build-essential cmake protobuf-compiler libssl-dev glibc-source musl-tools | |
- name: Upload wheels | |
uses: actions/upload-artifact@v3 | |
with: | |
name: wheels | |
path: dist | |
build-windows: | |
needs: should_build | |
if: ${{ needs.should_build.outputs.should_build == 'true' }} | |
runs-on: windows-latest | |
name: "Build Windows (${{ matrix.target }})" | |
strategy: | |
matrix: | |
target: [x64, x86] | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.10" | |
architecture: ${{ matrix.target }} | |
- name: Build wheels | |
uses: PyO3/maturin-action@v1 | |
with: | |
target: ${{ matrix.target }} | |
args: --release --out dist --find-interpreter | |
- name: Upload wheels | |
uses: actions/upload-artifact@v3 | |
with: | |
name: wheels | |
path: dist | |
build-macos: | |
needs: should_build | |
if: ${{ needs.should_build.outputs.should_build == 'true' }} | |
runs-on: macos-latest | |
name: "Build macOS (${{ matrix.target }})" | |
strategy: | |
matrix: | |
target: [x86_64, aarch64] | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.10" | |
- name: Build wheels | |
uses: PyO3/maturin-action@v1 | |
with: | |
target: ${{ matrix.target }} | |
args: --release --out dist --find-interpreter | |
- name: Upload wheels | |
uses: actions/upload-artifact@v3 | |
with: | |
name: wheels | |
path: dist | |
sdist: | |
runs-on: ubuntu-latest | |
needs: should_build | |
if: ${{ needs.should_build.outputs.should_build == 'true' }} | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Build sdist | |
uses: PyO3/maturin-action@v1 | |
with: | |
command: sdist | |
args: --out dist | |
- name: Upload sdist | |
uses: actions/upload-artifact@v3 | |
with: | |
name: wheels | |
path: dist | |
release: | |
name: Release | |
runs-on: ubuntu-latest | |
if: "startsWith(github.ref, 'refs/tags/')" | |
needs: [build-linux, build-windows, build-macos, sdist] | |
steps: | |
- uses: actions/download-artifact@v3 | |
with: | |
name: wheels | |
- name: Publish to PyPI | |
uses: PyO3/maturin-action@v1 | |
env: | |
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} | |
with: | |
command: upload | |
maturin-version: 1.7.1 | |
args: --skip-existing * |