forked from Dao-AILab/causal-conv1d
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit a40ff74
Showing
15 changed files
with
2,233 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
# This workflow will: | ||
# - Create a new Github release | ||
# - Build wheels for supported architectures | ||
# - Deploy the wheels to the Github release | ||
# - Release the static code to PyPi | ||
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries | ||
|
||
name: Build wheels and deploy | ||
|
||
on: | ||
create: | ||
tags: | ||
- v* | ||
|
||
jobs: | ||
|
||
setup_release: | ||
name: Create Release | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Get the tag version | ||
id: extract_branch | ||
run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} | ||
shell: bash | ||
|
||
- name: Create Release | ||
id: create_release | ||
uses: actions/create-release@v1 | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
with: | ||
tag_name: ${{ steps.extract_branch.outputs.branch }} | ||
release_name: ${{ steps.extract_branch.outputs.branch }} | ||
|
||
build_wheels: | ||
name: Build Wheel | ||
needs: setup_release | ||
runs-on: ${{ matrix.os }} | ||
|
||
strategy: | ||
fail-fast: false | ||
matrix: | ||
# Using ubuntu-20.04 instead of 22.04 for more compatibility (glibc). Ideally we'd use the | ||
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux. | ||
os: [ubuntu-20.04] | ||
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] | ||
torch-version: ['1.12.1', '1.13.1', '2.0.1', '2.1.1', '2.2.0.dev20231127'] | ||
cuda-version: ['11.8.0', '12.2.0'] | ||
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not. | ||
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI. | ||
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs) | ||
# when building without C++11 ABI and using it on nvcr images. | ||
cxx11_abi: ['FALSE', 'TRUE'] | ||
exclude: | ||
# Pytorch <= 1.12 does not support Python 3.11 | ||
- torch-version: '1.12.1' | ||
python-version: '3.11' | ||
# Pytorch >= 2.0 only supports Python >= 3.8 | ||
- torch-version: '2.0.1' | ||
python-version: '3.7' | ||
- torch-version: '2.1.1' | ||
python-version: '3.7' | ||
- torch-version: '2.2.0.dev20231127' | ||
python-version: '3.7' | ||
# Pytorch <= 2.0 only supports CUDA <= 11.8 | ||
- torch-version: '1.12.1' | ||
cuda-version: '12.2.0' | ||
- torch-version: '1.13.1' | ||
cuda-version: '12.2.0' | ||
- torch-version: '2.0.1' | ||
cuda-version: '12.2.0' | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v3 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
|
||
- name: Set CUDA and PyTorch versions | ||
run: | | ||
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV | ||
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV | ||
- name: Free up disk space | ||
if: ${{ runner.os == 'Linux' }} | ||
# https://github.com/easimon/maximize-build-space/blob/master/action.yml | ||
# https://github.com/easimon/maximize-build-space/tree/test-report | ||
run: | | ||
sudo rm -rf /usr/share/dotnet | ||
sudo rm -rf /opt/ghc | ||
sudo rm -rf /opt/hostedtoolcache/CodeQL | ||
- name: Set up swap space | ||
if: runner.os == 'Linux' | ||
uses: pierotofy/set-swap-space@v1.0 | ||
with: | ||
swap-size-gb: 10 | ||
|
||
- name: Install CUDA ${{ matrix.cuda-version }} | ||
if: ${{ matrix.cuda-version != 'cpu' }} | ||
uses: Jimver/cuda-toolkit@v0.2.11 | ||
id: cuda-toolkit | ||
with: | ||
cuda: ${{ matrix.cuda-version }} | ||
linux-local-args: '["--toolkit"]' | ||
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1 | ||
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }} | ||
method: 'network' | ||
# We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions, | ||
# not just nvcc | ||
# sub-packages: '["nvcc"]' | ||
|
||
- name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }} | ||
run: | | ||
pip install --upgrade pip | ||
# If we don't install before installing Pytorch, we get error for torch 2.0.1 | ||
# ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none) | ||
pip install lit | ||
# We want to figure out the CUDA version to download pytorch | ||
# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116 | ||
# This code is ugly, maybe there's a better way to do this. | ||
export TORCH_CUDA_VERSION=$(python -c "import os; minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 118, '2.2': 118}[os.environ['MATRIX_TORCH_VERSION']]; maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121, '2.2': 121}[os.environ['MATRIX_TORCH_VERSION']]; print(max(min(int(os.environ['MATRIX_CUDA_VERSION']), maxv), minv))") | ||
if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then | ||
pip install --no-cache-dir --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION} | ||
else | ||
pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION} | ||
fi | ||
nvcc --version | ||
python --version | ||
python -c "import torch; print('PyTorch:', torch.__version__)" | ||
python -c "import torch; print('CUDA:', torch.version.cuda)" | ||
python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)" | ||
shell: | ||
bash | ||
|
||
- name: Build wheel | ||
run: | | ||
# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6 | ||
# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810 | ||
# However this still fails so I'm using a newer version of setuptools | ||
pip install setuptools==68.0.0 | ||
pip install ninja packaging wheel | ||
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH | ||
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH | ||
# Limit MAX_JOBS otherwise the github runner goes OOM | ||
MAX_JOBS=2 CAUSAL_CONV1D_FORCE_BUILD="TRUE" CAUSAL_CONV1D_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist | ||
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }} | ||
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2") | ||
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} | ||
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV | ||
- name: Log Built Wheels | ||
run: | | ||
ls dist | ||
- name: Get the tag version | ||
id: extract_branch | ||
run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} | ||
|
||
- name: Get Release with tag | ||
id: get_current_release | ||
uses: joutvhu/get-release@v1 | ||
with: | ||
tag_name: ${{ steps.extract_branch.outputs.branch }} | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Upload Release Asset | ||
id: upload_release_asset | ||
uses: actions/upload-release-asset@v1 | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
with: | ||
upload_url: ${{ steps.get_current_release.outputs.upload_url }} | ||
asset_path: ./dist/${{env.wheel_name}} | ||
asset_name: ${{env.wheel_name}} | ||
asset_content_type: application/* | ||
|
||
publish_package: | ||
name: Publish package | ||
needs: [build_wheels] | ||
|
||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
|
||
- uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.10' | ||
|
||
- name: Install dependencies | ||
run: | | ||
pip install ninja packaging setuptools wheel twine | ||
# We don't want to download anything CUDA-related here | ||
pip install torch --index-url https://download.pytorch.org/whl/cpu | ||
- name: Build core package | ||
env: | ||
CAUSAL_CONV1D_SKIP_CUDA_BUILD: "TRUE" | ||
run: | | ||
python setup.py sdist --dist-dir=dist | ||
- name: Deploy | ||
env: | ||
TWINE_USERNAME: "__token__" | ||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} | ||
run: | | ||
python -m twine upload dist/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Tri Dao, tri@tridao.me |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
BSD 3-Clause License | ||
|
||
Copyright (c) 2022, the respective contributors, as shown by the AUTHORS file. | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are met: | ||
|
||
* Redistributions of source code must retain the above copyright notice, this | ||
list of conditions and the following disclaimer. | ||
|
||
* Redistributions in binary form must reproduce the above copyright notice, | ||
this list of conditions and the following disclaimer in the documentation | ||
and/or other materials provided with the distribution. | ||
|
||
* Neither the name of the copyright holder nor the names of its | ||
contributors may be used to endorse or promote products derived from | ||
this software without specific prior written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Causal depthwise conv1d in CUDA with a PyTorch interface |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
__version__ = "1.0.0" | ||
|
||
from causal_conv1d.causal_conv1d_interface import causal_conv1d_fn, causal_conv1d_update |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# Copyright (c) 2023, Tri Dao. | ||
|
||
import torch | ||
import torch.nn.functional as F | ||
|
||
|
||
import causal_conv1d_cuda | ||
|
||
|
||
class CausalConv1dFn(torch.autograd.Function): | ||
@staticmethod | ||
def forward(ctx, x, weight, bias=None, activation=None): | ||
if activation not in [None, "silu", "swish"]: | ||
raise NotImplementedError("activation must be None, silu, or swish") | ||
if x.stride(2) != 1 and x.stride(1) != 1: | ||
x = x.contiguous() | ||
bias = bias.contiguous() if bias is not None else None | ||
ctx.save_for_backward(x, weight, bias) | ||
ctx.activation = activation in ["silu", "swish"] | ||
out = causal_conv1d_cuda.causal_conv1d_fwd(x, weight, bias, ctx.activation) | ||
return out | ||
|
||
@staticmethod | ||
def backward(ctx, dout): | ||
x, weight, bias = ctx.saved_tensors | ||
if dout.stride(2) != 1 and dout.stride(1) != 1: | ||
dout = dout.contiguous() | ||
# The kernel supports passing in a pre-allocated dx (e.g., in case we want to fuse the | ||
# backward of conv1d with the backward of chunk). | ||
# Here we just pass in None and dx will be allocated in the C++ code. | ||
dx, dweight, dbias = causal_conv1d_cuda.causal_conv1d_bwd( | ||
x, weight, bias, dout, None, ctx.activation | ||
) | ||
return dx, dweight, dbias if bias is not None else None, None | ||
|
||
|
||
def causal_conv1d_fn(x, weight, bias=None, activation=None): | ||
""" | ||
x: (batch, dim, seqlen) | ||
weight: (dim, width) | ||
bias: (dim,) | ||
activation: either None or "silu" or "swish" | ||
out: (batch, dim, seqlen) | ||
""" | ||
return CausalConv1dFn.apply(x, weight, bias, activation) | ||
|
||
|
||
def causal_conv1d_ref(x, weight, bias=None, activation=None): | ||
""" | ||
x: (batch, dim, seqlen) | ||
weight: (dim, width) | ||
bias: (dim,) | ||
out: (batch, dim, seqlen) | ||
""" | ||
if activation not in [None, "silu", "swish"]: | ||
raise NotImplementedError("activation must be None, silu, or swish") | ||
dtype_in = x.dtype | ||
x = x.to(weight.dtype) | ||
seqlen = x.shape[-1] | ||
dim, width = weight.shape | ||
out = F.conv1d(x, weight.unsqueeze(1), bias, padding=width - 1, groups=dim) | ||
out = out[..., :seqlen] | ||
return (out if activation is None else F.silu(out)).to(dtype=dtype_in) | ||
|
||
|
||
def causal_conv1d_update(x, conv_state, weight, bias=None, activation=None): | ||
""" | ||
x: (batch, dim) | ||
conv_state: (batch, dim, width) | ||
weight: (dim, width) | ||
bias: (dim,) | ||
out: (batch, dim) | ||
""" | ||
if activation not in [None, "silu", "swish"]: | ||
raise NotImplementedError("activation must be None, silu, or swish") | ||
activation = activation in ["silu", "swish"] | ||
return causal_conv1d_cuda.causal_conv1d_update(x, conv_state, weight, bias, activation) | ||
|
||
|
||
def causal_conv1d_update_ref(x, conv_state, weight, bias=None, activation=None): | ||
""" | ||
x: (batch, dim) | ||
conv_state: (batch, dim, width) | ||
weight: (dim, width) | ||
bias: (dim,) | ||
out: (batch, dim) | ||
""" | ||
if activation not in [None, "silu", "swish"]: | ||
raise NotImplementedError("activation must be None, silu, or swish") | ||
dtype_in = x.dtype | ||
batch, dim = x.shape | ||
width = weight.shape[1] | ||
assert conv_state.shape == (batch, dim, width) | ||
assert weight.shape == (dim, width) | ||
conv_state.copy_(torch.roll(conv_state, shifts=-1, dims=-1)) # Update state (B D W) | ||
conv_state[:, :, -1] = x | ||
out = torch.sum(conv_state * weight, dim=-1) # (B D) | ||
if bias is not None: | ||
out += bias | ||
return (out if activation is None else F.silu(out)).to(dtype=dtype_in) |
Oops, something went wrong.