From 93b495f9568aa60312ef55ef1c0b6eb533b782ba Mon Sep 17 00:00:00 2001 From: ksimpson Date: Wed, 27 Nov 2024 12:07:00 -0800 Subject: [PATCH 1/2] update nvjitlink test --- cuda_bindings/tests/test_nvjitlink.py | 112 +++++++++++++++++--------- 1 file changed, 76 insertions(+), 36 deletions(-) diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py index aaf32df3..d92a3ca7 100644 --- a/cuda_bindings/tests/test_nvjitlink.py +++ b/cuda_bindings/tests/test_nvjitlink.py @@ -4,13 +4,22 @@ import pytest -from cuda.bindings import nvjitlink +from cuda.bindings import nvjitlink, nvrtc -ptx_kernel = """ -.version 8.5 -.target sm_90 +# Establish a handful of compatible architectures and PTX versions to test with +ARCHITECTURES = ["sm_60", "sm_75", "sm_80", "sm_90"] +PTX_VERSIONS = ["5.0", "6.4", "7.0", "8.5"] + + +def ptx_header(version, arch): + return f""" +.version {version} +.target {arch} .address_size 64 +""" + +ptx_kernel = """ .visible .entry _Z6kernelPi( .param .u64 _Z6kernelPi_param_0 ) @@ -28,18 +37,40 @@ """ minimal_ptx_kernel = """ -.version 8.5 -.target sm_90 -.address_size 64 - .func _MinimalKernel() { ret; } """ -ptx_kernel_bytes = ptx_kernel.encode("utf-8") -minimal_ptx_kernel_bytes = minimal_ptx_kernel.encode("utf-8") +ptx_kernel_bytes = [ + (ptx_header(version, arch) + ptx_kernel).encode("utf-8") for version, arch in zip(PTX_VERSIONS, ARCHITECTURES) +] +minimal_ptx_kernel_bytes = [ + (ptx_header(version, arch) + minimal_ptx_kernel).encode("utf-8") + for version, arch in zip(PTX_VERSIONS, ARCHITECTURES) +] + + +# create a valid LTOIR input for testing +@pytest.fixture +def get_dummy_ltoir(): + def CHECK_NVRTC(err): + if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: + raise RuntimeError(f"Nvrtc Error: {err}") + + empty_cplusplus_kernel = "__global__ void A() {}" + err, program_handle = nvrtc.nvrtcCreateProgram(empty_cplusplus_kernel.encode(), b"", 0, [], []) + CHECK_NVRTC(err) + nvrtc.nvrtcCompileProgram(program_handle, 1, [b"-dlto"]) + err, size = nvrtc.nvrtcGetLTOIRSize(program_handle) + CHECK_NVRTC(err) + empty_kernel_ltoir = b" " * size + (err,) = nvrtc.nvrtcGetLTOIR(program_handle, empty_kernel_ltoir) + CHECK_NVRTC(err) + (err,) = nvrtc.nvrtcDestroyProgram(program_handle) + CHECK_NVRTC(err) + return empty_kernel_ltoir def test_unrecognized_option_error(): @@ -52,39 +83,41 @@ def test_invalid_arch_error(): nvjitlink.create(1, ["-arch=sm_XX"]) -def test_create_and_destroy(): - handle = nvjitlink.create(1, ["-arch=sm_53"]) +@pytest.mark.parametrize("option", ARCHITECTURES) +def test_create_and_destroy(option): + handle = nvjitlink.create(1, [f"-arch={option}"]) assert handle != 0 nvjitlink.destroy(handle) -def test_complete_empty(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) +@pytest.mark.parametrize("option", ARCHITECTURES) +def test_complete_empty(option): + handle = nvjitlink.create(1, [f"-arch={option}"]) nvjitlink.complete(handle) nvjitlink.destroy(handle) -def test_add_data(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) - nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_kernel_bytes, len(ptx_kernel_bytes), "test_data") - nvjitlink.add_data( - handle, nvjitlink.InputType.ANY, minimal_ptx_kernel_bytes, len(minimal_ptx_kernel_bytes), "minimal_test_data" - ) +@pytest.mark.parametrize("option, ptx_bytes", zip(ARCHITECTURES, ptx_kernel_bytes)) +def test_add_data(option, ptx_bytes): + handle = nvjitlink.create(1, [f"-arch={option}"]) + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_bytes, len(ptx_bytes), "test_data") nvjitlink.complete(handle) nvjitlink.destroy(handle) -def test_add_file(tmp_path): - handle = nvjitlink.create(1, ["-arch=sm_90"]) +@pytest.mark.parametrize("option, ptx_bytes", zip(ARCHITECTURES, ptx_kernel_bytes)) +def test_add_file(option, ptx_bytes, tmp_path): + handle = nvjitlink.create(1, [f"-arch={option}"]) file_path = tmp_path / "test_file.cubin" - file_path.write_bytes(ptx_kernel_bytes) + file_path.write_bytes(ptx_bytes) nvjitlink.add_file(handle, nvjitlink.InputType.ANY, str(file_path)) nvjitlink.complete(handle) nvjitlink.destroy(handle) -def test_get_error_log(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) +@pytest.mark.parametrize("option", ARCHITECTURES) +def test_get_error_log(option): + handle = nvjitlink.create(1, [f"-arch={option}"]) nvjitlink.complete(handle) log_size = nvjitlink.get_error_log_size(handle) log = bytearray(log_size) @@ -93,9 +126,10 @@ def test_get_error_log(): nvjitlink.destroy(handle) -def test_get_info_log(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) - nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_kernel_bytes, len(ptx_kernel_bytes), "test_data") +@pytest.mark.parametrize("option, ptx_bytes", zip(ARCHITECTURES, ptx_kernel_bytes)) +def test_get_info_log(option, ptx_bytes): + handle = nvjitlink.create(1, [f"-arch={option}"]) + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_bytes, len(ptx_bytes), "test_data") nvjitlink.complete(handle) log_size = nvjitlink.get_info_log_size(handle) log = bytearray(log_size) @@ -104,9 +138,10 @@ def test_get_info_log(): nvjitlink.destroy(handle) -def test_get_linked_cubin(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) - nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_kernel_bytes, len(ptx_kernel_bytes), "test_data") +@pytest.mark.parametrize("option, ptx_bytes", zip(ARCHITECTURES, ptx_kernel_bytes)) +def test_get_linked_cubin(option, ptx_bytes): + handle = nvjitlink.create(1, [f"-arch={option}"]) + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_bytes, len(ptx_bytes), "test_data") nvjitlink.complete(handle) cubin_size = nvjitlink.get_linked_cubin_size(handle) cubin = bytearray(cubin_size) @@ -115,11 +150,16 @@ def test_get_linked_cubin(): nvjitlink.destroy(handle) -def test_get_linked_ptx(): - # TODO improve this test to call get_linked_ptx without this error - handle = nvjitlink.create(2, ["-arch=sm_90", "-lto"]) - with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_NVVM_COMPILE"): - nvjitlink.complete(handle) +@pytest.mark.parametrize("option", ARCHITECTURES) +def test_get_linked_ptx(option, get_dummy_ltoir): + handle = nvjitlink.create(3, [f"-arch={option}", "-lto", "-ptx"]) + nvjitlink.add_data(handle, nvjitlink.InputType.LTOIR, get_dummy_ltoir, len(get_dummy_ltoir), "test_data") + nvjitlink.complete(handle) + ptx_size = nvjitlink.get_linked_ptx_size(handle) + ptx = bytearray(ptx_size) + nvjitlink.get_linked_ptx(handle, ptx) + assert len(ptx) == ptx_size + nvjitlink.destroy(handle) def test_package_version(): From 30d65ed1ceb45a901742626587e1211c018d55b4 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Thu, 28 Nov 2024 03:04:14 +0000 Subject: [PATCH 2/2] re-run pre-commit; update docs to reflect recent changes --- README.md | 10 +++--- cuda_bindings/README.md | 32 ++++++++++++++++-- cuda_core/README.md | 36 ++++++++++++++++++++- cuda_core/cuda/core/experimental/_stream.py | 4 +-- cuda_python/docs/source/index.rst | 2 +- 5 files changed, 71 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index d2b5241b..3b7338a3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# CUDA-Python +# cuda-python CUDA Python is the home for accessing NVIDIA’s CUDA platform from Python. It consists of multiple components: @@ -7,13 +7,13 @@ CUDA Python is the home for accessing NVIDIA’s CUDA platform from Python. It c * [cuda.cooperative](https://nvidia.github.io/cccl/cuda_cooperative/): Pythonic exposure of CUB cooperative algorithms * [cuda.parallel](https://nvidia.github.io/cccl/cuda_parallel/): Pythonic exposure of Thrust parallel algorithms -For access to NVIDIA Math Libraries, please refer to [nvmath-python](https://docs.nvidia.com/cuda/nvmath-python/latest). +For access to NVIDIA CPU & GPU Math Libraries, please refer to [nvmath-python](https://docs.nvidia.com/cuda/nvmath-python/latest). CUDA Python is currently undergoing an overhaul to improve existing and bring up new components. All of the previously available functionalities from the cuda-python package will continue to be available, please refer to the [cuda.bindings](https://nvidia.github.io/cuda-python/cuda-bindings/latest) documentation for installation guide and further detail. -## CUDA-Python as a metapackage +## cuda-python as a metapackage -CUDA-Python is structured to become a metapackage that contains a collection of subpackages. Each subpackage is versioned independently, allowing installation of each component as needed. +`cuda-python` is being re-structured to become a metapackage that contains a collection of subpackages. Each subpackage is versioned independently, allowing installation of each component as needed. ### Subpackage: `cuda.core` @@ -21,7 +21,7 @@ The `cuda.core` package offers idiomatic, pythonic access to CUDA Runtime and ot The goals are to -1. Provide **idiomatic (“pythonic”)** access to CUDA Driver/Runtime +1. Provide **idiomatic ("pythonic")** access to CUDA Driver, Runtime, and JIT compiler toolchain 2. Focus on **developer productivity** by ensuring end-to-end CUDA development can be performed quickly and entirely in Python 3. **Avoid homegrown** Python abstractions for CUDA for new Python GPU libraries starting from scratch 4. **Ease** developer **burden of maintaining** and catching up with latest CUDA features diff --git a/cuda_bindings/README.md b/cuda_bindings/README.md index 85852068..7a2d8fc8 100644 --- a/cuda_bindings/README.md +++ b/cuda_bindings/README.md @@ -1,13 +1,15 @@ # `cuda.bindings`: Low-level CUDA interfaces -CUDA Python is a standard set of low-level interfaces, providing full coverage of and access to the CUDA host APIs from Python. Checkout the [Overview](https://nvidia.github.io/cuda-python/cuda-bindings/latest/overview.html) for the workflow and performance results. +`cuda.bindings` is a standard set of low-level interfaces, providing full coverage of and access to the CUDA host APIs from Python. Checkout the [Overview](https://nvidia.github.io/cuda-python/cuda-bindings/latest/overview.html) for the workflow and performance results. + +`cuda.bindings` is a subpackage of `cuda-python`. ## Installing CUDA Python can be installed from: -* PYPI -* Conda (nvidia channel) +* PyPI +* Conda (conda-forge/nvidia channels) * Source builds Differences between these options are described in [Installation](https://nvidia.github.io/cuda-python/cuda-bindings/latest/install.html) documentation. Each package guarantees minor version compatibility. @@ -31,6 +33,30 @@ Source builds work for multiple Python versions, however pre-build PyPI and Cond * Python 3.9 to 3.12 +## Developing + +We use `pre-commit` to manage various tools to help development and ensure consistency. +```shell +pip install pre-commit +``` + +### Code linting + +Run this command before checking in the code changes +```shell +pre-commit run -a --show-diff-on-failure +``` +to ensure the code formatting is in line of the requirements (as listed in [`pyproject.toml`](./pyproject.toml)). + +### Code signing + +This repository implements a security check to prevent the CI system from running untrusted code. A part of the +security check consists of checking if the git commits are signed. See +[here](https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/faqs/#why-did-i-receive-a-comment-that-my-pull-request-requires-additional-validation) +and +[here](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification) +for more details, including how to sign your commits. + ## Testing Latest dependencies can be found in [requirements.txt](https://github.com/NVIDIA/cuda-python/blob/main/cuda_bindings/requirements.txt). diff --git a/cuda_core/README.md b/cuda_core/README.md index e979fb73..6545eef7 100644 --- a/cuda_core/README.md +++ b/cuda_core/README.md @@ -1,9 +1,43 @@ # `cuda.core`: (experimental) pythonic CUDA module -Currently under active development. To build from source, just do: +Currently under active developmen; see [the documentation](https://nvidia.github.io/cuda-python/cuda-core/latest/) for more details. + +## Installing + +TO build from source, just do: ```shell $ git clone https://github.com/NVIDIA/cuda-python $ cd cuda-python/cuda_core # move to the directory where this README locates $ pip install . ``` For now `cuda-python` is a required dependency. + +## Developing + +We use `pre-commit` to manage various tools to help development and ensure consistency. +```shell +pip install pre-commit +``` + +### Code linting + +Run this command before checking in the code changes +```shell +pre-commit run -a --show-diff-on-failure +``` +to ensure the code formatting is in line of the requirements (as listed in [`pyproject.toml`](./pyproject.toml)). + +### Code signing + +This repository implements a security check to prevent the CI system from running untrusted code. A part of the +security check consists of checking if the git commits are signed. See +[here](https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/faqs/#why-did-i-receive-a-comment-that-my-pull-request-requires-additional-validation) +and +[here](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification) +for more details, including how to sign your commits. + +## Testing + +To run these tests: +* `python -m pytest tests/` against local builds +* `pytest tests/` against installed packages diff --git a/cuda_core/cuda/core/experimental/_stream.py b/cuda_core/cuda/core/experimental/_stream.py index 6a68d175..5756dd34 100644 --- a/cuda_core/cuda/core/experimental/_stream.py +++ b/cuda_core/cuda/core/experimental/_stream.py @@ -211,9 +211,7 @@ def wait(self, event_or_stream: Union[Event, Stream]): try: stream = Stream._init(event_or_stream) except Exception as e: - raise ValueError( - "only an Event, Stream, or object supporting __cuda_stream__ can be waited" - ) from e + raise ValueError("only an Event, Stream, or object supporting __cuda_stream__ can be waited") from e else: stream = event_or_stream event = handle_return(cuda.cuEventCreate(cuda.CUevent_flags.CU_EVENT_DISABLE_TIMING)) diff --git a/cuda_python/docs/source/index.rst b/cuda_python/docs/source/index.rst index 4b17a1e9..330268b4 100644 --- a/cuda_python/docs/source/index.rst +++ b/cuda_python/docs/source/index.rst @@ -9,7 +9,7 @@ multiple components: - `cuda.cooperative`_: Pythonic exposure of CUB cooperative algorithms - `cuda.parallel`_: Pythonic exposure of Thrust parallel algorithms -For access to NVIDIA Math Libraries, please refer to `nvmath-python`_. +For access to NVIDIA CPU & GPU Math Libraries, please refer to `nvmath-python`_. .. _nvmath-python: https://docs.nvidia.com/cuda/nvmath-python/latest