From 555f88f718579114ace33401de637833eef80ebb Mon Sep 17 00:00:00 2001 From: Gary Miguel Date: Wed, 12 Jul 2023 18:37:32 -0700 Subject: [PATCH] Have cuda_library output RDC (#125) This allows a `cuda_library` that was built with `rdc=True` to be depended upon by another such library. This is convenient as such a library can be consumed by either another `cuda_library` OR a `cc_library`. Change-Id: I1014d28a0ab3a9c76b788821211b13c4a9956d2a --- .github/workflows/build-tests.yaml | 8 +++--- cuda/private/rules/cuda_library.bzl | 22 ++++++++++++++-- examples/rdc/BUILD.bazel | 39 +++++++++++++++++++++++------ 3 files changed, 55 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build-tests.yaml b/.github/workflows/build-tests.yaml index 5703a417..43015ba3 100644 --- a/.github/workflows/build-tests.yaml +++ b/.github/workflows/build-tests.yaml @@ -74,13 +74,13 @@ jobs: if: ${{ startsWith(matrix.cases.os, 'windows') }} run: .github/workflows/Set-VSEnv.ps1 2019 - - run: bazelisk build @rules_cuda_examples//basic:main - - run: bazelisk build @rules_cuda_examples//rdc:main + - run: bazelisk build @rules_cuda_examples//basic:all + - run: bazelisk build @rules_cuda_examples//rdc:all - run: bazelisk build @rules_cuda_examples//if_cuda:main - run: bazelisk build @rules_cuda_examples//if_cuda:main --enable_cuda=False - - run: cd examples && bazelisk build //basic:main --config=bzlmod - - run: cd examples && bazelisk build //rdc:main --config=bzlmod + - run: cd examples && bazelisk build //basic:all --config=bzlmod + - run: cd examples && bazelisk build //rdc:all --config=bzlmod - run: cd examples && bazelisk build //if_cuda:main --config=bzlmod - run: cd examples && bazelisk build //if_cuda:main --enable_cuda=False --config=bzlmod diff --git a/cuda/private/rules/cuda_library.bzl b/cuda/private/rules/cuda_library.bzl index 75c2360c..bdb40127 100644 --- a/cuda/private/rules/cuda_library.bzl +++ b/cuda/private/rules/cuda_library.bzl @@ -31,13 +31,17 @@ def _cuda_library_impl(ctx): # outputs objects = depset(compile(ctx, cuda_toolchain, cc_toolchain, src_files, common, pic = False, rdc = use_rdc)) pic_objects = depset(compile(ctx, cuda_toolchain, cc_toolchain, src_files, common, pic = True, rdc = use_rdc)) + rdc_objects = depset([]) + rdc_pic_objects = depset([]) # if rdc is enabled for this cuda_library, then we need futher do a pass of device link if use_rdc: transitive_objects = depset(transitive = [dep[CudaInfo].rdc_objects for dep in attr.deps if CudaInfo in dep]) transitive_pic_objects = depset(transitive = [dep[CudaInfo].rdc_pic_objects for dep in attr.deps if CudaInfo in dep]) objects = depset(transitive = [objects, transitive_objects]) + rdc_objects = objects pic_objects = depset(transitive = [pic_objects, transitive_pic_objects]) + rdc_pic_objects = pic_objects dlink_object = depset([device_link(ctx, cuda_toolchain, cc_toolchain, objects, common, pic = False, rdc = use_rdc)]) dlink_pic_object = depset([device_link(ctx, cuda_toolchain, cc_toolchain, pic_objects, common, pic = True, rdc = use_rdc)]) objects = depset(transitive = [objects, dlink_object]) @@ -87,12 +91,20 @@ def _cuda_library_impl(ctx): pic_lib = pic_libs, objects = objects, pic_objects = pic_objects, + rdc_objects = rdc_objects, + rdc_pic_objects = rdc_pic_objects, ), CcInfo( compilation_context = cc_info.compilation_context, linking_context = cc_info.linking_context, ), - cuda_helper.create_cuda_info(defines = depset(common.defines)), + cuda_helper.create_cuda_info( + defines = depset(common.defines), + objects = objects, + pic_objects = pic_objects, + rdc_objects = rdc_objects, + rdc_pic_objects = rdc_pic_objects, + ), ] cuda_library = rule( @@ -104,7 +116,13 @@ cuda_library = rule( "hdrs": attr.label_list(allow_files = ALLOW_CUDA_HDRS), "deps": attr.label_list(providers = [[CcInfo], [CudaInfo]]), "alwayslink": attr.bool(default = False), - "rdc": attr.bool(default = False, doc = "whether to perform relocateable device code linking, otherwise, normal device link."), + "rdc": attr.bool( + default = False, + doc = ("Whether to produce and consume relocateable device code. " + + "Transitive deps that contain device code must all either be cuda_objects or cuda_library(rdc = True). " + + "If False, all device code must be in the same translation unit. May have performance implications. " + + "See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#using-separate-compilation-in-cuda."), + ), "includes": attr.string_list(doc = "List of include dirs to be added to the compile line."), "host_copts": attr.string_list(doc = "Add these options to the CUDA host compilation command."), "host_defines": attr.string_list(doc = "List of defines to add to the compile line."), diff --git a/examples/rdc/BUILD.bazel b/examples/rdc/BUILD.bazel index 2775d7f6..a4153b88 100644 --- a/examples/rdc/BUILD.bazel +++ b/examples/rdc/BUILD.bazel @@ -1,29 +1,52 @@ load("@rules_cuda//cuda:defs.bzl", "cuda_library", "cuda_objects") cuda_objects( - name = "a", + name = "a_objects", srcs = ["a.cu"], deps = [":b"], ) cuda_objects( - name = "b", + name = "b_objects", srcs = ["b.cu"], hdrs = ["b.cuh"], ) cuda_library( - name = "librdc", - rdc = 1, + name = "lib_from_objects", + rdc = True, deps = [ - ":a", - ":b", + ":a_objects", + ":b_objects", ], ) cc_binary( - name = "main", + name = "main_from_objects", deps = [ - ":librdc", + ":lib_from_objects", + ], +) + +# Another way of doing it is to just use cuda_library +cuda_library( + name = "a", + srcs = ["a.cu"], + rdc = True, + deps = [":b"], +) + +cuda_library( + name = "b", + srcs = ["b.cu"], + hdrs = ["b.cuh"], + rdc = True, +) + +cc_binary( + name = "main_from_library", + deps = [ + ":a", + ":b", ], )