Merge branch 'branch-25.02' into random-state-everywhere

rapidsai · Jan 14, 2025 · b388d0a · b388d0a
2 parents 7092d59 + 47bac70
commit b388d0a
Show file tree

Hide file tree

Showing 109 changed files with 1,152 additions and 1,124 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -5,11 +5,9 @@ cpp/               @rapidsai/cuml-cpp-codeowners
 python/            @rapidsai/cuml-python-codeowners
 
 #cmake code owners
-**/CMakeLists.txt          @rapidsai/cuml-cmake-codeowners
-**/cmake/                  @rapidsai/cuml-cmake-codeowners
-python/cuml/pyproject.toml @rapidsai/cuml-cmake-codeowners
-build.sh                   @rapidsai/cuml-cmake-codeowners
-**/build.sh                @rapidsai/cuml-cmake-codeowners
+CMakeLists.txt @rapidsai/cuml-cmake-codeowners
+*.cmake        @rapidsai/cuml-cmake-codeowners
+**/cmake/      @rapidsai/cuml-cmake-codeowners
 
 #CI code owners
 /.github/                @rapidsai/ci-codeowners

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -91,3 +91,4 @@ jobs:
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
       package-name: cuml
+      package-type: python
diff --git a/.gitignore b/.gitignore
@@ -68,3 +68,8 @@ cpp/Doxyfile
 # clang tooling
 compile_commands.json
 .clangd/
+
+# generally prefer 'pyproject.toml' to 'pytest.ini' for pytest options
+# ref: https://github.com/rapidsai/cuml/pull/6201
+pytest.ini
+!python/cuml/cuml/benchmark/automated/pytest.ini
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -69,15 +69,14 @@ repos:
                 CMakeLists[.]txt$|
                 CMakeLists_standalone[.]txt$|
                 [.]flake8[.]cython$|
-                meta[.]yaml$|
-                setup[.]cfg$
+                meta[.]yaml$
           exclude: |
             (?x)
                 cpp/src/tsne/cannylab/bh[.]cu$|
                 python/cuml/cuml/_thirdparty
         - id: verify-alpha-spec
     - repo: https://github.com/rapidsai/dependency-file-generator
-      rev: v1.16.0
+      rev: v1.17.0
       hooks:
           - id: rapids-dependency-file-generator
             args: ["--clean"]

diff --git a/BUILD.md b/BUILD.md
@@ -234,7 +234,6 @@ cuML's cmake has the following configurable flags available:
 | BUILD_PRIMS_TESTS | [ON, OFF]  | ON  | Enable/disable building cuML algorithm test executable `prims_test`.  |
 | BUILD_CUML_EXAMPLES | [ON, OFF]  | ON  | Enable/disable building cuML C++ API usage examples.  |
 | BUILD_CUML_BENCH | [ON, OFF] | ON | Enable/disable building of cuML C++ benchark.  |
-| BUILD_CUML_PRIMS_BENCH | [ON, OFF] | ON | Enable/disable building of ml-prims C++ benchark.  |
 | CMAKE_CXX11_ABI | [ON, OFF]  | ON  | Enable/disable the GLIBCXX11 ABI  |
 | DETECT_CONDA_ENV | [ON, OFF] | ON | Use detection of conda environment for dependencies. If set to ON, and no value for CMAKE_INSTALL_PREFIX is passed, then it'll assign it to $CONDA_PREFIX (to install in the active environment).  |
 | DISABLE_OPENMP | [ON, OFF]  | OFF  | Set to `ON` to disable OpenMP  |

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
@@ -10,10 +10,6 @@ source rapids-date-string
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-# This is the version of the suffix with a preceding hyphen. It's used
-# everywhere except in the final wheel name.
-PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"
-
 rapids-generate-version > ./VERSION
 
 cd ${package_dir}

diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
@@ -1,19 +1,15 @@
 #!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
 mkdir -p ./dist
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
-
-# On arm also need to install CMake because treelite needs to be compiled (no wheels available for arm).
-if [[ "$(arch)" == "aarch64" ]]; then
-    python -m pip install cmake
-fi
+RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
 # echo to expand wildcard before adding `[extra]` requires for pip
-python -m pip install $(echo ./dist/cuml*.whl)[test]
+python -m pip install \
+  "$(echo ./dist/cuml*.whl)[test]"
 
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}"

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -71,9 +71,9 @@ dependencies:
 - scipy>=1.8.0
 - seaborn
 - spdlog>=1.14.1,<1.15
+- sphinx
 - sphinx-copybutton
 - sphinx-markdown-tables
-- sphinx<6
 - statsmodels
 - sysroot_linux-64==2.17
 - treelite==4.3.0

diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -67,9 +67,9 @@ dependencies:
 - scipy>=1.8.0
 - seaborn
 - spdlog>=1.14.1,<1.15
+- sphinx
 - sphinx-copybutton
 - sphinx-markdown-tables
-- sphinx<6
 - statsmodels
 - sysroot_linux-64==2.17
 - treelite==4.3.0

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -65,6 +65,7 @@ option(USE_CCACHE "Cache build artifacts with ccache" OFF)
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
 option(CUDA_STATIC_MATH_LIBRARIES "Statically link the CUDA math libraries" OFF)
 option(CUML_USE_CUVS_STATIC "Build and statically link the CUVS library" OFF)
+option(CUML_USE_RAFT_STATIC "Build and statically link the RAFT library" OFF)
 option(CUML_USE_TREELITE_STATIC "Build and statically link the treelite library" OFF)
 option(CUML_EXPORT_TREELITE_LINKAGE "Whether to publicly or privately link treelite to libcuml++" OFF)
 option(CUML_USE_CUMLPRIMS_MG_STATIC "Build and statically link the cumlprims_mg library" OFF)
@@ -99,6 +100,7 @@ message(VERBOSE "CUML_CPP: Cache build artifacts with ccache: ${USE_CCACHE}")
 message(VERBOSE "CUML_CPP: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}")
 message(VERBOSE "CUML_CPP: Statically link the CUDA math libraries: ${CUDA_STATIC_MATH_LIBRARIES}")
 message(VERBOSE "CUML_CPP: Build and statically link CUVS libraries: ${CUML_USE_CUVS_STATIC}")
+message(VERBOSE "CUML_CPP: Build and statically link RAFT library: ${CUML_USE_RAFT_STATIC}")
 message(VERBOSE "CUML_CPP: Build and statically link Treelite library: ${CUML_USE_TREELITE_STATIC}")
 
 set(CUML_ALGORITHMS "ALL" CACHE STRING "Experimental: Choose which algorithms are built into libcuml++.so. Can specify individual algorithms or groups in a semicolon-separated list.")
@@ -109,6 +111,17 @@ set(RMM_LOGGING_LEVEL "INFO" CACHE STRING "Choose the logging level.")
 set_property(CACHE RMM_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" "OFF")
 message(VERBOSE "CUML_CPP: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'.")
 
+# Set logging level
+set(LIBCUML_LOGGING_LEVEL
+    "DEBUG"
+    CACHE STRING "Choose the logging level."
+)                                                                                                
+set_property(
+  CACHE LIBCUML_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL"
+                                       "OFF"
+)
+message(VERBOSE "CUML: LIBCUML_LOGGING_LEVEL = '${LIBCUML_LOGGING_LEVEL}'.")
+
 if(BUILD_CUML_TESTS OR BUILD_PRIMS_TESTS)
   # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
   # have different values for the `Threads::Threads` target. Setting this flag ensures
@@ -220,6 +233,12 @@ endif()
 rapids_cpm_init()
 rapids_cmake_install_lib_dir(lib_dir)
 
+include(${rapids-cmake-dir}/cpm/rapids_logger.cmake)
+rapids_cpm_rapids_logger()
+rapids_make_logger(
+  ML EXPORT_SET cuml-exports LOGGER_HEADER_DIR include/cuml/common/ LOGGER_MACRO_PREFIX CUML LOGGER_TARGET cuml_logger
+)
+
 if(BUILD_CUML_TESTS OR BUILD_PRIMS_TESTS)
   find_package(Threads)
 endif()
@@ -291,8 +310,7 @@ if(BUILD_CUML_CPP_LIBRARY)
 
   # single GPU components
   # common components
-  add_library(${CUML_CPP_TARGET}
-              src/common/logger.cpp)
+  add_library(${CUML_CPP_TARGET})
   if (CUML_ENABLE_GPU)
     target_compile_definitions(${CUML_CPP_TARGET} PUBLIC CUML_ENABLE_GPU)
   endif()
@@ -564,6 +582,7 @@ if(BUILD_CUML_CPP_LIBRARY)
         PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUML_CXX_FLAGS}>"
                 "$<$<COMPILE_LANGUAGE:CUDA>:${CUML_CUDA_FLAGS}>"
   )
+  target_compile_definitions(${CUML_CPP_TARGET} PUBLIC "CUML_LOG_ACTIVE_LEVEL=CUML_LOG_LEVEL_${LIBCUML_LOGGING_LEVEL}")
 
   target_include_directories(${CUML_CPP_TARGET}
     PUBLIC
@@ -602,6 +621,9 @@ if(BUILD_CUML_CPP_LIBRARY)
   # These are always private:
   list(APPEND _cuml_cpp_private_libs
     raft::raft
+    rmm::rmm_logger_impl
+    raft::raft_logger_impl
+    cuml_logger_impl
     $<TARGET_NAME_IF_EXISTS:GPUTreeShap::GPUTreeShap>
     $<$<BOOL:${LINK_CUFFT}>:CUDA::cufft${_ctk_fft_static_suffix}>
     ${TREELITE_LIBS}
@@ -628,8 +650,8 @@ if(BUILD_CUML_CPP_LIBRARY)
   target_link_libraries(${CUML_CPP_TARGET}
     PUBLIC  rmm::rmm rmm::rmm_logger ${CUVS_LIB}
             ${_cuml_cpp_public_libs}
+            cuml_logger
     PRIVATE ${_cuml_cpp_private_libs}
-                   rmm::rmm_logger_impl
   )
 
   # If we export the libdmlc symbols, they can lead to weird crashes with other

diff --git a/cpp/README.md b/cpp/README.md
@@ -37,7 +37,6 @@ Current cmake offers the following configuration options:
 | BUILD_PRIMS_TESTS | [ON, OFF]  | ON  | Enable/disable building cuML algorithm test executable `prims_test`.  |
 | BUILD_CUML_EXAMPLES | [ON, OFF]  | ON  | Enable/disable building cuML C++ API usage examples.  |
 | BUILD_CUML_BENCH | [ON, OFF]  | ON  | Enable/disable building of cuML C++ benchark. |
-| BUILD_CUML_PRIMS_BENCH | [ON, OFF]  | ON  | Enable/disable building of ml-prims C++ benchark. |
 | BUILD_CUML_STD_COMMS | [ON, OFF] | ON | Enable/disable building cuML NCCL+UCX communicator for running multi-node multi-GPU algorithms. Note that UCX support can also be enabled/disabled (see below). The standard communicator and MPI communicator are not mutually exclusive and can both be installed at the same time. |
 | WITH_UCX | [ON, OFF] | OFF | Enable/disable UCX support in the standard cuML communicator. Algorithms requiring point-to-point messaging will not work when this is disabled. This flag is ignored if BUILD_CUML_STD_COMMS is set to OFF. |
 | BUILD_CUML_MPI_COMMS | [ON, OFF] | OFF | Enable/disable building cuML MPI+NCCL communicator for running multi-node multi-GPU C++ tests. MPI communicator and STD communicator may both be installed at the same time. If OFF, it overrides BUILD_CUML_MG_TESTS to be OFF as well. |

diff --git a/cpp/bench/sg/kmeans.cu b/cpp/bench/sg/kmeans.cu
@@ -92,7 +92,7 @@ std::vector<Params> getInputs()
   p.kmeans.init                            = ML::kmeans::KMeansParams::InitMethod(0);
   p.kmeans.max_iter                        = 300;
   p.kmeans.tol                             = 1e-4;
-  p.kmeans.verbosity                       = RAFT_LEVEL_INFO;
+  p.kmeans.verbosity                       = raft::level_enum::info;
   p.kmeans.metric                          = cuvs::distance::DistanceType::L2Expanded;
   p.kmeans.rng_state                       = raft::random::RngState(p.blobs.seed);
   p.kmeans.inertia_check                   = true;

diff --git a/cpp/bench/sg/svc.cu b/cpp/bench/sg/svc.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -100,8 +100,9 @@ std::vector<SvcParams<D>> getInputs()
   p.blobs.seed           = 12345ULL;
 
   // SvmParameter{C, cache_size, max_iter, nochange_steps, tol, verbosity})
-  p.svm_param = ML::SVM::SvmParameter{1, 200, 100, 100, 1e-3, CUML_LEVEL_INFO, 0, ML::SVM::C_SVC};
-  p.model     = ML::SVM::SvmModel<D>{0, 0, 0, nullptr, {}, nullptr, 0, nullptr};
+  p.svm_param =
+    ML::SVM::SvmParameter{1, 200, 100, 100, 1e-3, ML::level_enum::info, 0, ML::SVM::C_SVC};
+  p.model = ML::SVM::SvmModel<D>{0, 0, 0, nullptr, {}, nullptr, 0, nullptr};
 
   std::vector<Triplets> rowcols = {{50000, 2, 2}, {2048, 100000, 2}, {50000, 1000, 2}};
 

diff --git a/cpp/bench/sg/svr.cu b/cpp/bench/sg/svr.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -102,7 +102,7 @@ std::vector<SvrParams<D>> getInputs()
   // SvmParameter{C, cache_size, max_iter, nochange_steps, tol, verbosity,
   //              epsilon, svmType})
   p.svm_param =
-    ML::SVM::SvmParameter{1, 200, 200, 100, 1e-3, CUML_LEVEL_INFO, 0.1, ML::SVM::EPSILON_SVR};
+    ML::SVM::SvmParameter{1, 200, 200, 100, 1e-3, ML::level_enum::info, 0.1, ML::SVM::EPSILON_SVR};
   p.model = new ML::SVM::SvmModel<D>{0, 0, 0, 0};
 
   std::vector<Triplets> rowcols = {{50000, 2, 2}, {1024, 10000, 10}, {3000, 200, 200}};

diff --git a/cpp/cmake/thirdparty/get_cuvs.cmake b/cpp/cmake/thirdparty/get_cuvs.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -55,6 +55,7 @@ function(find_and_configure_cuvs)
         OPTIONS
           "BUILD_TESTS OFF"
           "BUILD_BENCH OFF"
+          "BUILD_CAGRA_HNSWLIB OFF"
           "BUILD_MG_ALGOS ${CUVS_BUILD_MG_ALGOS}"
 
     )

diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ function(find_and_configure_raft)
     message(VERBOSE "CUML: raft FIND_PACKAGE_ARGUMENTS COMPONENTS ${RAFT_COMPONENTS}")
 
     rapids_cpm_find(raft ${PKG_VERSION}
-      GLOBAL_TARGETS      raft::raft
+      GLOBAL_TARGETS      raft::raft raft::raft_logger raft::raft_logger_impl
       BUILD_EXPORT_SET    cuml-exports
       INSTALL_EXPORT_SET  cuml-exports
       COMPONENTS          ${RAFT_COMPONENTS}
@@ -54,7 +54,7 @@ function(find_and_configure_raft)
         EXCLUDE_FROM_ALL       ${PKG_EXCLUDE_FROM_ALL}
         OPTIONS
           "BUILD_TESTS OFF"
-          "BUILD_BENCH OFF"
+          "BUILD_PRIMS_BENCH OFF"
           "BUILD_CAGRA_HNSWLIB OFF"
           "RAFT_COMPILE_LIBRARY OFF"
     )

diff --git a/cpp/examples/dbscan/dbscan_example.cpp b/cpp/examples/dbscan/dbscan_example.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -209,7 +209,7 @@ int main(int argc, char* argv[])
                   nullptr,
                   max_bytes_per_batch,
                   ML::Dbscan::EpsNnMethod::BRUTE_FORCE,
-                  false);
+                  ML::level_enum::off);
   CUDA_RT_CALL(cudaMemcpyAsync(
     h_labels.data(), d_labels, nRows * sizeof(int), cudaMemcpyDeviceToHost, stream));
   CUDA_RT_CALL(cudaStreamSynchronize(stream));

diff --git a/cpp/include/cuml/cluster/dbscan.hpp b/cpp/include/cuml/cluster/dbscan.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <cuml/common/log_levels.hpp>
+#include <cuml/common/logger.hpp>
 
 #include <cuvs/distance/distance.hpp>
 
@@ -73,7 +73,7 @@ void fit(const raft::handle_t& handle,
          float* sample_weight       = nullptr,
          size_t max_bytes_per_batch = 0,
          EpsNnMethod eps_nn_method  = BRUTE_FORCE,
-         int verbosity              = CUML_LEVEL_INFO,
+         level_enum verbosity       = ML::level_enum::info,
          bool opg                   = false);
 void fit(const raft::handle_t& handle,
          double* input,
@@ -87,7 +87,7 @@ void fit(const raft::handle_t& handle,
          double* sample_weight      = nullptr,
          size_t max_bytes_per_batch = 0,
          EpsNnMethod eps_nn_method  = BRUTE_FORCE,
-         int verbosity              = CUML_LEVEL_INFO,
+         level_enum verbosity       = ML::level_enum::info,
          bool opg                   = false);
 
 void fit(const raft::handle_t& handle,
@@ -102,7 +102,7 @@ void fit(const raft::handle_t& handle,
          float* sample_weight         = nullptr,
          size_t max_bytes_per_batch   = 0,
          EpsNnMethod eps_nn_method    = BRUTE_FORCE,
-         int verbosity                = CUML_LEVEL_INFO,
+         level_enum verbosity         = ML::level_enum::info,
          bool opg                     = false);
 void fit(const raft::handle_t& handle,
          double* input,
@@ -116,7 +116,7 @@ void fit(const raft::handle_t& handle,
          double* sample_weight        = nullptr,
          size_t max_bytes_per_batch   = 0,
          EpsNnMethod eps_nn_method    = BRUTE_FORCE,
-         int verbosity                = CUML_LEVEL_INFO,
+         level_enum verbosity         = ML::level_enum::info,
          bool opg                     = false);
 
 /** @} */

diff --git a/cpp/include/cuml/cluster/kmeans.hpp b/cpp/include/cuml/cluster/kmeans.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,8 +16,6 @@
 
 #pragma once
 
-#include <cuml/common/log_levels.hpp>
-
 #include <cuvs/cluster/kmeans.hpp>
 
 namespace raft {