Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enabling c++20 on linux #17816

Open
wants to merge 52 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
81fbc65
Trying out c++20 on linux
jchen351 Oct 6, 2023
c72c40e
Enabling "set_property(TARGET onnxruntime_test_all APPEND_STRING PROP…
jchen351 Oct 6, 2023
512cd4d
Revert "Enabling "set_property(TARGET onnxruntime_test_all APPEND_STR…
jchen351 Oct 6, 2023
e9fd1ef
Merge branch 'main' into Cjian/linux_c++20
jchen351 Oct 13, 2023
a3a69d0
ignore "-Werror=deprecated" for Eigen CXX11 Tensor
jchen351 Oct 13, 2023
2e795b4
-Wdeprecated-declarations
jchen351 Oct 13, 2023
1a7efec
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 May 24, 2024
c46ab4a
Merge remote-tracking branch 'origin/main' into Cjian/linux_c++20
snnn Jun 3, 2024
11f8d5b
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Jun 3, 2024
107788f
#pragma GCC diagnostic ignored "-Wdeprecated"
jchen351 Jun 3, 2024
7646254
#pragma GCC diagnostic ignored "-Wdeprecated"
jchen351 Jun 3, 2024
661a465
TreeAggregatorMin
jchen351 Jun 3, 2024
450d994
#pragma GCC diagnostic ignored "-Wdeprecated"
jchen351 Jun 4, 2024
168beb8
[=
jchen351 Jun 6, 2024
3736658
qembed_layer_norm.cc:97:50
jchen351 Jun 6, 2024
89d8e59
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Jun 17, 2024
2d6abd5
is_trivial
jchen351 Jun 17, 2024
c66de02
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Jun 19, 2024
31cb371
Merge remote-tracking branch 'refs/remotes/origin/main' into Cjian/li…
jchen351 Jun 20, 2024
d3a385a
Merge remote-tracking branch 'origin/main' into Cjian/linux_c++20
jchen351 Jun 20, 2024
41c821e
onnxruntime/core/session/inference_session.cc lintrunner
jchen351 Jun 20, 2024
cc3f411
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
jchen351 Jun 20, 2024
84122a5
#if __cplusplus >= 202002L
jchen351 Jun 20, 2024
5244cc4
std::function<void()> run_fn = [=, this]() {
jchen351 Jun 20, 2024
0e10f73
std::function<void()> run_fn = [this]() {
jchen351 Jun 20, 2024
c669ead
std::function<void()> run_fn = [this]() {
jchen351 Jun 20, 2024
55edc2d
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Jul 19, 2024
33d08f3
#pragma GCC diagnostic ignored "-Wdeprecated"
jchen351 Jul 19, 2024
2452708
volatile size_t volatile_x = 0;
jchen351 Jul 19, 2024
0a2d22b
implicit capture of ‘this’ via ‘[=]’ is deprecated in C++20
jchen351 Jul 19, 2024
c5a3e64
#ifdef __GNUC__
jchen351 Jul 22, 2024
6c105be
compute_info.create_state_func = [=](ComputeContext* context, Functio…
jchen351 Jul 22, 2024
f356538
Special case where c++20 is defined by was undefined again
jchen351 Jul 22, 2024
6177120
#pragma GCC diagnostic push
jchen351 Jul 22, 2024
254c8c1
#pragma GCC diagnostic push
jchen351 Jul 22, 2024
7386423
if(UNIX)
jchen351 Jul 22, 2024
9750089
if(UNIX)
jchen351 Jul 22, 2024
42ee12f
-Wdeprecated-pragma
jchen351 Jul 22, 2024
d837bfa
-Wdeprecated-pragma
jchen351 Jul 22, 2024
a6b56f3
google nsync
jchen351 Jul 23, 2024
78c7b4a
#if defined(
jchen351 Jul 23, 2024
4f2ebed
#if defined(__clang__) && __cplusplus >= 202002L
jchen351 Jul 23, 2024
7de09f7
[=, this]
jchen351 Jul 23, 2024
524decf
maxThreadsPerBlock
jchen351 Jul 23, 2024
0fe8720
#ifdef __GNUC__
jchen351 Jul 23, 2024
6eb70d5
deprecated
jchen351 Jul 24, 2024
fa8a8cc
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Aug 27, 2024
1471383
Changing ostringstream to wostringstream
jchen351 Aug 28, 2024
fb7ee24
Using wostringstream only on Windows
jchen351 Aug 30, 2024
fc63b36
change ifndef to ifdef
jchen351 Aug 30, 2024
3c31842
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Sep 10, 2024
9e30f73
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Sep 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ include(CheckFunctionExists)
include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables

# TODO: update this once all system adapt c++20
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
if(UNIX)
set(CMAKE_CXX_STANDARD 20)
else()
set(CMAKE_CXX_STANDARD 17)
Expand Down
1 change: 1 addition & 0 deletions include/onnxruntime/core/common/eigen_common_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
// error: ignoring attributes on template argument "Eigen::PacketType<const float, Eigen::DefaultDevice>::type {aka __vector(4) float}" [-Werror=ignored-attributes]
#if defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated"
#if __GNUC__ >= 6
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif
Expand Down
15 changes: 14 additions & 1 deletion include/onnxruntime/core/platform/ort_mutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,14 @@ std::cv_status OrtCondVar::wait_for(std::unique_lock<OrtMutex>& cond_mutex,
namespace onnxruntime {

class OrtMutex {
#if defined(__clang__) && __cplusplus >= 202002L
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-pragma"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can be fixed - see this patch: e34c91c

#endif
nsync::nsync_mu data_ = NSYNC_MU_INIT;
#if defined(__clang__) && __cplusplus >= 202002L
#pragma clang diagnostic pop
#endif

public:
constexpr OrtMutex() = default;
Expand All @@ -125,8 +132,14 @@ class OrtMutex {
};

class OrtCondVar {
#if defined(__clang__) && __cplusplus >= 202002L
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-pragma"
#endif
nsync::nsync_cv native_cv_object = NSYNC_CV_INIT;

#if defined(__clang__) && __cplusplus >= 202002L
#pragma clang diagnostic pop
#endif
public:
constexpr OrtCondVar() noexcept = default;

Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/contrib_ops/cpu/bert/embed_layer_norm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,18 @@ Status EmbedLayerNorm<T>::Compute(OpKernelContext* context) const {

int n = batch_size * sequence_length;
concurrency::ThreadPool::TryBatchParallelFor(
#if __cplusplus >= 202002L
context->GetOperatorThreadPool(), n, [=, this, &failed](ptrdiff_t index) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see this core guideline:
https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#f54-when-writing-a-lambda-that-captures-this-or-any-class-data-member-dont-use--default-capture

I think they have a point. at least, we should consider how to make the code easier to understand.

#else
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated"
#endif
context->GetOperatorThreadPool(), n, [=, &failed](ptrdiff_t index) {
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#endif
int word_col_index = input_ids_data[index];
if (word_col_index < 0 || word_col_index >= word_embedding_length) {
failed.store(true, std::memory_order_release);
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/common/logging/sinks/ostream_sink.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ void OStreamSink::SendImpl(const Timestamp& timestamp, const std::string& logger
// Going with #2 as it should scale better at the cost of creating the message in memory first
// before sending to the stream.

#ifdef _WIN32
std::wostringstream msg;
#else
std::ostringstream msg;
#endif

#ifndef ORT_MINIMAL_BUILD
if (message.Severity() == Severity::kWARNING) {
Expand Down
7 changes: 7 additions & 0 deletions onnxruntime/core/providers/cpu/text/string_normalizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,14 @@ class Utf8ConverterGeneric {
}

private:
#if __cplusplus >= 202002L
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
std::codecvt_utf8<wchar_t> converter_;
#if __cplusplus >= 202002L
#pragma GCC diagnostic pop
#endif
};

// We need to specialize for MS as there is
Expand Down
6 changes: 3 additions & 3 deletions onnxruntime/core/providers/cuda/math/topk_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@
});
}

auto XPT = static_cast<int64_t>(ceil(static_cast<double>(dimension) / GridDim::maxThreadsPerBlock));
auto XPT = static_cast<int64_t>(ceil(static_cast<double>(dimension) / static_cast<double>(GridDim::maxThreadsPerBlock)));

Check warning on line 424 in onnxruntime/core/providers/cuda/math/topk_impl.cuh

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/providers/cuda/math/topk_impl.cuh:424: Lines should be <= 120 characters long [whitespace/line_length] [2]
if (BT * 2 >= K || 0 == sorted) {
RadixTopK<CudaT, BT, 2><<<N, BT, 256 * sizeof(uint32_t), stream>>>(
input_x_ptr, output_v_ptr, output_i, elem_nums, size, axis, K, largest, sorted, dimension, XPT,
Expand Down Expand Up @@ -452,8 +452,8 @@
CUDA_RETURN_IF_ERROR(cub::DeviceRadixSort::SortPairs(nullptr, temp_bytes, input_key, output_key, input_value, output_value, dimension, 0, sizeof(T) * 8, stream));
auto temp_storage_buffer = kernel->GetScratchBuffer<char>(temp_bytes, ort_stream);
auto* temp_storage = temp_storage_buffer.get();
auto blocks_per_grid_D = (int)(ceil(static_cast<float>(dimension) / BT));
auto blocks_per_grid_K = (int)(ceil(static_cast<float>(K) / BT));
auto blocks_per_grid_D = (int)(ceil(static_cast<float>(dimension) / static_cast<float>(BT)));

Check warning on line 455 in onnxruntime/core/providers/cuda/math/topk_impl.cuh

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Using C-style cast. Use static_cast<int>(...) instead [readability/casting] [4] Raw Output: onnxruntime/core/providers/cuda/math/topk_impl.cuh:455: Using C-style cast. Use static_cast<int>(...) instead [readability/casting] [4]
auto blocks_per_grid_K = (int)(ceil(static_cast<float>(K) / static_cast<float>(BT)));

Check warning on line 456 in onnxruntime/core/providers/cuda/math/topk_impl.cuh

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Using C-style cast. Use static_cast<int>(...) instead [readability/casting] [4] Raw Output: onnxruntime/core/providers/cuda/math/topk_impl.cuh:456: Using C-style cast. Use static_cast<int>(...) instead [readability/casting] [4]
for (int64_t i = 0; i < N; i++) {
FillInput<CudaT><<<blocks_per_grid_D, BT, 0, stream>>>(input_x_ptr, input_key, input_value, elem_nums, size, axis, K, i, dimension);
CUDA_RETURN_IF_ERROR(1 == largest ? cub::DeviceRadixSort::SortPairsDescending(temp_storage, temp_bytes, input_key, output_key, input_value, output_value, dimension, 0, sizeof(T) * 8, stream)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,7 @@
map_input_index_[fused_node.Name()] = input_name_index;
map_no_input_shape_[fused_node.Name()] = no_input_shape;
NodeComputeInfo compute_info;
compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
compute_info.create_state_func = [=,this](ComputeContext* context, FunctionState* state) {

Check warning on line 1213 in onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing space after , [whitespace/comma] [3] Raw Output: onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc:1213: Missing space after , [whitespace/comma] [3]
std::unique_ptr<MIGraphXFuncState> p = std::make_unique<MIGraphXFuncState>();
*p = {context->allocate_func, context->release_func, context->allocator_handle, map_progs_[context->node_name],
map_onnx_string_[context->node_name], options, t_, map_input_index_[context->node_name], &mgx_mu_,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3381,7 +3381,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
// Create function state
// TODO: remove default capture
NodeComputeInfo compute_info;
compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
compute_info.create_state_func = [=, this](ComputeContext* context, FunctionState* state) {
std::unique_ptr<TensorrtFuncState> p = std::make_unique<TensorrtFuncState>();
// translate tactic sources string to nvinfer1::TacticSources
nvinfer1::TacticSources tactics = 0;
Expand All @@ -3408,7 +3408,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
};

// Create compute function
compute_info.compute_func = [this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
compute_info.compute_func = [=, this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
Ort::KernelContext ctx(context);

TensorrtFuncState* trt_state = reinterpret_cast<TensorrtFuncState*>(state);
Expand Down Expand Up @@ -4056,7 +4056,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
// Create function state
// TODO: remove default capture
NodeComputeInfo compute_info;
compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
compute_info.create_state_func = [=, this](ComputeContext* context, FunctionState* state) {
std::unique_ptr<TensorrtShortFuncState> p = std::make_unique<TensorrtShortFuncState>();
*p = {context->allocate_func,
context->release_func,
Expand All @@ -4079,7 +4079,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
};

// Create compute function
compute_info.compute_func = [this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
compute_info.compute_func = [=, this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
Ort::KernelContext ctx(context);

TensorrtShortFuncState* trt_state = reinterpret_cast<TensorrtShortFuncState*>(state);
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2770,7 +2770,18 @@ common::Status InferenceSession::RunAsync(const RunOptions* run_options,
if (!tp || concurrency::ThreadPool::DegreeOfParallelism(tp) < 2) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "intra op thread pool must have at least one thread for RunAsync");
}
#if __cplusplus >= 202002L
std::function<void()> run_fn = [=, this]() {
#else
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated"
#endif
std::function<void()> run_fn = [=]() {
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#endif
Status status = Status::OK();
ORT_TRY {
if (run_options) {
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/test/onnx/microbenchmark/eigen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wunused-result"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#pragma GCC diagnostic ignored "-Wdeprecated"

// _deps/eigen-src/unsupported/Eigen/CXX11/../../../Eigen/src/Core/arch/NEON/PacketMath.h:1671:9:
// error: ‘void* memcpy(void*, const void*, size_t)’ copying an object of non-trivial type ‘Eigen::internal::Packet4c’
Expand Down
7 changes: 7 additions & 0 deletions onnxruntime/test/onnx/microbenchmark/tptest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,14 @@ static void BM_ThreadPoolSimpleParallelFor(benchmark::State& state) {
for (auto _ : state) {
for (int j = 0; j < 100; j++) {
ThreadPool::TrySimpleParallelFor(tp.get(), len, [&](size_t) {
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wvolatile"
for (volatile size_t x = 0; x < body; x++) {
#pragma GCC diagnostic pop
#else
for (volatile size_t x = 0; x < body; x++) {
#endif
}
});
}
Expand Down
Loading