From c66f8f6a0fa5a8d03379979366f71c29607966df Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 10 Oct 2023 01:26:21 -0700 Subject: [PATCH] [TSL] Remove `TSL_STATIC_THREAD_LOCAL_POD` macro. PiperOrigin-RevId: 572168960 --- .../xla/third_party/tsl/tsl/platform/BUILD | 8 ---- .../tsl/tsl/platform/static_threadlocal.h | 42 ------------------- .../xla/xla/stream_executor/cuda/BUILD | 1 - .../xla/stream_executor/cuda/cuda_driver.cc | 17 ++++---- .../xla/xla/stream_executor/rocm/BUILD | 1 - .../xla/stream_executor/rocm/rocm_driver.cc | 17 ++++---- 6 files changed, 14 insertions(+), 72 deletions(-) delete mode 100644 third_party/xla/third_party/tsl/tsl/platform/static_threadlocal.h diff --git a/third_party/xla/third_party/tsl/tsl/platform/BUILD b/third_party/xla/third_party/tsl/tsl/platform/BUILD index 3708a5f4cec794..03c5ac0d339f22 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/BUILD +++ b/third_party/xla/third_party/tsl/tsl/platform/BUILD @@ -1905,11 +1905,3 @@ tsl_cc_test( "//tsl/lib/core:status_test_util", ], ) - -cc_library( - name = "static_threadlocal", - hdrs = [ - "static_threadlocal.h", - ], - visibility = ["//visibility:public"], -) diff --git a/third_party/xla/third_party/tsl/tsl/platform/static_threadlocal.h b/third_party/xla/third_party/tsl/tsl/platform/static_threadlocal.h deleted file mode 100644 index f8535f1a7234b9..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/platform/static_threadlocal.h +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_TSL_PLATFORM_STATIC_THREADLOCAL_H_ -#define TENSORFLOW_TSL_PLATFORM_STATIC_THREADLOCAL_H_ - -#ifdef _MSC_VER -#define __thread __declspec(thread) -#endif - -// For POD types in TLS mode, s_obj_VAR is the thread-local variable. -#define TSL_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \ - static __thread _Type_ s_obj_##_var_; \ - namespace { \ - class ThreadLocal_##_var_ { \ - public: \ - ThreadLocal_##_var_() {} \ - void Init() {} \ - inline _Type_ *pointer() const { return &s_obj_##_var_; } \ - inline _Type_ *safe_pointer() const { return &s_obj_##_var_; } \ - _Type_ &get() const { return s_obj_##_var_; } \ - bool is_native_tls() const { return true; } \ - \ - private: \ - ThreadLocal_##_var_(const ThreadLocal_##_var_ &) = delete; \ - void operator=(const ThreadLocal_##_var_ &) = delete; \ - } _var_; \ - } - -#endif // TENSORFLOW_TSL_PLATFORM_STATIC_THREADLOCAL_H_ diff --git a/third_party/xla/xla/stream_executor/cuda/BUILD b/third_party/xla/xla/stream_executor/cuda/BUILD index 9fbf47173cd856..02ae37a4a31c8b 100644 --- a/third_party/xla/xla/stream_executor/cuda/BUILD +++ b/third_party/xla/xla/stream_executor/cuda/BUILD @@ -129,7 +129,6 @@ cc_library( "@local_tsl//tsl/cuda", "@local_tsl//tsl/cuda:cudart", "@local_tsl//tsl/platform:env", - "@local_tsl//tsl/platform:static_threadlocal", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings:str_format", diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc b/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc index 35c8da8fad1358..8cec30041bf2a9 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc @@ -46,13 +46,12 @@ limitations under the License. #include "tsl/platform/errors.h" #include "tsl/platform/logging.h" #include "tsl/platform/stacktrace.h" -#include "tsl/platform/static_threadlocal.h" #include "tsl/platform/status.h" #include "tsl/platform/threadpool.h" -bool FLAGS_gpuexec_cuda_driver_inject_init_error = false; -bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false; -bool FLAGS_gpuexec_cuda_device_0_only = false; +static constexpr bool FLAGS_gpuexec_cuda_driver_inject_init_error = false; +static constexpr bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false; +static constexpr bool FLAGS_gpuexec_cuda_device_0_only = false; #define RETURN_IF_CUDA_RES_ERROR(expr, ...) \ do { \ @@ -135,20 +134,18 @@ void SynchronizeOrDie() { "Synchronize fail: ", tsl::CurrentStackTrace()); } -struct ThreadLocalData { +thread_local struct ThreadLocalData { int64_t id; GpuContext* context; // Only valid if id == a known good context. int depth; -}; - -TSL_STATIC_THREAD_LOCAL_POD(ThreadLocalData, tls_data); +} tls_data = {}; } // namespace ScopedActivateContext::ScopedActivateContext(GpuContext* cuda_context) { if (FLAGS_gpuexec_cuda_sync_around_driver_calls) SynchronizeOrDie(); - auto* tls = &tls_data.get(); + auto* tls = &tls_data; // If this is an outermost scope, we must not assume that the CUDA context has // been left in the same state we left it. Other code may have run on this @@ -187,7 +184,7 @@ ScopedActivateContext::ScopedActivateContext(GpuContext* cuda_context) { ScopedActivateContext::~ScopedActivateContext() { if (FLAGS_gpuexec_cuda_sync_around_driver_calls) SynchronizeOrDie(); - auto* tls = &tls_data.get(); + auto* tls = &tls_data; if (kVerifyGpuContext) { // Note that if kVerifyGpuContext is used, and contexts are deleted, it's diff --git a/third_party/xla/xla/stream_executor/rocm/BUILD b/third_party/xla/xla/stream_executor/rocm/BUILD index 98ff3e58f5037e..c5a16ffbdf002f 100644 --- a/third_party/xla/xla/stream_executor/rocm/BUILD +++ b/third_party/xla/xla/stream_executor/rocm/BUILD @@ -63,7 +63,6 @@ cc_library( "@local_tsl//tsl/platform:env", "@local_tsl//tsl/platform:numbers", "@local_tsl//tsl/platform:stacktrace", - "@local_tsl//tsl/platform:static_threadlocal", ]), ) diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc b/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc index d48ed7e3818d92..c50be50e64f008 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc @@ -38,12 +38,11 @@ limitations under the License. #include "tsl/platform/logging.h" #include "tsl/platform/numbers.h" #include "tsl/platform/stacktrace.h" -#include "tsl/platform/static_threadlocal.h" #include "tsl/platform/threadpool.h" -bool FLAGS_gpuexec_rocm_driver_inject_init_error = false; -bool FLAGS_gpuexec_rocm_sync_around_driver_calls = false; -bool FLAGS_gpuexec_rocm_device_0_only = false; +static constexpr bool FLAGS_gpuexec_rocm_driver_inject_init_error = false; +static constexpr bool FLAGS_gpuexec_rocm_sync_around_driver_calls = false; +static constexpr bool FLAGS_gpuexec_rocm_device_0_only = false; #define RETURN_IF_ROCM_ERROR(expr, ...) \ do { \ @@ -128,20 +127,18 @@ void SynchronizeOrDie() { } } -struct ThreadLocalData { +thread_local struct ThreadLocalData { int current_device_ordinal; GpuContext* context; // Only valid if id == a known good context. int depth; -}; - -TSL_STATIC_THREAD_LOCAL_POD(ThreadLocalData, tls_data); +} tls_data = {}; } // namespace ScopedActivateContext::ScopedActivateContext(GpuContext* hip_context) { if (FLAGS_gpuexec_rocm_sync_around_driver_calls) SynchronizeOrDie(); - auto* tls = &tls_data.get(); + auto* tls = &tls_data; if (tls->depth == 0) { VLOG(3) << "ScopedActivateContext switching to " << hip_context->device_ordinal(); @@ -177,7 +174,7 @@ ScopedActivateContext::ScopedActivateContext(GpuContext* hip_context) { ScopedActivateContext::~ScopedActivateContext() { if (FLAGS_gpuexec_rocm_sync_around_driver_calls) SynchronizeOrDie(); - auto* tls = &tls_data.get(); + auto* tls = &tls_data; if (kVerifyGpuContext) { CHECK_EQ(CurrentContext(),