From c66f8f6a0fa5a8d03379979366f71c29607966df Mon Sep 17 00:00:00 2001
From: Christian Sigg <csigg@google.com>
Date: Tue, 10 Oct 2023 01:26:21 -0700
Subject: [PATCH] [TSL] Remove `TSL_STATIC_THREAD_LOCAL_POD` macro.

PiperOrigin-RevId: 572168960
---
 .../xla/third_party/tsl/tsl/platform/BUILD    |  8 ----
 .../tsl/tsl/platform/static_threadlocal.h     | 42 -------------------
 .../xla/xla/stream_executor/cuda/BUILD        |  1 -
 .../xla/stream_executor/cuda/cuda_driver.cc   | 17 ++++----
 .../xla/xla/stream_executor/rocm/BUILD        |  1 -
 .../xla/stream_executor/rocm/rocm_driver.cc   | 17 ++++----
 6 files changed, 14 insertions(+), 72 deletions(-)
 delete mode 100644 third_party/xla/third_party/tsl/tsl/platform/static_threadlocal.h

diff --git a/third_party/xla/third_party/tsl/tsl/platform/BUILD b/third_party/xla/third_party/tsl/tsl/platform/BUILD
index 3708a5f4cec794..03c5ac0d339f22 100644
--- a/third_party/xla/third_party/tsl/tsl/platform/BUILD
+++ b/third_party/xla/third_party/tsl/tsl/platform/BUILD
@@ -1905,11 +1905,3 @@ tsl_cc_test(
         "//tsl/lib/core:status_test_util",
     ],
 )
-
-cc_library(
-    name = "static_threadlocal",
-    hdrs = [
-        "static_threadlocal.h",
-    ],
-    visibility = ["//visibility:public"],
-)
diff --git a/third_party/xla/third_party/tsl/tsl/platform/static_threadlocal.h b/third_party/xla/third_party/tsl/tsl/platform/static_threadlocal.h
deleted file mode 100644
index f8535f1a7234b9..00000000000000
--- a/third_party/xla/third_party/tsl/tsl/platform/static_threadlocal.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_TSL_PLATFORM_STATIC_THREADLOCAL_H_
-#define TENSORFLOW_TSL_PLATFORM_STATIC_THREADLOCAL_H_
-
-#ifdef _MSC_VER
-#define __thread __declspec(thread)
-#endif
-
-// For POD types in TLS mode, s_obj_VAR is the thread-local variable.
-#define TSL_STATIC_THREAD_LOCAL_POD(_Type_, _var_)                 \
-  static __thread _Type_ s_obj_##_var_;                            \
-  namespace {                                                      \
-  class ThreadLocal_##_var_ {                                      \
-   public:                                                         \
-    ThreadLocal_##_var_() {}                                       \
-    void Init() {}                                                 \
-    inline _Type_ *pointer() const { return &s_obj_##_var_; }      \
-    inline _Type_ *safe_pointer() const { return &s_obj_##_var_; } \
-    _Type_ &get() const { return s_obj_##_var_; }                  \
-    bool is_native_tls() const { return true; }                    \
-                                                                   \
-   private:                                                        \
-    ThreadLocal_##_var_(const ThreadLocal_##_var_ &) = delete;     \
-    void operator=(const ThreadLocal_##_var_ &) = delete;          \
-  } _var_;                                                         \
-  }
-
-#endif  // TENSORFLOW_TSL_PLATFORM_STATIC_THREADLOCAL_H_
diff --git a/third_party/xla/xla/stream_executor/cuda/BUILD b/third_party/xla/xla/stream_executor/cuda/BUILD
index 9fbf47173cd856..02ae37a4a31c8b 100644
--- a/third_party/xla/xla/stream_executor/cuda/BUILD
+++ b/third_party/xla/xla/stream_executor/cuda/BUILD
@@ -129,7 +129,6 @@ cc_library(
         "@local_tsl//tsl/cuda",
         "@local_tsl//tsl/cuda:cudart",
         "@local_tsl//tsl/platform:env",
-        "@local_tsl//tsl/platform:static_threadlocal",
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings:str_format",
diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc b/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc
index 35c8da8fad1358..8cec30041bf2a9 100644
--- a/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc
+++ b/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc
@@ -46,13 +46,12 @@ limitations under the License.
 #include "tsl/platform/errors.h"
 #include "tsl/platform/logging.h"
 #include "tsl/platform/stacktrace.h"
-#include "tsl/platform/static_threadlocal.h"
 #include "tsl/platform/status.h"
 #include "tsl/platform/threadpool.h"
 
-bool FLAGS_gpuexec_cuda_driver_inject_init_error = false;
-bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false;
-bool FLAGS_gpuexec_cuda_device_0_only = false;
+static constexpr bool FLAGS_gpuexec_cuda_driver_inject_init_error = false;
+static constexpr bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false;
+static constexpr bool FLAGS_gpuexec_cuda_device_0_only = false;
 
 #define RETURN_IF_CUDA_RES_ERROR(expr, ...)                                   \
   do {                                                                        \
@@ -135,20 +134,18 @@ void SynchronizeOrDie() {
                          "Synchronize fail: ", tsl::CurrentStackTrace());
 }
 
-struct ThreadLocalData {
+thread_local struct ThreadLocalData {
   int64_t id;
   GpuContext* context;  // Only valid if id == a known good context.
   int depth;
-};
-
-TSL_STATIC_THREAD_LOCAL_POD(ThreadLocalData, tls_data);
+} tls_data = {};
 
 }  // namespace
 
 ScopedActivateContext::ScopedActivateContext(GpuContext* cuda_context) {
   if (FLAGS_gpuexec_cuda_sync_around_driver_calls) SynchronizeOrDie();
 
-  auto* tls = &tls_data.get();
+  auto* tls = &tls_data;
 
   // If this is an outermost scope, we must not assume that the CUDA context has
   // been left in the same state we left it. Other code may have run on this
@@ -187,7 +184,7 @@ ScopedActivateContext::ScopedActivateContext(GpuContext* cuda_context) {
 ScopedActivateContext::~ScopedActivateContext() {
   if (FLAGS_gpuexec_cuda_sync_around_driver_calls) SynchronizeOrDie();
 
-  auto* tls = &tls_data.get();
+  auto* tls = &tls_data;
 
   if (kVerifyGpuContext) {
     // Note that if kVerifyGpuContext is used, and contexts are deleted, it's
diff --git a/third_party/xla/xla/stream_executor/rocm/BUILD b/third_party/xla/xla/stream_executor/rocm/BUILD
index 98ff3e58f5037e..c5a16ffbdf002f 100644
--- a/third_party/xla/xla/stream_executor/rocm/BUILD
+++ b/third_party/xla/xla/stream_executor/rocm/BUILD
@@ -63,7 +63,6 @@ cc_library(
         "@local_tsl//tsl/platform:env",
         "@local_tsl//tsl/platform:numbers",
         "@local_tsl//tsl/platform:stacktrace",
-        "@local_tsl//tsl/platform:static_threadlocal",
     ]),
 )
 
diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc b/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc
index d48ed7e3818d92..c50be50e64f008 100644
--- a/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc
+++ b/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc
@@ -38,12 +38,11 @@ limitations under the License.
 #include "tsl/platform/logging.h"
 #include "tsl/platform/numbers.h"
 #include "tsl/platform/stacktrace.h"
-#include "tsl/platform/static_threadlocal.h"
 #include "tsl/platform/threadpool.h"
 
-bool FLAGS_gpuexec_rocm_driver_inject_init_error = false;
-bool FLAGS_gpuexec_rocm_sync_around_driver_calls = false;
-bool FLAGS_gpuexec_rocm_device_0_only = false;
+static constexpr bool FLAGS_gpuexec_rocm_driver_inject_init_error = false;
+static constexpr bool FLAGS_gpuexec_rocm_sync_around_driver_calls = false;
+static constexpr bool FLAGS_gpuexec_rocm_device_0_only = false;
 
 #define RETURN_IF_ROCM_ERROR(expr, ...)                                       \
   do {                                                                        \
@@ -128,20 +127,18 @@ void SynchronizeOrDie() {
   }
 }
 
-struct ThreadLocalData {
+thread_local struct ThreadLocalData {
   int current_device_ordinal;
   GpuContext* context;  // Only valid if id == a known good context.
   int depth;
-};
-
-TSL_STATIC_THREAD_LOCAL_POD(ThreadLocalData, tls_data);
+} tls_data = {};
 
 }  // namespace
 
 ScopedActivateContext::ScopedActivateContext(GpuContext* hip_context) {
   if (FLAGS_gpuexec_rocm_sync_around_driver_calls) SynchronizeOrDie();
 
-  auto* tls = &tls_data.get();
+  auto* tls = &tls_data;
   if (tls->depth == 0) {
     VLOG(3) << "ScopedActivateContext switching to "
             << hip_context->device_ordinal();
@@ -177,7 +174,7 @@ ScopedActivateContext::ScopedActivateContext(GpuContext* hip_context) {
 ScopedActivateContext::~ScopedActivateContext() {
   if (FLAGS_gpuexec_rocm_sync_around_driver_calls) SynchronizeOrDie();
 
-  auto* tls = &tls_data.get();
+  auto* tls = &tls_data;
 
   if (kVerifyGpuContext) {
     CHECK_EQ(CurrentContext(),