Enable Address Sanitizer in CI (microsoft#19073)

### Description 1. Add two build jobs for enabling Address Sanitizer in CI. One for Windows CPU, One for Linux CPU. 2. Set default compiler flags/linker flags in build.py for normal Windows/Linux/MacOS build. This can help control compiler flags in a more centralized way. 3. All Windows binaries in our official packages will be built with "/PROFILE" flag. Symbols of onnxruntime.dll can be found at [Microsoft public symbol server](https://learn.microsoft.com/en-us/windows-hardware/drivers/debugger/microsoft-public-symbols). Limitations: 1. On Linux Address Sanitizer ignores RPATH settings in ELF binaries. Therefore once Address Sanitizer is enabled, before running tests we need to manually set LD_LIBRARY_PATH properly otherwise libonnxruntime.so may not be able to find custom ops and shared EPs. 4. On Linux we also need to set LD_PRELOAD before running some tests(if the main executable, like python, is not built with address sanitizer. On Windows we do not need to. 5. On Windows before running python tests we should manually copy address sanitizer DLL to the onnxruntime/capi directory, because python 3.8 and above has enabled "Safe DLL Search Mode" that wouldn't use the information provided by PATH env. 6. On Linux Address Sanitizer found a lot of memory leaks from our python binding code. Therefore right now we cannot enable Address Sanitizer when building ONNX Runtime with python binding. 7. Address Sanitizer itself uses a lot of memory address space and delays memory deallocations, which is easy to cause OOM issues in 32-bit applications. We cannot run all the tests in onnxruntime_test_all in 32-bit mode with Address Sanitizer due to this reason. However, we still can run individual tests in such a way. We just cannot run all of them in one process. ### Motivation and Context To catch memory issues.
intel · Jan 12, 2024 · 0e8d4c3 · 0e8d4c3
1 parent 2856061
commit 0e8d4c3
Show file tree

Hide file tree

Showing 32 changed files with 386 additions and 213 deletions.
diff --git a/.pipelines/windowsai-steps.yml b/.pipelines/windowsai-steps.yml
@@ -84,7 +84,7 @@ jobs:
         7z x cmake-3.26.3-windows-x86_64.zip
         set PYTHONHOME=$(Build.BinariesDirectory)\${{ parameters.PythonPackageName }}.3.9.7\tools
         set PYTHONPATH=$(Build.BinariesDirectory)\${{ parameters.PythonPackageName }}.3.9.7\tools
-        $(Build.BinariesDirectory)\${{ parameters.PythonPackageName }}.3.9.7\tools\python.exe "$(Build.SourcesDirectory)\tools\ci_build\build.py" --build_dir $(Build.BinariesDirectory) --build_shared_lib --enable_onnx_tests --ms_experimental --use_dml --use_winml --cmake_generator "Visual Studio 17 2022" --update --config RelWithDebInfo --enable_lto --use_telemetry --disable_rtti --enable_wcos $(BuildFlags) --cmake_extra_defines "CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO=/PROFILE" "CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO=/PROFILE" CMAKE_SYSTEM_VERSION=10.0.19041.0 --cmake_path $(Build.BinariesDirectory)\cmake-3.26.3-windows-x86_64\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake-3.26.3-windows-x86_64\bin\ctest.exe
+        $(Build.BinariesDirectory)\${{ parameters.PythonPackageName }}.3.9.7\tools\python.exe "$(Build.SourcesDirectory)\tools\ci_build\build.py" --build_dir $(Build.BinariesDirectory) --build_shared_lib --enable_onnx_tests --ms_experimental --use_dml --use_winml --cmake_generator "Visual Studio 17 2022" --update --config RelWithDebInfo --enable_qspectre --enable_lto --use_telemetry --disable_rtti --enable_wcos $(BuildFlags) --cmake_extra_defines "CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO=/PROFILE" "CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO=/PROFILE" CMAKE_SYSTEM_VERSION=10.0.19041.0 --cmake_path $(Build.BinariesDirectory)\cmake-3.26.3-windows-x86_64\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake-3.26.3-windows-x86_64\bin\ctest.exe
       workingDirectory: '$(Build.BinariesDirectory)'
       displayName: 'Generate cmake config'
 

diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake
@@ -74,11 +74,6 @@ if (onnxruntime_MINIMAL_BUILD)
   endif()
 
   if (MSVC)
-    # turn on LTO (which adds some compiler flags and turns on LTCG) unless it's a Debug build to minimize binary size
-    if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
-      set(onnxruntime_ENABLE_LTO ON)
-    endif()
-
     # undocumented internal flag to allow analysis of a minimal build binary size
     if (ADD_DEBUG_INFO_TO_MINIMAL_BUILD)
       string(APPEND CMAKE_CXX_FLAGS " /Zi")
@@ -267,37 +262,11 @@ if (MSVC)
     string(APPEND CMAKE_C_FLAGS " /arch:AVX512")
   endif()
 
-  if (NOT GDK_PLATFORM)
-    add_compile_definitions(WINAPI_FAMILY=100) # Desktop app
-    message("Building ONNX Runtime for Windows 10 and newer")
-    add_compile_definitions(WINVER=0x0A00 _WIN32_WINNT=0x0A00 NTDDI_VERSION=0x0A000000)
-  endif()
   if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA)
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Gw /GL")
     set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Gw /GL")
     set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Gw /GL")
   endif()
-
-  # The WinML build tool chain builds ARM/ARM64, and the internal tool chain does not have folders for spectre mitigation libs.
-  # WinML performs spectre mitigation differently.
-  if (NOT DEFINED onnxruntime_DISABLE_QSPECTRE_CHECK)
-    check_cxx_compiler_flag(-Qspectre HAS_QSPECTRE)
-    if (HAS_QSPECTRE)
-      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qspectre")
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qspectre")
-    endif()
-  endif()
-  set(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} /DYNAMICBASE")
-  check_cxx_compiler_flag(-guard:cf HAS_GUARD_CF)
-  if (HAS_GUARD_CF)
-    set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /guard:cf")
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /guard:cf")
-    set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} /guard:cf")
-    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /guard:cf")
-    set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL} /guard:cf")
-    set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /guard:cf")
-    set(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} /guard:cf")
-  endif()
 else()
   if (NOT APPLE)
     #XXX: Sometimes the value of CMAKE_SYSTEM_PROCESSOR is set but it's wrong. For example, if you run an armv7 docker
@@ -378,16 +347,9 @@ else()
 
 endif()
 
-if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-    #For Mac compliance
-    message("Adding flags for Mac builds")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong")
-elseif (WIN32)
-    # parallel build
-    # These compiler opitions cannot be forwarded to NVCC, so cannot use add_compiler_options
-    string(APPEND CMAKE_CXX_FLAGS " /MP")
+if (WIN32)
     # required to be set explicitly to enable Eigen-Unsupported SpecialFunctions
     string(APPEND CMAKE_CXX_FLAGS " -DEIGEN_HAS_C99_MATH")
-else()
+elseif(LINUX)
     add_compile_definitions("_GNU_SOURCE")
 endif()
diff --git a/onnxruntime/test/framework/bfc_arena_test.cc b/onnxruntime/test/framework/bfc_arena_test.cc
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <absl/base/config.h>
 #include "core/framework/bfc_arena.h"
 #include "core/framework/allocator_utils.h"
 #include "gtest/gtest.h"
@@ -164,6 +165,8 @@ void TestCustomMemoryLimit_ProcessException(const OnnxRuntimeException& ex) {
 #endif  // #ifdef GTEST_USES_POSIX_RE
 }
 
+// Address Sanitizer would report allocation-size-too-big if we don't disable this test.
+#ifndef ABSL_HAVE_ADDRESS_SANITIZER
 TEST(BFCArenaTest, TestCustomMemoryLimit) {
   {
     // Configure a 1MiB byte limit
@@ -214,6 +217,7 @@ TEST(BFCArenaTest, TestCustomMemoryLimit) {
     b.Free(first_ptr);
   }
 }
+#endif
 
 TEST(BFCArenaTest, AllocationsAndDeallocationsWithGrowth) {
   // Max of 2GiB, but starts out small.

diff --git a/onnxruntime/test/framework/tunable_op_test.cc b/onnxruntime/test/framework/tunable_op_test.cc
@@ -459,6 +459,8 @@ class TunableVecAddSelectFastestIfSupported : public TunableOp<VecAddParamsRecor
   }
 };
 
+// We run Android tests in a simulator so the result might be different
+#if defined(__ANDROID__) && defined(NDEBUG)
 TEST(TunableOp, SelectFastestIfSupported) {
 #ifdef ORT_NO_RTTI
   GTEST_SKIP() << "TunableOp needs RTTI to work correctly";
@@ -483,6 +485,7 @@ TEST(TunableOp, SelectFastestIfSupported) {
   ASSERT_EQ(last_run, "FastestNarrow");
 #endif
 }
+#endif
 
 TEST(TunableOp, DisabledWithManualSelection) {
 #ifdef ORT_NO_RTTI

diff --git a/onnxruntime/test/logging_apis/test_logging_apis.cc b/onnxruntime/test/logging_apis/test_logging_apis.cc
@@ -12,7 +12,7 @@
 #pragma GCC diagnostic pop
 #endif
 #endif
-
+#include <absl/base/config.h>
 #include "gtest/gtest.h"
 
 // Manually initialize the Ort API object for every test.
@@ -167,7 +167,13 @@ TEST_F(RealCAPITestsFixture, CApiLoggerLogMessage) {
                                                     ORT_FILE, line_num, static_cast<const char*>(__FUNCTION__)));
 }
 
+// The code below where it tests for formatting error generates an out-of-bound memory access. Therefore we disable it
+// when memory sanitizer is enabled.
+#ifdef ABSL_HAVE_ADDRESS_SANITIZER
+TEST_F(RealCAPITestsFixture, DISABLED_CppApiORTCXXLOG) {
+#else
 TEST_F(RealCAPITestsFixture, CppApiORTCXXLOG) {
+#endif
   // Tests the output and filtering of the ORT_CXX_LOG and ORT_CXX_LOG_NOEXCEPT macros in the C++ API.
   // The first two calls go through, but the last two calls are filtered out due to an insufficient severity.
 
@@ -203,7 +209,11 @@ TEST_F(RealCAPITestsFixture, CppApiORTCXXLOG) {
   ORT_CXX_LOG_NOEXCEPT(cpp_ort_logger, OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO, "Ignored2");
 }
 
+#ifdef ABSL_HAVE_ADDRESS_SANITIZER
+TEST_F(RealCAPITestsFixture, DISABLED_CppApiORTCXXLOGF) {
+#else
 TEST_F(RealCAPITestsFixture, CppApiORTCXXLOGF) {
+#endif
   // Tests the output and filtering of the ORT_CXX_LOGF and ORT_CXX_LOGF_NOEXCEPT macros in the C++ API.
   // The first set of calls go through. The next set of calls are filtered out due to an insufficient severity.
   // The last calls have a formatting error and we expect an exception depending on which macro is used.

diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
@@ -11,6 +11,7 @@
 #include <algorithm>
 #include <thread>
 
+#include <absl/base/config.h>
 #include "gtest/gtest.h"
 #include "gmock/gmock.h"
 
@@ -402,6 +403,8 @@ TEST(CApiTest, SparseInputModel) {
 #endif  // DISABLE_CONTRIB_OPS
 #endif  // !defined(DISABLE_SPARSE_TENSORS)
 
+// Memory leak
+#ifndef ABSL_HAVE_ADDRESS_SANITIZER
 TEST(CApiTest, custom_op_handler) {
   std::cout << "Running custom op inference" << std::endl;
 
@@ -435,6 +438,7 @@ TEST(CApiTest, custom_op_handler) {
                        custom_op_domain, nullptr);
 #endif
 }
+#endif
 
 #ifdef USE_CUDA
 TEST(CApiTest, custom_op_set_input_memory_type) {
@@ -1452,7 +1456,8 @@ TEST(CApiTest, test_custom_op_library) {
 #endif
 }
 
-#if defined(__ANDROID__)
+// Has memory leak
+#if defined(__ANDROID__) || defined(ABSL_HAVE_ADDRESS_SANITIZER)
 TEST(CApiTest, DISABLED_test_custom_op_shape_infer_attr) {
 // To accomodate a reduced op build pipeline
 #elif defined(REDUCED_OPS_BUILD) && defined(USE_CUDA)

diff --git a/onnxruntime/test/shared_lib/test_ort_format_models.cc b/onnxruntime/test/shared_lib/test_ort_format_models.cc
@@ -3,7 +3,7 @@
 
 // custom ops are only supported in a minimal build if explicitly enabled
 #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
-
+#include <absl/base/config.h>
 #include "core/common/common.h"
 #include "core/graph/constants.h"
 #include "core/session/onnxruntime_cxx_api.h"
@@ -16,17 +16,18 @@
 
 extern std::unique_ptr<Ort::Env> ort_env;
 
-static void TestInference(Ort::Env& env, const std::basic_string<ORTCHAR_T>& model_uri,
-                          const std::vector<Input>& inputs, const char* output_name,
-                          const std::vector<int64_t>& expected_dims_y, const std::vector<float>& expected_values_y,
-                          Ort::CustomOpDomain& custom_op_domain, void* cuda_compute_stream = nullptr) {
+[[maybe_unused]] static void TestInference(Ort::Env& env, const std::basic_string<ORTCHAR_T>& model_uri,
+                                           const std::vector<Input>& inputs, const char* output_name,
+                                           const std::vector<int64_t>& expected_dims_y, const std::vector<float>& expected_values_y,
+                                           Ort::CustomOpDomain& custom_op_domain, void* cuda_compute_stream = nullptr) {
   Ort::SessionOptions session_options;
   session_options.Add(custom_op_domain);
 
 #ifdef USE_CUDA
   auto cuda_options = CreateDefaultOrtCudaProviderOptionsWithCustomStream(cuda_compute_stream);
   session_options.AppendExecutionProvider_CUDA(cuda_options);
 #else
+  session_options.DisableCpuMemArena();
   ORT_UNUSED_PARAMETER(cuda_compute_stream);
 #endif
   Ort::Session session(env, model_uri.c_str(), session_options);
@@ -65,7 +66,7 @@ static void TestInference(Ort::Env& env, const std::basic_string<ORTCHAR_T>& mod
   }
 }
 
-#if !defined(ORT_MINIMAL_BUILD)
+#if !defined(ORT_MINIMAL_BUILD) && !defined(ABSL_HAVE_ADDRESS_SANITIZER)
 TEST(OrtFormatCustomOpTests, ConvertOnnxModelToOrt) {
   const std::basic_string<ORTCHAR_T> onnx_file = ORT_TSTR("testdata/foo_1.onnx");
   const std::basic_string<ORTCHAR_T> ort_file = ORT_TSTR("testdata/foo_1.onnx.test_output.ort");
@@ -120,7 +121,7 @@ TEST(OrtFormatCustomOpTests, ConvertOnnxModelToOrt) {
 
 // the saved ORT format model has the CPU EP assigned to the custom op node, so we only test if we're not using the
 // CUDA EP for the test.
-#ifndef USE_CUDA
+#if !defined(USE_CUDA) && !defined(ABSL_HAVE_ADDRESS_SANITIZER)
 TEST(OrtFormatCustomOpTests, LoadOrtModel) {
   const std::basic_string<ORTCHAR_T> ort_file = ORT_TSTR("testdata/foo_1.onnx.ort");