Skip to content

Commit

Permalink
[core] Convert reference use intrinsic implementation for ChromeOS (#…
Browse files Browse the repository at this point in the history
…26870)

### Details:
 - Add intrinsic implementation for convert reference

### Tickets:
 - CVS-152654
  • Loading branch information
praasz authored Oct 4, 2024
1 parent 36c9a43 commit 817bc1a
Show file tree
Hide file tree
Showing 8 changed files with 406 additions and 183 deletions.
12 changes: 12 additions & 0 deletions src/core/reference/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@ add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${PUBLIC_HEADERS})
add_library(openvino::reference ALIAS ${TARGET_NAME})
set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME reference)

if(ENABLE_AVX2)
ov_avx2_optimization_flags(avx2_flags)

set(OV_REFERENCE_X86_AVX2_SRC
${CMAKE_CURRENT_SOURCE_DIR}/src/op/convert_x86_intrinsics.cpp
)
set_source_files_properties(${OV_REFERENCE_X86_AVX2_SRC} PROPERTIES COMPILE_OPTIONS "${avx2_flags}"
SKIP_UNITY_BUILD_INCLUSION ON
SKIP_PRECOMPILE_HEADERS ON)
target_compile_definitions(${TARGET_NAME} PRIVATE HAVE_AVX2)
endif()

ov_build_target_faster(${TARGET_NAME}
UNITY
PCH PRIVATE "src/precomp.hpp")
Expand Down
16 changes: 7 additions & 9 deletions src/core/reference/include/openvino/reference/convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
#include "openvino/core/type/nf4.hpp"

#if !defined(OS_CHROMEOS) && (defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64))
# define OV_CORE_USE_XBYAK_JIT 1
#else
# define OV_CORE_USE_XBYAK_JIT 0
# define OV_CORE_USE_XBYAK_JIT
#endif

#if defined(OS_CHROMEOS) && defined(OPENVINO_ARCH_X86_64) && defined(HAVE_AVX2)
# define OV_CORE_USE_INTRINSICS
#endif

namespace ov {
Expand All @@ -33,12 +35,12 @@ namespace reference {
namespace detail {

template <typename TI, typename TO>
typename std::enable_if<!std::is_same<TO, char>::value, TO>::type convert(const TI v) {
constexpr typename std::enable_if<!std::is_same<TO, char>::value, TO>::type convert(const TI v) {
return static_cast<TO>(v);
}

template <typename TI, typename TO>
typename std::enable_if<std::is_same<TO, char>::value, TO>::type convert(const TI v) {
constexpr typename std::enable_if<std::is_same<TO, char>::value, TO>::type convert(const TI v) {
return static_cast<char>(static_cast<bool>(v));
}
} // namespace detail
Expand All @@ -62,8 +64,6 @@ void convert(const TI* arg, TO* out, const size_t count) {
std::transform(arg, arg + count, out, detail::convert<TI, TO>);
}

#if OV_CORE_USE_XBYAK_JIT

template <>
void convert<uint8_t, float16>(const uint8_t* arg, float16* out, size_t count);
template <>
Expand All @@ -79,8 +79,6 @@ void convert<bfloat16, float16>(const bfloat16* arg, float16* out, size_t count)
template <>
void convert<bfloat16, float>(const bfloat16* arg, float* out, size_t count);

#endif // OV_CORE_USE_XBYAK_JIT

template <>
void convert<int32_t, float16>(const int32_t* arg, float16* out, size_t count);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <functional>
#include <type_traits>

#include "openvino/reference/convert.hpp"

namespace ov {

// forward declare from inference dev API (cannot be included)
extern bool with_cpu_x86_avx2();

namespace reference {

struct NoClamp {
static constexpr bool enabled = false;

// Generic implementation
template <class T>
static constexpr T apply(const T v) {
return v;
}

// Specialize for optimization
template <class T, class R>
static R apply(const T v);
};

template <class TI, class TO>
struct Clamp {
static constexpr bool enabled = true;

// Generic implementation
static constexpr TO apply(const TI v) {
return (v < std::numeric_limits<TO>::lowest())
? std::numeric_limits<TO>::lowest()
: ((v > std::numeric_limits<TO>::max()) ? std::numeric_limits<TO>::max()
: detail::convert<TI, TO>(v));
}

// Specialize for optimization
template <class T, class R>
static R apply(const T v);
};

template <class TI, class TO>
struct Converter {
static constexpr size_t vec_f32_size = 32 / sizeof(float);

// Generic implementation to convert tail elements
template <class ClampMode>
static void tail(const TI* in, TO* out, size_t n) {
std::transform(in, in + n, out, [](const TI v) {
return detail::convert<decltype(ClampMode::apply(v)), TO>(ClampMode::apply(v));
});
}

// Helper struct to defined optimized version of conversion
template <class ClampMode>
struct Optimized {
static constexpr bool enabled = false;
static void run(const TI* in, TO* out) {}
};

// Generic implementation of conversion
template <class ClampMode, typename std::enable_if<!Optimized<ClampMode>::enabled>::type* = nullptr>
static void apply(const TI* in, TO* out, size_t n) {
return tail<ClampMode>(in, out, n);
}

// Enabled when Optimized struct specialized defined for optimization
template <class ClampMode, typename std::enable_if<Optimized<ClampMode>::enabled>::type* = nullptr>
static void apply(const TI* in, TO* out, size_t n) {
if (with_cpu_x86_avx2()) {
for (; n >= vec_f32_size; n -= vec_f32_size, in += vec_f32_size, out += vec_f32_size) {
Optimized<ClampMode>::run(in, out);
}
}
tail<ClampMode>(in, out, n);
}
};

} // namespace reference
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#ifdef OV_CORE_USE_INTRINSICS
# include <immintrin.h>

# include "openvino/reference/utils/convert_util.hpp"

namespace ov {
namespace reference {
# ifdef HAVE_AVX2

// Clamp optimized specializations
template <>
__m128i NoClamp::apply<__m256i, __m128i>(const __m256i vec_i32);

template <>
template <>
__m256 Clamp<float, float16>::apply<__m256, __m256>(const __m256 vec_f32);

// Conversion optimized specializations
// --- f32 -> other
template <>
template <>
struct Converter<float, float16>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const float* in, float16* out);
};

template <>
template <>
struct Converter<float, float16>::Optimized<Clamp<float, float16>> {
static constexpr bool enabled = true;
static void run(const float* in, float16* out);
};

template <>
template <>
struct Converter<float, int8_t>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const float* in, int8_t* out);
};

// --- f16 -> other
template <>
template <>
struct Converter<float16, float>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const float16* in, float* out);
};

template <>
template <>
struct Converter<float16, int8_t>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const float16* in, int8_t* out);
};

// --- bf16 -> other
template <>
template <>
struct Converter<bfloat16, float16>::Optimized<Clamp<float, float16>> {
static constexpr bool enabled = true;
static void run(const bfloat16* in, float16* out);
};

template <>
template <>
struct Converter<bfloat16, float>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const bfloat16* in, float* out);
};

// --- u8 -> other
template <>
template <>
struct Converter<uint8_t, float16>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const uint8_t* in, float16* out);
};
# endif // HAVE_AVX2
} // namespace reference
} // namespace ov
#endif
Loading

0 comments on commit 817bc1a

Please sign in to comment.