From 48f31a3b5e4c4ffa31ce10cd16be3fa2c386ea53 Mon Sep 17 00:00:00 2001 From: Fahri Ali Rahman Date: Sat, 21 Oct 2023 18:37:04 +0700 Subject: [PATCH] add tests for simd reduction on 2d array --- tests/simd/CMakeLists.txt | 2 + tests/simd/index/simd_reduction.cpp | 609 ++++++++++++++++++++++++++++ tests/simd/x86/reduction_2d_sse.cpp | 214 ++++++++++ 3 files changed, 825 insertions(+) create mode 100644 tests/simd/index/simd_reduction.cpp create mode 100644 tests/simd/x86/reduction_2d_sse.cpp diff --git a/tests/simd/CMakeLists.txt b/tests/simd/CMakeLists.txt index a64018719..b4050acf8 100644 --- a/tests/simd/CMakeLists.txt +++ b/tests/simd/CMakeLists.txt @@ -41,6 +41,7 @@ if (NMTOOLS_SIMD_TEST_SSE) if (NMTOOLS_SIMD_TEST_REDUCTION) set(NMTOOLS_SIMD_TEST_SOURCES ${NMTOOLS_SIMD_TEST_SOURCES} x86/reduction_sse.cpp + x86/reduction_2d_sse.cpp ) endif (NMTOOLS_SIMD_TEST_REDUCTION) endif (NMTOOLS_SIMD_TEST_SSE) @@ -63,6 +64,7 @@ if (NMTOOLS_SIMD_TEST_INDEX) set(NMTOOLS_SIMD_TEST_SOURCES ${NMTOOLS_SIMD_TEST_SOURCES} index/simd_index.cpp index/simd_matmul.cpp + index/simd_reduction.cpp ) endif (NMTOOLS_SIMD_TEST_INDEX) add_executable(${PROJECT_NAME}-doctest tests.cpp diff --git a/tests/simd/index/simd_reduction.cpp b/tests/simd/index/simd_reduction.cpp new file mode 100644 index 000000000..dcc49f247 --- /dev/null +++ b/tests/simd/index/simd_reduction.cpp @@ -0,0 +1,609 @@ +#include "nmtools/array/eval/simd/index.hpp" +#include "nmtools/array/index/product.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace ix = nm::index; +namespace meta = nm::meta; + +using ix::ReductionKind; +using ix::SIMD; + +TEST_CASE("reduction_2d_shape(case1a)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{3,4}; + auto out_shape = nmtools_array{1,4}; + + auto result = ix::reduction_2d_shape(reduction_kind,n_elem_pack,inp_shape,out_shape); + auto expect = nmtools_array{3,1}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("reduction_2d_shape(case1b)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{3,4}; + auto out_shape = nmtools_array{1,4}; + + auto result = ix::reduction_2d_shape(reduction_kind,n_elem_pack,inp_shape,out_shape); + auto expect = nmtools_array{3,1}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("reduction_2d_shape(case2a)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + + auto result = ix::reduction_2d_shape(reduction_kind,n_elem_pack,inp_shape,out_shape); + auto expect = nmtools_array{4,4}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("reduction_2d_shape(case2b)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + + auto result = ix::reduction_2d_shape(reduction_kind,n_elem_pack,inp_shape,out_shape); + auto expect = nmtools_array{4,3}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +//////////////////////////////////////////////////////////////////////////////// + +// VERTICAL REDUCTION +/** +>>> a +array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) +>>> np.add.reduce(a,axis=0,keepdims=True) +array([[12, 15, 18, 21]]) +>>> np.add.reduce(a,axis=0,keepdims=True).shape +(1, 4) +*/ + +TEST_CASE("reduction_2d(case1a)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{3,4}; + auto out_shape = nmtools_array{1,4}; + auto simd_shape = nmtools_array{3,1}; + + auto simd_index = nmtools_array{0,0}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 0 ); + } +} + +TEST_CASE("reduction_2d(case1b)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{3,4}; + auto out_shape = nmtools_array{1,4}; + auto simd_shape = nmtools_array{3,1}; + + auto simd_index = nmtools_array{1,0}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 4 ); + } +} + +TEST_CASE("reduction_2d(case1c)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{3,4}; + auto out_shape = nmtools_array{1,4}; + auto simd_shape = nmtools_array{3,1}; + + auto simd_index = nmtools_array{2,0}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 8 ); + } +} + +// HORIZONTAL REDUCTION +/** +>>> a +array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) +>>> np.add.reduce(a,axis=1,keepdims=True) +array([[ 6], + [22], + [38]]) +>>> np.add.reduce(a,axis=1,keepdims=True).shape +(3, 1) +*/ + +TEST_CASE("reduction_2d(case2a)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{3,4}; + auto out_shape = nmtools_array{3,1}; + auto simd_shape = nmtools_array{3,1}; + + auto simd_index = nmtools_array{0,0}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 0 ); + } +} + +TEST_CASE("reduction_2d(case2b)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{3,4}; + auto out_shape = nmtools_array{3,1}; + auto simd_shape = nmtools_array{3,1}; + + auto simd_index = nmtools_array{1,0}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE ); + NMTOOLS_ASSERT_EQUAL( out_offset, 1 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 4 ); + } +} + +TEST_CASE("reduction_2d(case2c)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{3,4}; + auto out_shape = nmtools_array{3,1}; + auto simd_shape = nmtools_array{3,1}; + + auto simd_index = nmtools_array{2,0}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE ); + NMTOOLS_ASSERT_EQUAL( out_offset, 2 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 8 ); + } +} + +TEST_CASE("reduction_2d(case3a)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto simd_index = nmtools_array{0,0}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 0 ); + } +} + +TEST_CASE("reduction_2d(case3b)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto simd_index = nmtools_array{1,0}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 10 ); + } +} + +TEST_CASE("reduction_2d(case3c)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto simd_index = nmtools_array{2,0}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 20 ); + } +} + +TEST_CASE("reduction_2d(case3d)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto simd_index = nmtools_array{3,0}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 30 ); + } +} + +TEST_CASE("reduction_2d(case3e)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto simd_index = nmtools_array{0,1}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 4 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 4 ); + } +} + +TEST_CASE("reduction_2d(case3f)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto simd_index = nmtools_array{0,2}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE ); + NMTOOLS_ASSERT_EQUAL( out_offset, 8 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::SCALAR ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 8 ); + } +} + +TEST_CASE("reduction_2d(case3g)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto simd_index = nmtools_array{0,3}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE ); + NMTOOLS_ASSERT_EQUAL( out_offset, 9 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::SCALAR ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 9 ); + } +} + +TEST_CASE("reduction_2d(case3h)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto simd_index = nmtools_array{1,1}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 4 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 14 ); + } +} + +TEST_CASE("reduction_2d(case3i)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto simd_index = nmtools_array{2,1}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 4 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 24 ); + } +} + +TEST_CASE("reduction_2d(case3j)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto simd_index = nmtools_array{2,2}; + + auto result = ix::reduction_2d(reduction_kind,n_elem_pack,simd_index,simd_shape,out_shape,inp_shape); + auto [out_index,inp_index] = result; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE ); + NMTOOLS_ASSERT_EQUAL( out_offset, 8 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::SCALAR ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 28 ); + } +} + +///////////////////////////////////////////////////////////////////////// + +TEST_CASE("reduction_2d_enumerator(case1)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{3,4}; + auto out_shape = nmtools_array{1,4}; + auto simd_shape = nmtools_array{3,1}; + + auto enumerator = ix::reduction_2d_enumerator(reduction_kind,n_elem_pack,inp_shape,out_shape); + { + NMTOOLS_ASSERT_EQUAL( enumerator.size(), 3 ); + } + { + auto [out_index,inp_index] = enumerator[0]; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 0 ); + } + } + { + auto [out_index,inp_index] = enumerator[1]; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 4 ); + } + } + { + auto [out_index,inp_index] = enumerator[2]; + { + auto [out_tag,out_offset] = out_index; + CHECK( out_tag == SIMD::ACCUMULATE_PACKED ); + NMTOOLS_ASSERT_EQUAL( out_offset, 0 ); + } + { + auto [inp_tag,inp_offset] = inp_index; + CHECK( inp_tag == SIMD::PACKED ); + NMTOOLS_ASSERT_EQUAL( inp_offset, 8 ); + } + } +} + +TEST_CASE("reduction_2d_enumerator(case2)" * doctest::test_suite("simd::index")) +{ + auto reduction_kind = meta::as_type_v; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{3,4}; + auto out_shape = nmtools_array{1,4}; + auto simd_shape = nmtools_array{3,1}; + + auto enumerator = ix::reduction_2d_enumerator(reduction_kind,n_elem_pack,inp_shape,out_shape); + auto inp_tags = nmtools_array{SIMD::PACKED,SIMD::PACKED,SIMD::PACKED}; + auto out_tags = nmtools_array{SIMD::ACCUMULATE,SIMD::ACCUMULATE,SIMD::ACCUMULATE}; + auto inp_offsets = nmtools_array{0,4,8}; + auto out_offsets = nmtools_array{0,1,2}; + + for (size_t i=0; i; + auto n_elem_pack = meta::as_type_v<4>; + auto inp_shape = nmtools_array{4,10}; + auto out_shape = nmtools_array{1,10}; + auto simd_shape = nmtools_array{4,4}; + + auto enumerator = ix::reduction_2d_enumerator(reduction_kind,n_elem_pack,inp_shape,out_shape); + auto out_tags = nmtools_array{ + SIMD::ACCUMULATE_PACKED, + SIMD::ACCUMULATE_PACKED, + SIMD::ACCUMULATE, + SIMD::ACCUMULATE, + SIMD::ACCUMULATE_PACKED, + SIMD::ACCUMULATE_PACKED, + SIMD::ACCUMULATE, + SIMD::ACCUMULATE, + SIMD::ACCUMULATE_PACKED, + SIMD::ACCUMULATE_PACKED, + SIMD::ACCUMULATE, + SIMD::ACCUMULATE, + SIMD::ACCUMULATE_PACKED, + SIMD::ACCUMULATE_PACKED, + SIMD::ACCUMULATE, + SIMD::ACCUMULATE, + }; + auto inp_tags = nmtools_array{ + SIMD::PACKED, + SIMD::PACKED, + SIMD::SCALAR, + SIMD::SCALAR, + SIMD::PACKED, + SIMD::PACKED, + SIMD::SCALAR, + SIMD::SCALAR, + SIMD::PACKED, + SIMD::PACKED, + SIMD::SCALAR, + SIMD::SCALAR, + SIMD::PACKED, + SIMD::PACKED, + SIMD::SCALAR, + SIMD::SCALAR, + }; + auto inp_offsets = nmtools_array{0,4,8,9,10,14,18,19,20,24,28,29,30,34,38,39}; + auto out_offsets = nmtools_array{0,4,8,9,0,4,8,9,0,4,8,9,0,4,8,9}; + + for (size_t i=0; i; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +TEST_CASE("add.reduce_2d(case1b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 3; + auto N = 4; + auto axis = 0; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +TEST_CASE("add.reduce_2d(case2a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 3; + auto N = 4; + auto axis = meta::ct_v<1>; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +TEST_CASE("add.reduce_2d(case2b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 3; + auto N = 4; + auto axis = 1; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +TEST_CASE("add.reduce_2d(case3a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 4; + auto N = 10; + auto axis = meta::ct_v<0>; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +TEST_CASE("add.reduce_2d(case3b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 4; + auto N = 10; + auto axis = 0; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +#if 0 +TEST_CASE("add.reduce_2d(case3c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 4; + auto N = 10; + auto axis = meta::ct_v<1>; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +TEST_CASE("add.reduce_2d(case3d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 4; + auto N = 10; + auto axis = 1; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} +#endif + + +TEST_CASE("add.reduce_2d(case4a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 8; + auto N = 20; + auto axis = meta::ct_v<0>; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +TEST_CASE("add.reduce_2d(case4b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 8; + auto N = 20; + auto axis = 0; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + + +TEST_CASE("add.reduce_2d(case5a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 26; + auto axis = meta::ct_v<0>; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +TEST_CASE("add.reduce_2d(case5b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 26; + auto axis = 0; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +TEST_CASE("add.reduce_2d(case6a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 13; + auto N = 29; + auto axis = meta::ct_v<0>; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} + +TEST_CASE("add.reduce_2d(case6b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 13; + auto N = 29; + auto axis = 0; + auto new_shape = nmtools_array{M,N}; + auto input = na::reshape(na::arange(M*N),new_shape); + auto dtype = nm::None; + auto initial = nm::None; + auto keepdims = nm::True; + X86_SSE_TEST(add.reduce,input,axis,dtype,initial,keepdims); +} \ No newline at end of file