Skip to content

Commit

Permalink
Merge branch 'SYCL-2020' into various-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
bader authored Jun 19, 2024
2 parents 004bcbf + 7225b94 commit 7d63aba
Show file tree
Hide file tree
Showing 8 changed files with 357 additions and 55 deletions.
219 changes: 219 additions & 0 deletions test_plans/sycl_ext_oneapi_enqueue_functions.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
:sectnums:
:xrefstyle: short

= Test plan for sycl_ext_oneapi_enqueue_functions

This is a test plan for the API described in
https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc[sycl_ext_oneapi_enqueue_functions].


== Testing scope

=== Device coverage

All of the tests described below are performed only on the default device that
is selected on the CTS command line.

=== Feature test macro

All of the tests should use `#ifdef SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS` so they can be skipped
if feature is not supported.

== Tests

* All following tests run with either a queue or handler.
* Tests that require a handler should create one as follows:
```C++
using syclex = sycl::ext::oneapi::experimental;

syclex::submit(q, [&](sycl::handler& h) {
// ...
}
```

=== Single Task

Define a simple task kernel to compute a value. For each `single_task` overload, launch this kernel using the free-function and the equivalent member function. Assert that the outputs computed from the two launches are the same. The `single_task` overloads are the following:

```C++
namespace sycl::ext::oneapi::experimental {

template <typename KernelName, typename KernelType>
void single_task(sycl::queue q, const KernelType& k);

template <typename KernelName, typename KernelType>
void single_task(sycl::handler h, const KernelType& k);

template <typename Args...>
void single_task(sycl::queue q, const sycl::kernel& k, Args&&... args);

template <typename Args...>
void single_task(sycl::handler h, const sycl::kernel& k, Args&&... args);

}
```

=== Basic Kernel

Define a basic kernel that computes a set of values. Launch this kernel using each `parallel_for` overload and the equivalent `parallel_for` member function. Assert that the output for both kernel launches are the same. The `parallel_for` overloads are the following:

```C++
namespace sycl::ext::oneapi::experimental {

template <typename KernelName, int Dimensions,
typename KernelType, typename... Reductions>
void parallel_for(sycl::queue q, sycl::range<Dimensions> r,
const KernelType& k, Reductions&&... reductions);

template <typename KernelName, int Dimensions,
typename KernelType, typename... Reductions>
void parallel_for(sycl::handler h, sycl::range<Dimensions> r,
const KernelType& k, Reductions&&... reductions);

template <typename KernelName, int Dimensions,
typename Properties,
typename KernelType, typename... Reductions>
void parallel_for(sycl::queue q,
launch_config<sycl::range<Dimensions>, Properties> c,
const KernelType& k, Reductions&&... reductions);

template <typename KernelName, int Dimensions,
typename Properties, typename KernelType, typename... Reductions>
void parallel_for(sycl::handler h,
launch_config<sycl::range<Dimensions>, Properties> c,
const KernelType& k, Reductions&&... reductions);

template <typename KernelName, int Dimensions, typename... Args>
void parallel_for(sycl::queue q, sycl::range<Dimensions> r,
const sycl::kernel& k, Args&&... args);

template <typename KernelName, int Dimensions, typename... Args>
void parallel_for(sycl::handler h, sycl::range<Dimensions> r,
const sycl::kernel& k, Args&&... args);

template <typename KernelName, int Dimensions,
typename Properties, typename... Args>
void parallel_for(sycl::queue q,
launch_config<sycl::range<Dimensions>, Properties> c,
const sycl::kernel& k, Args&& args...);

template <typename KernelName, int Dimensions,
typename Properties, typename... Args>
void parallel_for(sycl::handler h,
launch_config<sycl::range<Dimensions>, Properties> c,
const sycl::kernel& k, Args&& args...);

}
```

=== ND-range Kernel

Define an ND-range kernel that computes a set of values. Launch this kernel using each `nd_launch` overload and the equivalent `parallel_for` member function. Assert that the output for both kernel launches are the same. The `nd_launch` overloads are the following:

```C++
namespace sycl::ext::oneapi::experimental {

template <typename KernelName, int Dimensions,
typename KernelType, typename... Reductions>
void nd_launch(sycl::queue q, sycl::nd_range<Dimensions> r,
const KernelType& k, Reductions&&... reductions);

template <typename KernelName, int Dimensions,
typename KernelType, typename... Reductions>
void nd_launch(sycl::handler h, sycl::nd_range<Dimensions> r,
const KernelType& k, Reductions&&... reductions);

template <typename KernelName, int Dimensions,
typename Properties,
typename KernelType, typename... Reductions>
void nd_launch(sycl::queue q,
launch_config<sycl::nd_range<Dimensions>, Properties> c,
const KernelType& k, Reductions&&... reductions);

template <typename KernelName, int Dimensions,
typename Properties,
typename KernelType, typename... Reductions>
void nd_launch(sycl::handler h,
launch_config<sycl::nd_range<Dimensions>, Properties> c,
const KernelType& k, Reductions&&... reductions);

template <typename KernelName, int Dimensions, typename... Args>
void nd_launch(sycl::queue q, sycl::nd_range<Dimensions> r,
const sycl::kernel& k, Args&&... args);

template <typename KernelName, int Dimensions, typename... Args>
void nd_launch(sycl::handler h, sycl::nd_range<Dimensions> r,
const sycl::kernel& k, Args&&... args);

template <typename KernelName, int Dimensions,
typename Properties, typename... Args>
void nd_launch(sycl::queue q,
launch_config<sycl::nd_range<Dimensions>, Properties> c,
const sycl::kernel& k, Args&& args...);

template <typename KernelName, int Dimensions,
typename Properties, typename... Args>
void nd_launch(sycl::handler h,
launch_config<sycl::nd_range<Dimensions>, Properties> c,
const sycl::kernel& k, Args&& args...);

}
```

=== Memory Operations

For the `memcpy`, `copy`, `memset`, and `fill` memory operations, create one or more test buffers and assert that they have the correct values after the operation completes. For the `prefetch` and `mem_advise` operations, assert that they can be called without throwing an exception. The list of memory operations to test are the following:

```C++
namespace sycl::ext::oneapi::experimental {

void memcpy(sycl::queue q, void* dest, const void* src, size_t numBytes);

void memcpy(sycl::handler h, void* dest, const void* src, size_t numBytes);

template <typename T>
void copy(sycl::queue q, const T* src, T* dest, size_t count);

template <typename T>
void copy(sycl::handler h, const T* src, T* dest, size_t count);

void memset(sycl::queue q, void* ptr, int value, size_t numBytes);

void memset(sycl::handler h, void* ptr, int value, size_t numBytes);

template <typename T>
void fill(sycl::queue q, T* ptr, const T& pattern, size_t count);

template <typename T>
void fill(sycl::handler h, T* ptr, const T& pattern, size_t count);

void prefetch(sycl::queue q, void* ptr, size_t numBytes);

void prefetch(sycl::handler h, void* ptr, size_t numBytes);

void mem_advise(sycl::queue q, void* ptr, size_t numBytes, int advice);

void mem_advise(sycl::handler h, void* ptr, size_t numBytes, int advice);

}
```

=== Command Barriers

These tests should use `#ifdef SYCL_EXT_ONEAPI_ENQUEUE_BARRIER` so they can be skipped
if feature is not supported. For each barrier function, enqueue a some commands before and after enqueuing the barrier. Assert that the commands enqueued after the barrier do not execute until those enqueued before the barrier have completed. The barrier functions are the following:

```C++
namespace sycl::ext::oneapi::experimental {

void barrier(sycl::queue q);

void barrier(sycl::handler h);

void partial_barrier(sycl::queue q, const std::vector<sycl::event>& events);

void partial_barrier(sycl::handler h, const std::vector<sycl::event>& events);

}
```

73 changes: 59 additions & 14 deletions tests/common/common_python_vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
# ************************************************************************

from collections import defaultdict
from string import Template
from itertools import product
from math import ceil, floor
from string import Template

class Data:
signs = [True, False]
Expand Down Expand Up @@ -384,6 +385,8 @@ def get_space_count(line):
def add_spaces_to_lines(count, string):
"""Adds a number of spaces to the start of each line"""
all_lines = string.splitlines(True)
if not all_lines:
return ''
new_string = all_lines[0]
for i in range(1, len(all_lines)):
new_string += ' ' * count + all_lines[i]
Expand Down Expand Up @@ -619,7 +622,7 @@ def substitute_swizzles_templates(type_str, size, index_subset, value_subset, co
test_string)
return string

def gen_swizzle_test(type_str, convert_type_str, as_type_str, size):
def gen_swizzle_test(type_str, convert_type_str, as_type_str, size, num_batches, batch_index):
string = ''
if size > 4:
test_string = SwizzleData.swizzle_full_test_template.substitute(
Expand Down Expand Up @@ -654,26 +657,68 @@ def gen_swizzle_test(type_str, convert_type_str, as_type_str, size):
', '.join(Data.swizzle_elem_list_dict[size][:size]) + '>',
test_string)
return string
# size <=4

# Case when size <=4
# The test files generated for swizzles of vectors of size <= 4 are enormous and are hurting
# compilation times of the suite so we batch the tests according to two command line arguments
# in num_batches and batch_index that will dictate how many tests we can put in a single test file.
# Specifically, the test cases are to be split in num_batches different groups aka batches
# and the batch_index tells the script which batch in particular we want to output to a test file during this run.
# Both of these arguments, num_batches and batch_index, are controlled by the cmake test generation script.

total_tests = 0
for length in range(size, size + 1):
for index_subset, value_subset in zip(
product(
Data.swizzle_xyzw_list_dict[size][:size],
repeat=length),
product(Data.vals_list_dict[size][:size], repeat=length)):
total_tests += 1
batch_size = ceil(total_tests / num_batches)
cur_index = 0
cur_batch = 0
for length in range(size, size + 1):
for index_subset, value_subset in zip(
product(
Data.swizzle_xyzw_list_dict[size][:size],
repeat=length),
product(Data.vals_list_dict[size][:size], repeat=length)):
string += substitute_swizzles_templates(type_str, size,
index_subset, value_subset, convert_type_str, as_type_str)
cur_batch = floor(cur_index / batch_size)
if cur_batch > batch_index:
break
if cur_batch == batch_index:
string += substitute_swizzles_templates(type_str, size,
index_subset, value_subset, convert_type_str, as_type_str)
cur_index += 1

# Same logic as above repeated for the case when size == 4
if size == 4:
total_tests = 0
for length in range(size, size + 1):
for index_subset, value_subset in zip(
product(
Data.swizzle_rgba_list_dict[size][:size],
repeat=length),
product(
Data.vals_list_dict[size][:size], repeat=length)):
string += substitute_swizzles_templates(type_str, size,
index_subset, value_subset, convert_type_str, as_type_str)
total_tests += 1
batch_size = ceil(total_tests / num_batches)
cur_index = 0
cur_batch = 0
for length in range(size, size + 1):
for index_subset, value_subset in zip(
product(
Data.swizzle_rgba_list_dict[size][:size],
repeat=length),
product(
Data.vals_list_dict[size][:size], repeat=length)):
cur_batch = floor(cur_index / batch_size)
if cur_batch > batch_index:
break
if cur_batch == batch_index:
string += substitute_swizzles_templates(type_str, size,
index_subset, value_subset, convert_type_str, as_type_str)
cur_index += 1
return string


Expand Down Expand Up @@ -724,7 +769,7 @@ def get_reverse_type(type_str):
# Reason for the TODO above is that this function and several more it calls are
# not really common and only used to generate vector_swizzles test.
# FIXME: The test (main template and others) should be updated to use Catch2
def make_swizzles_tests(type_str, input_file, output_file):
def make_swizzles_tests(type_str, input_file, output_file, num_batches, batch_index):
if type_str == 'bool':
Data.vals_list_dict = cast_to_bool(Data.vals_list_dict)

Expand All @@ -733,15 +778,15 @@ def make_swizzles_tests(type_str, input_file, output_file):
convert_type_str = get_reverse_type(type_str)
as_type_str = get_reverse_type(type_str)
swizzles[0] = gen_swizzle_test(type_str, convert_type_str,
as_type_str, 1)
as_type_str, 1, num_batches, batch_index)
swizzles[1] = gen_swizzle_test(type_str, convert_type_str,
as_type_str, 2)
as_type_str, 2, num_batches, batch_index)
swizzles[2] = gen_swizzle_test(type_str, convert_type_str,
as_type_str, 3)
as_type_str, 3, num_batches, batch_index)
swizzles[3] = gen_swizzle_test(type_str, convert_type_str,
as_type_str, 4)
as_type_str, 4, num_batches, batch_index)
swizzles[4] = gen_swizzle_test(type_str, convert_type_str,
as_type_str, 8)
as_type_str, 8, num_batches, batch_index)
swizzles[5] = gen_swizzle_test(type_str, convert_type_str,
as_type_str, 16)
as_type_str, 16, num_batches, batch_index)
write_swizzle_source_file(swizzles, input_file, output_file, type_str)
2 changes: 1 addition & 1 deletion tests/common/vector_swizzles.template
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class TEST_NAME : public util::test_base {
}
};

util::test_proxy<TEST_NAME> proxy;
inline util::test_proxy<TEST_NAME> proxy;

} /* namespace vector_swizzles_$TYPE_NAME__ */
$ENDIF
7 changes: 6 additions & 1 deletion tests/event/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,13 @@ class delayed_host_event : public resolvable_host_event {
: resolvable_host_event() {
future = std::async(std::launch::async, [this, delay] {
std::this_thread::sleep_for(delay);
resolve();
// For the purpose of the tests it's important that `resolved` will be
// true whenever SYCL event is completed. As such, we have to set this
// flag before actually resolving the `future` because otherwise the
// current thread can go to sleep before the flag is set and the checks
// would be failing.
resolved = true;
resolve();
});
}

Expand Down
Loading

0 comments on commit 7d63aba

Please sign in to comment.