Skip to content

Commit

Permalink
Local execution tests (#1418)
Browse files Browse the repository at this point in the history
* pr for debugging kernel driver issues

* Commit flake files

* current kernel tests

* softmax, flat, transpose kernel tests

* clang formatting kernel tests

* reverse, split, full dropout kernels

* rest of kernel-tests

* minor cleannup

* Restore .proj.toml

* Delete misadded directory

* merge fix

* more merge fixes

* resolved merge conflicts with repo-refactor

* code review changes

* allocator updates

* allocation util updates

* test clean up and review fixes

* fixed forward backward pass consistencies, added filler tests for all tests, other review changes

* unnested test subcases and more review changes

* Add == in OpTaskBinding

* Add single operator test example

* Finish multi operator test

* added managed_stream and handle classes, other minor clean up

* fix accessor and corresponding shape clarity, other clean up

* merge error fixes

* More aggressive subcasing

* Remove comment

* managed handle and stream fixes, removed datatype dispatch from cuda_helper, other clean up

* managed handle and stream updates

* Refactoring and split tests

* Fix build

* Fix build

* Add cuda test suite

* Remove mock

* Pass task registry

* Pass slots backing and task arg acc

* Pass cost estimator test

* Fix

* PR fixes

* Fixes

* Add test to ci

* Fix test libs

* Fix build, add more fmt placeholders

* Fixes

* Fixes

* Delete file

* Fixes

* Fixes

* Fixes

* Fix includes

* Fix includes

---------

Co-authored-by: Dylan Lim <dylaneverettlim@gmail.com>
Co-authored-by: Dylan Lim <72822184+oOTigger@users.noreply.github.com>
Co-authored-by: Colin Unger <unger@stanford.edu>
  • Loading branch information
4 people authored Aug 22, 2024
1 parent 8f9082f commit 5e1f349
Show file tree
Hide file tree
Showing 118 changed files with 3,055 additions and 882 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/helpers/test_libs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ DIR="$(realpath -- "$(dirname "${BASH_SOURCE[0]}")")"
REPO="$(realpath -- "$DIR/../../../")"

TEST_LIBS=("${@/%/-tests}")
REGEX="^$(IFS='|'; echo "${TEST_LIBS[*]}")\$"
REGEX="^($(IFS='|'; echo "${TEST_LIBS[*]}"))\$"

cd "$REPO/build-ci"
make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) "${TEST_LIBS[@]}"
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/per-lib-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ jobs:
run: |
test_libs.sh substitution-generator
- name: Test local-execution
run: |
test_libs.sh local-execution
- name: Generate code coverage
run: |
echo "gitwork: $GITHUB_WORKSPACE"
Expand Down
4 changes: 3 additions & 1 deletion .proj.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,18 @@ build_targets = [
"substitutions",
"compiler",
"substitution-generator",
"local-execution",
"local-execution",
]

test_targets = [
# "kernels-tests",
"utils-tests",
"op-attrs-tests",
"pcg-tests",
"substitutions-tests",
"compiler-tests",
"substitution-generator-tests",
"local-execution-tests"
]

[cmake_flags_extra]
Expand Down
2 changes: 1 addition & 1 deletion cmake/flexflow-utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ function(ff_add_test_executable)
${FF_TEST_EXEC_NAME}
${FF_TEST_EXEC_DEPS})

target_compile_definitions(${FF_TEST_EXEC_NAME} PRIVATE FF_TEST_SUITE="${FF_TEST_EXEC_NAME}")
target_compile_definitions(${FF_TEST_EXEC_NAME} PRIVATE FF_TEST_SUITE="${FF_TEST_EXEC_NAME}" FF_CUDA_TEST_SUITE="cuda-${FF_TEST_EXEC_NAME}")

define_ff_vars(${FF_TEST_EXEC_NAME})
ff_set_cxx_properties(${FF_TEST_EXEC_NAME})
Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ set_target_properties(
CUDA_STANDARD 17
)

add_subdirectory(test)
add_subdirectory(test)
17 changes: 17 additions & 0 deletions lib/kernels/include/kernels/accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,30 @@ std::vector<real_type<DT> const *>
GenericTensorAccessorR read_only_accessor_from_write_accessor(
GenericTensorAccessorW const &write_accessor);

bool is_shape_and_dtype_equal(GenericTensorAccessorW const &acc1,
GenericTensorAccessorW const &acc2);

bool shape_and_dtype_matches(GenericTensorAccessorW const &accessor,
ArrayShape const &expected_shape,
DataType const &expected_dtype);

bool shape_and_dtype_matches(GenericTensorAccessorR const &accessor,
ArrayShape const &expected_shape,
DataType const &expected_dtype);

std::pair<ArrayShape, DataType>
get_shape_and_datatype(GenericTensorAccessorR const &accessor);
std::pair<ArrayShape, DataType>
get_shape_and_datatype(GenericTensorAccessorW const &accessor);

} // namespace FlexFlow

namespace FlexFlow {
static_assert(is_well_behaved_value_type_no_hash<GenericTensorAccessorR>::value,
"");
static_assert(is_well_behaved_value_type_no_hash<GenericTensorAccessorW>::value,
"");

} // namespace FlexFlow

#endif
6 changes: 5 additions & 1 deletion lib/kernels/include/kernels/array_shape.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ struct ArrayShape {
legion_dim_t last_idx() const;
legion_dim_t neg_idx(int) const;

std::optional<std::size_t> at_maybe(std::size_t) const;
std::optional<std::size_t> at_maybe(legion_dim_t) const;
std::optional<std::size_t> at_maybe(ff_dim_t) const;

ArrayShape
sub_shape(std::optional<std::variant<ff_dim_t, legion_dim_t>> start,
Expand All @@ -54,6 +55,9 @@ size_t get_volume(ArrayShape const &);

TensorShape get_tensor_shape(ArrayShape const &, DataType);

std::string format_as(ArrayShape const &);
std::ostream &operator<<(std::ostream &, ArrayShape const &);

} // namespace FlexFlow

#endif
22 changes: 22 additions & 0 deletions lib/kernels/include/kernels/attention_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,25 @@ struct MHAPerDeviceState {
int *hiWinIdx;
void *reserveSpace;
Allocator allocator;

bool operator==(MHAPerDeviceState const &other) const;
bool operator!=(MHAPerDeviceState const &other) const;

private:
std::tuple<decltype(handle) const &,
decltype(weightSize) const &,
decltype(reserveSpaceSize) const &,
decltype(attnDesc) const &,
decltype(qDesc) const &,
decltype(kDesc) const &,
decltype(vDesc) const &,
decltype(oDesc) const &,
decltype(devQoSeqArray) const &,
decltype(devKvSeqArray) const &,
decltype(loWinIdx) const &,
decltype(hiWinIdx) const &,
decltype(reserveSpace) const &>
tie() const;
};

FF_VISITABLE_STRUCT_NO_EQ(MHAPerDeviceState,
Expand All @@ -43,6 +62,9 @@ FF_VISITABLE_STRUCT_NO_EQ(MHAPerDeviceState,
reserveSpace,
allocator);

std::string format_as(MHAPerDeviceState const &x);
std::ostream &operator<<(std::ostream &s, MHAPerDeviceState const &x);

namespace Kernels {
namespace MultiHeadAttention {

Expand Down
4 changes: 3 additions & 1 deletion lib/kernels/include/kernels/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,13 @@ using coord_t = long long;
exit(1); \
} while (0)

char const *getCudaErrorString(cudaError_t status);

#define checkCUDA(status) \
do { \
std::stringstream _error; \
if (status != 0) { \
_error << "CUDA failure: " << cudaGetErrorString(status) << " (" \
_error << "CUDA failure: " << getCudaErrorString(status) << " (" \
<< status << ")"; \
FatalError(_error.str()); \
} \
Expand Down
3 changes: 3 additions & 0 deletions lib/kernels/include/kernels/ff_handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(PerDeviceFFHandle,
allowTensorOpMathConversion);
#endif

std::string format_as(PerDeviceFFHandle const &x);
std::ostream &operator<<(std::ostream &s, PerDeviceFFHandle const &x);

} // namespace FlexFlow

#endif
17 changes: 17 additions & 0 deletions lib/kernels/include/kernels/legion_dim.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@ using LegionOrdered = DimOrdered<legion_dim_t, T>;

using LegionTensorDims = LegionOrdered<size_t>;

template <typename T>
FFOrdered<T>
ff_ordered_from_legion_ordered(LegionOrdered<T> const &legion_ordered) {
return FFOrdered<T>(legion_ordered.rbegin(), legion_ordered.rend());
}

template <typename T>
std::string format_as(LegionOrdered<T> const &v) {
std::vector<T> as_vec(v.cbegin(), v.cend());
return fmt::format("<legion_ordered {}>", as_vec);

Check warning on line 27 in lib/kernels/include/kernels/legion_dim.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/legion_dim.h#L25-L27

Added lines #L25 - L27 were not covered by tests
}

template <typename T>
std::ostream &operator<<(std::ostream &s, LegionOrdered<T> const &v) {
return (s << fmt::to_string(v));

Check warning on line 32 in lib/kernels/include/kernels/legion_dim.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/legion_dim.h#L31-L32

Added lines #L31 - L32 were not covered by tests
}

} // namespace FlexFlow

#endif
14 changes: 1 addition & 13 deletions lib/kernels/include/kernels/profiling.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,11 @@
#define _FLEXFLOW_KERNELS_PROFILING_H

#include "device.h"
#include "kernels/profiling_settings.dtg.h"
#include "utils/visitable.h"

namespace FlexFlow {

struct ProfilingSettings : public use_visitable_cmp<ProfilingSettings> {
public:
ProfilingSettings() = delete;
ProfilingSettings(int warmup_iters, int measure_iters);

public:
int warmup_iters;
int measure_iters;
};

template <typename F, typename... Ts>
std::optional<float>
profiling_wrapper(F const &f, bool enable_profiling, Ts &&...ts) {
Expand Down Expand Up @@ -59,7 +50,4 @@ std::optional<float> profiling_wrapper(F const &f,

} // namespace FlexFlow

VISITABLE_STRUCT(::FlexFlow::ProfilingSettings, warmup_iters, measure_iters);
MAKE_VISIT_HASHABLE(::FlexFlow::ProfilingSettings);

#endif
18 changes: 18 additions & 0 deletions lib/kernels/include/kernels/profiling_settings.struct.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
namespace = "FlexFlow"
name = "ProfilingSettings"

features = [
"eq",
"ord",
"hash",
"json",
"fmt",
]

[[fields]]
name = "warmup_iters"
type = "int"

[[fields]]
name = "measure_iters"
type = "int"
29 changes: 29 additions & 0 deletions lib/kernels/src/accessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,33 @@ GenericTensorAccessorR read_only_accessor_from_write_accessor(
writable.data_type, writable.shape, req<void const *>(writable.ptr)};
}

bool is_shape_and_dtype_equal(GenericTensorAccessorW const &acc1,

Check warning on line 141 in lib/kernels/src/accessor.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/accessor.cc#L141

Added line #L141 was not covered by tests
GenericTensorAccessorW const &acc2) {
return acc1.shape == acc2.shape && acc1.data_type == acc2.data_type;

Check warning on line 143 in lib/kernels/src/accessor.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/accessor.cc#L143

Added line #L143 was not covered by tests
}

bool shape_and_dtype_matches(GenericTensorAccessorW const &accessor,

Check warning on line 146 in lib/kernels/src/accessor.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/accessor.cc#L146

Added line #L146 was not covered by tests
ArrayShape const &expected_shape,
DataType const &expected_dtype) {
return accessor.shape == expected_shape &&
accessor.data_type == expected_dtype;

Check warning on line 150 in lib/kernels/src/accessor.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/accessor.cc#L149-L150

Added lines #L149 - L150 were not covered by tests
}

bool shape_and_dtype_matches(GenericTensorAccessorR const &accessor,

Check warning on line 153 in lib/kernels/src/accessor.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/accessor.cc#L153

Added line #L153 was not covered by tests
ArrayShape const &expected_shape,
DataType const &expected_dtype) {
return accessor.shape == expected_shape &&
accessor.data_type == expected_dtype;

Check warning on line 157 in lib/kernels/src/accessor.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/accessor.cc#L156-L157

Added lines #L156 - L157 were not covered by tests
}

std::pair<ArrayShape, DataType>
get_shape_and_datatype(GenericTensorAccessorR const &accessor) {
return std::make_pair(accessor.shape, accessor.data_type);

Check warning on line 162 in lib/kernels/src/accessor.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/accessor.cc#L161-L162

Added lines #L161 - L162 were not covered by tests
}

std::pair<ArrayShape, DataType>
get_shape_and_datatype(GenericTensorAccessorW const &accessor) {
return std::make_pair(accessor.shape, accessor.data_type);
}

} // namespace FlexFlow
37 changes: 33 additions & 4 deletions lib/kernels/src/array_shape.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,15 @@ std::size_t ArrayShape::num_elements() const {
}

std::size_t ArrayShape::operator[](legion_dim_t idx) const {
return dims[idx];
return dims.at(idx);

Check warning on line 42 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L41-L42

Added lines #L41 - L42 were not covered by tests
}

std::size_t ArrayShape::at(legion_dim_t idx) const {
return dims.at(idx);

Check warning on line 46 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L45-L46

Added lines #L45 - L46 were not covered by tests
}

std::size_t ArrayShape::at(ff_dim_t idx) const {
return dims.at(legion_dim_from_ff_dim(idx, this->num_dims()));

Check warning on line 50 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L49-L50

Added lines #L49 - L50 were not covered by tests
}

ArrayShape ArrayShape::sub_shape(

Check warning on line 53 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L53

Added line #L53 was not covered by tests
Expand All @@ -48,16 +56,37 @@ ArrayShape ArrayShape::sub_shape(
NOT_IMPLEMENTED();

Check warning on line 56 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L56

Added line #L56 was not covered by tests
}

std::optional<std::size_t> ArrayShape::at_maybe(std::size_t index) const {
if (index < dims.size()) {
return dims.at(legion_dim_t(index));
std::optional<std::size_t> ArrayShape::at_maybe(legion_dim_t index) const {

Check warning on line 59 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L59

Added line #L59 was not covered by tests
if (index.value < dims.size()) {
return dims.at(index);

Check warning on line 61 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L61

Added line #L61 was not covered by tests
} else {
return std::nullopt;

Check warning on line 63 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L63

Added line #L63 was not covered by tests
}
}

std::optional<std::size_t> ArrayShape::at_maybe(ff_dim_t index) const {
return this->at_maybe(legion_dim_from_ff_dim(index, this->num_dims()));

Check warning on line 68 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L67-L68

Added lines #L67 - L68 were not covered by tests
}

size_t get_volume(ArrayShape const &shape) {
return shape.get_volume();

Check warning on line 72 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L71-L72

Added lines #L71 - L72 were not covered by tests
}

TensorShape get_tensor_shape(ArrayShape const &shape, DataType dtype) {
return TensorShape{TensorDims{ff_ordered_from_legion_ordered(shape.dims)},
dtype};
}

std::string format_as(ArrayShape const &x) {
std::ostringstream oss;
oss << "<ArrayShape";
oss << " dims=" << x.dims;
oss << ">";
return oss.str();
}

Check warning on line 86 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L80-L86

Added lines #L80 - L86 were not covered by tests

std::ostream &operator<<(std::ostream &s, ArrayShape const &x) {
return (s << fmt::to_string(x));

Check warning on line 89 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L88-L89

Added lines #L88 - L89 were not covered by tests
}

} // namespace FlexFlow
27 changes: 8 additions & 19 deletions lib/kernels/src/cuda/cuda_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -220,25 +220,14 @@ __host__ void
ffStatus_t
cudnnSetTensorDescriptorFromArrayShape(cudnnTensorDescriptor_t tensor,
ArrayShape const &shape) {
std::vector<std::size_t> reversed_dims(shape.dims.begin(), shape.dims.end());
reversed(reversed_dims);
ArrayShape flipped(reversed_dims);

if (flipped.get_dim() == 5) {
assert(flipped[legion_dim_t(0)] == 1);
flipped = flipped.sub_shape(legion_dim_t(1), std::nullopt);
}

assert(flipped.get_dim() > 0);
assert(flipped.get_dim() < 4);

return cudnnSetTensor4dDescriptor(tensor,
CUDNN_TENSOR_NCHW,
CUDNN_DATA_FLOAT,
flipped.at_maybe(0).value_or(1),
flipped.at_maybe(1).value_or(2),
flipped.at_maybe(2).value_or(3),
flipped.at_maybe(3).value_or(3));
return cudnnSetTensor4dDescriptor(
tensor,
CUDNN_TENSOR_NCHW,
CUDNN_DATA_FLOAT,
shape.at_maybe(legion_dim_t{0}).value_or(1),
shape.at_maybe(legion_dim_t{1}).value_or(1),
shape.at_maybe(legion_dim_t{2}).value_or(1),
shape.at_maybe(legion_dim_t{3}).value_or(1));
}

cudnnDataType_t ff_to_cudnn_datatype(DataType type) {
Expand Down
Loading

0 comments on commit 5e1f349

Please sign in to comment.