Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CPU Kernel Tests #1439

Open
wants to merge 20 commits into
base: repo-refactor
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ target_link_libraries(
cudnn
nccl
utils
pcg
)

define_ff_vars(${project_target})
Expand Down
127 changes: 117 additions & 10 deletions lib/kernels/include/kernels/accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
#include "device.h"
#include "kernels/ff_handle.h"
#include "op-attrs/datatype.h"
#include "pcg/device_type.dtg.h"
#include "utils/exception.h"
#include "utils/required.h"

namespace FlexFlow {

struct Allocator;

class GenericTensorAccessorW {
public:
template <DataType DT>
Expand All @@ -28,15 +31,68 @@
double *get_double_ptr() const;
half *get_half_ptr() const;

GenericTensorAccessorW() = delete;

GenericTensorAccessorW(DataType data_type,
ArrayShape const &shape,
void *ptr,
DeviceType device_type);

bool operator==(GenericTensorAccessorW const &) const;
bool operator!=(GenericTensorAccessorW const &) const;

template <DataType DT, typename... Indices>
real_type_t<DT> &at(Indices... indices) {

Check warning on line 45 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L45

Added line #L45 was not covered by tests
if (this->device_type != DeviceType::CPU) {
throw mk_runtime_error("Calling at() on non-CPU allocated tensor");

Check warning on line 47 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L47

Added line #L47 was not covered by tests
}
if (this->data_type != DT) {
throw mk_runtime_error(fmt::format(
"Invalid access data type ({} != {})", this->data_type, DT));

Check warning on line 51 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L50-L51

Added lines #L50 - L51 were not covered by tests
}

using T = real_type_t<DT>;

T *data_ptr = static_cast<T *>(this->ptr);
size_t offset = calculate_index_offset({static_cast<size_t>(indices)...});

Check warning on line 57 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L56-L57

Added lines #L56 - L57 were not covered by tests

return data_ptr[offset];

Check warning on line 59 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L59

Added line #L59 was not covered by tests
}

template <DataType DT, typename... Indices>
real_type_t<DT> const &at(Indices... indices) const {
if (this->device_type != DeviceType::CPU) {
throw mk_runtime_error("Calling at() on non-CPU allocated tensor");
}
if (this->data_type != DT) {
throw mk_runtime_error(fmt::format(
"Invalid access data type ({} != {})", this->data_type, DT));
}

using T = real_type_t<DT>;

T const *data_ptr = static_cast<T const *>(this->ptr);
size_t offset = calculate_index_offset({static_cast<size_t>(indices)...});

return data_ptr[offset];
}

public:
DataType data_type;
ArrayShape shape;
req<void *> ptr;
void *ptr;
DeviceType device_type;

private:
std::tuple<decltype(data_type) const &,
decltype(shape) const &,
decltype(ptr) const &,
decltype(device_type) const &>
tie() const;

size_t calculate_index_offset(
std::initializer_list<size_t> const &indices) const;
};
FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(GenericTensorAccessorW,
data_type,
shape,
ptr);

std::string format_as(GenericTensorAccessorW const &);
std::ostream &operator<<(std::ostream &, GenericTensorAccessorW const &);
Expand All @@ -59,15 +115,50 @@
double const *get_double_ptr() const;
half const *get_half_ptr() const;

GenericTensorAccessorR() = delete;

GenericTensorAccessorR(DataType data_type,
ArrayShape const &shape,
void const *ptr,
DeviceType device_type);

bool operator==(GenericTensorAccessorR const &) const;
bool operator!=(GenericTensorAccessorR const &) const;

template <DataType DT, typename... Indices>
real_type_t<DT> const &at(Indices... indices) const {

Check warning on line 129 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L129

Added line #L129 was not covered by tests
if (this->device_type != DeviceType::CPU) {
throw mk_runtime_error("Calling at() on non-CPU allocated tensor");

Check warning on line 131 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L131

Added line #L131 was not covered by tests
}
if (this->data_type != DT) {
throw mk_runtime_error(fmt::format(
"Invalid access data type ({} != {})", this->data_type, DT));

Check warning on line 135 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L134-L135

Added lines #L134 - L135 were not covered by tests
}

using T = real_type_t<DT>;

T const *data_ptr = static_cast<T const *>(this->ptr);
size_t offset = calculate_index_offset({static_cast<size_t>(indices)...});

Check warning on line 141 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L140-L141

Added lines #L140 - L141 were not covered by tests

return data_ptr[offset];

Check warning on line 143 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L143

Added line #L143 was not covered by tests
}

public:
DataType data_type;
ArrayShape shape;
req<void const *> ptr;
void const *ptr;
DeviceType device_type;

private:
std::tuple<decltype(data_type) const &,
decltype(shape) const &,
decltype(ptr) const &,
decltype(device_type) const &>
tie() const;

size_t calculate_index_offset(
std::initializer_list<size_t> const &indices) const;
};
FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(GenericTensorAccessorR,
data_type,
shape,
ptr);

std::string format_as(GenericTensorAccessorR const &);
std::ostream &operator<<(std::ostream &, GenericTensorAccessorR const &);
Expand Down Expand Up @@ -166,6 +257,22 @@
std::pair<ArrayShape, DataType>
get_shape_and_datatype(GenericTensorAccessorW const &accessor);

void transfer_data_between_accessors(
GenericTensorAccessorW &dst_accessor,
GenericTensorAccessorR const &src_accessor);

void transfer_data_between_accessors(
GenericTensorAccessorW &dst_accessor,
GenericTensorAccessorW const &src_accessor);

GenericTensorAccessorR
copy_tensor_accessor_r(GenericTensorAccessorR const &src_accessor,
Allocator &allocator);

GenericTensorAccessorW
copy_tensor_accessor_w(GenericTensorAccessorW const &src_accessor,
Allocator &allocator);

} // namespace FlexFlow

namespace FlexFlow {
Expand Down
7 changes: 6 additions & 1 deletion lib/kernels/include/kernels/allocation.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef _FLEXFLOW_KERNELS_ALLOCATION_H
#define _FLEXFLOW_KERNELS_ALLOCATION_H

#include "accessor.h"
#include "kernels/accessor.h"
#include <cstddef>
#include <memory>

Expand All @@ -11,16 +11,21 @@ struct IAllocator {
virtual void *allocate(size_t) = 0;
virtual void deallocate(void *) = 0;

virtual DeviceType get_allocation_device_type() const = 0;

virtual ~IAllocator() = default;
};

struct Allocator {
Allocator() = delete;

GenericTensorAccessorW allocate_tensor(TensorShape const &tensor_shape);

void *allocate(size_t mem_size);
void deallocate(void *ptr);

DeviceType get_allocation_device_type() const;

template <typename T, typename... Args>
static typename std::enable_if<std::is_base_of<IAllocator, T>::value,
Allocator>::type
Expand Down
6 changes: 2 additions & 4 deletions lib/kernels/include/kernels/attention_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ FF_VISITABLE_STRUCT_NO_EQ(MHAPerDeviceState,
std::string format_as(MHAPerDeviceState const &x);
std::ostream &operator<<(std::ostream &s, MHAPerDeviceState const &x);

namespace Kernels {
namespace MultiHeadAttention {
namespace Kernels::MultiHeadAttention {

MHAPerDeviceState init_kernel(PerDeviceFFHandle const &,
Allocator &,
Expand Down Expand Up @@ -105,8 +104,7 @@ void backward_kernel(ffStream_t stream,
void cleanup_kernel(Allocator &allocator,
MHAPerDeviceState const &device_state);

} // namespace MultiHeadAttention
} // namespace Kernels
} // namespace Kernels::MultiHeadAttention
} // namespace FlexFlow

#endif
8 changes: 2 additions & 6 deletions lib/kernels/include/kernels/batch_matmul_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
#include "kernels/allocation.h"
#include "kernels/ff_handle.h"

namespace FlexFlow {
namespace Kernels {
namespace BatchMatmul {
namespace FlexFlow::Kernels::BatchMatmul {

void forward_kernel(ffStream_t stream,
PerDeviceFFHandle const &handle,
Expand Down Expand Up @@ -35,8 +33,6 @@ void backward_kernel(ffStream_t stream,
int k,
int batch);

} // namespace BatchMatmul
} // namespace Kernels
} // namespace FlexFlow
} // namespace FlexFlow::Kernels::BatchMatmul

#endif
6 changes: 2 additions & 4 deletions lib/kernels/include/kernels/batch_norm_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(BatchNormPerDeviceState,
output_w,
relu);

namespace Kernels {
namespace BatchNorm {
namespace Kernels::BatchNorm {

BatchNormPerDeviceState init_kernel(PerDeviceFFHandle handle,
Allocator allocator,
Expand Down Expand Up @@ -81,8 +80,7 @@ void cleanup_kernel(Allocator allocator,
bool relu,
float *runningMean);

} // namespace BatchNorm
} // namespace Kernels
} // namespace Kernels::BatchNorm
} // namespace FlexFlow

#endif
10 changes: 2 additions & 8 deletions lib/kernels/include/kernels/cast_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,8 @@

#include "device.h"
#include "kernels/accessor.h"
#include "kernels/ff_handle.h"
#include "op-attrs/activation.dtg.h"

namespace FlexFlow {
namespace Kernels {
namespace Cast {
namespace FlexFlow::Kernels::Cast {

void forward_kernel(ffStream_t stream,
GenericTensorAccessorR const &input,
Expand All @@ -22,8 +18,6 @@ void backward_kernel(ffStream_t stream,
DataType input_type,
DataType output_type);

} // namespace Cast
} // namespace Kernels
} // namespace FlexFlow
} // namespace FlexFlow::Kernels::Cast

#endif
21 changes: 21 additions & 0 deletions lib/kernels/include/kernels/cast_kernels_cpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#ifndef _FLEXFLOW_OPS_KERNELS_CAST_KERNELS_CPU_H
#define _FLEXFLOW_OPS_KERNELS_CAST_KERNELS_CPU_H

#include "device.h"
#include "kernels/accessor.h"

namespace FlexFlow::Kernels::Cast {

void cpu_forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type);

void cpu_backward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type);

} // namespace FlexFlow::Kernels::Cast

#endif
8 changes: 2 additions & 6 deletions lib/kernels/include/kernels/combine_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
#include "device.h"
#include "kernels/accessor.h"

namespace FlexFlow {
namespace Kernels {
namespace Combine {
namespace FlexFlow::Kernels::Combine {

void forward_kernel(ffStream_t stream,
GenericTensorAccessorR const &input,
Expand All @@ -16,8 +14,6 @@ void backward_kernel(ffStream_t stream,
GenericTensorAccessorR const &output_grad,
GenericTensorAccessorW const &input_grad);

} // namespace Combine
} // namespace Kernels
} // namespace FlexFlow
} // namespace FlexFlow::Kernels::Combine

#endif // _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_H
17 changes: 17 additions & 0 deletions lib/kernels/include/kernels/combine_kernels_cpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_CPU_H
#define _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_CPU_H

#include "device.h"
#include "kernels/accessor.h"

namespace FlexFlow::Kernels::Combine {

void cpu_forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output);

void cpu_backward_kernel(GenericTensorAccessorR const &output_grad,
GenericTensorAccessorW const &input_grad);

} // namespace FlexFlow::Kernels::Combine

#endif // _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_CPU_H
8 changes: 2 additions & 6 deletions lib/kernels/include/kernels/concat_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
#include "device.h"
#include "kernels/accessor.h"

namespace FlexFlow {
namespace Kernels {
namespace Concat {
namespace FlexFlow::Kernels::Concat {

void forward_kernel(ffStream_t stream,
GenericTensorAccessorW const &output,
Expand All @@ -18,8 +16,6 @@ void backward_kernel(ffStream_t stream,
std::vector<GenericTensorAccessorW> const &input_grads,
ff_dim_t axis);

} // namespace Concat
} // namespace Kernels
} // namespace FlexFlow
} // namespace FlexFlow::Kernels::Concat

#endif
6 changes: 2 additions & 4 deletions lib/kernels/include/kernels/conv_2d_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(Conv2DPerDeviceState,
bwdFilterAlgo,
bwdDataAlgo);

namespace Kernels {
namespace Conv2D {
namespace Kernels::Conv2D {

Conv2DPerDeviceState init_kernel(PerDeviceFFHandle handle,
std::optional<Activation> activation,
Expand Down Expand Up @@ -70,8 +69,7 @@ void backward_kernel(ffStream_t stream,
float *bias_grad_ptr,
std::optional<Activation> activation);

} // namespace Conv2D
} // namespace Kernels
} // namespace Kernels::Conv2D
} // namespace FlexFlow

#endif // _FLEXFLOW_OPS_KERNELS_CONV_2D_KERNELS_H
Loading
Loading