Skip to content

Commit

Permalink
Softmax kernels (#593)
Browse files Browse the repository at this point in the history
  • Loading branch information
reyna-abhyankar authored and goliaro committed Feb 2, 2023
1 parent 07a3617 commit eaedc29
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 127 deletions.
52 changes: 52 additions & 0 deletions include/flexflow/ops/kernels/softmax_kernels.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#ifndef _FLEXFLOW_OPS_KERNELS_SOFTMAX_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_SOFTMAX_KERNELS_H

#include "flexflow/device.h"
#include "flexflow/fftype.h"
#include "flexflow/op_meta.h"
#include "flexflow/ops/softmax.h"

namespace FlexFlow {

class SoftmaxMeta : public OpMeta {
public:
SoftmaxMeta(FFHandler handle,
Softmax const *softmax,
Legion::Domain const &input_domain);
#if defined(FF_USE_CUDA) || defined(FF_USE_HIP_CUDA)
cudnnTensorDescriptor_t inputTensor;
#else
miopenTensorDescriptor_t inputTensor;
#endif
bool profiling;
int dim;
char op_name[MAX_OPNAME];
};

namespace Kernels {
namespace Softmax {

void forward_kernel_wrapper(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr);

void backward_kernel_wrapper(SoftmaxMeta const *m,
float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements);

namespace Internal {
void forward_kernel(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr,
ffStream_t stream);
void backward_kernel(float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements,
ffStream_t stream);
} // namespace Internal
} // namespace Softmax
} // namespace Kernels
} // namespace FlexFlow

#endif // _FLEXFLOW_OPS_KERNELS_SOFTMAX_KERNELS_H
39 changes: 0 additions & 39 deletions include/flexflow/ops/softmax.h
Original file line number Diff line number Diff line change
@@ -1,33 +1,13 @@
#ifndef _FLEXFLOW_SOFTMAX_H
#define _FLEXFLOW_SOFTMAX_H

#include "flexflow/device.h"
#include "flexflow/fftype.h"
#include "flexflow/layer.h"
#include "flexflow/node.h"
#include "flexflow/op_meta.h"
#include "flexflow/operator.h"
#include "flexflow/ops/softmax_params.h"

namespace FlexFlow {

class Softmax;

class SoftmaxMeta : public OpMeta {
public:
SoftmaxMeta(FFHandler handle,
Softmax const *softmax,
Legion::Domain const &input_domain);
#if defined(FF_USE_CUDA) || defined(FF_USE_HIP_CUDA)
cudnnTensorDescriptor_t inputTensor;
#else
miopenTensorDescriptor_t inputTensor;
#endif
bool profiling;
int dim;
char op_name[MAX_OPNAME];
};

class Softmax : public Op {
public:
using Params = SoftmaxParams;
Expand Down Expand Up @@ -63,28 +43,9 @@ class Softmax : public Op {
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Legion::Runtime *runtime);
void init_meta(SoftmaxMeta *m,
Legion::Rect<2> const &input,
Legion::Rect<2> const &output) const;
bool measure_operator_cost(Simulator *sim,
MachineView const &pc,
CostMetrics &cost_metrics) const override;
static void forward_kernel(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr,
ffStream_t stream);
static void forward_kernel_wrapper(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr);
static void backward_kernel(float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements,
ffStream_t stream);
static void backward_kernel_wrapper(SoftmaxMeta const *m,
float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements);

Params get_params() const;

private:
Expand Down
90 changes: 47 additions & 43 deletions src/ops/softmax.cpp → src/ops/kernels/softmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* limitations under the License.
*/

#include "flexflow/ops/softmax.h"
#include "flexflow/ops/kernels/softmax_kernels.h"
#include "flexflow/utils/hash_utils.h"
#include "flexflow/utils/hip_helper.h"
#include <hip/hip_runtime.h>
Expand All @@ -33,29 +33,12 @@ SoftmaxMeta::SoftmaxMeta(FFHandler handler,
std::strcpy(op_name, softmax->name);
}

/* static */
void Softmax::forward_kernel(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr,
hipStream_t stream) {
checkCUDNN(miopenSetStream(m->handle.dnn, stream));
namespace Kernels {
namespace Softmax {

float alpha = 1.0f, beta = 0.0f;
checkCUDNN(miopenSoftmaxForward_V2(m->handle.dnn,
&alpha,
m->inputTensor,
input_ptr,
&beta,
m->inputTensor,
output_ptr,
MIOPEN_SOFTMAX_ACCURATE,
MIOPEN_SOFTMAX_MODE_CHANNEL));
}

/* static */
void Softmax::forward_kernel_wrapper(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr) {
void forward_kernel_wrapper(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr) {
hipStream_t stream;
checkCUDA(get_legion_stream(&stream));

Expand All @@ -65,7 +48,7 @@ void Softmax::forward_kernel_wrapper(SoftmaxMeta const *m,
hipEventCreate(&t_end);
hipEventRecord(t_start, stream);
}
Softmax::forward_kernel(m, input_ptr, output_ptr, stream);
Internal::forward_kernel(m, input_ptr, output_ptr, stream);
if (m->profiling) {
hipEventRecord(t_end, stream);
checkCUDA(hipEventSynchronize(t_end));
Expand All @@ -81,23 +64,10 @@ void Softmax::forward_kernel_wrapper(SoftmaxMeta const *m,
}
}

/* static */
void Softmax::backward_kernel(float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements,
hipStream_t stream) {
checkCUDA(hipMemcpyAsync(input_grad_ptr,
output_grad_ptr,
num_elements * sizeof(float),
hipMemcpyDeviceToDevice,
stream));
}

/* static */
void Softmax::backward_kernel_wrapper(SoftmaxMeta const *m,
float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements) {
void backward_kernel_wrapper(SoftmaxMeta const *m,
float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements) {
hipStream_t stream;
checkCUDA(get_legion_stream(&stream));

Expand All @@ -107,7 +77,7 @@ void Softmax::backward_kernel_wrapper(SoftmaxMeta const *m,
hipEventCreate(&t_end);
hipEventRecord(t_start, stream);
}
Softmax::backward_kernel(
Internal::backward_kernel(
input_grad_ptr, output_grad_ptr, num_elements, stream);
if (m->profiling) {
hipEventRecord(t_end, stream);
Expand All @@ -124,4 +94,38 @@ void Softmax::backward_kernel_wrapper(SoftmaxMeta const *m,
}
}

}; // namespace FlexFlow
namespace Internal {

void forward_kernel(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr,
hipStream_t stream) {
checkCUDNN(miopenSetStream(m->handle.dnn, stream));

float alpha = 1.0f, beta = 0.0f;
checkCUDNN(miopenSoftmaxForward_V2(m->handle.dnn,
&alpha,
m->inputTensor,
input_ptr,
&beta,
m->inputTensor,
output_ptr,
MIOPEN_SOFTMAX_ACCURATE,
MIOPEN_SOFTMAX_MODE_CHANNEL));
}

void backward_kernel(float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements,
hipStream_t stream) {
checkCUDA(hipMemcpyAsync(input_grad_ptr,
output_grad_ptr,
num_elements * sizeof(float),
hipMemcpyDeviceToDevice,
stream));
}

} // namespace Internal
} // namespace Softmax
} // namespace Kernels
} // namespace FlexFlow
90 changes: 47 additions & 43 deletions src/ops/softmax.cu → src/ops/kernels/softmax.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* limitations under the License.
*/

#include "flexflow/ops/softmax.h"
#include "flexflow/ops/kernels/softmax_kernels.h"
#include "flexflow/utils/cuda_helper.h"
#include "flexflow/utils/hash_utils.h"

Expand All @@ -32,29 +32,12 @@ SoftmaxMeta::SoftmaxMeta(FFHandler handler,
std::strcpy(op_name, softmax->name);
}

/* static */
void Softmax::forward_kernel(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr,
cudaStream_t stream) {
checkCUDNN(cudnnSetStream(m->handle.dnn, stream));
namespace Kernels {
namespace Softmax {

float alpha = 1.0f, beta = 0.0f;
checkCUDNN(cudnnSoftmaxForward(m->handle.dnn,
CUDNN_SOFTMAX_ACCURATE,
CUDNN_SOFTMAX_MODE_CHANNEL,
&alpha,
m->inputTensor,
input_ptr,
&beta,
m->inputTensor,
output_ptr));
}

/* static */
void Softmax::forward_kernel_wrapper(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr) {
void forward_kernel_wrapper(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr) {
cudaStream_t stream;
checkCUDA(get_legion_stream(&stream));

Expand All @@ -64,7 +47,7 @@ void Softmax::forward_kernel_wrapper(SoftmaxMeta const *m,
cudaEventCreate(&t_end);
cudaEventRecord(t_start, stream);
}
Softmax::forward_kernel(m, input_ptr, output_ptr, stream);
Internal::forward_kernel(m, input_ptr, output_ptr, stream);
if (m->profiling) {
cudaEventRecord(t_end, stream);
checkCUDA(cudaEventSynchronize(t_end));
Expand All @@ -80,23 +63,10 @@ void Softmax::forward_kernel_wrapper(SoftmaxMeta const *m,
}
}

/* static */
void Softmax::backward_kernel(float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements,
cudaStream_t stream) {
checkCUDA(cudaMemcpyAsync(input_grad_ptr,
output_grad_ptr,
num_elements * sizeof(float),
cudaMemcpyDeviceToDevice,
stream));
}

/* static */
void Softmax::backward_kernel_wrapper(SoftmaxMeta const *m,
float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements) {
void backward_kernel_wrapper(SoftmaxMeta const *m,
float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements) {
cudaStream_t stream;
checkCUDA(get_legion_stream(&stream));

Expand All @@ -106,7 +76,7 @@ void Softmax::backward_kernel_wrapper(SoftmaxMeta const *m,
cudaEventCreate(&t_end);
cudaEventRecord(t_start, stream);
}
Softmax::backward_kernel(
Internal::backward_kernel(
input_grad_ptr, output_grad_ptr, num_elements, stream);
if (m->profiling) {
cudaEventRecord(t_end, stream);
Expand All @@ -123,4 +93,38 @@ void Softmax::backward_kernel_wrapper(SoftmaxMeta const *m,
}
}

}; // namespace FlexFlow
namespace Internal {

void forward_kernel(SoftmaxMeta const *m,
float const *input_ptr,
float *output_ptr,
cudaStream_t stream) {
checkCUDNN(cudnnSetStream(m->handle.dnn, stream));

float alpha = 1.0f, beta = 0.0f;
checkCUDNN(cudnnSoftmaxForward(m->handle.dnn,
CUDNN_SOFTMAX_ACCURATE,
CUDNN_SOFTMAX_MODE_CHANNEL,
&alpha,
m->inputTensor,
input_ptr,
&beta,
m->inputTensor,
output_ptr));
}

void backward_kernel(float *input_grad_ptr,
float const *output_grad_ptr,
size_t num_elements,
cudaStream_t stream) {
checkCUDA(cudaMemcpyAsync(input_grad_ptr,
output_grad_ptr,
num_elements * sizeof(float),
cudaMemcpyDeviceToDevice,
stream));
}

} // namespace Internal
} // namespace Softmax
} // namespace Kernels
} // namespace FlexFlow
Loading

0 comments on commit eaedc29

Please sign in to comment.