fix

flexflow · Sep 17, 2023 · 01be9a5 · 01be9a5
1 parent ba06333
commit 01be9a5
Show file tree

Hide file tree

Showing 6 changed files with 78 additions and 26 deletions.
diff --git a/include/flexflow/flexflow_c.h b/include/flexflow/flexflow_c.h
@@ -259,6 +259,17 @@ flexflow_tensor_t flexflow_model_add_layer_norm(flexflow_model_t handle,
                                                 bool use_bias,
                                                 char const *name);
 
+flexflow_tensor_t *flexflow_model_add_add_bias_residual_layer_norm(
+    flexflow_model_t handle,
+    const flexflow_tensor_t input,
+    const flexflow_tensor_t residual,
+    int n,
+    int *axes,
+    bool elementwise_affine,
+    float eps,
+    bool use_bias,
+    char const *name);
+
 flexflow_tensor_t
     flexflow_model_add_batch_matmul(flexflow_model_t handle,
                                     const flexflow_tensor_t a,

diff --git a/include/flexflow/model.h b/include/flexflow/model.h
@@ -532,9 +532,9 @@ class FFModel {
                     DataType data_type = DT_NONE,
                     char const *name = NULL);
   // Add a add_bias_residual_layer_norm layer
-  std::pair<Tensor, Tensor>
-      add_bias_residual_layer_norm(const Tensor input,
+  void add_bias_residual_layer_norm(const Tensor input,
                                    const Tensor residual,
+                                   Tensor *outputs,
                                    std::vector<int> const &axes,
                                    bool elementwise_affine,
                                    float eps,

diff --git a/inference/models/opt.cc b/inference/models/opt.cc
@@ -197,17 +197,18 @@ void OPT::create_opt_model(FFModel &ff,
 
     layer_name =
         "layers_" + std::to_string(i) + "_add_bias_residual_layer_norm";
-    auto pair = ff.add_bias_residual_layer_norm(
-        mha,
-        residual,
-        axes,
-        opt_config.layer_norm_elementwise_affine,
-        1e-05,
-        true,
-        DT_NONE,
-        layer_name.c_str());
-    Tensor added = pair.first;
-    Tensor final_norm = pair.second;
+    Tensor added_final_norm[2];
+    ff.add_bias_residual_layer_norm(mha,
+                                    residual,
+                                    added_final_norm,
+                                    axes,
+                                    opt_config.layer_norm_elementwise_affine,
+                                    1e-05,
+                                    true,
+                                    DT_NONE,
+                                    layer_name.c_str());
+    Tensor added = added_final_norm[0];
+    Tensor final_norm = added_final_norm[1];
 
     //--------linear fc1 fc2 ----------
     layer_name = "layers_" + std::to_string(i) + "_fc1";

diff --git a/python/flexflow/core/flexflow_cffi.py b/python/flexflow/core/flexflow_cffi.py
@@ -1595,10 +1595,9 @@ def layer_norm(self, input, axes, elementwise_affine=True, eps=1e-5, use_bias =
   def add_bias_residual_layer_norm(self, input, residual, axes, elementwise_affine=True, eps=1e-5, use_bias = True, name=None):
     c_name = get_c_name(name)
     c_axes = ffi.new("int[]", axes)
-    # TODO: figure out how to get two outputs from C++
-    handle1, handle2 = ffc().flexflow_model_add_add_bias_residual_layer_norm(self.handle, input.handle, residual.handle, len(axes), c_axes, elementwise_affine, eps, use_bias, c_name)
+    handles_array = ffc().flexflow_model_add_add_bias_residual_layer_norm(self.handle, input.handle, residual.handle, len(axes), c_axes, elementwise_affine, eps, use_bias, c_name)
     self.add_layer(OpType.ADD_BIAS_RESIDUAL_LAYERNORM, name)
-    return Tensor(handle, owner_op_type=OpType.ADD_BIAS_RESIDUAL_LAYERNORM)
+    return Tensor(handles_array[0], owner_op_type=OpType.ADD_BIAS_RESIDUAL_LAYERNORM), Tensor(handles_array[1], owner_op_type=OpType.ADD_BIAS_RESIDUAL_LAYERNORM)
 
   def batch_matmul(self, A, B, a_seq_length_dim=None, b_seq_length_dim=None, name=None):
     """Layer that applied batched matrix multiplication onto two input Tensors, :attr:`output = x * y`.

diff --git a/src/c/flexflow_c.cc b/src/c/flexflow_c.cc
@@ -638,6 +638,46 @@ flexflow_tensor_t flexflow_model_add_layer_norm(flexflow_model_t handle_,
   return FFCObjectWrapper::wrap(tensor);
 }
 
+flexflow_tensor_t *flexflow_model_add_add_bias_residual_layer_norm(
+    flexflow_model_t handle_,
+    const flexflow_tensor_t input_,
+    const flexflow_tensor_t residual_,
+    int n,
+    int *axes,
+    bool elementwise_affine,
+    float eps,
+    bool use_bias,
+    char const *name) {
+  FFModel *handle = FFCObjectWrapper::unwrap(handle_);
+  const Tensor input = FFCObjectWrapper::unwrap(input_);
+  const Tensor residual = FFCObjectWrapper::unwrap(residual_);
+  Tensor tensor_outputs[2];
+  std::vector<int> axes_vec;
+  for (int i = 0; i < n; i++) {
+    axes_vec.push_back(axes[i]);
+  }
+  handle->add_bias_residual_layer_norm(input,
+                                       residual,
+                                       tensor_outputs,
+                                       axes_vec,
+                                       elementwise_affine,
+                                       eps,
+                                       use_bias,
+                                       input->data_type,
+                                       name);
+  DEBUG_PRINT("[LayerNorm] new Tensor %p, input %p, elementwise_affine %d, eps "
+              "%f, name %s",
+              tensor,
+              input,
+              elementwise_affine,
+              eps,
+              name);
+  flexflow_tensor_t tensor_outputs_wrapped[2] = {
+      FFCObjectWrapper::wrap(tensor_outputs[0]),
+      FFCObjectWrapper::wrap(tensor_outputs[1])};
+  return tensor_outputs_wrapped;
+}
+
 flexflow_tensor_t flexflow_model_add_batch_matmul(flexflow_model_t handle_,
                                                   const flexflow_tensor_t a_,
                                                   const flexflow_tensor_t b_,

diff --git a/src/ops/add_bias_residual_layer_norm.cc b/src/ops/add_bias_residual_layer_norm.cc
@@ -61,15 +61,15 @@ AddBiasResidualLayerNormParams AddBiasResidualLayerNorm::get_params() const {
   return params;
 }
 
-std::pair<Tensor, Tensor>
-    FFModel::add_bias_residual_layer_norm(const Tensor input,
-                                          const Tensor residual,
-                                          std::vector<int> const &axes,
-                                          bool elementwise_affine,
-                                          float eps,
-                                          bool use_bias,
-                                          DataType data_type,
-                                          char const *name) {
+void FFModel::add_bias_residual_layer_norm(const Tensor input,
+                                           const Tensor residual,
+                                           Tensor *outputs,
+                                           std::vector<int> const &axes,
+                                           bool elementwise_affine,
+                                           float eps,
+                                           bool use_bias,
+                                           DataType data_type,
+                                           char const *name) {
   // In PyTorch, axes must be the sizes of the last axes.size() dimensions of
   // the input tensor. However, since the tensor dimensions are reversed in
   // FlexFlow (batch size is the last dimension), we require that axes must be
@@ -177,7 +177,8 @@ std::pair<Tensor, Tensor>
   ln->add_int_vector_property("axes", axes);
   ln->add_float_property("eps", eps);
   layers.push_back(ln);
-  return std::make_pair<Tensor &, Tensor &>(ln->outputs[0], ln->outputs[1]);
+  outputs[0] = ln->outputs[0];
+  outputs[1] = ln->outputs[1];
 }
 
 Op *AddBiasResidualLayerNorm::create_operator_from_layer(