linting

flexflow · Sep 11, 2024 · 104ba3c · 104ba3c
1 parent 73468c5
commit 104ba3c
Show file tree

Hide file tree

Showing 17 changed files with 282 additions and 221 deletions.
diff --git a/include/flexflow/ops/inc_multihead_self_attention.h b/include/flexflow/ops/inc_multihead_self_attention.h
@@ -126,13 +126,14 @@ class IncMultiHeadSelfAttention : public Op {
                                        int shard_id,
                                        GenericTensorAccessorR const &input,
                                        GenericTensorAccessorW const &output);
-  static void peft_bwd_kernel_wrapper(IncMultiHeadSelfAttentionMeta *m,
-                                      BatchConfig const *bc,
-                                      int shard_id,
-                                      GenericTensorAccessorW const &input_grad,
-                                      // GenericTensorAccessorR const &weight,
-                                      GenericTensorAccessorR const &output_grad);
-                                      // GenericTensorAccessorR const &bias);
+  static void
+      peft_bwd_kernel_wrapper(IncMultiHeadSelfAttentionMeta *m,
+                              BatchConfig const *bc,
+                              int shard_id,
+                              GenericTensorAccessorW const &input_grad,
+                              // GenericTensorAccessorR const &weight,
+                              GenericTensorAccessorR const &output_grad);
+  // GenericTensorAccessorR const &bias);
   Params get_params() const;
 
 public:

diff --git a/inference/models/llama.cc b/inference/models/llama.cc
@@ -93,19 +93,20 @@ void LLAMA::create_llama_model(FFModel &ff,
     }
     att_norm->print("att_norm");
     Tensor qkv_proj = ff.dense(
-      att_norm,
-      llama_config.hidden_size * 3, // q, k, v. need to change if want to remove replication. (q_heads + 2 * kv_heads) * proj_size
-      AC_MODE_NONE,
-      false, // seems like llama does not use bias
-      DT_NONE, // what is this
-      nullptr, // ?
-      nullptr, // ?
-      nullptr, // ?
-      REG_MODE_NONE, // no regularization
-      0.0f, // no dropout
-      std::string("layers." + std::to_string(i) + ".self_attn.qkv_proj")
-                     .c_str()
-    );
+        att_norm,
+        llama_config.hidden_size *
+            3, // q, k, v. need to change if want to remove replication.
+               // (q_heads + 2 * kv_heads) * proj_size
+        AC_MODE_NONE,
+        false,         // seems like llama does not use bias
+        DT_NONE,       // what is this
+        nullptr,       // ?
+        nullptr,       // ?
+        nullptr,       // ?
+        REG_MODE_NONE, // no regularization
+        0.0f,          // no dropout
+        std::string("layers." + std::to_string(i) + ".self_attn.qkv_proj")
+            .c_str());
     qkv_proj->print("qkv_proj");
 
     Tensor mha;
@@ -189,18 +190,19 @@ void LLAMA::create_llama_model(FFModel &ff,
 
     Tensor mha_input = mha;
     mha_input->print("mha_input");
-    mha = ff.dense(mha_input,
-                   llama_config.hidden_size,
-                   AC_MODE_NONE,
-                   false,
-                   DT_NONE,
-                   nullptr,
-                   nullptr,
-                   nullptr,
-                   REG_MODE_NONE,
-                   0.0f,
-                   std::string("layers." + std::to_string(i) + ".self_attn.o_proj")
-                       .c_str());
+    mha = ff.dense(
+        mha_input,
+        llama_config.hidden_size,
+        AC_MODE_NONE,
+        false,
+        DT_NONE,
+        nullptr,
+        nullptr,
+        nullptr,
+        REG_MODE_NONE,
+        0.0f,
+        std::string("layers." + std::to_string(i) + ".self_attn.o_proj")
+            .c_str());
     mha->print("mha");
 
     // step 2: SILU activaion

diff --git a/src/ops/fused.cu b/src/ops/fused.cu
@@ -457,8 +457,7 @@ __host__ void
             bc,
             task->index_point.point_data[0],
             my_input_accessor[0],
-            my_output_accessor[0]
-            );
+            my_output_accessor[0]);
         break;
       }
       case OP_TREE_INC_MULTIHEAD_SELF_ATTENTION: {
@@ -1042,7 +1041,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
             my_input_grad_accessor[0],
             // my_weight_accessor[0],
             my_output_grad_accessor[0]);
-            // biases);
+        // biases);
         break;
       }
       case OP_TREE_INC_MULTIHEAD_SELF_ATTENTION:

diff --git a/src/ops/inc_multihead_self_attention.cc b/src/ops/inc_multihead_self_attention.cc
@@ -394,8 +394,8 @@ IncMultiHeadSelfAttention::IncMultiHeadSelfAttention(
     dims[i] = _input->dims[i];
   }
   dims[0].size = _embed_dim;
-  // Currently require no parallelism along this dim, is this consistent with the 
-  // removal of the previous assert?
+  // Currently require no parallelism along this dim, is this consistent with
+  // the removal of the previous assert?
   assert(dims[0].degree == 1);
   if (allocate_weights) {
     // Create weight tensor
@@ -600,10 +600,13 @@ OpMeta *IncMultiHeadSelfAttention::init_task(
       attn->num_kv_heads / attn->tensor_parallelism_degree +
       (attn->num_kv_heads % attn->tensor_parallelism_degree != 0);
 
-  if(attn->oProjSize != output.domain.hi()[0] - output.domain.lo()[0] + 1) {
-    printf("attn o_proj size %d does not match output domain %d\n", attn->oProjSize, output.domain.hi()[0] - output.domain.lo()[0] + 1);
+  if (attn->oProjSize != output.domain.hi()[0] - output.domain.lo()[0] + 1) {
+    printf("attn o_proj size %d does not match output domain %d\n",
+           attn->oProjSize,
+           output.domain.hi()[0] - output.domain.lo()[0] + 1);
   }
-  // assert(attn->oProjSize == output.domain.hi()[0] - output.domain.lo()[0] + 1);
+  // assert(attn->oProjSize == output.domain.hi()[0] - output.domain.lo()[0] +
+  // 1);
 
   Memory gpu_mem = get_proc_mem(Machine::get_machine(), task->target_proc);
   MemoryAllocator gpu_mem_allocator(gpu_mem);
@@ -709,7 +712,7 @@ void IncMultiHeadSelfAttention::inference_task(
 
   GenericTensorAccessorR input = helperGetGenericTensorAccessorRO(
       m->input_type[0], regions[0], task->regions[0], FID_DATA, ctx, runtime);
-    GenericTensorAccessorW output = helperGetGenericTensorAccessorWO(
+  GenericTensorAccessorW output = helperGetGenericTensorAccessorWO(
       m->output_type[0], regions[1], task->regions[1], FID_DATA, ctx, runtime);
 
   Domain input_domain = runtime->get_index_space_domain(
@@ -724,7 +727,7 @@ void IncMultiHeadSelfAttention::inference_task(
   assert(task->index_point.get_dim() == 1);
 
   IncMultiHeadSelfAttention::inference_kernel_wrapper(
-         m, bc, task->index_point.point_data[0], input, output);
+      m, bc, task->index_point.point_data[0], input, output);
 
   if (m->inference_debugging) {
     assert(task->index_point.get_dim() == 1);
@@ -822,9 +825,11 @@ void IncMultiHeadSelfAttention::peft_bwd_task(
   GenericTensorAccessorW input_grad = helperGetGenericTensorAccessorRW(
       m->input_type[0], regions[0], task->regions[0], FID_DATA, ctx, runtime);
   // GenericTensorAccessorR weight = helperGetGenericTensorAccessorRO(
-  //     m->weight_type[0], regions[1], task->regions[1], FID_DATA, ctx, runtime);
+  //     m->weight_type[0], regions[1], task->regions[1], FID_DATA, ctx,
+  //     runtime);
   // GenericTensorAccessorW output_grad = helperGetGenericTensorAccessorRW(
-  //     m->output_type[0], regions[2], task->regions[2], FID_DATA, ctx, runtime);
+  //     m->output_type[0], regions[2], task->regions[2], FID_DATA, ctx,
+  //     runtime);
   GenericTensorAccessorW output_grad = helperGetGenericTensorAccessorRW(
       m->output_type[0], regions[1], task->regions[1], FID_DATA, ctx, runtime);
   GenericTensorAccessorR biases;
@@ -862,7 +867,7 @@ void IncMultiHeadSelfAttention::peft_bwd_task(
       input_grad,
       // weight,
       output_grad);
-      // biases);
+  // biases);
 
   if (m->inference_debugging) {
     assert(task->index_point.get_dim() == 1);

diff --git a/src/ops/inc_multihead_self_attention.cpp b/src/ops/inc_multihead_self_attention.cpp
@@ -938,7 +938,7 @@ void inference_kernel(IncMultiHeadSelfAttentionMeta *m,
   compute_qkv_kernel(m,
                      bc,
                      shard_id,
-                    //  input_ptr,
+                     //  input_ptr,
                      weight_ptr,
                      static_cast<DT *>(m->devQKVProjArray),
                      bias_ptr,