From fa8db5cf2f9cf724fd2703353d40e3b37a8e7310 Mon Sep 17 00:00:00 2001 From: Yejing-Lai Date: Sat, 11 Jan 2025 06:18:01 +0800 Subject: [PATCH 1/3] Support pure meta model lm_head tp (#6812) Add lm_head tp support when checkpoint not provided to deepspeed.init_inference(). --------- Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com> Co-authored-by: Ma, Guokai --- deepspeed/module_inject/replace_module.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/deepspeed/module_inject/replace_module.py b/deepspeed/module_inject/replace_module.py index e59f84bc8453..00b22aac81d8 100644 --- a/deepspeed/module_inject/replace_module.py +++ b/deepspeed/module_inject/replace_module.py @@ -342,13 +342,11 @@ def set_lm_head(module): module.lm_head, "weight") and module.lm_head.weight.is_meta: module.lm_head.weight = embedding_weight # enable tensor parallel for the last linear - if hasattr(module, "lm_head") and hasattr(module.lm_head, - "weight") and not module.lm_head.weight.is_meta and isinstance( - module.lm_head, torch.nn.Linear): + if hasattr(module, "lm_head") and hasattr(module.lm_head, "weight") and isinstance( + module.lm_head, torch.nn.Linear): module = replace_wo_policy(module, ("lm_head", ), 0, "lm_head") - elif hasattr(module, "embed_out") and hasattr(module.embed_out, - "weight") and not module.embed_out.weight.is_meta and isinstance( - module.embed_out, torch.nn.Linear): + elif hasattr(module, "embed_out") and hasattr(module.embed_out, "weight") and isinstance( + module.embed_out, torch.nn.Linear): module = replace_wo_policy(module, ("embed_out", ), 0, "embed_out") elif hasattr(module, "language_model") and hasattr(module.language_model, "lm_head"): module = replace_wo_policy(module.language_model, ("lm_head", ), 0, "lm_head") @@ -389,7 +387,6 @@ def conv2d_parallel_shard_weights(model, rank, world_size): checkpoint=checkpoint_file) pbar.update(1) gc.collect() - replaced_module = set_lm_head(replaced_module) # conv2d tp module replace # Now is for yuan model. Add model list and conv policy to decide whether to replace conv. if 'Yuan' in str(replaced_module): @@ -399,6 +396,9 @@ def conv2d_parallel_shard_weights(model, rank, world_size): orig_class=orig_layer_impl, replace_fn=replace_fn, _replace_policy=config.injection_policy_tuple) + # AutoTP default set lm_head tp + if not config.replace_with_kernel_inject: + replaced_module = set_lm_head(replaced_module) quantizer = GroupQuantizer(q_int8=quantize) world_size = dist.get_world_size() if dist.is_initialized() else 1 From 396f8db793b37db9b11847df8245f85bc57eeaa3 Mon Sep 17 00:00:00 2001 From: Nir Sonnenschein Date: Mon, 13 Jan 2025 18:50:22 +0200 Subject: [PATCH 2/3] Remove op compilation flags due to perf issue (#6944) in some scenarios some of the optimization flags for the ops compiler for HPU can cause a significant performance degradation. remove the flags until the issue is resolved --- op_builder/hpu/builder.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/op_builder/hpu/builder.py b/op_builder/hpu/builder.py index c176a586ba49..11e710a8ee48 100644 --- a/op_builder/hpu/builder.py +++ b/op_builder/hpu/builder.py @@ -32,9 +32,6 @@ def builder(self): def cxx_args(self): args = ['-O3', '-g', '-Wno-reorder'] - CPU_ARCH = self.cpu_arch() - SIMD_WIDTH = self.simd_width() - args += [CPU_ARCH, '-fopenmp', SIMD_WIDTH] return args def libraries_args(self): From 66d3d3e94dbdfbbf6535cab66256c238983fc7c3 Mon Sep 17 00:00:00 2001 From: Logan Adams <114770087+loadams@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:34:15 -0800 Subject: [PATCH 3/3] Pin nv-a6000 workflow (#6938) Breaking change in transformers is https://github.com/huggingface/transformers/pull/35235. Need to make changes to unpin nv-a6000 workflow. --- .github/workflows/nv-a6000.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nv-a6000.yml b/.github/workflows/nv-a6000.yml index 639f27498dd9..0547431e3099 100644 --- a/.github/workflows/nv-a6000.yml +++ b/.github/workflows/nv-a6000.yml @@ -40,8 +40,9 @@ jobs: python -c "import torch; print('CUDA available:', torch.cuda.is_available())" - name: Install transformers run: | - git clone --depth=1 https://github.com/huggingface/transformers + git clone https://github.com/huggingface/transformers cd transformers + git checkout v4.47.1 git rev-parse --short HEAD python -m pip install . - name: Install deepspeed