Skip to content

Commit

Permalink
Reverts 6b2de64
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 669670355
  • Loading branch information
yashk2810 authored and tensorflower-gardener committed Aug 31, 2024
1 parent a09c66e commit 2fe2937
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 4 deletions.
1 change: 1 addition & 0 deletions third_party/xla/xla/service/gpu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1654,6 +1654,7 @@ xla_test(
"//xla/service:hlo_cost_analysis",
"//xla/service:hlo_memory_scheduler",
"//xla/service:hlo_rematerialization",
"//xla/service/gpu/transforms:stream_attribute_annotator",
"//xla/tests:hlo_test_base",
"//xla/tests:xla_internal_test_main",
"//xla/tsl/lib/core:status_test_util",
Expand Down
1 change: 1 addition & 0 deletions third_party/xla/xla/service/gpu/gpu_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2523,6 +2523,7 @@ absl::Status GpuCompiler::RunPostSchedulingPipelines(
main_pipeline.AddPass<HloPassPipeline>("remat-pipeline");

pipeline.AddPass<HloRematerialization>(remat_opts, sizes);
pipeline.AddPass<StreamAttributeAnnotator>();
pipeline.AddPass<OptimizationBarrierExpander>();
}

Expand Down
14 changes: 14 additions & 0 deletions third_party/xla/xla/service/gpu/gpu_offloading_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ limitations under the License.
#include "xla/hlo/utils/hlo_matchers.h"
#include "xla/layout.h"
#include "xla/service/buffer_value.h"
#include "xla/service/gpu/backend_configs.pb.h"
#include "xla/service/gpu/transforms/stream_attribute_annotator.h"
#include "xla/service/hlo_cost_analysis.h"
#include "xla/service/hlo_memory_scheduler.h"
#include "xla/service/hlo_rematerialization.h"
Expand Down Expand Up @@ -216,6 +218,18 @@ TEST_F(GpuOffloadingTest, CopyIRCreationTest) {
RunHloRematerialization(
/*memory_limit_bytes=*/10 * 1024, module.get()));
ASSERT_TRUE(changed);
StreamAttributeAnnotator attr_annotator;
TF_ASSERT_OK_AND_ASSIGN(bool changed_attr, attr_annotator.Run(module.get()));
EXPECT_TRUE(changed_attr);
// Verify that the stream attribute for a copy-start is annotated
for (std::string i : {"", ".1", ".2", ".3"}) {
const HloInstruction* cp_start =
FindInstruction(module.get(), "copy-start" + i);
EXPECT_TRUE(cp_start->has_backend_config());
TF_ASSERT_OK_AND_ASSIGN(GpuBackendConfig gpu_config,
cp_start->backend_config<GpuBackendConfig>());
EXPECT_GT(gpu_config.operation_queue_id(), 0);
}

// The module should still have a schedule.
ASSERT_TRUE(module->has_schedule());
Expand Down
6 changes: 2 additions & 4 deletions third_party/xla/xla/service/gpu/runtime/copy_thunk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,7 @@ absl::Status DeviceToHostCopyThunk::ExecuteOnStream(
VLOG(2) << "Memcpy D2H from the main stream";
return absl::OkStatus();
}
VLOG(2) << absl::StreamFormat("Memcpy D2Hfrom the other stream %d",
Thunk::execution_stream_id().value());
VLOG(2) << "Memcpy D2H from the other stream";
se::StreamExecutor* executor = params.stream->parent();
TF_ASSIGN_OR_RETURN(auto event, executor->CreateEvent());
// Record memcpy operation completion.
Expand Down Expand Up @@ -169,8 +168,7 @@ absl::Status HostToDeviceCopyThunk::ExecuteOnStream(
VLOG(2) << "Memcpy H2D from the main stream";
return absl::OkStatus();
}
VLOG(2) << absl::StreamFormat("Memcpy H2D from the other stream %d",
Thunk::execution_stream_id().value());
VLOG(2) << "Memcpy H2D from the other stream";
se::StreamExecutor* executor = params.stream->parent();
TF_ASSIGN_OR_RETURN(auto event, executor->CreateEvent());
// Record memcpy operation completion.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,20 @@ absl::StatusOr<bool> AnnotateStreamAttributesForInstruction(
return true;
}

absl::StatusOr<bool> AnnotateStreamAttributesForCopyStart(
HloInstruction* instr, int64_t channel_id,
GpuBackendConfig& instr_gpu_config) {
// Do nothing if copy-start has already been annotated
if (instr_gpu_config.operation_queue_id() !=
Thunk::kDefaultExecutionStreamId.value()) {
return false;
}
instr_gpu_config.set_operation_queue_id(channel_id);
TF_RETURN_IF_ERROR(instr->set_backend_config(instr_gpu_config));
VLOG(3) << "Add copy-start's backend config: " << channel_id;
return true;
}

absl::StatusOr<bool> WrapIntoFusionAndAnnotateStreamAttributes(
HloInstruction* instruction, int64_t channel_id,
GpuBackendConfig& instr_gpu_config) {
Expand Down Expand Up @@ -181,6 +195,12 @@ absl::StatusOr<bool> StreamAttributeAnnotator::Run(
AnnotateStreamAttributesForInstruction(
instr, instr_gpu_config.value()));
changed |= comp_result;
} else if (instr->opcode() == HloOpcode::kCopyStart) {
TF_ASSIGN_OR_RETURN(bool comp_result,
AnnotateStreamAttributesForCopyStart(
instr, channel_id, instr_gpu_config.value()));
changed |= comp_result;
continue;
} else if (comp->IsAsyncComputation() &&
(instr->opcode() == HloOpcode::kDynamicSlice ||
instr->opcode() == HloOpcode::kDynamicUpdateSlice)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,50 @@ TEST_F(StreamAttributeAnnotatorTest, FusionIsAnnotated) {
EXPECT_EQ(gpu_config.operation_queue_id(), 1);
}

TEST_F(StreamAttributeAnnotatorTest, CopyStartIsAnnotated) {
constexpr absl::string_view kHloString = R"(
HloModule offloading
ENTRY %main (param_0: f32[1024], param_1: f32[1024]) -> f32[1024] {
%param_1 = f32[1024]{0} parameter(1)
%param_0 = f32[1024]{0} parameter(0)
%res_3 = f32[1024]{0} add(f32[1024]{0} %param_0, f32[1024]{0} %param_1)
%copy-start = (f32[1024]{0:S(5)}, f32[1024]{0}, u32[]) copy-start(f32[1024]{0} %res_3)
%res_4 = f32[1024]{0} tanh(f32[1024]{0} %res_3)
%copy-start.2 = (f32[1024]{0:S(5)}, f32[1024]{0}, u32[]) copy-start(f32[1024]{0} %res_4)
%res_5 = f32[1024]{0} tanh(f32[1024]{0} %res_4)
%copy-done = f32[1024]{0:S(5)} copy-done((f32[1024]{0:S(5)}, f32[1024]{0}, u32[]) %copy-start)
%res_6 = f32[1024]{0} tanh(f32[1024]{0} %res_5)
%copy-done.2 = f32[1024]{0:S(5)} copy-done((f32[1024]{0:S(5)}, f32[1024]{0}, u32[]) %copy-start.2)
%copy-start.3 = (f32[1024]{0}, f32[1024]{0:S(5)}, u32[]) copy-start(f32[1024]{0:S(5)} %copy-done.2)
%res_7 = f32[1024]{0} add(f32[1024]{0} %res_6, f32[1024]{0} %res_6)
%copy-start.1 = (f32[1024]{0}, f32[1024]{0:S(5)}, u32[]) copy-start(f32[1024]{0:S(5)} %copy-done)
%res_8 = f32[1024]{0} add(f32[1024]{0} %res_7, f32[1024]{0} %res_5)
%copy-done.3 = f32[1024]{0} copy-done((f32[1024]{0}, f32[1024]{0:S(5)}, u32[]) %copy-start.3)
%res_9 = f32[1024]{0} add(f32[1024]{0} %res_8, f32[1024]{0} %copy-done.3)
%copy-done.1 = f32[1024]{0} copy-done((f32[1024]{0}, f32[1024]{0:S(5)}, u32[]) %copy-start.1)
%res_10 = f32[1024]{0} add(f32[1024]{0} %res_9, f32[1024]{0} %copy-done.1)
ROOT %res_11 = f32[1024]{0} tanh(f32[1024]{0} %res_10)
}
)";

TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
ParseAndReturnVerifiedModule(kHloString));

StreamAttributeAnnotator attr_annotator;
bool changed;
TF_ASSERT_OK_AND_ASSIGN(changed, attr_annotator.Run(module.get()));
EXPECT_TRUE(changed);

for (std::string i : {"", ".1", ".2", ".3"}) {
const HloInstruction* cp_start =
FindInstruction(module.get(), "copy-start" + i);
EXPECT_TRUE(cp_start->has_backend_config());
TF_ASSERT_OK_AND_ASSIGN(GpuBackendConfig gpu_config,
cp_start->backend_config<GpuBackendConfig>());
EXPECT_EQ(gpu_config.operation_queue_id(), 1);
}
}

TEST_F(StreamAttributeAnnotatorTest, DynamicUpdateSliceWrappedAndAnnotated) {
constexpr absl::string_view kHloString = R"(
HloModule ModuleWithAsyncDynamicUpdateSlice, is_scheduled=true
Expand Down

0 comments on commit 2fe2937

Please sign in to comment.