diff --git a/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.cc b/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.cc index 9ee596af3e..7607132832 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.cc @@ -5,23 +5,25 @@ namespace FlexFlow { TestCostEstimator::TestCostEstimator( - std::function const &get_operator_cost, - std::function const + std::function const + &get_operator_cost, + std::function const &get_communication_cost) : get_operator_cost(get_operator_cost), get_communication_cost(get_communication_cost) {} -float TestCostEstimator::estimate_cost(OpCostEstimateKey const &k) const { +CostMetric TestCostEstimator::estimate_cost(OpCostEstimateKey const &k) const { return this->get_operator_cost(k); } -float TestCostEstimator::estimate_cost(TensorSetMovement const &m) const { +CostMetric TestCostEstimator::estimate_cost(TensorSetMovement const &m) const { return this->get_communication_cost(m); } CostEstimator make_fake_cost_estimator( - std::function const &get_operator_cost, - std::function const + std::function const + &get_operator_cost, + std::function const &get_communication_cost) { return CostEstimator::create(get_operator_cost, @@ -29,8 +31,8 @@ CostEstimator make_fake_cost_estimator( } CostEstimator make_fake_cost_estimator( - std::unordered_map const &op_cost_map, - std::unordered_map const &comm_cost_map) { + std::unordered_map const &op_cost_map, + std::unordered_map const &comm_cost_map) { return make_fake_cost_estimator( [op_cost_map](OpCostEstimateKey const &k) { return op_cost_map.at(k); }, [comm_cost_map](TensorSetMovement const &m) { diff --git a/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.h b/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.h index 7c1d06207a..1b2cc9e91e 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.h +++ b/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.h @@ -11,27 +11,27 @@ namespace FlexFlow { struct TestCostEstimator : public ICostEstimator { - std::function get_operator_cost; - std::function get_communication_cost; + std::function get_operator_cost; + std::function get_communication_cost; TestCostEstimator() = delete; TestCostEstimator(decltype(get_operator_cost) const &get_operator_cost, decltype(get_communication_cost) const &get_communication_cost); - float estimate_cost(OpCostEstimateKey const &) const override; - - float estimate_cost(TensorSetMovement const &) const override; + CostMetric estimate_cost(OpCostEstimateKey const &) const override; + CostMetric estimate_cost(TensorSetMovement const &) const override; }; CostEstimator make_fake_cost_estimator( - std::function const &get_operator_cost, - std::function const + std::function const + &get_operator_cost, + std::function const &get_communication_cost); CostEstimator make_fake_cost_estimator( - std::unordered_map const &op_cost_map, - std::unordered_map const &comm_cost_map); + std::unordered_map const &op_cost_map, + std::unordered_map const &comm_cost_map); } // namespace FlexFlow diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc index 0a874948e4..440e8506c4 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc @@ -118,22 +118,22 @@ TEST_SUITE(FF_TEST_SUITE) { }}; CostEstimator cost_estimator = make_fake_cost_estimator( - std::unordered_map{{ - {map_unmapped_op_cost_estimate_key(k1, mv1), 1.0}, - {map_unmapped_op_cost_estimate_key(k2, mv1), 2.0}, - {map_unmapped_op_cost_estimate_key(k1, mv2), 1.5}, - {map_unmapped_op_cost_estimate_key(k2, mv2), 2.5}, + std::unordered_map{{ + {map_unmapped_op_cost_estimate_key(k1, mv1), CostMetric{1.0, 1}}, + {map_unmapped_op_cost_estimate_key(k2, mv1), CostMetric{2.0, 2}}, + {map_unmapped_op_cost_estimate_key(k1, mv2), CostMetric{1.5, 3}}, + {map_unmapped_op_cost_estimate_key(k2, mv2), CostMetric{2.5, 3}}, }}, - std::unordered_map{{ - {TensorSetMovement{{}}, 0.0}, + std::unordered_map{{ + {TensorSetMovement{{}}, CostMetric{0.0, 0}}, {concretize_abstracted_tensor_set_movement(movement1, mm1, mm1), - 0.1}, + CostMetric{0.1, 0}}, {concretize_abstracted_tensor_set_movement(movement1, mm2, mm2), - 0.2}, + CostMetric{0.2, 0}}, {concretize_abstracted_tensor_set_movement(movement1, mm1, mm2), - 0.3}, + CostMetric{0.3, 0}}, {concretize_abstracted_tensor_set_movement(movement1, mm2, mm1), - 0.4}, + CostMetric{0.4, 0}}, }}); MachineMappingContext context = MachineMappingContext{ @@ -150,11 +150,25 @@ TEST_SUITE(FF_TEST_SUITE) { get_unconstrained_solution_for_layers( get_all_leaf_paths(problem_tree)); - MachineMappingResult result = get_optimal_machine_mapping( - cache, context, problem_tree, full_machine_spec, constraints); + MachineMemoryConstraints memory_constraints = MachineMemoryConstraints{ + /*memory_limit=*/10, + }; + + MachineMappingConfig config = MachineMappingConfig{ + /*enable_memory_optimization=*/false, + }; + + MachineMappingResult result = + get_optimal_machine_mapping(cache, + context, + problem_tree, + full_machine_spec, + constraints, + memory_constraints, + config); MachineMappingResult correct = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/1.0, + /*cost=*/CostMetric{1.0, 1}, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ {binary_tree_root_path(), mv1}, @@ -173,11 +187,25 @@ TEST_SUITE(FF_TEST_SUITE) { get_unconstrained_solution_for_layers( get_all_leaf_paths(problem_tree)); - MachineMappingResult result = get_optimal_machine_mapping( - cache, context, problem_tree, full_machine_spec, constraints); + MachineMemoryConstraints memory_constraints = MachineMemoryConstraints{ + /*memory_limit=*/10, + }; + + MachineMappingConfig config = MachineMappingConfig{ + /*enable_memory_optimization=*/false, + }; + + MachineMappingResult result = + get_optimal_machine_mapping(cache, + context, + problem_tree, + full_machine_spec, + constraints, + memory_constraints, + config); MachineMappingResult correct = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/1.0 + 2.0 + 0.1, + /*cost=*/CostMetric{1.0 + 2.0 + 0.1, 1 + 2 + 0}, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ { @@ -207,11 +235,25 @@ TEST_SUITE(FF_TEST_SUITE) { get_unconstrained_solution_for_layers( get_all_leaf_paths(problem_tree)); - MachineMappingResult result = get_optimal_machine_mapping( - cache, context, problem_tree, full_machine_spec, constraints); + MachineMemoryConstraints memory_constraints = MachineMemoryConstraints{ + /*memory_limit=*/10, + }; + + MachineMappingConfig config = MachineMappingConfig{ + /*enable_memory_optimization=*/false, + }; + + MachineMappingResult result = + get_optimal_machine_mapping(cache, + context, + problem_tree, + full_machine_spec, + constraints, + memory_constraints, + config); MachineMappingResult correct = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/2.5, + /*cost=*/CostMetric{2.5, 3}, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ { diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc index 254d6b2784..7665f929f2 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc @@ -6,10 +6,20 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("series_combine") { + MachineMemoryConstraints memory_constraints = MachineMemoryConstraints{ + /*memory_limit=*/10, + }; + MachineMappingConfig config = MachineMappingConfig{ + /*enable_memory_optimization=*/false, + }; + MachineView machine_view_0 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(1)); MachineView machine_view_1 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(2)); - float pre_cost = 2.0; + CostMetric pre_cost = CostMetric{ + /*runtime=*/2.0, + /*memory=*/2, + }; MachineMappingResult pre = MachineMappingResult{ FeasibleMachineMappingResult{ /*runtime=*/pre_cost, @@ -31,7 +41,10 @@ TEST_SUITE(FF_TEST_SUITE) { }, }; - float post_cost = 4.0; + CostMetric post_cost = CostMetric{ + /*runtime=*/4.0, + /*memory=*/1, + }; MachineMappingResult post = MachineMappingResult{ FeasibleMachineMappingResult{ /*runtime=*/post_cost, @@ -47,19 +60,32 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingResult infeasible = infeasible_machine_mapping_result(); - float comm_cost = 3.0; + CostMetric comm_cost = CostMetric{ + /*runtime=*/3.0, + /*memory=*/0, + }; SUBCASE("pre is infeasbile") { - MachineMappingResult result = series_combine( - comm_cost, infeasible, post, ParallelSplitTransformation::LthenR); + MachineMappingResult result = + series_combine(config, + memory_constraints, + comm_cost, + infeasible, + post, + ParallelSplitTransformation::LthenR); MachineMappingResult correct = infeasible; CHECK(result == correct); } SUBCASE("post is infeasbile") { - MachineMappingResult result = series_combine( - comm_cost, pre, infeasible, ParallelSplitTransformation::LthenR); + MachineMappingResult result = + series_combine(config, + memory_constraints, + comm_cost, + pre, + infeasible, + ParallelSplitTransformation::LthenR); MachineMappingResult correct = infeasible; CHECK(result == correct); @@ -67,7 +93,9 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("both are infeasible") { MachineMappingResult result = - series_combine(comm_cost, + series_combine(config, + memory_constraints, + comm_cost, infeasible, infeasible, ParallelSplitTransformation::LthenR); @@ -77,9 +105,13 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("both are feasible") { + CostMetric no_parallel_split_transform_cost = CostMetric{ + /*runtime=*/pre_cost.runtime + post_cost.runtime + comm_cost.runtime, + /*memory=*/pre_cost.memory + post_cost.memory + comm_cost.memory, + }; MachineMappingResult no_parallel_split_transform = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/pre_cost + comm_cost + post_cost, + /*cost=*/no_parallel_split_transform_cost, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ { @@ -107,27 +139,42 @@ TEST_SUITE(FF_TEST_SUITE) { }; SUBCASE("parallel_split_transformation = std::nullopt") { - MachineMappingResult result = - series_combine(comm_cost, pre, post, std::nullopt); + MachineMappingResult result = series_combine( + config, memory_constraints, comm_cost, pre, post, std::nullopt); MachineMappingResult correct = no_parallel_split_transform; CHECK(result == correct); } SUBCASE("parallel_split_transformation = LthenR") { - MachineMappingResult result = series_combine( - comm_cost, pre, post, ParallelSplitTransformation::LthenR); + MachineMappingResult result = + series_combine(config, + memory_constraints, + comm_cost, + pre, + post, + ParallelSplitTransformation::LthenR); MachineMappingResult correct = no_parallel_split_transform; CHECK(result == correct); } SUBCASE("parallel_split_transformation = RthenL") { - MachineMappingResult result = series_combine( - comm_cost, pre, post, ParallelSplitTransformation::RthenL); + MachineMappingResult result = + series_combine(config, + memory_constraints, + comm_cost, + pre, + post, + ParallelSplitTransformation::RthenL); + CostMetric correct_cost = CostMetric{ + /*runtime=*/pre_cost.runtime + post_cost.runtime + + comm_cost.runtime, + /*memory=*/pre_cost.memory + post_cost.memory + comm_cost.memory, + }; MachineMappingResult correct = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/pre_cost + comm_cost + post_cost, + /*runtime=*/correct_cost, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ { @@ -160,12 +207,29 @@ TEST_SUITE(FF_TEST_SUITE) { } TEST_CASE("parallel_combine") { + MachineMemoryConstraints memory_constraints = MachineMemoryConstraints{ + /*memory_limit=*/10, + }; + MachineMappingConfig config = MachineMappingConfig{ + /*enable_memory_optimization=*/false, + }; + MachineView machine_view_0 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(1)); MachineView machine_view_1 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(2)); + CostMetric lhs_cost = CostMetric{ + /*runtime=*/2.0, + /*memory=*/2, + }; + + CostMetric rhs_cost = CostMetric{ + /*runtime=*/4.0, + /*memory=*/1, + }; + MachineMappingResult lhs = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/2.0, + /*cost=*/lhs_cost, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ { @@ -186,7 +250,7 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingResult rhs = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/4.0, + /*cost=*/rhs_cost, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ { @@ -200,31 +264,40 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingResult infeasible = infeasible_machine_mapping_result(); SUBCASE("lhs is infeasbile") { - MachineMappingResult result = parallel_combine(infeasible, rhs); + MachineMappingResult result = + parallel_combine(config, memory_constraints, infeasible, rhs); MachineMappingResult correct = infeasible; CHECK(result == correct); } SUBCASE("rhs is infeasbile") { - MachineMappingResult result = parallel_combine(lhs, infeasible); + MachineMappingResult result = + parallel_combine(config, memory_constraints, lhs, infeasible); MachineMappingResult correct = infeasible; CHECK(result == correct); } SUBCASE("both are infeasible") { - MachineMappingResult result = parallel_combine(infeasible, infeasible); + MachineMappingResult result = + parallel_combine(config, memory_constraints, infeasible, infeasible); MachineMappingResult correct = infeasible; CHECK(result == correct); } SUBCASE("both are feasible") { - MachineMappingResult result = parallel_combine(lhs, rhs); + MachineMappingResult result = + parallel_combine(config, memory_constraints, lhs, rhs); + + CostMetric correct_cost = CostMetric{ + /*runtime=*/4.0, + /*memory=*/2, + }; MachineMappingResult correct = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/4.0, + /*cost=*/correct_cost, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ { @@ -261,7 +334,7 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingResult faster = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/2.0, + /*cost=*/CostMetric{2.0, 2}, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ { @@ -282,7 +355,7 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingResult slower = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/4.0, + /*cost=*/CostMetric{4.0, 1}, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ {