-
Notifications
You must be signed in to change notification settings - Fork 225
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Unordered StridedRectangle,
get_allowed_machine_views
(#1458)
* containers helper functions * Additional support for unordered_multiset * format fix * Unordered Machine Mapping and adjacent changes * formatting * Minor fixes * Update to StridedRectangle interface * Minor updates * added get_allowed_machine_views * formatting * minor fix * Added StartInvariantMachineView * formatting * Containers fix * Implemented tensor to machine view injection * small refactor * formatting * Cleaning Up * Formatting fix * new machine-view interface * update to allowed machine views * PR review fixes * update to machine view and getting allowed machine view to match new interface * formatting * minor fix * PR fixes * PR fixes * machineview interface change * Minor PR fixes * .cc machine view fixes + added StartInvariantMachineView * minor PR fixes * minor fixes * Post-merge fixes * Format --------- Co-authored-by: Pietro Max Marsella <marsella@stanford.edu> Co-authored-by: Colin Unger <unger@stanford.edu> Co-authored-by: Colin Unger <lockshaw@lockshaw.net>
- Loading branch information
1 parent
a9d10d7
commit 65c3911
Showing
80 changed files
with
2,588 additions
and
558 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H | ||
#define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H | ||
|
||
#include "pcg/machine_specification.dtg.h" | ||
#include "pcg/machine_view.dtg.h" | ||
#include "pcg/operator_task_space.dtg.h" | ||
|
||
namespace FlexFlow { | ||
|
||
bool is_valid_machine_view(MachineView const &mv, | ||
OperatorTaskSpace const &task, | ||
MachineSpecification const &ms); | ||
|
||
std::unordered_set<MachineView> | ||
get_allowed_machine_views(MachineSpecification const &machine_spec, | ||
OperatorTaskSpace const &task, | ||
DeviceType device_type); | ||
|
||
} // namespace FlexFlow | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#include "compiler/allowed_machine_views.h" | ||
#include "pcg/machine_specification.h" | ||
#include "pcg/machine_view.h" | ||
#include "pcg/multi_dimensional_stride.dtg.h" | ||
#include "pcg/operator_task_space.h" | ||
#include "utils/containers/all_of.h" | ||
#include "utils/containers/cartesian_product.h" | ||
#include "utils/containers/extend.h" | ||
#include "utils/containers/filter.h" | ||
#include "utils/containers/get_all_permutations_with_repetition.h" | ||
#include "utils/containers/map_from_keys_and_values.h" | ||
#include "utils/containers/product.h" | ||
#include "utils/containers/range.h" | ||
#include "utils/containers/replicate.h" | ||
#include "utils/containers/sorted.h" | ||
#include "utils/containers/transform.h" | ||
#include "utils/containers/unordered_multiset_of.h" | ||
#include "utils/containers/unordered_set_of.h" | ||
#include "utils/containers/zip.h" | ||
#include "utils/overload.h" | ||
|
||
namespace FlexFlow { | ||
|
||
bool is_valid_machine_view(MachineView const &mv, | ||
OperatorTaskSpace const &task, | ||
MachineSpecification const &ms) { | ||
std::optional<MachineSpaceCoordinate> maximum_device_coord = | ||
get_machine_space_coordinate( | ||
task, mv, get_task_space_maximum_coordinate(task), ms); | ||
return maximum_device_coord.has_value(); | ||
} | ||
|
||
/* | ||
* Generates a set of candidate `MachineView`s. | ||
* The returned set includes all valid machine views, and might contain invalid | ||
* ones. This function should not be used externally (see | ||
* `get_allowed_machine_views` instead). There is no guarantee that a non-empty | ||
* returned set contains a valid machine view (i.e. it's possible for all | ||
* the returned `MachineView`s to be invalid) | ||
*/ | ||
static std::unordered_set<MachineView> | ||
get_candidate_machine_views(MachineSpecification const &machine_spec, | ||
OperatorTaskSpace const &task, | ||
DeviceType const &device_type) { | ||
|
||
auto get_max_stride_upper_bound = [](std::vector<int> const &tensor_dims, | ||
int total_devices) -> int { | ||
int min_num_devices_with_full_stride_volume = product(transform( | ||
tensor_dims, [](int const &num_devices) { return num_devices - 1; })); | ||
return std::ceil(total_devices / min_num_devices_with_full_stride_volume); | ||
}; | ||
|
||
auto candidate_strides = [&](std::vector<int> const &tensor_dims, | ||
int total_devices) | ||
-> std::unordered_multiset<MultiDimensionalStride> { | ||
int max_stride_upper_bound = | ||
get_max_stride_upper_bound(tensor_dims, total_devices); | ||
|
||
std::vector<stride_t> single_stride_range = | ||
transform(range(1, max_stride_upper_bound + 1), | ||
[](int stride) { return stride_t{stride}; }); | ||
std::unordered_multiset<std::vector<stride_t>> raw_stride_vectors = | ||
cartesian_product(replicate(tensor_dims.size(), single_stride_range)); | ||
std::unordered_multiset<MultiDimensionalStride> strides = | ||
transform(raw_stride_vectors, [](auto const &stride_vec) { | ||
return MultiDimensionalStride{stride_vec}; | ||
}); | ||
return strides; | ||
}; | ||
|
||
auto candidate_starts = [](MachineSpecification const &ms, | ||
DeviceType const &device_type) { | ||
std::unordered_set<MachineSpaceCoordinate> result; | ||
for (int node_idx : range(ms.num_nodes)) { | ||
for (int device_idx : range(get_num_devices_per_node(ms, device_type))) { | ||
result.insert( | ||
MachineSpaceCoordinate{node_idx, device_idx, device_type}); | ||
} | ||
} | ||
return result; | ||
}; | ||
|
||
auto candidate_dimensions = [](OperatorTaskSpace const &task) { | ||
std::unordered_set<MachineSpecificationDimension> options = { | ||
MachineSpecificationDimension::INTER_NODE, | ||
MachineSpecificationDimension::INTRA_NODE}; | ||
return get_all_permutations_with_repetition(options, num_dims(task)); | ||
}; | ||
|
||
std::vector<int> tensor_dims = task.degrees; | ||
int total_devices = get_num_devices(machine_spec, device_type); | ||
|
||
std::unordered_set<MachineView> machine_views; | ||
|
||
for (MultiDimensionalStride const &strides : | ||
candidate_strides(tensor_dims, total_devices)) { | ||
for (MachineSpaceCoordinate start : | ||
candidate_starts(machine_spec, device_type)) { | ||
for (std::vector<MachineSpecificationDimension> const &dims : | ||
candidate_dimensions(task)) { | ||
machine_views.insert( | ||
machine_view_from_strides_and_machine_spec_dimensions( | ||
start, strides.raw_strides, dims)); | ||
} | ||
} | ||
} | ||
return machine_views; | ||
} | ||
|
||
std::unordered_set<MachineView> | ||
get_allowed_machine_views(MachineSpecification const &machine_spec, | ||
OperatorTaskSpace const &task, | ||
DeviceType device_type) { | ||
|
||
std::unordered_set<MachineView> views = | ||
get_candidate_machine_views(machine_spec, task, device_type); | ||
return filter(views, [&](MachineView const &mv) { | ||
return is_valid_machine_view(mv, task, machine_spec); | ||
}); | ||
} | ||
|
||
} // namespace FlexFlow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#include "compiler/allowed_machine_views.h" | ||
#include "doctest/doctest.h" | ||
#include "utils/containers/extend.h" | ||
#include "utils/containers/range.h" | ||
#include "utils/containers/transform.h" | ||
#include "utils/containers/unordered_set_of.h" | ||
#include "utils/containers/zip.h" | ||
#include "utils/fmt/unordered_set.h" | ||
|
||
using namespace FlexFlow; | ||
|
||
TEST_SUITE(FF_TEST_SUITE) { | ||
|
||
TEST_CASE("get_allowed_machine_views") { | ||
|
||
SUBCASE("1 degree of parallelism") { | ||
MachineSpecification ms = MachineSpecification{ | ||
/*num_nodes=*/1, | ||
/*num_cpus_per_node=*/5, | ||
/*num_gpus_per_node=*/5, | ||
/*inter_node_bandwidth=*/0, | ||
/*intra_node_bandwidth=*/0, | ||
}; | ||
|
||
OperatorTaskSpace task = OperatorTaskSpace{{3}}; | ||
|
||
std::unordered_set<MachineView> correct = { | ||
MachineView{ | ||
MachineSpaceCoordinate{ | ||
/*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, | ||
{MachineViewDimension{stride_t{1}, | ||
MachineSpecificationDimension::INTRA_NODE}}, | ||
}, | ||
|
||
MachineView{ | ||
MachineSpaceCoordinate{ | ||
/*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, | ||
{MachineViewDimension{stride_t{1}, | ||
MachineSpecificationDimension::INTRA_NODE}}, | ||
}, | ||
MachineView{ | ||
MachineSpaceCoordinate{ | ||
/*node_idx=*/0, /*device_idx=*/2, DeviceType::GPU}, | ||
{MachineViewDimension{stride_t{1}, | ||
MachineSpecificationDimension::INTRA_NODE}}, | ||
}, | ||
MachineView{ | ||
MachineSpaceCoordinate{ | ||
/*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, | ||
{MachineViewDimension{stride_t{2}, | ||
MachineSpecificationDimension::INTRA_NODE}}, | ||
}, | ||
}; | ||
|
||
std::unordered_set<MachineView> result = | ||
get_allowed_machine_views(ms, task, DeviceType::GPU); | ||
|
||
CHECK(correct == result); | ||
} | ||
|
||
SUBCASE("2 degrees of parallelism") { | ||
|
||
MachineSpecification ms = MachineSpecification{ | ||
/*num_nodes=*/3, | ||
/*num_cpus_per_node=*/3, | ||
/*num_gpus_per_node=*/3, | ||
/*inter_node_bandwidth=*/0, | ||
/*intra_node_bandwidth=*/0, | ||
}; | ||
OperatorTaskSpace task = OperatorTaskSpace{{2, 3}}; | ||
|
||
auto make_2d_view = [&](int start_node_idx, | ||
int start_device_idx, | ||
int stride1, | ||
int stride2, | ||
MachineSpecificationDimension m1, | ||
MachineSpecificationDimension m2) { | ||
return MachineView{ | ||
MachineSpaceCoordinate{ | ||
start_node_idx, start_device_idx, DeviceType::GPU}, | ||
{MachineViewDimension{stride_t{stride1}, m1}, | ||
MachineViewDimension{stride_t{stride2}, m2}}, | ||
}; | ||
}; | ||
|
||
auto intra = MachineSpecificationDimension::INTRA_NODE; | ||
auto inter = MachineSpecificationDimension::INTER_NODE; | ||
std::unordered_set<MachineView> correct = { | ||
make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, inter, intra), | ||
make_2d_view(1, 0, /*stride1=*/1, /*stride2=*/1, inter, intra), | ||
make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, inter, intra), | ||
|
||
make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, intra, inter), | ||
make_2d_view(0, 1, /*stride1=*/1, /*stride2=*/1, intra, inter), | ||
make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, intra, inter), | ||
}; | ||
|
||
std::unordered_set<MachineView> result = | ||
get_allowed_machine_views(ms, task, DeviceType::GPU); | ||
|
||
CHECK(correct == result); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.