Skip to content

Commit

Permalink
Unordered StridedRectangle, get_allowed_machine_views (#1458)
Browse files Browse the repository at this point in the history
* containers helper functions

* Additional support for unordered_multiset

* format fix

* Unordered Machine Mapping and adjacent changes

* formatting

* Minor fixes

* Update to StridedRectangle interface

* Minor updates

* added get_allowed_machine_views

* formatting

* minor fix

* Added StartInvariantMachineView

* formatting

* Containers fix

* Implemented tensor to machine view injection

* small refactor

* formatting

* Cleaning Up

* Formatting fix

* new machine-view interface

* update to allowed machine views

* PR review fixes

* update to machine view and getting allowed machine view to match new interface

* formatting

* minor fix

* PR fixes

* PR fixes

* machineview interface change

* Minor PR fixes

* .cc machine view fixes + added StartInvariantMachineView

* minor PR fixes

* minor fixes

* Post-merge fixes

* Format

---------

Co-authored-by: Pietro Max Marsella <marsella@stanford.edu>
Co-authored-by: Colin Unger <unger@stanford.edu>
Co-authored-by: Colin Unger <lockshaw@lockshaw.net>
  • Loading branch information
4 people authored Oct 9, 2024
1 parent a9d10d7 commit 65c3911
Show file tree
Hide file tree
Showing 80 changed files with 2,588 additions and 558 deletions.
21 changes: 21 additions & 0 deletions lib/compiler/include/compiler/allowed_machine_views.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H
#define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H

#include "pcg/machine_specification.dtg.h"
#include "pcg/machine_view.dtg.h"
#include "pcg/operator_task_space.dtg.h"

namespace FlexFlow {

bool is_valid_machine_view(MachineView const &mv,
OperatorTaskSpace const &task,
MachineSpecification const &ms);

std::unordered_set<MachineView>
get_allowed_machine_views(MachineSpecification const &machine_spec,
OperatorTaskSpace const &task,
DeviceType device_type);

} // namespace FlexFlow

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_PARALLEL_LAYER_GUID_OBLIVIOUS_MACHINE_MAPPING_H

#include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h"
#include <optional>

namespace FlexFlow {

Expand Down
122 changes: 122 additions & 0 deletions lib/compiler/src/compiler/allowed_machine_views.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#include "compiler/allowed_machine_views.h"
#include "pcg/machine_specification.h"
#include "pcg/machine_view.h"
#include "pcg/multi_dimensional_stride.dtg.h"
#include "pcg/operator_task_space.h"
#include "utils/containers/all_of.h"
#include "utils/containers/cartesian_product.h"
#include "utils/containers/extend.h"
#include "utils/containers/filter.h"
#include "utils/containers/get_all_permutations_with_repetition.h"
#include "utils/containers/map_from_keys_and_values.h"
#include "utils/containers/product.h"
#include "utils/containers/range.h"
#include "utils/containers/replicate.h"
#include "utils/containers/sorted.h"
#include "utils/containers/transform.h"
#include "utils/containers/unordered_multiset_of.h"
#include "utils/containers/unordered_set_of.h"
#include "utils/containers/zip.h"
#include "utils/overload.h"

namespace FlexFlow {

bool is_valid_machine_view(MachineView const &mv,
OperatorTaskSpace const &task,
MachineSpecification const &ms) {
std::optional<MachineSpaceCoordinate> maximum_device_coord =
get_machine_space_coordinate(
task, mv, get_task_space_maximum_coordinate(task), ms);
return maximum_device_coord.has_value();
}

/*
* Generates a set of candidate `MachineView`s.
* The returned set includes all valid machine views, and might contain invalid
* ones. This function should not be used externally (see
* `get_allowed_machine_views` instead). There is no guarantee that a non-empty
* returned set contains a valid machine view (i.e. it's possible for all
* the returned `MachineView`s to be invalid)
*/
static std::unordered_set<MachineView>
get_candidate_machine_views(MachineSpecification const &machine_spec,
OperatorTaskSpace const &task,
DeviceType const &device_type) {

auto get_max_stride_upper_bound = [](std::vector<int> const &tensor_dims,
int total_devices) -> int {
int min_num_devices_with_full_stride_volume = product(transform(
tensor_dims, [](int const &num_devices) { return num_devices - 1; }));
return std::ceil(total_devices / min_num_devices_with_full_stride_volume);
};

auto candidate_strides = [&](std::vector<int> const &tensor_dims,
int total_devices)
-> std::unordered_multiset<MultiDimensionalStride> {
int max_stride_upper_bound =
get_max_stride_upper_bound(tensor_dims, total_devices);

std::vector<stride_t> single_stride_range =
transform(range(1, max_stride_upper_bound + 1),
[](int stride) { return stride_t{stride}; });
std::unordered_multiset<std::vector<stride_t>> raw_stride_vectors =
cartesian_product(replicate(tensor_dims.size(), single_stride_range));
std::unordered_multiset<MultiDimensionalStride> strides =
transform(raw_stride_vectors, [](auto const &stride_vec) {
return MultiDimensionalStride{stride_vec};
});
return strides;
};

auto candidate_starts = [](MachineSpecification const &ms,
DeviceType const &device_type) {
std::unordered_set<MachineSpaceCoordinate> result;
for (int node_idx : range(ms.num_nodes)) {
for (int device_idx : range(get_num_devices_per_node(ms, device_type))) {
result.insert(
MachineSpaceCoordinate{node_idx, device_idx, device_type});
}
}

Check warning on line 79 in lib/compiler/src/compiler/allowed_machine_views.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/allowed_machine_views.cc#L79

Added line #L79 was not covered by tests
return result;
};

Check warning on line 81 in lib/compiler/src/compiler/allowed_machine_views.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/allowed_machine_views.cc#L81

Added line #L81 was not covered by tests

auto candidate_dimensions = [](OperatorTaskSpace const &task) {
std::unordered_set<MachineSpecificationDimension> options = {
MachineSpecificationDimension::INTER_NODE,
MachineSpecificationDimension::INTRA_NODE};
return get_all_permutations_with_repetition(options, num_dims(task));
};

std::vector<int> tensor_dims = task.degrees;
int total_devices = get_num_devices(machine_spec, device_type);

std::unordered_set<MachineView> machine_views;

for (MultiDimensionalStride const &strides :
candidate_strides(tensor_dims, total_devices)) {
for (MachineSpaceCoordinate start :
candidate_starts(machine_spec, device_type)) {
for (std::vector<MachineSpecificationDimension> const &dims :
candidate_dimensions(task)) {
machine_views.insert(
machine_view_from_strides_and_machine_spec_dimensions(
start, strides.raw_strides, dims));
}
}
}

Check warning on line 106 in lib/compiler/src/compiler/allowed_machine_views.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/allowed_machine_views.cc#L106

Added line #L106 was not covered by tests
return machine_views;
}

std::unordered_set<MachineView>
get_allowed_machine_views(MachineSpecification const &machine_spec,
OperatorTaskSpace const &task,
DeviceType device_type) {

std::unordered_set<MachineView> views =
get_candidate_machine_views(machine_spec, task, device_type);
return filter(views, [&](MachineView const &mv) {
return is_valid_machine_view(mv, task, machine_spec);
});
}

} // namespace FlexFlow
104 changes: 104 additions & 0 deletions lib/compiler/test/src/allowed_machine_views.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include "compiler/allowed_machine_views.h"
#include "doctest/doctest.h"
#include "utils/containers/extend.h"
#include "utils/containers/range.h"
#include "utils/containers/transform.h"
#include "utils/containers/unordered_set_of.h"
#include "utils/containers/zip.h"
#include "utils/fmt/unordered_set.h"

using namespace FlexFlow;

TEST_SUITE(FF_TEST_SUITE) {

TEST_CASE("get_allowed_machine_views") {

SUBCASE("1 degree of parallelism") {
MachineSpecification ms = MachineSpecification{
/*num_nodes=*/1,
/*num_cpus_per_node=*/5,
/*num_gpus_per_node=*/5,
/*inter_node_bandwidth=*/0,
/*intra_node_bandwidth=*/0,
};

OperatorTaskSpace task = OperatorTaskSpace{{3}};

std::unordered_set<MachineView> correct = {
MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU},
{MachineViewDimension{stride_t{1},
MachineSpecificationDimension::INTRA_NODE}},
},

MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU},
{MachineViewDimension{stride_t{1},
MachineSpecificationDimension::INTRA_NODE}},
},
MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/2, DeviceType::GPU},
{MachineViewDimension{stride_t{1},
MachineSpecificationDimension::INTRA_NODE}},
},
MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU},
{MachineViewDimension{stride_t{2},
MachineSpecificationDimension::INTRA_NODE}},
},
};

std::unordered_set<MachineView> result =
get_allowed_machine_views(ms, task, DeviceType::GPU);

CHECK(correct == result);
}

SUBCASE("2 degrees of parallelism") {

MachineSpecification ms = MachineSpecification{
/*num_nodes=*/3,
/*num_cpus_per_node=*/3,
/*num_gpus_per_node=*/3,
/*inter_node_bandwidth=*/0,
/*intra_node_bandwidth=*/0,
};
OperatorTaskSpace task = OperatorTaskSpace{{2, 3}};

auto make_2d_view = [&](int start_node_idx,
int start_device_idx,
int stride1,
int stride2,
MachineSpecificationDimension m1,
MachineSpecificationDimension m2) {
return MachineView{
MachineSpaceCoordinate{
start_node_idx, start_device_idx, DeviceType::GPU},
{MachineViewDimension{stride_t{stride1}, m1},
MachineViewDimension{stride_t{stride2}, m2}},
};
};

auto intra = MachineSpecificationDimension::INTRA_NODE;
auto inter = MachineSpecificationDimension::INTER_NODE;
std::unordered_set<MachineView> correct = {
make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, inter, intra),
make_2d_view(1, 0, /*stride1=*/1, /*stride2=*/1, inter, intra),
make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, inter, intra),

make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, intra, inter),
make_2d_view(0, 1, /*stride1=*/1, /*stride2=*/1, intra, inter),
make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, intra, inter),
};

std::unordered_set<MachineView> result =
get_allowed_machine_views(ms, task, DeviceType::GPU);

CHECK(correct == result);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,35 @@ TEST_SUITE(FF_TEST_SUITE) {
};
};

MachineView mv1 = make_1d_machine_view(gpu_id_t(1), gpu_id_t(2));
MachineView mv2 = make_1d_machine_view(gpu_id_t(1), gpu_id_t(3));
MachineView mv1 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{1},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

MachineView mv2 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{2},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

MachineSpecification full_machine_spec = MachineSpecification{
/*num_nodes=*/2,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,65 @@ TEST_SUITE(FF_TEST_SUITE) {
ParallelLayerAddedResult relu_2 = add_parallel_layer(
pcg, relu_attrs, {get_only(relu_1.outputs)}, {relu_output_attrs});

MachineView pre_mv1 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{1});
MachineView pre_mv2 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{2});
MachineView post_mv1 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{3});
MachineView post_mv2 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{4});
MachineView pre_mv1 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{1},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

MachineView pre_mv2 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{2},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

MachineView post_mv1 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{3},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

MachineView post_mv2 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{4},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

SUBCASE("single edge across split") {
PCGBinarySeriesSplit split = PCGBinarySeriesSplit{
Expand Down
Loading

0 comments on commit 65c3911

Please sign in to comment.