Skip to content

Commit

Permalink
Add Semi Filter to Join Fuzzer (facebookincubator#11473)
Browse files Browse the repository at this point in the history
Summary:

This changes adds a semi filter to the join filter 10% of the time. Currently it only supports boolean columns. The next steps will be to support integer columns.

Differential Revision: D65629460
  • Loading branch information
Daniel Hunte authored and facebook-github-bot committed Nov 7, 2024
1 parent 396b2bb commit aebf152
Showing 1 changed file with 49 additions and 19 deletions.
68 changes: 49 additions & 19 deletions velox/exec/fuzzer/JoinFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,17 @@ class JoinFuzzer {
// Randomly pick a join type to test.
core::JoinType pickJoinType();

// Returns an equality join filter between probeKeys and buildKeys and adds a
// semi filter 10% of the time. When there is a semi filter, 50% of the time
// it is based off a column from the probe side, and the rest of the time, the
// build side. A random column is chosen from the probe or build side to
// filter on.
std::string makeJoinFilter(
const std::vector<std::string>& probeKeys,
const std::vector<std::string>& buildKeys,
const std::vector<RowVectorPtr>& probeInput,
const std::vector<RowVectorPtr>& buildInput);

// Makes the query plan with default settings in JoinFuzzer and value inputs
// for both probe and build sides.
//
Expand Down Expand Up @@ -380,6 +391,38 @@ core::JoinType JoinFuzzer::pickJoinType() {
return kJoinTypes[idx];
}

std::string JoinFuzzer::makeJoinFilter(
const std::vector<std::string>& probeKeys,
const std::vector<std::string>& buildKeys,
const std::vector<RowVectorPtr>& probeInput,
const std::vector<RowVectorPtr>& buildInput) {
const auto numKeys = probeKeys.size();
std::string filter;
VELOX_CHECK_EQ(numKeys, buildKeys.size());
for (auto i = 0; i < numKeys; ++i) {
if (i > 0) {
filter += " AND ";
}
filter += fmt::format("{} = {}", probeKeys[i], buildKeys[i]);
}
// Add a semi filter 10% of the time.
if (vectorFuzzer_.coinToss(0.1) && !probeInput.empty() &&
!buildInput.empty()) {
RowTypePtr rowType = vectorFuzzer_.coinToss(0.5)
? asRowType(probeInput[0]->type())
: asRowType(buildInput[0]->type());

for (int i = 0; i < rowType->size(); i++) {
// TODO: Add support for non-boolean types.
if (rowType->childAt(i)->isBoolean()) {
filter += fmt::format(" AND {} = true", rowType->nameOf(i));
break;
}
}
}
return filter;
}

std::vector<TypePtr> JoinFuzzer::generateJoinKeyTypes(int32_t numKeys) {
std::vector<TypePtr> types;
types.reserve(numKeys);
Expand Down Expand Up @@ -690,6 +733,8 @@ JoinFuzzer::PlanWithSplits JoinFuzzer::makeDefaultPlan(
const std::vector<RowVectorPtr>& buildInput,
const std::vector<std::string>& outputColumns) {
auto planNodeIdGenerator = std::make_shared<core::PlanNodeIdGenerator>();
std::string filter =
makeJoinFilter(probeKeys, buildKeys, probeInput, buildInput);
auto plan =
PlanBuilder(planNodeIdGenerator)
.values(probeInput)
Expand Down Expand Up @@ -773,22 +818,6 @@ std::vector<core::PlanNodePtr> makeSources(
return sourceNodes;
}

// Returns an equality join filter between probeKeys and buildKeys.
std::string makeJoinFilter(
const std::vector<std::string>& probeKeys,
const std::vector<std::string>& buildKeys) {
const auto numKeys = probeKeys.size();
std::string filter;
VELOX_CHECK_EQ(numKeys, buildKeys.size());
for (auto i = 0; i < numKeys; ++i) {
if (i > 0) {
filter += " AND ";
}
filter += fmt::format("{} = {}", probeKeys[i], buildKeys[i]);
}
return filter;
}

template <typename TNode>
void addFlippedJoinPlan(
const core::PlanNodePtr& plan,
Expand Down Expand Up @@ -846,8 +875,9 @@ JoinFuzzer::PlanWithSplits JoinFuzzer::makeNestedLoopJoinPlan(
const std::vector<std::string>& outputColumns,
bool withFilter) {
auto planNodeIdGenerator = std::make_shared<core::PlanNodeIdGenerator>();
const std::string filter =
withFilter ? makeJoinFilter(probeKeys, buildKeys) : "";
const std::string filter = withFilter
? makeJoinFilter(probeKeys, buildKeys, probeInput, buildInput)
: "";
return JoinFuzzer::PlanWithSplits{
PlanBuilder(planNodeIdGenerator)
.values(probeInput)
Expand Down Expand Up @@ -1232,7 +1262,7 @@ JoinFuzzer::PlanWithSplits JoinFuzzer::makeNestedLoopJoinPlanWithTableScan(
core::PlanNodeId buildScanId;

const std::string filter =
withFilter ? makeJoinFilter(probeKeys, buildKeys) : "";
withFilter ? makeJoinFilter(probeKeys, buildKeys, {}, {}) : "";
return JoinFuzzer::PlanWithSplits{
PlanBuilder(planNodeIdGenerator)
.tableScan(probeType)
Expand Down

0 comments on commit aebf152

Please sign in to comment.