Skip to content

Commit

Permalink
[df] Fix Vary sanity check for typedefs
Browse files Browse the repository at this point in the history
  • Loading branch information
vepadulano committed Jan 23, 2025
1 parent 885b663 commit c9c762b
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 9 deletions.
28 changes: 19 additions & 9 deletions tree/dataframe/inc/ROOT/RDF/RInterfaceBase.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -89,22 +89,32 @@ protected:
throw std::runtime_error("This Vary call is varying multiple columns simultaneously but the expression "
"does not return an RVec of RVecs.");

// Check for type mismatches. We are interested in two cases:
// - All columns that are going to be varied must be of the same type
// - The return type of the expression must match the type of the nominal column
auto colTypes = GetColumnTypeNamesList(colNames);
auto allColTypesEqual =
std::all_of(colTypes.begin() + 1, colTypes.end(), [&](const std::string &t) { return t == colTypes[0]; });
if (!allColTypesEqual)
throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");

auto &&nColTypes = colTypes.size();
// Cache type_info when requested
std::vector<const std::type_info *> colTypeIDs(nColTypes);
const auto &innerTypeID = typeid(RDFInternal::InnerValueType_t<RetType>);

for (auto i = 0u; i < colTypes.size(); ++i) {
for (decltype(nColTypes) i{}; i < nColTypes; ++i) {
// Need to retrieve the type_info for each column. We start with
// checking if the column comes from a Define, in which case the
// type_info is cached already. Otherwise, we need to retrieve it
// via TypeName2TypeID, which eventually might call the interpreter.
const auto *define = fColRegister.GetDefine(colNames[i]);
const auto *expectedTypeID = define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(colTypes[i]);
if (innerTypeID != *expectedTypeID)
colTypeIDs[i] = define ? &define->GetTypeId() : &RDFInternal::TypeName2TypeID(colTypes[i]);
// First check: whether the current column type is the same as the first one.
if (*colTypeIDs[i] != *colTypeIDs[0]) {
throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");
}
// Second check: mismatch between varied type and nominal type
if (innerTypeID != *colTypeIDs[i])
throw std::runtime_error("Varied values for column \"" + colNames[i] + "\" have a different type (" +
RDFInternal::TypeID2TypeName(innerTypeID) + ") than the nominal value (" +
colTypes[i] + ").");
}

} else { // we are varying a single column, RetType is RVec<T>
const auto &retTypeID = typeid(typename RetType::value_type);
const auto &colName = colNames[0]; // we have only one element in there
Expand Down
71 changes: 71 additions & 0 deletions tree/dataframe/test/dataframe_vary.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1620,6 +1620,77 @@ TEST_P(RDFVary, ManyVariationsManyColumns)
}
}

// Regression test for https://github.com/root-project/root/issues/17486
TEST_P(RDFVary, CompatibleColumnTypes)
{
struct DataRAII {
const char *fFileName{"rdf_vary_compatible_column_types.root"};
const char *fTreeName{"tree"};
DataRAII()
{
TFile f{fFileName, "recreate"};
TTree t{fTreeName, fTreeName};
Float_t v[3] = {1.f, 2.f, 3.f};
t.Branch("v", &v, "v[3]/F");
t.Branch("w", &v, "w[3]/F");
t.Fill();
f.Write();
}
~DataRAII() { std::remove(fFileName); }
} dataset;

ROOT::RDataFrame df_start{dataset.fTreeName, dataset.fFileName};
ROOT::RDF::RNode df{df_start};

// The JITted transformation triggers a change in the column type name
// due to the desugaring of the Float_t typedef
df = df.Redefine("v", "v");

// Previously Vary would throw when doing a string comparison check of the
// types of the columns to be varied
EXPECT_NO_THROW({
df = df.Vary(
{"v", "w"},
[](const ROOT::RVecF &v, const ROOT::RVecF &w) {
return ROOT::RVec<ROOT::RVec<ROOT::RVecF>>{{v - 1, v + 1}, {w - 1, w + 1}};
},
{"v", "w"}, {"down", "up"}, "variation");
});
}

TEST_P(RDFVary, IncompatibleColumnTypes)
{
struct DataRAII {
const char *fFileName{"rdf_vary_incompatible_column_types.root"};
const char *fTreeName{"tree"};
DataRAII()
{
TFile f{fFileName, "recreate"};
TTree t{fTreeName, fTreeName};
Float_t v[3] = {1.f, 2.f, 3.f};
Int_t w[3] = {1, 2, 3};
t.Branch("v", &v, "v[3]/F");
t.Branch("w", &w, "w[3]/I");
t.Fill();
f.Write();
}
~DataRAII() { std::remove(fFileName); }
} dataset;

ROOT::RDataFrame df{dataset.fTreeName, dataset.fFileName};

EXPECT_THROW(
{
df.Vary(
{"v", "w"},
[](const ROOT::RVecF &v, const ROOT::RVecI &w) {
return ROOT::RVec<ROOT::RVec<ROOT::RVecF>>{{v - 1, v + 1}, {w - 1, w + 1}};
},
{"v", "w"}, {"down", "up"}, "variation");
},
std::runtime_error);
}

struct HelperWithCallback : ROOT::Detail::RDF::RActionImpl<HelperWithCallback> {
using Result_t = int;
std::shared_ptr<Result_t> fResult = std::make_shared<Result_t>(0);
Expand Down

0 comments on commit c9c762b

Please sign in to comment.