From 9097b0a8f3b57cf2da2f1ae2b3e65b6d027fdc78 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Tue, 4 Jun 2024 18:35:28 +0200 Subject: [PATCH] [df] Add initial snapshotting for RNTuple --- tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx | 134 +- .../dataframe/inc/ROOT/RDF/InterfaceUtils.hxx | 46 +- tree/dataframe/inc/ROOT/RDF/RInterface.hxx | 92 +- tree/dataframe/inc/ROOT/RDF/RLoopManager.hxx | 3 + tree/dataframe/test/CMakeLists.txt | 1 + .../test/dataframe_snapshot_ntuple.cxx | 1281 +++++++++++++++++ 6 files changed, 1549 insertions(+), 8 deletions(-) create mode 100644 tree/dataframe/test/dataframe_snapshot_ntuple.cxx diff --git a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx index 5f4101873441e..16972fd3ea085 100644 --- a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx +++ b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx @@ -23,6 +23,8 @@ #include #include "ROOT/RVec.hxx" #include "ROOT/TBufferMerger.hxx" // for SnapshotHelper +#include "ROOT/RField.hxx" // for SnapshotRNTupleHelper +#include "ROOT/RNTupleWriter.hxx" // for SnapshotRNTupleHelper #include "ROOT/RDF/RCutFlowReport.hxx" #include "ROOT/RDF/RSampleInfo.hxx" #include "ROOT/RDF/Utils.hxx" @@ -46,6 +48,8 @@ #include "TStatistic.h" #include "ROOT/RDF/RActionImpl.hxx" #include "ROOT/RDF/RMergeableValue.hxx" +#include "ROOT/RDF/RLoopManager.hxx" +#include "ROOT/RNTupleDS.hxx" #include #include @@ -1509,9 +1513,10 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName); -/// Helper object for a single-thread Snapshot action +/// Helper object for a single-thread TTree-based Snapshot action template -class R__CLING_PTRCHECK(off) SnapshotHelper : public RActionImpl> { +class R__CLING_PTRCHECK(off) SnapshotHelper + : public RActionImpl> { // TODO(fdegeus) rename SnapshotTTreeHelper std::string fFileName; std::string fDirName; std::string fTreeName; @@ -1658,9 +1663,10 @@ public: } }; -/// Helper object for a multi-thread Snapshot action +/// Helper object for a multi-thread TTree-based Snapshot action template -class R__CLING_PTRCHECK(off) SnapshotHelperMT : public RActionImpl> { +class R__CLING_PTRCHECK(off) SnapshotHelperMT + : public RActionImpl> { // TODO(fdegeus) rename SnapshotTTreeHelper unsigned int fNSlots; std::unique_ptr fMerger; // must use a ptr because TBufferMerger is not movable std::vector> fOutputFiles; @@ -1844,6 +1850,126 @@ public: } }; +/// Helper function to update the value of an RNTuple's field in the provided entry. +template +void SetFieldsHelper(T value, std::string_view fieldName, ROOT::Experimental::REntry *entry) +{ + entry->BindValue(fieldName, std::make_shared(value)); +} + +/// Helper object for a single-thread RNTuple-based Snapshot action +template +class R__CLING_PTRCHECK(off) SnapshotRNTupleHelper : public RActionImpl> { + std::string fFileName; + std::string fNTupleName; + RSnapshotOptions fOptions; + ROOT::Detail::RDF::RLoopManager *fLoopManager; + ColumnNames_t fInputFieldNames; // This contains the resolved aliases + ColumnNames_t fOutputFieldNames; + std::unique_ptr fWriter{nullptr}; + + ROOT::Experimental::REntry *fOutputEntry; + + std::vector fIsDefine; + +public: + using ColumnTypes_t = TypeList; + SnapshotRNTupleHelper(std::string_view filename, std::string_view ntuplename, const ColumnNames_t &vfnames, + const ColumnNames_t &fnames, const RSnapshotOptions &options, + ROOT::Detail::RDF::RLoopManager *lm, std::vector &&isDefine) + : fFileName(filename), + fNTupleName(ntuplename), + fOptions(options), + fLoopManager(lm), + fInputFieldNames(vfnames), + fOutputFieldNames(ReplaceDotWithUnderscore(fnames)), + fIsDefine(std::move(isDefine)) + { + // TODO(fdegeus) add ValidateSnapshotRNTupleOutput(fOptions, fTreeName, fFileName); + } + + SnapshotRNTupleHelper(const SnapshotRNTupleHelper &) = delete; + SnapshotRNTupleHelper(SnapshotRNTupleHelper &&) = default; + ~SnapshotRNTupleHelper() + { + if (!fNTupleName.empty() /* TODO(fdegeus) Check if LM has DS */ && fOptions.fLazy) + Warning("SnapshotRNTuple", "A lazy Snapshot action was booked but never triggered."); + } + + void InitTask(TTreeReader *, unsigned int /* slot */) {} + + void Exec(unsigned int /* slot */, ColTypes &...values) + { + using ind_t = std::index_sequence_for; + + SetFields(values..., ind_t{}); + fWriter->Fill(); + } + + template + void SetFields(ColTypes &...values, std::index_sequence /*dummy*/) + { + int expander[] = {(SetFieldsHelper(values, fOutputFieldNames[S], fOutputEntry), 0)..., 0}; + (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9 + } + + void Initialize() + { + using ind_t = std::index_sequence_for; + + auto model = ROOT::Experimental::RNTupleModel::Create(); + MakeFields(*model, ind_t{}); + fOutputEntry = &model->GetDefaultEntry(); + + fWriter = ROOT::Experimental::RNTupleWriter::Recreate(std::move(model), fNTupleName, fFileName); + } + + template + void MakeFields(ROOT::Experimental::RNTupleModel &model, std::index_sequence /*dummy*/) + { + int expander[] = {(model.MakeField(fOutputFieldNames[S]), 0)..., 0}; + (void)expander; + } + + void Finalize() + { + { + fWriter.reset(); + } + fLoopManager->SetDataSource(std::make_unique(fNTupleName, fFileName)); + } + + std::string GetActionName() { return "Snapshot"; } + + ROOT::RDF::SampleCallback_t GetSampleCallback() final + { + return [](unsigned int, const RSampleInfo &) mutable {}; + } + + /** + * @brief Create a new SnapshotRNTupleHelper with a different output file name + * + * @param newName A type-erased string with the output file name + * @return SnapshotRNTupleHelper + * + * This MakeNew implementation is tied to the cloning feature of actions + * of the computation graph. In particular, cloning a Snapshot node usually + * also involves changing the name of the output file, otherwise the cloned + * Snapshot would overwrite the same file. + */ + SnapshotRNTupleHelper MakeNew(void *newName) + { + const std::string finalName = *reinterpret_cast(newName); + return SnapshotRNTupleHelper{finalName, + fNTupleName, + fInputFieldNames, + fOutputFieldNames, + fOptions, + fLoopManager, + std::vector(fIsDefine)}; + } +}; + template ::value> class R__CLING_PTRCHECK(off) AggregateHelper diff --git a/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx b/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx index 7ab241761bc63..45849b99e018f 100644 --- a/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx +++ b/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx @@ -105,6 +105,7 @@ struct Fill{}; struct StdDev{}; struct Display{}; struct Snapshot{}; +struct SnapshotRNTuple{}; struct Book{}; } // clang-format on @@ -250,9 +251,10 @@ struct SnapshotHelperArgs { std::string fTreeName; std::vector fOutputColNames; ROOT::RDF::RSnapshotOptions fOptions; + RDFDetail::RLoopManager *fLoopManager; }; -// Snapshot action +// Snapshot action TODO(fdegeus) SnapshotTTree template std::unique_ptr BuildAction(const ColumnNames_t &colNames, const std::shared_ptr &snapHelperArgs, @@ -293,6 +295,48 @@ BuildAction(const ColumnNames_t &colNames, const std::shared_ptr +std::unique_ptr +BuildAction(const ColumnNames_t &colNames, const std::shared_ptr &snapHelperArgs, + const unsigned int /*nSlots*/, std::shared_ptr prevNode, ActionTags::SnapshotRNTuple, + const RColumnRegister &colRegister) +{ + const auto &filename = snapHelperArgs->fFileName; + const auto &ntuplename = snapHelperArgs->fTreeName; + const auto &outputColNames = snapHelperArgs->fOutputColNames; + const auto &options = snapHelperArgs->fOptions; + auto loopManager = snapHelperArgs->fLoopManager; + + auto makeIsDefine = [&] { + std::vector isDef; + isDef.reserve(sizeof...(ColTypes)); + for (auto i = 0u; i < sizeof...(ColTypes); ++i) + isDef.push_back(colRegister.IsDefineOrAlias(colNames[i])); + return isDef; + }; + std::vector isDefine = makeIsDefine(); + + std::unique_ptr actionPtr; + if (!ROOT::IsImplicitMTEnabled()) { + // single-thread snapshot + using Helper_t = SnapshotRNTupleHelper; + using Action_t = RAction; + actionPtr.reset(new Action_t( + Helper_t(filename, ntuplename, colNames, outputColNames, options, loopManager, std::move(isDefine)), colNames, + prevNode, colRegister)); + } else { + // TODO(fdegeus) + // // multi-thread snapshot + // using Helper_t = SnapshotHelperMT; + // using Action_t = RAction; + // actionPtr.reset(new Action_t( + // Helper_t(nSlots, filename, dirname, treename, colNames, outputColNames, options, std::move(isDefine)), + // colNames, prevNode, colRegister)); + } + return actionPtr; +} + // Book with custom helper type template std::unique_ptr diff --git a/tree/dataframe/inc/ROOT/RDF/RInterface.hxx b/tree/dataframe/inc/ROOT/RDF/RInterface.hxx index 58de8ea2cd801..88d2e168ac09b 100644 --- a/tree/dataframe/inc/ROOT/RDF/RInterface.hxx +++ b/tree/dataframe/inc/ROOT/RDF/RInterface.hxx @@ -1169,7 +1169,7 @@ public: auto snapHelperArgs = std::make_shared( RDFInternal::SnapshotHelperArgs{std::string(filename), std::string(dirname), std::string(treename), - colListWithAliasesAndSizeBranches, options}); + colListWithAliasesAndSizeBranches, options, nullptr}); ::TDirectory::TContext ctxt; @@ -1190,6 +1190,56 @@ public: return resPtr; } +#ifdef R__HAS_ROOT7 + /// TODO(fdegeus) docs + template + RResultPtr> + SnapshotRNTuple(std::string_view ntuplename, std::string_view filename, const ColumnNames_t &columnList, + const RSnapshotOptions &options = RSnapshotOptions()) + { + return SnapshotRNTupleImpl(ntuplename, filename, columnList, options); + } + + RResultPtr> SnapshotRNTuple(std::string_view ntuplename, std::string_view filename, + const ColumnNames_t &columnList, + const RSnapshotOptions &options = RSnapshotOptions()) + { + // like columnList but with `#var` columns removed + auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "SnapshotRNTuple"); + // like columnListWithoutSizeColumns but with aliases resolved + auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes); + RDFInternal::CheckForDuplicateSnapshotColumns(colListNoAliases); + // like validCols but with missing size branches required by array branches added in the right positions + const auto pairOfColumnLists = + RDFInternal::AddSizeBranches(fLoopManager->GetBranchNames(), fLoopManager->GetTree(), + std::move(colListNoAliases), std::move(colListNoPoundSizes)); + const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first; + const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second; + + const auto colTypeList = GetColumnTypeNamesList(colListNoAliasesWithSizeBranches); + + const auto fullNTupleName = ntuplename; + const auto parsedNTupleName = RDFInternal::ParseTreePath(fullNTupleName); + ntuplename = parsedNTupleName.fTreeName; + + ::TDirectory::TContext ctxt; + + auto newRDF = std::make_shared>(std::make_shared(0)); + + auto snapHelperArgs = std::make_shared( + RDFInternal::SnapshotHelperArgs{std::string(filename), "", std::string(ntuplename), + colListWithAliasesAndSizeBranches, options, newRDF->GetLoopManager()}); + + auto resPtr = CreateAction( + colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr, + colListNoAliasesWithSizeBranches.size()); + + if (!options.fLazy) + *resPtr; + return resPtr; + } +#endif + // clang-format off //////////////////////////////////////////////////////////////////////////// /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. @@ -3002,8 +3052,9 @@ private: const auto &treename = parsedTreePath.fTreeName; const auto &dirname = parsedTreePath.fDirName; - auto snapHelperArgs = std::make_shared(RDFInternal::SnapshotHelperArgs{ - std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options}); + auto snapHelperArgs = std::make_shared( + RDFInternal::SnapshotHelperArgs{std::string(filename), std::string(dirname), std::string(treename), + columnListWithoutSizeColumns, options, nullptr}); ::TDirectory::TContext ctxt; @@ -3025,6 +3076,41 @@ private: return resPtr; } + // TODO(fdegeus) Docs + template + RResultPtr> + SnapshotRNTupleImpl(std::string_view fullNTupleName, std::string_view filename, const ColumnNames_t &columnList, + const RSnapshotOptions &options) + { + const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "SnapshotRNTuple"); + + RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size()); + // validCols has aliases resolved, while columnListWithoutSizeColumns still has aliases in it. + const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns); + RDFInternal::CheckForDuplicateSnapshotColumns(validCols); + CheckAndFillDSColumns(validCols, TTraits::TypeList()); + + const auto parsedNTuplePath = RDFInternal::ParseTreePath(fullNTupleName); + const auto &ntuplename = parsedNTuplePath.fTreeName; + + ::TDirectory::TContext ctxt; + + auto newRDF = std::make_shared>(std::make_shared(0)); + + auto snapHelperArgs = std::make_shared( + RDFInternal::SnapshotHelperArgs{std::string(filename), "", std::string(ntuplename), + columnListWithoutSizeColumns, options, newRDF->GetLoopManager()}); + + // The Snapshot helper will use validCols (with aliases resolved) as input columns, and + // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column names. + auto resPtr = CreateAction(validCols, newRDF, + snapHelperArgs, fProxiedPtr); + + if (!options.fLazy) + *resPtr; + return resPtr; + } + //////////////////////////////////////////////////////////////////////////// /// \brief Implementation of cache. template diff --git a/tree/dataframe/inc/ROOT/RDF/RLoopManager.hxx b/tree/dataframe/inc/ROOT/RDF/RLoopManager.hxx index 60e10a835a26f..9bea2d038ca0f 100644 --- a/tree/dataframe/inc/ROOT/RDF/RLoopManager.hxx +++ b/tree/dataframe/inc/ROOT/RDF/RLoopManager.hxx @@ -17,6 +17,9 @@ #include "ROOT/RDF/RNodeBase.hxx" #include "ROOT/RDF/RNewSampleNotifier.hxx" #include "ROOT/RDF/RSampleInfo.hxx" +#include "ROOT/RDataSource.hxx" + +#include "ROOT/RNTupleWriter.hxx" #include #include diff --git a/tree/dataframe/test/CMakeLists.txt b/tree/dataframe/test/CMakeLists.txt index a9f69344b298c..e8d48823bb78c 100644 --- a/tree/dataframe/test/CMakeLists.txt +++ b/tree/dataframe/test/CMakeLists.txt @@ -96,6 +96,7 @@ endif() if(root7) ROOT_ADD_GTEST(datasource_ntuple datasource_ntuple.cxx LIBRARIES ROOTDataFrame) + ROOT_ADD_GTEST(dataframe_snapshot_ntuple dataframe_snapshot_ntuple.cxx LIBRARIES ROOTDataFrame) ROOT_STANDARD_LIBRARY_PACKAGE(NTupleStruct NO_INSTALL_HEADERS diff --git a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx new file mode 100644 index 0000000000000..9b311e836d2f1 --- /dev/null +++ b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx @@ -0,0 +1,1281 @@ +#include "ROOT/TestSupport.hxx" +#include "ROOT/RDataFrame.hxx" + +// #include "TFile.h" +#include "TROOT.h" +#include "TSystem.h" +// #include +#include "gtest/gtest.h" +// #include +// #include +using namespace ROOT; // RDataFrame +using namespace ROOT::RDF; // RInterface +using namespace ROOT::VecOps; // RVec +using namespace ROOT::Detail::RDF; // RLoopManager + +TEST(RDFSnapshot, Snapshot_RNTupleTemplated) +{ + auto filename = "Snapshot_RNTuple.root"; + + auto df = ROOT::RDataFrame(25ull).Define("x", [] { return 10; }); + auto sdf = df.SnapshotRNTuple("ntuple", filename, {"x"}); + + std::vector expectedColumns = {"x"}; + EXPECT_EQ(expectedColumns, sdf->GetColumnNames()); +} + +TEST(RDFSnapshot, Snapshot_RNTupleJITted) +{ + auto filename = "Snapshot_RNTuple.root"; + + auto df = ROOT::RDataFrame(25ull).Define("x", [] { return 10; }); + auto sdf = df.SnapshotRNTuple("ntuple", filename, {"x"}); + + std::vector expectedColumns = {"x"}; + EXPECT_EQ(expectedColumns, sdf->GetColumnNames()); +} + +// TODO: +// * test snapshotting from existing on-disk fields + +// TEST_F(RDFSnapshot, SnapshotCallAmbiguities) +// { +// auto filename = "Snapshot_interface.root"; + +// tdf.Snapshot("t", filename, "an.*"); +// tdf.Snapshot("t", filename, {"ans"}); +// tdf.Snapshot("t", filename, {{"ans"}}); + +// gSystem->Unlink(filename); +// } + +// // Test for ROOT-9210 +// TEST_F(RDFSnapshot, Snapshot_aliases) +// { +// const auto alias0 = "myalias0"; +// const auto alias1 = "myalias1"; +// auto tdfa = tdf.Alias(alias0, "ans"); +// auto tdfb = tdfa.Define("vec", [] { return RVec{1,2,3}; }).Alias(alias1, "vec"); +// testing::internal::CaptureStderr(); +// auto snap = tdfb.Snapshot>("mytree", "Snapshot_aliases.root", {alias0, alias1}); +// std::string err = testing::internal::GetCapturedStderr(); +// EXPECT_TRUE(err.empty()) << err; +// auto names = snap->GetColumnNames(); +// EXPECT_EQ(2U, names.size()); +// EXPECT_EQ(names, std::vector({alias0, alias1})); + +// auto takenCol = snap->Alias("a", alias0).Take("a"); +// for (auto i : takenCol) { +// EXPECT_EQ(42, i); +// } +// } + +// // Test for ROOT-9122 +// TEST_F(RDFSnapshot, Snapshot_nocolumnmatch) +// { +// const auto fname = "snapshotnocolumnmatch.root"; +// RDataFrame d(1); +// auto op = [&](){ +// d.Snapshot("t", fname, "x"); +// }; +// EXPECT_ANY_THROW(op()); +// gSystem->Unlink(fname); +// } + +// void TestSnapshotUpdate(RInterface &tdf, const std::string &outfile, const std::string &tree1, +// const std::string &tree2, bool overwriteIfExists) +// { +// // test snapshotting two trees to the same file opened in "UPDATE" mode +// auto df = tdf.Define("x", [] { return 10; }); +// auto s1 = df.Snapshot(tree1, outfile, {"x"}); + +// auto c1 = s1->Count(); +// auto mean1 = s1->Mean("x"); +// EXPECT_EQ(100ull, *c1); +// EXPECT_DOUBLE_EQ(10., *mean1); + +// RSnapshotOptions opts; +// opts.fMode = "UPDATE"; +// opts.fOverwriteIfExists = overwriteIfExists; +// auto s2 = ROOT::RDataFrame(50ull).Define("x", [] { return 10; }) +// .Snapshot(tree2, outfile, {"x"}, opts); + +// auto c2 = s2->Count(); +// auto mean2 = s2->Mean("x"); +// EXPECT_EQ(50ull, *c2); +// EXPECT_DOUBLE_EQ(10., *mean2); + +// // check that the output file contains both trees +// std::unique_ptr f(TFile::Open(outfile.c_str())); +// EXPECT_NE(nullptr, f->Get(tree1.c_str())); +// EXPECT_NE(nullptr, f->Get(tree2.c_str())); + +// // clean-up +// gSystem->Unlink(outfile.c_str()); +// } + +// TEST_F(RDFSnapshot, Snapshot_update_diff_treename) +// { +// // test snapshotting two trees with different names +// TestSnapshotUpdate(tdf, "snap_update_difftreenames.root", "t1", "t2", false); +// } + +// TEST_F(RDFSnapshot, Snapshot_update_same_treename) +// { +// bool exceptionCaught = false; +// try { +// // test snapshotting two trees with same name +// TestSnapshotUpdate(tdf, "snap_update_sametreenames.root", "t", "t", false); +// } catch (const std::invalid_argument &e) { +// const std::string msg = +// "Snapshot: tree \"t\" already present in file \"snap_update_sametreenames.root\". If you want to delete the +// " "original tree and write another, please set RSnapshotOptions::fOverwriteIfExists to true."; +// EXPECT_EQ(e.what(), msg); +// exceptionCaught = true; +// } +// EXPECT_TRUE(exceptionCaught); +// } + +// TEST_F(RDFSnapshot, Snapshot_update_overwrite) +// { +// // test snapshotting two trees with different names +// TestSnapshotUpdate(tdf, "snap_update_overwrite.root", "t", "t", true); +// } + +// void test_snapshot_options(RInterface &tdf) +// { +// RSnapshotOptions opts; +// opts.fAutoFlush = 10; +// opts.fMode = "RECREATE"; +// opts.fCompressionLevel = 6; + +// const auto outfile = "snapshot_test_opts.root"; +// for (auto algorithm : {ROOT::kZLIB, ROOT::kLZMA, ROOT::kLZ4, ROOT::kZSTD}) { +// opts.fCompressionAlgorithm = algorithm; + +// auto s = tdf.Snapshot("t", outfile, {"ans"}, opts); + +// auto c = s->Count(); +// auto min = s->Min("ans"); +// auto max = s->Max("ans"); +// auto mean = s->Mean("ans"); +// EXPECT_EQ(100ull, *c); +// EXPECT_EQ(42, *min); +// EXPECT_EQ(42, *max); +// EXPECT_EQ(42, *mean); + +// std::unique_ptr f(TFile::Open("snapshot_test_opts.root")); + +// EXPECT_EQ(algorithm, f->GetCompressionAlgorithm()); +// EXPECT_EQ(6, f->GetCompressionLevel()); +// } + +// // clean-up +// gSystem->Unlink(outfile); +// } + +// TEST_F(RDFSnapshot, Snapshot_action_with_options) +// { +// test_snapshot_options(tdf); +// } + +// void checkSnapshotArrayFile(RResultPtr> &df, unsigned int kNEvents) +// { +// // fixedSizeArr and varSizeArr are RResultPtr>> +// auto fixedSizeArr = df->Take>("fixedSizeArr"); +// auto varSizeArr = df->Take>("varSizeArr"); +// auto fixedSizeBoolArr = df->Take>("fixedSizeBoolArr"); +// auto varSizeBoolArr = df->Take>("varSizeBoolArr"); +// auto size = df->Take("size"); + +// // check contents of fixed sized arrays +// const auto nEvents = fixedSizeArr->size(); +// const auto fixedSizeSize = fixedSizeArr->front().size(); +// EXPECT_EQ(nEvents, kNEvents); +// EXPECT_EQ(fixedSizeSize, 4u); +// for (auto i = 0u; i < nEvents; ++i) { +// for (auto j = 0u; j < fixedSizeSize; ++j) { +// EXPECT_DOUBLE_EQ(fixedSizeArr->at(i).at(j), i * j); +// EXPECT_EQ(fixedSizeBoolArr->at(i).at(j), j % 2 == 0); +// } +// } + +// // check contents of variable sized arrays +// for (auto i = 0u; i < nEvents; ++i) { +// const auto thisSize = size->at(i); +// const auto &dv = varSizeArr->at(i); +// const auto &bv = varSizeBoolArr->at(i); +// EXPECT_EQ(thisSize, dv.size()); +// EXPECT_EQ(thisSize, bv.size()); +// std::cout << "bv: "; +// for (auto j = 0u; j < thisSize; ++j) +// std::cout << bv[j] << ' '; +// std::cout << "\nexpected: "; +// for (auto j = 0u; j < thisSize; ++j) { +// EXPECT_DOUBLE_EQ(dv[j], i * j); +// const bool value = bv[j]; +// const bool expected = j % 2 == 0; +// std::cout << expected << ' '; +// EXPECT_EQ(value, expected); +// } +// std::cout << '\n'; +// } +// } + +// TEST_F(RDFSnapshotArrays, SingleThread) +// { +// RDataFrame tdf("arrayTree", kFileNames); +// // template Snapshot +// // "size" _must_ be listed before "varSizeArr"! +// auto dt = tdf.Snapshot, unsigned int, RVec, RVec, RVec>( +// "outTree", "test_snapshotRVecoutST.root", +// {"fixedSizeArr", "size", "varSizeArr", "varSizeBoolArr", "fixedSizeBoolArr"}); + +// checkSnapshotArrayFile(dt, kNEvents); +// } + +// TEST_F(RDFSnapshotArrays, SingleThreadJitted) +// { +// RDataFrame tdf("arrayTree", kFileNames); +// // jitted Snapshot +// // "size" _must_ be listed before "varSizeArr"! +// auto dj = tdf.Snapshot("outTree", "test_snapshotRVecoutSTJitted.root", +// {"fixedSizeArr", "size", "varSizeArr", "varSizeBoolArr", "fixedSizeBoolArr"}); + +// checkSnapshotArrayFile(dj, kNEvents); +// } + +// TEST_F(RDFSnapshotArrays, RedefineArray) +// { +// RDataFrame df("arrayTree", kFileNames); +// auto df2 = df.Redefine("fixedSizeArr", +// [] { +// return ROOT::RVecF{42.f, 42.f}; +// }) +// .Snapshot>("t", "test_snapshotRVecRedefineArray.root", {"fixedSizeArr"}); +// df2->Foreach( +// [](const ROOT::RVecF &v) { +// EXPECT_EQ(v.size(), 2u); // not 4 as it was in the original input +// EXPECT_TRUE(All(v == ROOT::RVecF{42.f, 42.f})); +// }, +// {"fixedSizeArr"}); + +// gSystem->Unlink("test_snapshotRVecRedefineArray.root"); +// } + +// void WriteColsWithCustomTitles(const std::string &tname, const std::string &fname) +// { +// int i; +// float f; +// int a[2]; +// TFile file(fname.c_str(), "RECREATE"); +// TTree t(tname.c_str(), tname.c_str()); +// auto b = t.Branch("float", &f); +// b->SetTitle("custom title"); +// b = t.Branch("i", &i); +// b->SetTitle("custom title"); +// b = t.Branch("arrint", &a, "arrint[2]/I"); +// b->SetTitle("custom title"); +// b = t.Branch("vararrint", &a, "vararrint[i]/I"); +// b->SetTitle("custom title"); + +// i = 1; +// a[0] = 42; +// a[1] = 84; +// f = 4.2; +// t.Fill(); + +// i = 2; +// f = 8.4; +// t.Fill(); + +// t.Write(); +// } + +// void CheckColsWithCustomTitles(unsigned long long int entry, int i, const RVec &arrint, +// const RVec &vararrint, float f) +// { +// if (entry == 0) { +// EXPECT_EQ(i, 1); +// EXPECT_EQ(arrint.size(), 2u); +// EXPECT_EQ(arrint[0], 42); +// EXPECT_EQ(arrint[1], 84); +// EXPECT_EQ(vararrint.size(), 1u); +// EXPECT_EQ(vararrint[0], 42); +// EXPECT_FLOAT_EQ(f, 4.2f); +// } else if (entry == 1) { +// EXPECT_EQ(i, 2); +// EXPECT_EQ(arrint.size(), 2u); +// EXPECT_EQ(arrint[0], 42); +// EXPECT_EQ(arrint[1], 84); +// EXPECT_EQ(vararrint.size(), 2u); +// EXPECT_EQ(vararrint[0], 42); +// EXPECT_EQ(vararrint[1], 84); +// EXPECT_FLOAT_EQ(f, 8.4f); +// } else +// throw std::runtime_error("tree has more entries than expected"); +// } + +// TEST(RDFSnapshotMore, ColsWithCustomTitles) +// { +// const auto fname = "colswithcustomtitles.root"; +// const auto tname = "t"; + +// // write test tree +// WriteColsWithCustomTitles(tname, fname); + +// // read and write test tree with RDF +// RDataFrame d(tname, fname); +// const std::string prefix = "snapshotted_"; +// auto res_tdf = +// d.Snapshot, RVec>(tname, prefix + fname, {"i", "float", "arrint", "vararrint"}); + +// // check correct results have been written out +// res_tdf->Foreach(CheckColsWithCustomTitles, {"tdfentry_", "i", "arrint", "vararrint", "float"}); + +// // clean-up +// gSystem->Unlink(fname); +// gSystem->Unlink((prefix + fname).c_str()); +// } + +// TEST(RDFSnapshotMore, ReadWriteStdVec) +// { +// // write a TFile containing a std::vector +// const auto fname = "readwritestdvec.root"; +// const auto treename = "t"; +// TFile f(fname, "RECREATE"); +// TTree t(treename, treename); +// std::vector v({42}); +// std::vector vb({true, false, true}); // std::vector is special, not in a good way +// t.Branch("v", &v); +// t.Branch("vb", &vb); +// t.Fill(); +// // as an extra test, make sure that the vector reallocates between first and second entry +// v = std::vector(100000, 84); +// vb = std::vector(100000, true); +// t.Fill(); +// t.Write(); +// f.Close(); + +// auto outputChecker = [&treename](const char* filename){ +// // check snapshot output +// TFile f2(filename); +// TTreeReader r(treename, &f2); +// TTreeReaderArray rv(r, "v"); +// TTreeReaderArray rvb(r, "vb"); +// r.Next(); +// EXPECT_EQ(rv.GetSize(), 1u); +// EXPECT_EQ(rv[0], 42); +// EXPECT_EQ(rvb.GetSize(), 3u); +// EXPECT_TRUE(rvb[0]); +// EXPECT_FALSE(rvb[1]); +// EXPECT_TRUE(rvb[2]); +// r.Next(); +// EXPECT_EQ(rv.GetSize(), 100000u); +// EXPECT_EQ(rvb.GetSize(), 100000u); +// for (auto e : rv) +// EXPECT_EQ(e, 84); +// for (auto e : rvb) +// EXPECT_TRUE(e); +// }; + +// // read and write using RDataFrame + +// const auto outfname1 = "out_readwritestdvec1.root"; +// RDataFrame(treename, fname).Snapshot, std::vector>(treename, outfname1, {"v", "vb"}); +// outputChecker(outfname1); + +// const auto outfname2 = "out_readwritestdvec2.root"; +// RDataFrame(treename, fname).Snapshot(treename, outfname2); +// outputChecker(outfname2); + +// const auto outfname3 = "out_readwritestdvec3.root"; +// RDataFrame(treename, fname).Snapshot, RVec>(treename, outfname3, {"v", "vb"}); +// outputChecker(outfname3); + +// gSystem->Unlink(fname); +// gSystem->Unlink(outfname1); +// gSystem->Unlink(outfname2); +// gSystem->Unlink(outfname3); +// } + +// void ReadWriteCarray(const char *outFileNameBase) +// { +// // write a TFile containing a arrays +// std::string outFileNameBaseStr = outFileNameBase; +// const auto fname = outFileNameBaseStr + ".root"; +// const auto treename = "t"; +// TFile f(fname.c_str(), "RECREATE"); +// TTree t(treename, treename); +// const auto maxArraySize = 100000U; +// auto size = 0; +// int v[maxArraySize]; +// bool vb[maxArraySize]; +// long int vl[maxArraySize]; +// t.Branch("size", &size, "size/I"); +// t.Branch("v", v, "v[size]/I"); +// t.Branch("vb", vb, "vb[size]/O"); +// t.Branch("vl", vl, "vl[size]/G"); + +// // use 2**33 as a larger-than-int value on 64 bits, otherwise just something larger than short (2**30) +// static constexpr long int longintTestValue = sizeof(long int) == 8 ? 8589934592 : 1073741824; + +// // Size 1 +// size = 1; +// v[0] = 12; +// vb[0] = true; +// vl[0] = longintTestValue; +// t.Fill(); + +// // Size 0 (see ROOT-9860) +// size = 0; +// t.Fill(); + +// // Size 100k: this reallocates! +// size = maxArraySize; +// for (auto i : ROOT::TSeqU(size)) { +// v[i] = 84; +// vb[i] = true; +// vl[i] = 42; +// } +// t.Fill(); + +// // Size 3 +// size = 3; +// v[0] = 42; +// v[1] = 43; +// v[2] = 44; +// vb[0] = true; +// vb[1] = false; +// vb[2] = true; +// vl[0] = -1; +// vl[1] = 0; +// vl[2] = 1; +// t.Fill(); + +// t.Write(); +// f.Close(); + +// auto outputChecker = [&treename](const char *filename) { +// // check snapshot output +// TFile f2(filename); +// TTreeReader r(treename, &f2); +// TTreeReaderArray rv(r, "v"); +// TTreeReaderArray rvb(r, "vb"); +// TTreeReaderArray rvl(r, "vl"); + +// // Size 1 +// EXPECT_TRUE(r.Next()); +// EXPECT_EQ(rv.GetSize(), 1u); +// EXPECT_EQ(rv[0], 12); +// EXPECT_EQ(rvb.GetSize(), 1u); +// EXPECT_TRUE(rvb[0]); +// EXPECT_EQ(rvl.GetSize(), 1u); +// EXPECT_EQ(rvl[0], longintTestValue); + +// // Size 0 +// EXPECT_TRUE(r.Next()); +// EXPECT_EQ(rv.GetSize(), 0u); +// EXPECT_EQ(rvb.GetSize(), 0u); +// EXPECT_EQ(rvl.GetSize(), 0u); + +// // Size 100k +// EXPECT_TRUE(r.Next()); +// EXPECT_EQ(rv.GetSize(), 100000u); +// EXPECT_EQ(rvb.GetSize(), 100000u); +// for (auto e : rv) +// EXPECT_EQ(e, 84); +// for (auto e : rvb) +// EXPECT_TRUE(e); +// for (auto e : rvl) +// EXPECT_EQ(e, 42); + +// // Size 3 +// EXPECT_TRUE(r.Next()); +// EXPECT_EQ(rv.GetSize(), 3u); +// EXPECT_EQ(rv[0], 42); +// EXPECT_EQ(rv[1], 43); +// EXPECT_EQ(rv[2], 44); +// EXPECT_EQ(rvb.GetSize(), 3u); +// EXPECT_TRUE(rvb[0]); +// EXPECT_FALSE(rvb[1]); +// EXPECT_TRUE(rvb[2]); +// EXPECT_EQ(rvl.GetSize(), 3u); +// EXPECT_EQ(rvl[0], -1); +// EXPECT_EQ(rvl[1], 0); +// EXPECT_EQ(rvl[2], 1); + +// EXPECT_FALSE(r.Next()); +// }; + +// // read and write using RDataFrame +// const auto outfname1 = outFileNameBaseStr + "_out1.root"; +// RDataFrame(treename, fname).Snapshot(treename, outfname1); +// outputChecker(outfname1.c_str()); + +// const auto outfname2 = outFileNameBaseStr + "_out2.root"; +// RDataFrame(treename, fname) +// .Snapshot, RVec, RVec>(treename, outfname2, {"size", "v", "vb", "vl"}); +// outputChecker(outfname2.c_str()); + +// gSystem->Unlink(fname.c_str()); +// gSystem->Unlink(outfname1.c_str()); +// gSystem->Unlink(outfname2.c_str()); +// } + +// TEST(RDFSnapshotMore, ReadWriteCarray) +// { +// ReadWriteCarray("ReadWriteCarray"); +// } + +// struct TwoInts { +// int a, b; +// }; + +// void WriteTreeWithLeaves(const std::string &treename, const std::string &fname) +// { +// TFile f(fname.c_str(), "RECREATE"); +// TTree t(treename.c_str(), treename.c_str()); + +// TwoInts ti{1, 2}; +// t.Branch("v", &ti, "a/I:b/I"); + +// // TODO add checks for reading of multiple nested levels ("w.v.a") +// // when ROOT-9312 is solved and RDF supports "w.v.a" nested notation + +// t.Fill(); +// t.Write(); +// } + +// TEST(RDFSnapshotMore, ReadWriteNestedLeaves) +// { +// const auto treename = "t"; +// const auto fname = "readwritenestedleaves.root"; +// WriteTreeWithLeaves(treename, fname); +// RDataFrame d(treename, fname); +// const auto outfname = "out_readwritenestedleaves.root"; +// ROOT::RDF::RNode d2(d); +// { +// ROOT::TestSupport::CheckDiagsRAII diagRAII; +// diagRAII.requiredDiag(kInfo, "Snapshot", "Column v.a will be saved as v_a"); +// diagRAII.requiredDiag(kInfo, "Snapshot", "Column v.b will be saved as v_b"); +// d2 = *d.Snapshot(treename, outfname, {"v.a", "v.b"}); +// } +// EXPECT_EQ(d2.GetColumnNames(), std::vector({"v_a", "v_b"})); +// auto check_a_b = [](int a, int b) { +// EXPECT_EQ(a, 1); +// EXPECT_EQ(b, 2); +// }; +// d2.Foreach(check_a_b, {"v_a", "v_b"}); +// gSystem->Unlink(fname); +// gSystem->Unlink(outfname); + +// try { +// d.Define("v_a", [] { return 0; }).Snapshot(treename, outfname, {"v.a", "v_a"}); +// } catch (std::runtime_error &e) { +// const auto error_msg = "Column v.a would be written as v_a but this column already exists. Please use Alias to +// " +// "select a new name for v.a"; +// EXPECT_STREQ(e.what(), error_msg); +// } +// } + +// TEST(RDFSnapshotMore, Lazy) +// { +// const auto treename = "t"; +// const auto fname0 = "lazy0.root"; +// const auto fname1 = "lazy1.root"; +// // make sure the file is not here beforehand +// gSystem->Unlink(fname0); +// RDataFrame d(1); +// auto v = 0U; +// auto genf = [&v](){++v;return 42;}; +// RSnapshotOptions opts = {"RECREATE", ROOT::kZLIB, 0, 0, 99, true}; +// auto ds = d.Define("c0", genf).Snapshot(treename, fname0, {"c0"}, opts); +// EXPECT_EQ(v, 0U); +// EXPECT_TRUE(gSystem->AccessPathName(fname0)); // This returns FALSE if the file IS there +// auto ds2 = ds->Define("c1", genf).Snapshot(treename, fname1, {"c1"}, opts); +// EXPECT_EQ(v, 1U); +// EXPECT_FALSE(gSystem->AccessPathName(fname0)); +// EXPECT_TRUE(gSystem->AccessPathName(fname1)); +// *ds2; +// EXPECT_EQ(v, 2U); +// EXPECT_FALSE(gSystem->AccessPathName(fname1)); +// gSystem->Unlink(fname0); +// gSystem->Unlink(fname1); +// } + +// TEST(RDFSnapshotMore, LazyJitted) +// { +// const auto treename = "t"; +// const auto fname = "lazyjittedsnapshot.root"; +// // make sure the file is not here beforehand +// gSystem->Unlink(fname); +// RDataFrame d(1); +// RSnapshotOptions opts = {"RECREATE", ROOT::kZLIB, 0, 0, 99, true}; +// auto ds = d.Alias("c0", "rdfentry_").Snapshot(treename, fname, {"c0"}, opts); +// EXPECT_TRUE(gSystem->AccessPathName(fname)); // This returns FALSE if the file IS there +// *ds; +// EXPECT_FALSE(gSystem->AccessPathName(fname)); +// gSystem->Unlink(fname); +// } + +// void BookLazySnapshot() +// { +// auto d = ROOT::RDataFrame(1); +// ROOT::RDF::RSnapshotOptions opts; +// opts.fLazy = true; +// d.Snapshot("t", "lazysnapshotnottriggered_shouldnotbecreated.root", {"rdfentry_"}, opts); +// } + +// TEST(RDFSnapshotMore, LazyNotTriggered) +// { +// ROOT_EXPECT_WARNING(BookLazySnapshot(), "Snapshot", "A lazy Snapshot action was booked but never triggered."); +// } + +// RResultPtr> ReturnLazySnapshot(const char *fname) +// { +// auto d = ROOT::RDataFrame(1); +// ROOT::RDF::RSnapshotOptions opts; +// opts.fLazy = true; +// auto res = d.Snapshot("t", fname, {"rdfentry_"}, opts); +// RResultPtr> res2 = res; +// return res; +// } + +// TEST(RDFSnapshotMore, LazyTriggeredAfterCopy) +// { +// const auto fname = "lazysnapshottriggeredaftercopy.root"; +// ROOT_EXPECT_NODIAG(*ReturnLazySnapshot(fname)); +// gSystem->Unlink(fname); +// } + +// void CheckTClonesArrayOutput(const RVec &hvec) +// { +// ASSERT_EQ(hvec.size(), 3); +// for (int i = 0; i < 3; ++i) { +// EXPECT_EQ(hvec[i].GetEntries(), 1); +// EXPECT_DOUBLE_EQ(hvec[i].GetMean(), i); +// } +// } + +// void ReadWriteTClonesArray() +// { +// { +// TClonesArray arr("TH1D", 3); +// for (int i = 0; i < 3; ++i) { +// auto *h = static_cast(arr.ConstructedAt(i)); +// h->SetBins(25, 0, 10); +// h->Fill(i); +// } +// TFile f("df_readwritetclonesarray.root", "recreate"); +// TTree t("t", "t"); +// t.Branch("arr", &arr); +// t.Fill(); +// t.Write(); +// f.Close(); +// } + +// { +// // write as TClonesArray +// auto out_df = ROOT::RDataFrame("t", "df_readwritetclonesarray.root") +// .Snapshot("t", "df_readwriteclonesarray1.root", {"arr"}); +// RVec hvec; + +// #ifndef NDEBUG +// ROOT_EXPECT_WARNING( +// hvec = out_df->Take>("arr")->at(0), "RTreeColumnReader::Get", +// "Branch arr hangs from a non-split branch. A copy is being performed in order to properly read the +// content."); +// #else +// ROOT_EXPECT_NODIAG(hvec = out_df->Take>("arr")->at(0)); +// #endif +// CheckTClonesArrayOutput(hvec); +// } + +// // FIXME uncomment when ROOT-10801 is solved +// //{ +// // gInterpreter->GenerateDictionary("vector>", +// // "vector;TH1D.h;ROOT/RVec.hxx"); +// // // write as RVecs +// // auto out_df = ROOT::RDataFrame("t", "df_readwritetclonesarray.root") +// // .Snapshot>("t", "df_readwriteclonesarray2.root", {"arr"}); +// // const auto hvec = out_df->Take>("arr")->at(0); +// // CheckTClonesArrayOutput(hvec); +// //} + +// { +// // write as Snapshot wants +// auto out_df = +// ROOT::RDataFrame("t", "df_readwritetclonesarray.root").Snapshot("t", "df_readwriteclonesarray3.root", +// {"arr"}); +// RVec hvec; +// #ifndef NDEBUG +// ROOT_EXPECT_WARNING( +// hvec = out_df->Take>("arr")->at(0), "RTreeColumnReader::Get", +// "Branch arr hangs from a non-split branch. A copy is being performed in order to properly read the +// content."); +// #else +// ROOT_EXPECT_NODIAG(hvec = out_df->Take>("arr")->at(0)); +// #endif +// CheckTClonesArrayOutput(hvec); +// } + +// gSystem->Unlink("df_readwritetclonesarray.root"); +// gSystem->Unlink("df_readwriteclonesarray1.root"); +// gSystem->Unlink("df_readwriteclonesarray2.root"); +// gSystem->Unlink("df_readwriteclonesarray3.root"); +// } + +// TEST(RDFSnapshotMore, TClonesArray) +// { +// ReadWriteTClonesArray(); +// } + +// // ROOT-10702 +// TEST(RDFSnapshotMore, CompositeTypeWithNameClash) +// { +// const auto fname = "snap_compositetypewithnameclash.root"; +// gInterpreter->Declare("struct Int { int x; };"); +// ROOT::RDataFrame df(3); +// auto snap_df = df.Define("i", "Int{-1};").Define("x", [] { return 1; }).Snapshot("t", fname); +// EXPECT_EQ(snap_df->Sum("x").GetValue(), 3); // prints -3 if the wrong "x" is written out +// EXPECT_EQ(snap_df->Sum("i.x").GetValue(), -3); + +// gSystem->Unlink(fname); +// } + +// // Test that we error out gracefully in case the output file specified for a Snapshot cannot be opened +// TEST(RDFSnapshotMore, ForbiddenOutputFilename) +// { +// ROOT::RDataFrame df(4); +// const auto out_fname = "/definitely/not/a/valid/path/f.root"; + +// // Compiled +// try { +// ROOT_EXPECT_SYSERROR(df.Snapshot("t", out_fname, {"rdfslot_"}), "TFile::TFile", +// "file /definitely/not/a/valid/path/f.root can not be opened No such file or directory") +// } catch (const std::runtime_error &e) { +// EXPECT_STREQ(e.what(), "Snapshot: could not create output file /definitely/not/a/valid/path/f.root"); +// } + +// // Jitted +// // If some other test case called EnableThreadSafety, the error printed here is of the form +// // "SysError in : file /definitely/not/a/valid/path/f.root can not be opened No such file or +// directory\nError in : Write lock already released for 0x55f179989378\n" +// // but the address printed changes every time +// ROOT::TestSupport::CheckDiagsRAII diagRAII{kSysError, "TFile::TFile", "file /definitely/not/a/valid/path/f.root +// can not be opened No such file or directory"}; EXPECT_THROW(df.Snapshot("t", out_fname, {"rdfslot_"}), +// std::runtime_error); +// } + +// TEST(RDFSnapshotMore, ZeroOutputEntries) +// { +// const auto fname = "snapshot_zerooutputentries.root"; +// ROOT::RDataFrame(10).Alias("c", "rdfentry_").Filter([] { return false; }).Snapshot("t", fname, {"c"}); +// EXPECT_EQ(gSystem->AccessPathName(fname), 0); // This returns 0 if the file IS there + +// TFile f(fname); +// auto *t = f.Get("t"); +// EXPECT_NE(t, nullptr); // TTree "t" should be in there... +// EXPECT_EQ(t->GetEntries(), 0ll); // ...and have zero entries +// gSystem->Unlink(fname); +// } + +// // Test for https://github.com/root-project/root/issues/10233 +// TEST(RDFSnapshotMore, RedefinedDSColumn) +// { +// const auto fname = "test_snapshot_redefinedscolumn.root"; +// auto df = ROOT::RDF::MakeTrivialDataFrame(1); + +// df.Redefine("col0", [] { return 42; }).Snapshot("t", fname); +// gSystem->Unlink(fname); +// } + +// // https://github.com/root-project/root/issues/6932 +// TEST(RDFSnapshotMore, MissingSizeBranch) +// { +// const auto inFile = "test_snapshot_missingsizebranch.root"; +// const auto outFile = "test_snapshot_missingsizebranch_out.root"; + +// // make input tree +// { +// TFile f(inFile, "recreate"); +// TTree t("t", "t"); +// int sz = 1; +// t.Branch("sz", &sz); +// float vec[3] = {1, 2, 3}; +// t.Branch("vec", vec, "vec[sz]/F"); +// t.Fill(); +// sz = 2; +// t.Fill(); +// sz = 3; +// t.Fill(); +// t.Write(); +// } + +// ROOT::RDataFrame df("t", inFile); + +// // fully typed Snapshot call throws +// EXPECT_THROW(df.Snapshot("t", outFile, {"vec"}), std::runtime_error); + +// // jitted Snapshot works anyway +// auto out = df.Snapshot("t", outFile, {"vec"}); + +// auto sizes = out->Take("sz"); +// auto vecs = out->Take("vec"); + +// EXPECT_EQ(sizes->at(0), 1); +// EXPECT_EQ(sizes->at(1), 2); +// EXPECT_EQ(sizes->at(2), 3); +// EXPECT_TRUE(All(vecs->at(0) == ROOT::RVecF{1})); +// EXPECT_TRUE(All(vecs->at(1) == ROOT::RVecF{1, 2})); +// EXPECT_TRUE(All(vecs->at(2) == ROOT::RVecF{1, 2, 3})); + +// gSystem->Unlink(inFile); +// gSystem->Unlink(outFile); +// } + +// TEST(RDFSnapshotMore, OutOfOrderSizeBranch) +// { +// const auto inFile = "test_snapshot_outofordersizebranch_in.root"; +// const auto outFile = "test_snapshot_outofordersizebranch_out.root"; + +// // make input tree +// { +// TFile f(inFile, "recreate"); +// TTree t("t", "t"); +// int sz = 1; +// t.Branch("sz", &sz); +// float vec[3] = {1, 2, 3}; +// t.Branch("vec", vec, "vec[sz]/F"); +// t.Fill(); +// sz = 2; +// t.Fill(); +// sz = 3; +// t.Fill(); +// t.Write(); +// } + +// auto check = [](const std::vector &sizes, const std::vector &vecs) { +// EXPECT_EQ(sizes.at(0), 1); +// EXPECT_EQ(sizes.at(1), 2); +// EXPECT_EQ(sizes.at(2), 3); +// EXPECT_TRUE(All(vecs.at(0) == ROOT::RVecF{1})); +// EXPECT_TRUE(All(vecs.at(1) == ROOT::RVecF{1, 2})); +// EXPECT_TRUE(All(vecs.at(2) == ROOT::RVecF{1, 2, 3})); +// }; + +// { +// // fully typed Snapshot +// auto out = ROOT::RDataFrame("t", inFile).Snapshot("t", outFile, {"vec", "sz"}); +// auto sizes = out->Take("sz"); +// auto vecs = out->Take("vec"); + +// check(*sizes, *vecs); +// } + +// { +// // jitted Snapshot +// auto out = ROOT::RDataFrame("t", inFile).Snapshot("t", outFile, {"vec", "sz"}); +// auto sizes = out->Take("sz"); +// auto vecs = out->Take("vec"); + +// check(*sizes, *vecs); +// } + +// gSystem->Unlink(inFile); +// gSystem->Unlink(outFile); +// } + +// /********* MULTI THREAD TESTS ***********/ +// #ifdef R__USE_IMT +// TEST_F(RDFSnapshotMT, Snapshot_update_diff_treename) +// { +// // test snapshotting two trees with different names +// TestSnapshotUpdate(tdf, "snap_update_difftreenames.root", "t1", "t2", false); +// } + +// TEST_F(RDFSnapshotMT, Snapshot_update_same_treename) +// { +// bool exceptionCaught = false; +// try { +// // test snapshotting two trees with same name +// TestSnapshotUpdate(tdf, "snap_update_sametreenames.root", "t", "t", false); +// } catch (const std::invalid_argument &e) { +// const std::string msg = +// "Snapshot: tree \"t\" already present in file \"snap_update_sametreenames.root\". If you want to delete the +// " "original tree and write another, please set RSnapshotOptions::fOverwriteIfExists to true."; +// EXPECT_EQ(e.what(), msg); +// exceptionCaught = true; +// } +// EXPECT_TRUE(exceptionCaught); +// } + +// TEST_F(RDFSnapshotMT, Snapshot_update_overwrite) +// { +// // test snapshotting two trees with different names +// TestSnapshotUpdate(tdf, "snap_update_overwrite.root", "t", "t", true); +// } + +// TEST_F(RDFSnapshotMT, Snapshot_action_with_options) +// { +// test_snapshot_options(tdf); +// } + +// TEST_F(RDFSnapshotMT, Reshuffled_friends) +// { +// const auto fname = "snapshot_reshuffled_friends.root"; +// tdf.Snapshot("t", fname); + +// { +// // add reshuffled tree as friend +// TFile f(fname); +// TTree *t = f.Get("t"); +// TTree t2("t2", "t2"); +// const auto expected = +// "Tree 't' has the kEntriesReshuffled bit set and cannot have friends nor can be added as a friend unless the +// " "main tree has a TTreeIndex on the friend tree 't'. You can also unset the bit manually if you know what +// you " "are doing; note that you risk associating wrong TTree entries of the friend with those of the main +// TTree!"; +// ROOT_EXPECT_ERROR(t2.AddFriend(t), "AddFriend", expected); +// } + +// { +// // add friend to reshuffled tree +// TFile f(fname); +// TTree *t = f.Get("t"); +// TTree t2("t2", "t2"); +// const auto expected = +// "Tree 't' has the kEntriesReshuffled bit set and cannot have friends nor can be added as a friend unless the +// " "main tree has a TTreeIndex on the friend tree 't2'. You can also unset the bit manually if you know what +// you " "are doing; note that you risk associating wrong TTree entries of the friend with those of the main +// TTree!"; +// ROOT_EXPECT_ERROR(t->AddFriend(&t2);, "AddFriend", expected); +// } +// } + +// TEST(RDFSnapshotMore, ManyTasksPerThread) +// { +// const auto nSlots = 4u; +// ROOT::EnableImplicitMT(nSlots); +// // make sure the file is not here beforehand +// gSystem->Unlink("snapshot_manytasks_out.root"); + +// // easiest way to be sure reading requires spawning of several tasks: create several input files +// const std::string inputFilePrefix = "snapshot_manytasks_"; +// const auto tasksPerThread = 8u; +// const auto nInputFiles = nSlots * tasksPerThread; +// ROOT::RDataFrame d(1); +// auto dd = d.Define("x", []() { return 42; }); +// for (auto i = 0u; i < nInputFiles; ++i) +// dd.Snapshot("t", inputFilePrefix + std::to_string(i) + ".root", {"x"}); + +// // test multi-thread Snapshotting from many tasks per worker thread +// const auto outputFile = "snapshot_manytasks_out.root"; +// ROOT::RDataFrame tdf("t", inputFilePrefix + "*.root"); +// tdf.Snapshot("t", outputFile, {"x"}); + +// // check output contents +// ROOT::RDataFrame checkTdf("t", outputFile); +// auto c = checkTdf.Count(); +// auto t = checkTdf.Take("x"); +// for (auto v : t) +// EXPECT_EQ(v, 42); +// EXPECT_EQ(*c, nInputFiles); + +// // clean-up input files +// for (auto i = 0u; i < nInputFiles; ++i) +// gSystem->Unlink((inputFilePrefix + std::to_string(i) + ".root").c_str()); +// gSystem->Unlink(outputFile); + +// ROOT::DisableImplicitMT(); +// } + +// void checkSnapshotArrayFileMT(RResultPtr> &df, unsigned int kNEvents) +// { +// // fixedSizeArr and varSizeArr are RResultPtr>> +// auto fixedSizeArr = df->Take>("fixedSizeArr"); +// auto varSizeArr = df->Take>("varSizeArr"); +// auto size = df->Take("size"); + +// // multi-thread execution might have scrambled events w.r.t. the original file, so we just check overall +// properties const auto nEvents = fixedSizeArr->size(); EXPECT_EQ(nEvents, kNEvents); +// // TODO check more! +// } + +// TEST_F(RDFSnapshotArrays, MultiThread) +// { +// ROOT::EnableImplicitMT(4); + +// RDataFrame tdf("arrayTree", kFileNames); +// auto dt = tdf.Snapshot, unsigned int, RVec, RVec, RVec>( +// "outTree", "test_snapshotRVecoutMT.root", +// {"fixedSizeArr", "size", "varSizeArr", "varSizeBoolArr", "fixedSizeBoolArr"}); + +// checkSnapshotArrayFileMT(dt, kNEvents); + +// ROOT::DisableImplicitMT(); +// } + +// TEST_F(RDFSnapshotArrays, MultiThreadJitted) +// { +// ROOT::EnableImplicitMT(4); + +// RDataFrame tdf("arrayTree", kFileNames); +// auto dj = tdf.Snapshot("outTree", "test_snapshotRVecoutMTJitted.root", +// {"fixedSizeArr", "size", "varSizeArr", "varSizeBoolArr", "fixedSizeBoolArr"}); + +// checkSnapshotArrayFileMT(dj, kNEvents); + +// ROOT::DisableImplicitMT(); +// } + +// // See also https://github.com/root-project/root/issues/10225 +// TEST_F(RDFSnapshotArrays, WriteRVecFromFile) +// { +// { +// auto df = ROOT::RDataFrame(3).Define("x", [](ULong64_t e) { return ROOT::RVecD(e, double(e)); }, +// {"rdfentry_"}); df.Snapshot("t", "test_snapshotRVecWriteRVecFromFile.root", {"x"}); +// } + +// ROOT::RDataFrame df("t", "test_snapshotRVecWriteRVecFromFile.root"); +// auto outdf = df.Snapshot("t", "test_snapshotRVecWriteRVecFromFile2.root", {"x"}); + +// const auto res = outdf->Take("x").GetValue(); + +// EXPECT_EQ(res.size(), 3u); +// EXPECT_EQ(res[0].size(), 0u); +// EXPECT_TRUE(All(res[1] == ROOT::RVecD{1.})); +// EXPECT_TRUE(All(res[2] == ROOT::RVecD{2., 2.})); + +// gSystem->Unlink("test_snapshotRVecWriteRVecFromFile.root"); +// gSystem->Unlink("test_snapshotRVecWriteRVecFromFile2.root"); +// } + +// TEST(RDFSnapshotMore, ColsWithCustomTitlesMT) +// { +// const auto fname = "colswithcustomtitlesmt.root"; +// const auto tname = "t"; + +// // write test tree +// WriteColsWithCustomTitles(tname, fname); + +// // read and write test tree with RDF (in parallel) +// ROOT::EnableImplicitMT(4); +// RDataFrame d(tname, fname); +// const std::string prefix = "snapshotted_"; +// auto res_tdf = +// d.Snapshot, RVec>(tname, prefix + fname, {"i", "float", "arrint", "vararrint"}); + +// // check correct results have been written out +// res_tdf->Foreach(CheckColsWithCustomTitles, {"tdfentry_", "i", "arrint", "vararrint", "float"}); +// res_tdf->Foreach(CheckColsWithCustomTitles, {"rdfentry_", "i", "arrint", "vararrint", "float"}); + +// // clean-up +// gSystem->Unlink(fname); +// gSystem->Unlink((prefix + fname).c_str()); +// ROOT::DisableImplicitMT(); +// } + +// TEST(RDFSnapshotMore, TreeWithFriendsMT) +// { +// const auto fname1 = "treewithfriendsmt1.root"; +// const auto fname2 = "treewithfriendsmt2.root"; +// RDataFrame(10).Define("x", []() { return 42; }).Snapshot("t", fname1, {"x"}); +// RDataFrame(10).Define("x", []() { return 0; }).Snapshot("t", fname2, {"x"}); + +// ROOT::EnableImplicitMT(); + +// TFile file(fname1); +// auto tree = file.Get("t"); +// TFile file2(fname2); +// auto tree2 = file2.Get("t"); +// tree->AddFriend(tree2); + +// const auto outfname = "out_treewithfriendsmt.root"; +// RDataFrame df(*tree); +// auto df_out = df.Snapshot("t", outfname, {"x"}); +// EXPECT_EQ(df_out->Max("x").GetValue(), 42); +// EXPECT_EQ(df_out->GetColumnNames(), std::vector{"x"}); + +// ROOT::DisableImplicitMT(); +// gSystem->Unlink(fname1); +// gSystem->Unlink(fname2); +// gSystem->Unlink(outfname); +// } + +// TEST(RDFSnapshotMore, JittedSnapshotAndAliasedColumns) +// { +// ROOT::RDataFrame df(1); +// const auto fname = "out_aliaseddefine.root"; +// // aliasing a custom column +// auto df2 = df.Define("x", [] { return 42; }).Alias("y", "x").Snapshot("t", fname, "y"); // must be jitted! +// EXPECT_EQ(df2->GetColumnNames(), std::vector({"y"})); +// EXPECT_EQ(df2->Take("y")->at(0), 42); + +// // aliasing a column from a file +// const auto fname2 = "out_aliaseddefine2.root"; +// auto df3 = df2->Alias("z", "y").Snapshot("t", fname2, "z"); +// EXPECT_EQ(df3->GetColumnNames(), std::vector({"z"})); +// EXPECT_EQ(df3->Max("z").GetValue(), 42); + +// gSystem->Unlink(fname); +// gSystem->Unlink(fname2); +// } + +// TEST(RDFSnapshotMore, LazyNotTriggeredMT) +// { +// ROOT::EnableImplicitMT(4); +// ROOT_EXPECT_WARNING(BookLazySnapshot(), "Snapshot", "A lazy Snapshot action was booked but never triggered."); +// ROOT::DisableImplicitMT(); +// } + +// TEST(RDFSnapshotMore, EmptyBuffersMT) +// { +// const auto fname = "emptybuffersmt.root"; +// const auto treename = "t"; +// const unsigned int nslots = std::min(4U, std::thread::hardware_concurrency()); +// ROOT::EnableImplicitMT(nslots); +// ROOT::RDataFrame d(10); +// auto dd = d.DefineSlot("x", [&](unsigned int s) { return s == nslots - 1 ? 0 : 1; }) +// .Filter([](int x) { return x == 0; }, {"x"}, "f"); +// auto r = dd.Report(); +// dd.Snapshot(treename, fname, {"x"}); + +// // check test sanity +// const auto passed = r->At("f").GetPass(); +// EXPECT_GT(passed, 0u); + +// // check result +// TFile f(fname); +// auto t = f.Get(treename); +// EXPECT_EQ(t->GetListOfBranches()->GetEntries(), 1); +// EXPECT_EQ(t->GetEntries(), Long64_t(passed)); + +// ROOT::DisableImplicitMT(); +// gSystem->Unlink(fname); +// } + +// TEST(RDFSnapshotMore, ReadWriteCarrayMT) +// { +// ROOT::EnableImplicitMT(4); +// ReadWriteCarray("ReadWriteCarrayMT"); +// ROOT::DisableImplicitMT(); +// } + +// TEST(RDFSnapshotMore, TClonesArrayMT) +// { +// TIMTEnabler _(4); +// ReadWriteTClonesArray(); +// } + +// // Test that we error out gracefully in case the output file specified for a Snapshot cannot be opened +// TEST(RDFSnapshotMore, ForbiddenOutputFilenameMT) +// { +// TIMTEnabler _(4); +// ROOT::RDataFrame df(4); +// const auto out_fname = "/definitely/not/a/valid/path/f.root"; + +// // Compiled +// try { +// const auto expected = "file /definitely/not/a/valid/path/f.root can not be opened No such file or directory"; +// ROOT_EXPECT_SYSERROR(df.Snapshot("t", out_fname, {"rdfslot_"}), "TFile::TFile", expected); +// } catch (const std::runtime_error &e) { +// EXPECT_STREQ(e.what(), "Snapshot: could not create output file /definitely/not/a/valid/path/f.root"); +// } + +// // Jitted +// // the error printed here is +// // "SysError in : file /definitely/not/a/valid/path/f.root can not be opened No such file or +// directory\nError in : Write lock already released for 0x55f179989378\n" +// // but the address printed changes every time +// ROOT::TestSupport::CheckDiagsRAII diagRAII; +// diagRAII.requiredDiag(kSysError, "TFile::TFile", "file /definitely/not/a/valid/path/f.root can not be opened No +// such file or directory"); diagRAII.optionalDiag(kSysError, "TReentrantRWLock::WriteUnLock", "Write lock already +// released for", /*wholeStringNeedsToMatch=*/false); EXPECT_THROW(df.Snapshot("t", out_fname, {"rdfslot_"}), +// std::runtime_error); +// } + +// /** +// * Test against issue #6523 and #6640 +// * Try to force `TTree::ChangeFile` behaviour. Within RDataFrame, this should +// * not happen and both sequential and multithreaded Snapshot should only create +// * one file. +// */ +// TEST(RDFSnapshotMore, SetMaxTreeSizeMT) +// { +// // Set TTree max size to a low number. Normally this would trigger the +// // behaviour of TTree::ChangeFile, but not within RDataFrame. +// const auto old_maxtreesize = TTree::GetMaxTreeSize(); +// TTree::SetMaxTreeSize(1000); + +// // Create TTree, fill it and Snapshot (should create one single file). +// { +// TTree t{"T", "SetMaxTreeSize(1000)"}; +// int x{}; +// const int nentries = 20000; + +// t.Branch("x", &x, "x/I"); + +// for (auto i = 0; i < nentries; i++) { +// x = i; +// t.Fill(); +// } + +// ROOT::RDataFrame df{t}; +// df.Snapshot("T", "rdfsnapshot_ttree_sequential_setmaxtreesize.root", {"x"}); +// } + +// // Create an RDF from the previously snapshotted file, then Snapshot again +// // with IMT enabled. +// { +// ROOT::EnableImplicitMT(); + +// ROOT::RDataFrame df{"T", "rdfsnapshot_ttree_sequential_setmaxtreesize.root"}; +// df.Snapshot("T", "rdfsnapshot_imt_setmaxtreesize.root", {"x"}); + +// ROOT::DisableImplicitMT(); +// } + +// // Check the file for data integrity. +// { +// TFile f{"rdfsnapshot_imt_setmaxtreesize.root"}; +// std::unique_ptr t{f.Get("T")}; + +// EXPECT_EQ(t->GetEntries(), 20000); + +// int sum{0}; +// int x{0}; +// t->SetBranchAddress("x", &x); + +// for (auto i = 0; i < t->GetEntries(); i++) { +// t->GetEntry(i); +// sum += x; +// } + +// // sum(range(20000)) == 199990000 +// EXPECT_EQ(sum, 199990000); +// } + +// gSystem->Unlink("rdfsnapshot_ttree_sequential_setmaxtreesize.root"); +// gSystem->Unlink("rdfsnapshot_imt_setmaxtreesize.root"); + +// // Reset TTree max size to its old value +// TTree::SetMaxTreeSize(old_maxtreesize); +// } + +// TEST(RDFSnapshotMore, ZeroOutputEntriesMT) +// { +// const auto fname = "snapshot_zerooutputentriesmt.root"; +// ROOT::RDataFrame(10).Alias("c", "rdfentry_").Filter([] { return false; }).Snapshot("t", fname, {"c"}); +// EXPECT_EQ(gSystem->AccessPathName(fname), 0); // This returns 0 if the file IS there + +// TFile f(fname); +// auto *t = f.Get("t"); +// // TTree "t" should *not* be in there, differently from the single-thread case: see ROOT-10868 +// EXPECT_NE(t, nullptr); +// gSystem->Unlink(fname); +// } + +// #endif // R__USE_IMT