Skip to content

Commit

Permalink
[ntuple] Store join field tokens instead of names
Browse files Browse the repository at this point in the history
  • Loading branch information
enirolf committed Nov 20, 2024
1 parent 9f59dce commit e344b22
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 18 deletions.
25 changes: 19 additions & 6 deletions tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -306,11 +306,11 @@ class RNTupleJoinProcessor : public RNTupleProcessor {
private:
std::unique_ptr<RNTupleModel> fJoinModel;
std::vector<std::unique_ptr<Internal::RPageSource>> fAuxiliaryPageSources;

std::vector<std::string> fJoinFieldNames;
/// Tokens representing the join fields present in the main RNTuple
std::vector<REntry::RFieldToken> fJoinFieldTokens;
std::vector<std::unique_ptr<Internal::RNTupleIndex>> fJoinIndices;

bool IsUsingIndex() const { return fJoinFieldNames.size() > 0; }
bool IsUsingIndex() const { return fJoinIndices.size() > 0; }

NTupleSize_t Advance() final;

Expand All @@ -325,19 +325,32 @@ private:
/// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
/// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
/// specified, it is created from the RNTuple's descriptor.
RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, const std::vector<std::string> &joinFields,
std::unique_ptr<RNTupleModel> model = nullptr);
RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, std::unique_ptr<RNTupleModel> model = nullptr);

/////////////////////////////////////////////////////////////////////////////
/// \brief Add an auxiliary RNTuple to the processor.
///
/// \param[in] auxNTuple The source specification (name and storage location) of the auxiliary RNTuple.
/// \param[in] joinFields The names of the fields used in the join.
/// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
/// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
/// specified, it is created from the RNTuple's descriptor.
void AddAuxiliary(const RNTupleOpenSpec &auxNTuple, std::unique_ptr<RNTupleModel> model = nullptr);
void AddAuxiliary(const RNTupleOpenSpec &auxNTuple, const std::vector<std::string> &joinFields,
std::unique_ptr<RNTupleModel> model = nullptr);
void ConnectFields();

/////////////////////////////////////////////////////////////////////////////
/// \brief Populate fJoinFieldTokens with tokens for join fields belonging to the main RNTuple in the join model.
///
/// \param[in] joinFields The names of the fields used in the join.
void SetJoinFieldTokens(const std::vector<std::string> &joinFields)
{
fJoinFieldTokens.reserve(joinFields.size());
for (const auto &fieldName : joinFields) {
fJoinFieldTokens.emplace_back(fEntry->GetToken(fieldName));
}
}

public:
RNTupleJoinProcessor(const RNTupleJoinProcessor &) = delete;
RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &) = delete;
Expand Down
25 changes: 13 additions & 12 deletions tree/ntuple/v7/src/RNTupleProcessor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,19 @@ ROOT::Experimental::RNTupleProcessor::CreateJoin(const std::vector<RNTupleOpenSp

std::unique_ptr<RNTupleJoinProcessor> processor;
if (models.size() > 0) {
processor =
std::unique_ptr<RNTupleJoinProcessor>(new RNTupleJoinProcessor(ntuples[0], joinFields, std::move(models[0])));
processor = std::unique_ptr<RNTupleJoinProcessor>(new RNTupleJoinProcessor(ntuples[0], std::move(models[0])));
} else {
processor = std::unique_ptr<RNTupleJoinProcessor>(new RNTupleJoinProcessor(ntuples[0], joinFields));
processor = std::unique_ptr<RNTupleJoinProcessor>(new RNTupleJoinProcessor(ntuples[0]));
}

for (unsigned i = 1; i < ntuples.size(); ++i) {
if (models.size() > 0)
processor->AddAuxiliary(ntuples[i], std::move(models[i]));
processor->AddAuxiliary(ntuples[i], joinFields, std::move(models[i]));
else
processor->AddAuxiliary(ntuples[i]);
processor->AddAuxiliary(ntuples[i], joinFields);
}

processor->SetJoinFieldTokens(joinFields);
processor->ConnectFields();

return processor;
Expand Down Expand Up @@ -185,9 +185,8 @@ ROOT::Experimental::NTupleSize_t ROOT::Experimental::RNTupleChainProcessor::Adva
//------------------------------------------------------------------------------

ROOT::Experimental::RNTupleJoinProcessor::RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple,
const std::vector<std::string> &joinFields,
std::unique_ptr<RNTupleModel> model)
: RNTupleProcessor({mainNTuple}), fJoinFieldNames(joinFields)
: RNTupleProcessor({mainNTuple})
{
fPageSource = Internal::RPageSource::Create(mainNTuple.fNTupleName, mainNTuple.fStorage);
fPageSource->Attach();
Expand Down Expand Up @@ -222,6 +221,7 @@ ROOT::Experimental::RNTupleJoinProcessor::RNTupleJoinProcessor(const RNTupleOpen
}

void ROOT::Experimental::RNTupleJoinProcessor::AddAuxiliary(const RNTupleOpenSpec &auxNTuple,
const std::vector<std::string> &joinFields,
std::unique_ptr<RNTupleModel> model)
{
assert(fNEntriesProcessed == 0 && "cannot add auxiliary ntuples after processing has started");
Expand Down Expand Up @@ -308,8 +308,9 @@ void ROOT::Experimental::RNTupleJoinProcessor::AddAuxiliary(const RNTupleOpenSpe

fEntry.swap(newEntry);

if (IsUsingIndex())
fJoinIndices.emplace_back(Internal::RNTupleIndex::Create(fJoinFieldNames, *pageSource, true /* deferBuild */));
// If no join fields have been specified, an aligned join is assumed and an index won't be necessary.
if (joinFields.size() > 0)
fJoinIndices.emplace_back(Internal::RNTupleIndex::Create(joinFields, *pageSource, true /* deferBuild */));

fAuxiliaryPageSources.emplace_back(std::move(pageSource));
}
Expand Down Expand Up @@ -340,10 +341,10 @@ ROOT::Experimental::NTupleSize_t ROOT::Experimental::RNTupleJoinProcessor::Advan
void ROOT::Experimental::RNTupleJoinProcessor::LoadEntry()
{
std::vector<void *> valPtrs;
valPtrs.reserve(fJoinFieldNames.size());
valPtrs.reserve(fJoinFieldTokens.size());

for (const auto &fieldName : fJoinFieldNames) {
auto ptr = fEntry->GetPtr<void>(fieldName);
for (const auto &token : fJoinFieldTokens) {
auto ptr = fEntry->GetPtr<void>(token);
valPtrs.push_back(ptr.get());
}

Expand Down

0 comments on commit e344b22

Please sign in to comment.