-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[df] Unify local and distributed API
Unify the main common entry points between local and distributed RDataFrame API. Currently these changes affect: - The ROOT.RDataFrame constructor - ROOT.RDF.RunGraphs - ROOT.RDF.Experimental.VariationsFor Anytime one of the above is called, a pythonization will dispatch to the appropriate RDataFrame flavour, depending on the arguments. This dispatcher checks for the presence of an "executor" keyword argument, in which case this is expected to be an instance of either `distributed.Client` or `pyspark.SparkContext` as those are the two distributed executors currently supported. Previous usage of the distributed module with fully qualified names of functions still works, although usage of the unified API is preferrable and advisable.
- Loading branch information
1 parent
fde7538
commit a1d8018
Showing
10 changed files
with
214 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
85 changes: 85 additions & 0 deletions
85
bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rdf_namespace.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# Author: Vincenzo Eduardo Padulano CERN 10/2024 | ||
|
||
################################################################################ | ||
# Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. # | ||
# All rights reserved. # | ||
# # | ||
# For the licensing terms see $ROOTSYS/LICENSE. # | ||
# For the list of contributors see $ROOTSYS/README/CREDITS. # | ||
################################################################################ | ||
|
||
""" | ||
This module contains utilities to help in the organization of the RDataFrame | ||
namespace and the interaction between the C++ and Python functionalities | ||
""" | ||
|
||
|
||
def _create_distributed_module(parent): | ||
""" | ||
Create the ROOT.RDF.Experimental.Distributed python module. | ||
This module will be injected into the ROOT.RDF namespace. | ||
Arguments: | ||
parent: The ROOT.RDF namespace. Needed to define __package__. | ||
Returns: | ||
types.ModuleType: The ROOT.RDF.Experimental.Distributed submodule. | ||
""" | ||
import DistRDF | ||
|
||
return DistRDF.create_distributed_module(parent) | ||
|
||
|
||
def _rungraphs(distrdf_rungraphs, rdf_rungraphs): | ||
""" | ||
Create a callable that correctly dispatches either to the local or | ||
distributed version of RunGraphs. | ||
""" | ||
|
||
def rungraphs(handles): | ||
# Caveat: we should not call `hasattr` on the result pointer, since | ||
# this will implicitly trigger the connected computation graph | ||
if len(handles) > 0 and "DistRDF" in str(type(handles[0])): | ||
return distrdf_rungraphs(handles) | ||
else: | ||
return rdf_rungraphs(handles) | ||
|
||
return rungraphs | ||
|
||
|
||
def _variationsfor(distrdf_variationsfor, rdf_variationsfor): | ||
""" | ||
Create a callable that correctly dispatches either to the local or | ||
distributed version of VariationsFor. | ||
""" | ||
|
||
def variationsfor(resptr): | ||
# Caveat: we should not call `hasattr` on the result pointer, since | ||
# this will implicitly trigger the connected computation graph | ||
if "DistRDF" in str(type(resptr)): | ||
return distrdf_variationsfor(resptr) | ||
else: | ||
# Help local VariationsFor with the type of the value held by the result pointer | ||
inner_type = type(resptr).__name__ | ||
inner_type = inner_type[ | ||
inner_type.index("<") + 1: inner_type.rindex(">")] | ||
return rdf_variationsfor[inner_type](resptr) | ||
|
||
return variationsfor | ||
|
||
|
||
def _rdataframe(local_rdf, distributed_rdf): | ||
""" | ||
Create a callable that correctly dispatches either to the local or | ||
distributed RDataFrame constructor, depending on whether the "executor" | ||
keyword argument is absent or not. | ||
""" | ||
|
||
def rdataframe(*args, **kwargs): | ||
if kwargs.get("executor", None) is not None: | ||
return distributed_rdf(*args, **kwargs) | ||
else: | ||
return local_rdf(*args, **kwargs) | ||
|
||
return rdataframe |
Oops, something went wrong.