Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Optional support to add c++ typenames to parameters with ak_add_typename in uproot.dask #1375

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions src/uproot/_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def dask(
steps_per_file=unset,
library="ak",
ak_add_doc=False,
ak_add_typename=False,
custom_classes=None,
allow_missing=False,
open_files=True,
Expand Down Expand Up @@ -89,6 +90,8 @@ def dask(
``NotImplementedError``.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
custom_classes (None or dict): If a dict, override the classes from
the :doc:`uproot.reading.ReadOnlyFile` or ``uproot.classes``.
allow_missing (bool): If True, skip over any files that do not contain
Expand Down Expand Up @@ -236,7 +239,7 @@ def dask(

filter_branch = uproot._util.regularize_filter(filter_branch)

interp_options = {"ak_add_doc": ak_add_doc}
interp_options = {"ak_add_doc": ak_add_doc, "ak_add_typename": ak_add_typename}

if library.name == "np":
if open_files:
Expand Down Expand Up @@ -491,6 +494,7 @@ def __call__(self, i_start_stop):
entry_stop=stop,
library="np",
ak_add_doc=self.interp_options["ak_add_doc"],
ak_add_typename=self.interp_options["ak_add_typename"],
decompression_executor=self.decompression_executor,
interpretation_executor=self.interpretation_executor,
)
Expand Down Expand Up @@ -555,6 +559,7 @@ def __call__(self, file_path_object_path_istep_nsteps_ischunk):
entry_start=start,
entry_stop=stop,
ak_add_doc=self.interp_options["ak_add_doc"],
ak_add_typename=self.interp_options["ak_add_typename"],
decompression_executor=self.decompression_executor,
interpretation_executor=self.interpretation_executor,
)
Expand Down Expand Up @@ -920,6 +925,7 @@ def load_buffers(
entry_start=start,
entry_stop=stop,
ak_add_doc=options["ak_add_doc"],
ak_add_typename=options["ak_add_typename"],
decompression_executor=decompression_executor,
interpretation_executor=interpretation_executor,
how=tuple,
Expand Down Expand Up @@ -1378,13 +1384,19 @@ def _get_ttree_form(
ttree,
common_keys,
ak_add_doc,
ak_add_typename,
):
contents = []
for key in common_keys:
branch = ttree[key]
content_form = branch.interpretation.awkward_form(ttree.file)
if ak_add_doc:
content_form = content_form.copy(parameters={"__doc__": branch.title})
if ak_add_doc or ak_add_typename:
content_parameters = {}
if ak_add_doc:
content_parameters["__doc__"] = branch.title
if ak_add_typename:
content_parameters["typename"] = branch.typename
content_form = content_form.copy(parameters=content_parameters)
contents.append(content_form)

parameters = {"__doc__": ttree.title} if ak_add_doc else None
Expand Down Expand Up @@ -1540,7 +1552,11 @@ def real_filter_branch(branch):
partition_args.append((i, start, stop))

base_form = _get_ttree_form(
awkward, ttrees[0], common_keys, interp_options.get("ak_add_doc")
awkward,
ttrees[0],
common_keys,
interp_options.get("ak_add_doc"),
interp_options.get("ak_add_typename"),
)

if len(partition_args) == 0:
Expand Down Expand Up @@ -1614,7 +1630,11 @@ def _get_dak_array_delay_open(
ignore_duplicates=True,
)
base_form = _get_ttree_form(
awkward, obj, common_keys, interp_options.get("ak_add_doc")
awkward,
obj,
common_keys,
interp_options.get("ak_add_doc"),
interp_options.get("ak_add_typename"),
)

divisions = [0]
Expand Down
63 changes: 51 additions & 12 deletions src/uproot/behaviors/TBranch.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def iterate(
interpretation_executor=None,
library="ak",
ak_add_doc=False,
ak_add_typename=False,
how=None,
report=False,
custom_classes=None,
Expand Down Expand Up @@ -110,6 +111,8 @@ def iterate(
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
how (None, str, or container type): Library-dependent instructions
for grouping. The only recognized container types are ``tuple``,
``list``, and ``dict``. Note that the container *type itself*
Expand Down Expand Up @@ -203,6 +206,7 @@ def iterate(
interpretation_executor=interpretation_executor,
library=library,
ak_add_doc=ak_add_doc,
ak_add_typename=ak_add_typename,
how=how,
report=report,
):
Expand Down Expand Up @@ -243,6 +247,7 @@ def concatenate(
interpretation_executor=None,
library="ak",
ak_add_doc=False,
ak_add_typename=False,
how=None,
custom_classes=None,
allow_missing=False,
Expand Down Expand Up @@ -287,6 +292,8 @@ def concatenate(
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
how (None, str, or container type): Library-dependent instructions
for grouping. The only recognized container types are ``tuple``,
``list``, and ``dict``. Note that the container *type itself*
Expand Down Expand Up @@ -375,6 +382,7 @@ def concatenate(
array_cache=None,
library=library,
ak_add_doc=ak_add_doc,
ak_add_typename=ak_add_typename,
how=how,
)
arrays = library.global_index(arrays, global_start)
Expand Down Expand Up @@ -540,9 +548,12 @@ def to_global(self, global_offset):
)


def _ak_add_doc(array, hasbranches, ak_add_doc):
if ak_add_doc and type(array).__module__ == "awkward.highlevel":
array.layout.parameters["__doc__"] = hasbranches.title
def _ak_add_doc_typename(array, hasbranches, ak_add_doc, ak_add_typename):
if type(array).__module__ == "awkward.highlevel":
if ak_add_doc:
array.layout.parameters["__doc__"] = hasbranches.title
if ak_add_typename:
array.layout.parameters["typename"] = hasbranches.typename
return array


Expand Down Expand Up @@ -690,6 +701,7 @@ def arrays(
array_cache="inherit",
library="ak",
ak_add_doc=False,
ak_add_typename=False,
how=None,
):
"""
Expand Down Expand Up @@ -740,6 +752,8 @@ def arrays(
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
how (None, str, or container type): Library-dependent instructions
for grouping. The only recognized container types are ``tuple``,
``list``, and ``dict``. Note that the container *type itself*
Expand Down Expand Up @@ -828,7 +842,7 @@ def get_from_cache(branchname, interpretation):
) in branch.entries_to_ranges_or_baskets(entry_start, entry_stop):
ranges_or_baskets.append((branch, basket_num, range_or_basket))

interp_options = {"ak_add_doc": ak_add_doc}
interp_options = {"ak_add_doc": ak_add_doc, "ak_add_typename": ak_add_typename}
_ranges_or_baskets_to_arrays(
self,
ranges_or_baskets,
Expand All @@ -853,6 +867,7 @@ def get_from_cache(branchname, interpretation):
library,
how,
ak_add_doc,
ak_add_typename,
)

if array_cache is not None:
Expand Down Expand Up @@ -883,8 +898,11 @@ def get_from_cache(branchname, interpretation):
(e, c) for e, c in expression_context if c["is_primary"] and not c["is_cut"]
]

return _ak_add_doc(
library.group(output, expression_context, how), self, ak_add_doc
return _ak_add_doc_typename(
library.group(output, expression_context, how),
self,
ak_add_doc,
ak_add_typename,
)

def iterate(
Expand All @@ -904,6 +922,7 @@ def iterate(
interpretation_executor=None,
library="ak",
ak_add_doc=False,
ak_add_typename=False,
how=None,
report=False,
):
Expand Down Expand Up @@ -956,6 +975,8 @@ def iterate(
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
how (None, str, or container type): Library-dependent instructions
for grouping. The only recognized container types are ``tuple``,
``list``, and ``dict``. Note that the container *type itself*
Expand Down Expand Up @@ -1068,7 +1089,10 @@ def iterate(
)

arrays = {}
interp_options = {"ak_add_doc": ak_add_doc}
interp_options = {
"ak_add_doc": ak_add_doc,
"ak_add_typename": ak_add_typename,
}
_ranges_or_baskets_to_arrays(
self,
ranges_or_baskets,
Expand All @@ -1090,6 +1114,7 @@ def iterate(
library,
how,
ak_add_doc,
ak_add_typename,
)

output = language.compute_expressions(
Expand All @@ -1111,10 +1136,11 @@ def iterate(
if c["is_primary"] and not c["is_cut"]
]

out = _ak_add_doc(
out = _ak_add_doc_typename(
library.group(output, minimized_expression_context, how),
self,
ak_add_doc,
ak_add_typename,
)

# no longer needed; save memory
Expand Down Expand Up @@ -1733,6 +1759,7 @@ def array(
array_cache="inherit",
library="ak",
ak_add_doc=False,
ak_add_typename=False,
):
"""
Args:
Expand Down Expand Up @@ -1763,6 +1790,8 @@ def array(
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.

Returns the ``TBranch`` data as an array.

Expand Down Expand Up @@ -1834,7 +1863,7 @@ def get_from_cache(branchname, interpretation):
) in branch.entries_to_ranges_or_baskets(entry_start, entry_stop):
ranges_or_baskets.append((branch, basket_num, range_or_basket))

interp_options = {"ak_add_doc": ak_add_doc}
interp_options = {"ak_add_doc": ak_add_doc, "ak_add_typename": ak_add_typename}
_ranges_or_baskets_to_arrays(
self,
ranges_or_baskets,
Expand All @@ -1856,6 +1885,7 @@ def get_from_cache(branchname, interpretation):
library,
None,
ak_add_doc,
ak_add_typename,
)

if array_cache is not None:
Expand Down Expand Up @@ -3139,7 +3169,13 @@ def basket_to_array(basket):


def _fix_asgrouped(
arrays, expression_context, branchid_interpretation, library, how, ak_add_doc
arrays,
expression_context,
branchid_interpretation,
library,
how,
ak_add_doc,
ak_add_typename,
):
index_start = 0
for index_stop, (_, context) in enumerate(expression_context):
Expand All @@ -3158,8 +3194,11 @@ def _fix_asgrouped(
subarrays[subname] = arrays[subbranch.cache_key]
subcontext.append((subname, limited_context[subname]))

arrays[branch.cache_key] = _ak_add_doc(
library.group(subarrays, subcontext, how), branch, ak_add_doc
arrays[branch.cache_key] = _ak_add_doc_typename(
library.group(subarrays, subcontext, how),
branch,
ak_add_doc,
ak_add_typename,
)

index_start = index_stop
Expand Down
Loading