Skip to content

Commit

Permalink
optional typename support with ak_add_typename in uproot.dask
Browse files Browse the repository at this point in the history
  • Loading branch information
prayagyadav committed Feb 5, 2025
1 parent 2443ff8 commit 97daf4b
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 18 deletions.
20 changes: 15 additions & 5 deletions src/uproot/_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def dask(
steps_per_file=unset,
library="ak",
ak_add_doc=False,
ak_add_typename=False,
custom_classes=None,
allow_missing=False,
open_files=True,
Expand Down Expand Up @@ -89,6 +90,8 @@ def dask(
``NotImplementedError``.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
custom_classes (None or dict): If a dict, override the classes from
the :doc:`uproot.reading.ReadOnlyFile` or ``uproot.classes``.
allow_missing (bool): If True, skip over any files that do not contain
Expand Down Expand Up @@ -236,7 +239,7 @@ def dask(

filter_branch = uproot._util.regularize_filter(filter_branch)

interp_options = {"ak_add_doc": ak_add_doc}
interp_options = {"ak_add_doc": ak_add_doc, "ak_add_typename": ak_add_typename}

if library.name == "np":
if open_files:
Expand Down Expand Up @@ -491,6 +494,7 @@ def __call__(self, i_start_stop):
entry_stop=stop,
library="np",
ak_add_doc=self.interp_options["ak_add_doc"],
ak_add_typename=self.interp_options["ak_add_typename"],
decompression_executor=self.decompression_executor,
interpretation_executor=self.interpretation_executor,
)
Expand Down Expand Up @@ -555,6 +559,7 @@ def __call__(self, file_path_object_path_istep_nsteps_ischunk):
entry_start=start,
entry_stop=stop,
ak_add_doc=self.interp_options["ak_add_doc"],
ak_add_typename=self.interp_options["ak_add_typename"],
decompression_executor=self.decompression_executor,
interpretation_executor=self.interpretation_executor,
)
Expand Down Expand Up @@ -920,6 +925,7 @@ def load_buffers(
entry_start=start,
entry_stop=stop,
ak_add_doc=options["ak_add_doc"],
ak_add_typename=options["ak_add_typename"],
decompression_executor=decompression_executor,
interpretation_executor=interpretation_executor,
how=tuple,
Expand Down Expand Up @@ -1378,13 +1384,17 @@ def _get_ttree_form(
ttree,
common_keys,
ak_add_doc,
ak_add_typename,
):
contents = []
for key in common_keys:
branch = ttree[key]
content_form = branch.interpretation.awkward_form(ttree.file)
if ak_add_doc:
content_form = content_form.copy(parameters={"__doc__": branch.title})
if ak_add_doc or ak_add_typename:
content_parameters = {}
if ak_add_doc : content_parameters["__doc__"] = branch.title
if ak_add_typename : content_parameters["typename"] = branch.typename
content_form = content_form.copy(parameters=content_parameters)
contents.append(content_form)

parameters = {"__doc__": ttree.title} if ak_add_doc else None
Expand Down Expand Up @@ -1540,7 +1550,7 @@ def real_filter_branch(branch):
partition_args.append((i, start, stop))

base_form = _get_ttree_form(
awkward, ttrees[0], common_keys, interp_options.get("ak_add_doc")
awkward, ttrees[0], common_keys, interp_options.get("ak_add_doc"), interp_options.get("ak_add_typename")
)

if len(partition_args) == 0:
Expand Down Expand Up @@ -1614,7 +1624,7 @@ def _get_dak_array_delay_open(
ignore_duplicates=True,
)
base_form = _get_ttree_form(
awkward, obj, common_keys, interp_options.get("ak_add_doc")
awkward, obj, common_keys, interp_options.get("ak_add_doc"), interp_options.get("ak_add_typename")
)

divisions = [0]
Expand Down
49 changes: 36 additions & 13 deletions src/uproot/behaviors/TBranch.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def iterate(
interpretation_executor=None,
library="ak",
ak_add_doc=False,
ak_add_typename=False,
how=None,
report=False,
custom_classes=None,
Expand Down Expand Up @@ -110,6 +111,8 @@ def iterate(
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
how (None, str, or container type): Library-dependent instructions
for grouping. The only recognized container types are ``tuple``,
``list``, and ``dict``. Note that the container *type itself*
Expand Down Expand Up @@ -203,6 +206,7 @@ def iterate(
interpretation_executor=interpretation_executor,
library=library,
ak_add_doc=ak_add_doc,
ak_add_typename=ak_add_typename,
how=how,
report=report,
):
Expand Down Expand Up @@ -243,6 +247,7 @@ def concatenate(
interpretation_executor=None,
library="ak",
ak_add_doc=False,
ak_add_typename=False,
how=None,
custom_classes=None,
allow_missing=False,
Expand Down Expand Up @@ -287,6 +292,8 @@ def concatenate(
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
how (None, str, or container type): Library-dependent instructions
for grouping. The only recognized container types are ``tuple``,
``list``, and ``dict``. Note that the container *type itself*
Expand Down Expand Up @@ -375,6 +382,7 @@ def concatenate(
array_cache=None,
library=library,
ak_add_doc=ak_add_doc,
ak_add_typename=ak_add_typename,
how=how,
)
arrays = library.global_index(arrays, global_start)
Expand Down Expand Up @@ -540,12 +548,14 @@ def to_global(self, global_offset):
)


def _ak_add_doc(array, hasbranches, ak_add_doc):
if ak_add_doc and type(array).__module__ == "awkward.highlevel":
array.layout.parameters["__doc__"] = hasbranches.title
def _ak_add_doc_typename(array, hasbranches, ak_add_doc, ak_add_typename):
if type(array).__module__ == "awkward.highlevel":
if ak_add_doc:
array.layout.parameters["__doc__"] = hasbranches.title
if ak_add_typename:
array.layout.parameters["typename"] = hasbranches.typename
return array


class HasBranches(Mapping):
"""
Abstract class of behaviors for anything that "has branches," namely
Expand Down Expand Up @@ -690,6 +700,7 @@ def arrays(
array_cache="inherit",
library="ak",
ak_add_doc=False,
ak_add_typename=False,
how=None,
):
"""
Expand Down Expand Up @@ -740,6 +751,8 @@ def arrays(
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
how (None, str, or container type): Library-dependent instructions
for grouping. The only recognized container types are ``tuple``,
``list``, and ``dict``. Note that the container *type itself*
Expand Down Expand Up @@ -828,7 +841,7 @@ def get_from_cache(branchname, interpretation):
) in branch.entries_to_ranges_or_baskets(entry_start, entry_stop):
ranges_or_baskets.append((branch, basket_num, range_or_basket))

interp_options = {"ak_add_doc": ak_add_doc}
interp_options = {"ak_add_doc": ak_add_doc, "ak_add_typename": ak_add_typename}
_ranges_or_baskets_to_arrays(
self,
ranges_or_baskets,
Expand All @@ -853,6 +866,7 @@ def get_from_cache(branchname, interpretation):
library,
how,
ak_add_doc,
ak_add_typename,
)

if array_cache is not None:
Expand Down Expand Up @@ -883,8 +897,8 @@ def get_from_cache(branchname, interpretation):
(e, c) for e, c in expression_context if c["is_primary"] and not c["is_cut"]
]

return _ak_add_doc(
library.group(output, expression_context, how), self, ak_add_doc
return _ak_add_doc_typename(
library.group(output, expression_context, how), self, ak_add_doc, ak_add_typename
)

def iterate(
Expand All @@ -904,6 +918,7 @@ def iterate(
interpretation_executor=None,
library="ak",
ak_add_doc=False,
ak_add_typename=False,
how=None,
report=False,
):
Expand Down Expand Up @@ -956,6 +971,8 @@ def iterate(
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
how (None, str, or container type): Library-dependent instructions
for grouping. The only recognized container types are ``tuple``,
``list``, and ``dict``. Note that the container *type itself*
Expand Down Expand Up @@ -1068,7 +1085,7 @@ def iterate(
)

arrays = {}
interp_options = {"ak_add_doc": ak_add_doc}
interp_options = {"ak_add_doc": ak_add_doc, "ak_add_typename": ak_add_typename}
_ranges_or_baskets_to_arrays(
self,
ranges_or_baskets,
Expand All @@ -1090,6 +1107,7 @@ def iterate(
library,
how,
ak_add_doc,
ak_add_typename
)

output = language.compute_expressions(
Expand All @@ -1111,10 +1129,11 @@ def iterate(
if c["is_primary"] and not c["is_cut"]
]

out = _ak_add_doc(
out = _ak_add_doc_typename(
library.group(output, minimized_expression_context, how),
self,
ak_add_doc,
ak_add_typename,
)

# no longer needed; save memory
Expand Down Expand Up @@ -1733,6 +1752,7 @@ def array(
array_cache="inherit",
library="ak",
ak_add_doc=False,
ak_add_typename=False,
):
"""
Args:
Expand Down Expand Up @@ -1763,6 +1783,8 @@ def array(
``"ak"`` for Awkward Array, and ``"pd"`` for Pandas.
ak_add_doc (bool): If True and ``library="ak"``, add the TBranch ``title``
to the Awkward ``__doc__`` parameter of the array.
ak_add_typename (bool): If True and ``library="ak"``, add the TBranch ``typename``
to the Awkward ``typename`` parameter of the array.
Returns the ``TBranch`` data as an array.
Expand Down Expand Up @@ -1834,7 +1856,7 @@ def get_from_cache(branchname, interpretation):
) in branch.entries_to_ranges_or_baskets(entry_start, entry_stop):
ranges_or_baskets.append((branch, basket_num, range_or_basket))

interp_options = {"ak_add_doc": ak_add_doc}
interp_options = {"ak_add_doc": ak_add_doc, "ak_add_typename": ak_add_typename}
_ranges_or_baskets_to_arrays(
self,
ranges_or_baskets,
Expand All @@ -1856,6 +1878,7 @@ def get_from_cache(branchname, interpretation):
library,
None,
ak_add_doc,
ak_add_typename,
)

if array_cache is not None:
Expand Down Expand Up @@ -3139,7 +3162,7 @@ def basket_to_array(basket):


def _fix_asgrouped(
arrays, expression_context, branchid_interpretation, library, how, ak_add_doc
arrays, expression_context, branchid_interpretation, library, how, ak_add_doc, ak_add_typename
):
index_start = 0
for index_stop, (_, context) in enumerate(expression_context):
Expand All @@ -3158,8 +3181,8 @@ def _fix_asgrouped(
subarrays[subname] = arrays[subbranch.cache_key]
subcontext.append((subname, limited_context[subname]))

arrays[branch.cache_key] = _ak_add_doc(
library.group(subarrays, subcontext, how), branch, ak_add_doc
arrays[branch.cache_key] = _ak_add_doc_typename(
library.group(subarrays, subcontext, how), branch, ak_add_doc, ak_add_typename
)

index_start = index_stop
Expand Down

0 comments on commit 97daf4b

Please sign in to comment.