Skip to content

Commit

Permalink
Lazy load dask.dataframe in datashader.py (#6309)
Browse files Browse the repository at this point in the history
Co-authored-by: Maxime Liquet <[email protected]>
  • Loading branch information
hoxbro and maximlt authored Jul 4, 2024
1 parent d0d535b commit 3447469
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 5 deletions.
29 changes: 29 additions & 0 deletions holoviews/core/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import builtins
import datetime as dt
import functools
import hashlib
import importlib
import inspect
import itertools
import json
Expand Down Expand Up @@ -2331,3 +2333,30 @@ def flatten(line):
yield from flatten(element)
else:
yield element


def lazy_isinstance(obj, class_or_tuple):
""" Lazy isinstance check
Will only import the module of the object if the module of the
obj matches the first value of an item in class_or_tuple.
lazy_isinstance(obj, 'dask.dataframe:DataFrame')
Will:
1) check if the first module is dask
2) If it dask, import dask.dataframe
3) Do an isinstance check for dask.dataframe.DataFrame
"""
if isinstance(class_or_tuple, str):
class_or_tuple = (class_or_tuple,)

obj_mod_name = obj.__module__.split('.')[0]
for cls in class_or_tuple:
mod_name, _, attr_name = cls.partition(':')
if not obj_mod_name.startswith(mod_name.split(".")[0]):
continue
mod = importlib.import_module(mod_name)
if isinstance(obj, functools.reduce(getattr, attr_name.split('.'), mod)):
return True
return False
11 changes: 6 additions & 5 deletions holoviews/operation/datashader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from collections.abc import Callable, Iterable
from functools import partial

import dask.dataframe as dd
import datashader as ds
import datashader.reductions as rd
import datashader.transfer_functions as tf
Expand Down Expand Up @@ -45,6 +44,7 @@
datetime_types,
dt_to_int,
get_param_values,
lazy_isinstance,
)
from ..element import (
RGB,
Expand Down Expand Up @@ -303,22 +303,23 @@ def get_agg_data(cls, obj, category=None):
if len(paths) > 1:
if glyph == 'line':
path = paths[0][:1]
if isinstance(path, dd.DataFrame):
if lazy_isinstance(path, "dask.dataframe:DataFrame"):
path = path.compute()
empty = path.copy()
empty.iloc[0, :] = (np.nan,) * empty.shape[1]
paths = [elem for p in paths for elem in (p, empty)][:-1]
if all(isinstance(path, dd.DataFrame) for path in paths):
if all(lazy_isinstance(path,"dask.dataframe:DataFrame") for path in paths):
import dask.dataframe as dd
df = dd.concat(paths)
else:
paths = [p.compute() if isinstance(p, dd.DataFrame) else p for p in paths]
paths = [p.compute() if lazy_isinstance(p, "dask.dataframe:DataFrame") else p for p in paths]
df = pd.concat(paths)
else:
df = paths[0] if paths else pd.DataFrame([], columns=[x.name, y.name])
if category and df[category].dtype.name != 'category':
df[category] = df[category].astype('category')

is_custom = isinstance(df, dd.DataFrame) or cuDFInterface.applies(df)
is_custom = lazy_isinstance(df, "dask.dataframe:DataFrame") or cuDFInterface.applies(df)
if any((not is_custom and len(df[d.name]) and isinstance(df[d.name].values[0], cftime_types)) or
df[d.name].dtype.kind in ["M", "u"] for d in (x, y)):
df = df.copy()
Expand Down

0 comments on commit 3447469

Please sign in to comment.