You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
---------------------------------------------------------------------------
ZeroDivisionError Traceback (most recent call last)
<command-4043649982577705> in <cell line: 37>()
35
36 # print(df.schema)# display(df)
---> 37 profile_data(spark, data_df=df, start_date=start_date)
38 # results = profile_data(spark, data_df=df, start_date=start_date)
<command-4043649982577705> in profile_data(spark, data_df, start_date)
12
13 # json_object = report.to_json()
---> 14 report_html = report.to_html()
15 displayHTML(report_html)
16 # jobj = json.loads(json_object)
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/typeguard/__init__.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/profile_report.py in to_html(self)
468
469 """
--> 470 return self.html
471
472 def to_json(self) -> str:
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/typeguard/__init__.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/profile_report.py in html(self)
275 def html(self) -> str:
276 if self._html is None:
--> 277 self._html = self._render_html()
278 return self._html
279
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/typeguard/__init__.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/profile_report.py in _render_html(self)
383 from ydata_profiling.report.presentation.flavours import HTMLReport
384
--> 385 report = self.report
386
387 with tqdm(
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/typeguard/__init__.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/profile_report.py in report(self)
269 def report(self) -> Root:
270 if self._report is None:
--> 271 self._report = get_report_structure(self.config, self.description_set)
272 return self._report
273
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/typeguard/__init__.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/profile_report.py in description_set(self)
251 def description_set(self) -> BaseDescription:
252 if self._description_set is None:
--> 253 self._description_set = describe_df(
254 self.config,
255 self.df,
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/model/describe.py in describe(config, df, summarizer, typeset, sample)
72 # Variable-specific
73 pbar.total += len(df.columns)
---> 74 series_description = get_series_descriptions(
75 config, df, summarizer, typeset, pbar
76 )
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/multimethod/__init__.py in __call__(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.__code__}") from ex
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/model/spark/summary_spark.py in spark_get_series_descriptions(config, df, summarizer, typeset, pbar)
90 args = [(name, df) for name in df.columns]
91 with multiprocessing.pool.ThreadPool(12) as executor:
---> 92 for i, (column, description) in enumerate(
93 executor.imap_unordered(multiprocess_1d, args)
94 ):
/usr/lib/python3.9/multiprocessing/pool.py in next(self, timeout)
868 if success:
869 return value
--> 870 raise value
871
872 __next__ = next # XXX
/usr/lib/python3.9/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
123 job, i, func, args, kwds = task
124 try:
--> 125 result = (True, func(*args, **kwds))
126 except Exception as e:
127 if wrap_exception and func is not _helper_reraises_exception:
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/model/spark/summary_spark.py in multiprocess_1d(args)
86 """
87 column, df = args
---> 88 return column, describe_1d(config, df.select(column), summarizer, typeset)
89
90 args = [(name, df) for name in df.columns]
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/multimethod/__init__.py in __call__(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.__code__}") from ex
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/model/spark/summary_spark.py in spark_describe_1d(config, series, summarizer, typeset)
62 }[dtype]
63
---> 64 return summarizer.summarize(config, series, dtype=vtype)
65
66
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/model/summarizer.py in summarize(self, config, series, dtype)
40 object:
41 """
---> 42 _, _, summary = self.handle(str(dtype), config, series, {"type": str(dtype)})
43 return summary
44
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/model/handler.py in handle(self, dtype, *args, **kwargs)
60 funcs = self.mapping.get(dtype, [])
61 op = compose(funcs)
---> 62 return op(*args)
63
64
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/model/handler.py in func2(*x)
15 def func(f: Callable, g: Callable) -> Callable:
16 def func2(*x) -> Any:
---> 17 res = g(*x)
18 if type(res) == bool:
19 return f(*x)
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/multimethod/__init__.py in __call__(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.__code__}") from ex
/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/ydata_profiling/model/spark/describe_supported_spark.py in describe_supported_spark(config, series, summary)
29 summary["is_unique"] = n_unique == count
30 summary["n_unique"] = n_unique
---> 31 summary["p_unique"] = n_unique / count
32
33 return config, series, summary
ZeroDivisionError: division by zero
Expected Behaviour
Get a data profiling report in json or html
Data Description
Code that reproduces the bug
fromydata_profilingimportProfileReportdf=spark.read.format('delta').load(f"abfss://***@***.dfs.core.windows.net/***")
df=df.withColumn("FakeNum", lit(0.0)) # Suggestions by other users to workaround the error.# Suggestion by other users to set correlation to None.report=ProfileReport(data_df, title='None', correlations=None)
# Both methods below throw the same error.json_object=report.to_json()
report_html=report.to_html()
Current Behaviour
ZeroDivisionError while executing
with a spark dataframe.
Expected Behaviour
Get a data profiling report in json or html
Data Description
Code that reproduces the bug
pandas-profiling version
v4.5.0
Dependencies
OS
Databricks
Checklist
The text was updated successfully, but these errors were encountered: