Skip to content

Commit

Permalink
Specify "spawn" start method
Browse files Browse the repository at this point in the history
Use multiprocessing.get_context to set "spawn" start method.
"fork" (default in POSIX systems) is deprecated.
  • Loading branch information
Tim-Abwao committed Dec 30, 2024
1 parent 50de13e commit 62b6ba8
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
9 changes: 6 additions & 3 deletions eda_report/_analysis.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from multiprocessing import Pool
from multiprocessing import get_context
from typing import Dict, Iterable, Optional, Union

import pandas as pd
Expand All @@ -9,6 +9,8 @@
from eda_report.plotting import _plot_dataset, _plot_variable
from eda_report.univariate import Variable, _analyze_univariate

mp_context = get_context("spawn") # Use "spawn" start method


def _get_contingency_tables(
categorical_df: pd.DataFrame, groupby_data: pd.Series
Expand Down Expand Up @@ -77,7 +79,7 @@ def _analyze_variables(self) -> Dict[str, Variable]:
Dict[str, Variable]: Univariate analysis results.
"""
data = self.dataset.data
with Pool() as p:
with mp_context.Pool() as p:
univariate_stats = dict(
tqdm(
# Analyze variables concurrently
Expand Down Expand Up @@ -132,7 +134,8 @@ def _get_univariate_graphs(self) -> Dict[str, Dict]:
Returns:
Dict[str, Dict]: Univariate graphs.
"""
with Pool() as p:

with mp_context.Pool() as p:
data = self.dataset.data
variable_data_hue_and_color = [
(
Expand Down
4 changes: 2 additions & 2 deletions eda_report/plotting.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from io import BytesIO
from multiprocessing import Pool
from multiprocessing import get_context
from typing import Dict, Iterable, Optional, Sequence, Tuple, Union

import matplotlib as mpl
Expand Down Expand Up @@ -458,7 +458,7 @@ def _plot_dataset(variables: Dataset, color: str = None) -> Optional[Dict]:
pairs_to_include = [
pair for pair, _ in variables._correlation_values[:20]
]
with Pool() as p:
with get_context("spawn").Pool() as p:
paired_data = [
(variables.data.loc[:, pair], color)
for pair in pairs_to_include
Expand Down

0 comments on commit 62b6ba8

Please sign in to comment.