Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TypeError: cannot safely cast non-equivalent float64 to int64 #12

Open
guyjansen opened this issue Feb 27, 2023 · 0 comments
Open

TypeError: cannot safely cast non-equivalent float64 to int64 #12

guyjansen opened this issue Feb 27, 2023 · 0 comments

Comments

@guyjansen
Copy link

Hey,

im new to coding and im trying AutoClean on a dataset but i keep getting this error: TypeError: cannot safely cast non-equivalent float64 to int64.
According to ChatGPT this error typically occurs when you try to convert a floating-point number to an integer using the "int()" function or a similar method, but the float number is not a whole number, which causes a loss of precision.
But it must be possible to use floats as well right?
So im curious why I might get this error. My code is provided below,
Thanks a lot for any help!

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_csv('/Users/guyjansen/Desktop/Python/Housing Prices Data Science Project/train.csv')
from AutoClean.autoclean import AutoClean
pipeline = AutoClean(df)
pipeline.output

this raises the error:

TypeError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in safe_cast(values, dtype, copy)
119 try:
--> 120 return values.astype(dtype, casting="safe", copy=copy)
121 except TypeError as err:

TypeError: Cannot cast array data from dtype('float64') to dtype('int64') according to the rule 'safe'

The above exception was the direct cause of the following exception:

TypeError Traceback (most recent call last)
/var/folders/wc/2vn5bk3x4hq0b0_hdn9tjzkm0000gn/T/ipykernel_45950/1120075175.py in
1 from AutoClean.autoclean import AutoClean
----> 2 pipeline = AutoClean(df)
3 pipeline.output

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/autoclean.py in init(self, input_data, mode, duplicates, missing_num, missing_categ, encode_categ, extract_datetime, outliers, outlier_param, logfile, verbose)
80
81 # initialize our class and start the autoclean process
---> 82 self.output = self._clean_data(output_data, input_data)
83
84 end = timer()

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/autoclean.py in _clean_data(self, df, input_data)
141 df = Duplicates.handle(self, df)
142 df = MissingValues.handle(self, df)
--> 143 df = Outliers.handle(self, df)
144 df = Adjust.convert_datetime(self, df)
145 df = EncodeCateg.handle(self, df)

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/modules.py in handle(self, df)
272
273 if self.outliers in ['auto', 'winz']:
--> 274 df = Outliers._winsorization(self, df)
275 elif self.outliers == 'delete':
276 df = Outliers._delete(self, df)

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/modules.py in _winsorization(self, df)
300 else:
301 if (df[feature].fillna(-9999) % 1 == 0).all():
--> 302 df.loc[row_index, feature] = upper_bound
303 df[feature] = df[feature].astype(int)
304 else:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in setitem(self, key, value)
714
715 iloc = self if self.name == "iloc" else self.obj.iloc
--> 716 iloc._setitem_with_indexer(indexer, value, self.name)
717
718 def _validate_key(self, key, axis: int):

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_with_indexer(self, indexer, value, name)
1689 if take_split_path:
1690 # We have to operate column-wise
-> 1691 self._setitem_with_indexer_split_path(indexer, value, name)
1692 else:
1693 self._setitem_single_block(indexer, value, name)

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_with_indexer_split_path(self, indexer, value, name)
1782 # scalar value
1783 for loc in ilocs:
-> 1784 self._setitem_single_column(loc, value, pi)
1785
1786 def _setitem_with_indexer_2d_value(self, indexer, value):

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_single_column(self, loc, value, plane_indexer)
1888
1889 orig_values = ser._values
-> 1890 ser._mgr = ser._mgr.setitem((pi,), value)
1891
1892 if ser._values is orig_values:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in setitem(self, indexer, value)
335 For SingleBlockManager, this backs s[indexer] = value
336 """
--> 337 return self.apply("setitem", indexer=indexer, value=value)
338
339 def putmask(self, mask, new, align: bool = True):

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, ignore_failures, **kwargs)
302 applied = b.apply(f, **kwargs)
303 else:
--> 304 applied = getattr(b, f)(**kwargs)
305 except (TypeError, NotImplementedError):
306 if not ignore_failures:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/internals/blocks.py in setitem(self, indexer, value)
1620
1621 check_setitem_lengths(indexer, value, self.values)
-> 1622 self.values[indexer] = value
1623 return self
1624

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/masked.py in setitem(self, key, value)
222 if _is_scalar:
223 value = [value]
--> 224 value, mask = self._coerce_to_array(value)
225
226 if _is_scalar:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in _coerce_to_array(self, value)
334
335 def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
--> 336 return coerce_to_array(value, dtype=self.dtype)
337
338 @overload

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in coerce_to_array(values, dtype, mask, copy)
228 values = values.astype(dtype, copy=copy)
229 else:
--> 230 values = safe_cast(values, dtype, copy=False)
231
232 return values, mask

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in safe_cast(values, dtype, copy)
124 return casted
125
--> 126 raise TypeError(
127 f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
128 ) from err

TypeError: cannot safely cast non-equivalent float64 to int64

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant