Skip to content

Commit

Permalink
spool.sort - applied requested changes
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmadtourei committed Aug 17, 2023
1 parent 5e83856 commit 82a7af3
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 40 deletions.
6 changes: 6 additions & 0 deletions d
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
adding_table_of_supported_formats
detrend
master
* sort_method_for_spool
vis_colorbar_title
write_func
71 changes: 45 additions & 26 deletions dascore/core/spool.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,16 @@ def get_contents(self) -> pd.DataFrame:
def sort(self, attribute) -> Self:
"""
Sort the Spool based on a specific attribute.
Parameters
---------------
atribute
The attribute or coordinate used for sorting. If a coordinate name
is used, the sorting will be based on the minimum value.
"""
raise NotImplementedError(
f"spool of type {self.__class__} has no sort implementation"
)

@abc.abstractmethod
def __len__(self) -> int:
Expand Down Expand Up @@ -318,34 +327,44 @@ def select(self, **kwargs) -> Self:

@compose_docstring(doc=BaseSpool.sort.__doc__)
def sort(self, attribute) -> Self:
"""Sort the Spool based on a specific attribute."""
df = self._df
inst_df = self._instruction_df

# make sure we can also cover "time" and "distance" as attributes
if attribute == "time":
attribute = "time_min"
elif attribute == "distance":
attribute = "distance_min"

# make sure a suitable attribute is entered
attrs = set(df.columns)
if attribute not in attrs:
"""
{doc}
"""
try:
df = self._df
inst_df = self._instruction_df

# make sure a suitable attribute is entered
attrs = set(df.columns)
if attribute not in attrs:
# make sure we can also cover coordinate names instead of the attribute
if f"{attribute}_min" in attrs:
attribute = f"{attribute}_min"
else:
msg = (
"Invalid attribute. "
"Please use a valid attribute such as: 'time'"
)
raise IndexError(msg)

# get a mapping from the old current index to the sorted ones
sorted_df = df.sort_values(attribute).reset_index(drop=True)
old_indices = df.index
new_indices = np.arange(len(df))
mapper = pd.Series(new_indices, index=old_indices)

# swap out all the old values with new ones
new_current_index = inst_df["current_index"].map(mapper)
new_instruction_df = inst_df.assign(current_index=new_current_index)

# create new spool from new dataframes
return self.new_from_df(df=sorted_df, instruction_df=new_instruction_df)
except IndexError: # Catch only IndexError
msg = "Invalid attribute. Please use a valid attribute such as: 'time'"
raise IndexError(msg)

# get a mapping from the old current index to the sorted ones
sorted_df = df.sort_values(attribute).reset_index(drop=True)
old_indices = df.index
new_indices = np.arange(len(df))
mapper = pd.Series(new_indices, index=old_indices)

# swap out all the old values with new ones
new_current_index = inst_df["current_index"].map(mapper)
new_instruction_df = inst_df.assign(current_index=new_current_index)

# create new spool from new dataframes
return self.new_from_df(df=sorted_df, instruction_df=new_instruction_df)
except Exception:
msg = f"spool of type {self.__class__} has no sort implementation"
raise NotImplementedError(msg)

Check warning on line 367 in dascore/core/spool.py

View check run for this annotation

Codecov / codecov/patch

dascore/core/spool.py#L365-L367

Added lines #L365 - L367 were not covered by tests

@compose_docstring(doc=BaseSpool.get_contents.__doc__)
def get_contents(self) -> pd.DataFrame:
Expand Down
23 changes: 9 additions & 14 deletions tests/test_core/test_spool.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from dascore.clients.filespool import FileSpool
from dascore.core.spool import BaseSpool, MemorySpool
from dascore.exceptions import InvalidSpoolError
from dascore.utils.misc import CacheDescriptor
from dascore.utils.time import to_datetime64, to_timedelta64


Expand Down Expand Up @@ -230,45 +229,41 @@ class TestSort:
Tests for sorting spools.
"""

# A dataframe which represents contents as they will be output
_df: pd.DataFrame = CacheDescriptor("_cache", "_get_df")
def test_base_spool_sort_raises(self, random_spool):
expected_str = "spool of type"
with pytest.raises(NotImplementedError, match=expected_str):
BaseSpool.sort(random_spool, "time")

def test_sorting_attr_not_exists(self, diverse_spool):
"""
Test sorting by an attribute that does not exist in the DataFrame.
"""
try:
expected_str = "Invalid attribute"
with pytest.raises(IndexError, match=expected_str):
diverse_spool.sort("dummy_attribute")
except IndexError as e:
assert (
str(e)
== "Invalid attribute. Please use a valid attribute such as: 'time'"
)
else:
assert False, "Expected an IndexError but no exception was raised."

def test_sorting_attr_exists(self, diverse_spool):
"""
Test sorting by an attribute that exists in the DataFrame.
"""
sorted_spool = diverse_spool.sort("time_min")
df = sorted_spool._df
df = sorted_spool.get_contents()
assert df["time_min"].is_monotonic_increasing

def test_sorting_attr_time(self, diverse_spool):
"""
Test sorting by the 'time' attribute that that may not be in the DataFrame.
"""
sorted_spool = diverse_spool.sort("time")
df = sorted_spool._df
df = sorted_spool.get_contents()
assert df["time_min"].is_monotonic_increasing

def test_sorting_attr_distance(self, diverse_spool):
"""
Test sorting by the 'distance' attribute that may not be exist in the DataFrame.
"""
sorted_spool = diverse_spool.sort("distance")
df = sorted_spool._df
df = sorted_spool.get_contents()
assert df["distance_min"].is_monotonic_increasing


Expand Down

0 comments on commit 82a7af3

Please sign in to comment.