diff --git a/panedr/panedr.py b/panedr/panedr.py index cffe77a..72a6a44 100644 --- a/panedr/panedr.py +++ b/panedr/panedr.py @@ -45,7 +45,8 @@ import sys import itertools import time -import pandas +import numpy as np + #Index for the IDs of additional blocks in the energy file. #Blocks can be added without sacrificing backward and forward @@ -75,7 +76,7 @@ Enxnm = collections.namedtuple('Enxnm', 'name unit') ENX_VERSION = 5 -__all__ = ['edr_to_df'] +__all__ = ['edr_to_df', 'edr_to_dict', 'read_edr'] class EDRFile(object): @@ -395,14 +396,14 @@ def edr_strings(data, file_version, n): def is_frame_magic(data): """Unpacks an int and checks whether it matches the EDR frame magic number - + Does not roll the reading position back. """ magic = data.unpack_int() return magic == -7777777 -def edr_to_df(path, verbose=False): +def read_edr(path, verbose=False): begin = time.time() edr_file = EDRFile(str(path)) all_energies = [] @@ -427,5 +428,27 @@ def edr_to_df(path, verbose=False): end='', file=sys.stderr) print('\n{} frame read in {:.2f} seconds'.format(ifr, end - begin), file=sys.stderr) + + return all_energies, all_names, times + + +def edr_to_df(path: str, verbose: bool = False): + try: + import pandas + except ImportError: + raise ImportError("""ERROR --- pandas was not found! + pandas is required to use the `.edr_to_df()` + functionality. Try installing it using pip, e.g.: + python -m pip install pandas""") + all_energies, all_names, times = read_edr(path, verbose=verbose) df = pandas.DataFrame(all_energies, columns=all_names, index=times) return df + + +def edr_to_dict(path: str, verbose: bool = False): + all_energies, all_names, times = read_edr(path, verbose=verbose) + energy_dict = {} + for idx, name in enumerate(all_names): + energy_dict[name] = np.array( + [all_energies[frame][idx] for frame in range(len(times))]) + return energy_dict diff --git a/requirements.txt b/requirements.txt index d1c77d1..87a4fa2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -pandas +numpy>=1.19.0 pbr diff --git a/setup.cfg b/setup.cfg index b8d13a4..b3dd5fb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,3 +27,5 @@ classifier = test = six pytest +pandas = + pandas diff --git a/tests/test_edr.py b/tests/test_edr.py index 8062dfb..b5c9606 100644 --- a/tests/test_edr.py +++ b/tests/test_edr.py @@ -59,6 +59,15 @@ EDR_Data = namedtuple('EDR_Data', ['df', 'xvgdata', 'xvgtime', 'xvgnames', 'xvgprec', 'edrfile', 'xvgfile']) + +def test_failed_import(monkeypatch): + # Putting this test first to avoid datafiles already being loaded + errmsg = "ERROR --- pandas was not found!" + monkeypatch.setitem(sys.modules, 'pandas', None) + with pytest.raises(ImportError, match=errmsg): + panedr.edr_to_df(EDR) + + @pytest.fixture(scope='module', params=[(EDR, EDR_XVG), (EDR_IRREGULAR, EDR_IRREGULAR_XVG), @@ -73,7 +82,7 @@ def edr(request): xvgtime = xvgdata[:, 0] xvgdata = xvgdata[:, 1:] return EDR_Data(df, xvgdata, xvgtime, xvgnames, xvgprec, edrfile, xvgfile) - + class TestEdrToDf(object): """ @@ -163,10 +172,18 @@ def _assert_progress_range(self, progress, dt, start, stop, step): assert ref_line == progress_line +def test_edr_to_dict_matches_edr_to_df(): + array_dict = panedr.edr_to_dict(EDR) + ref_df = panedr.edr_to_df(EDR) + array_df = pandas.DataFrame.from_dict(array_dict).set_index( + "Time", drop=False) + assert array_df.equals(ref_df) + + def read_xvg(path): """ Reads XVG file, returning the data, names, and precision. - + The data is returned as a 2D numpy array. Column names are returned as an array of string objects. Precision is an integer corresponding to the least number of decimal places found, excluding the first (time) column. @@ -205,7 +222,7 @@ def read_xvg(path): def ndec(val): """Returns the number of decimal places of a string rep of a float - + """ try: return len(re.split(NDEC_PATTERN, val)[1])