From c484bef511c072ad0cc4da826b87e48d8a0645f3 Mon Sep 17 00:00:00 2001 From: BFedder Date: Tue, 14 Jun 2022 14:42:45 +0100 Subject: [PATCH 01/10] restructured functions returning energy data --- panedr/panedr.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/panedr/panedr.py b/panedr/panedr.py index cffe77a..2155b9b 100644 --- a/panedr/panedr.py +++ b/panedr/panedr.py @@ -45,7 +45,7 @@ import sys import itertools import time -import pandas + #Index for the IDs of additional blocks in the energy file. #Blocks can be added without sacrificing backward and forward @@ -395,14 +395,14 @@ def edr_strings(data, file_version, n): def is_frame_magic(data): """Unpacks an int and checks whether it matches the EDR frame magic number - + Does not roll the reading position back. """ magic = data.unpack_int() return magic == -7777777 -def edr_to_df(path, verbose=False): +def read_edr(path, verbose=False): begin = time.time() edr_file = EDRFile(str(path)) all_energies = [] @@ -427,5 +427,27 @@ def edr_to_df(path, verbose=False): end='', file=sys.stderr) print('\n{} frame read in {:.2f} seconds'.format(ifr, end - begin), file=sys.stderr) + + return all_energies, all_names, times + + +def edr_to_df(path: str, verbose: bool = False): + import pandas + if verbose: + all_energies, all_names, times = read_edr(path, verbose=True) + else: + all_energies, all_names, times = read_edr(path) df = pandas.DataFrame(all_energies, columns=all_names, index=times) return df + + +def edr_to_dict(path: str, verbose: bool = False): + import numpy as np + if verbose: + all_energies, all_names, times = read_edr(path, verbose=True) + else: + all_energies, all_names, times = read_edr(path) + energy_dict = {} + for idx, name in enumerate(all_names): + energy_dict[name] = np.array([all_energies[frame][idx] for frame in range(len(times))]) + return energy_dict \ No newline at end of file From 48085f322d13ee6dc90f10a00da932f4ef34b7ab Mon Sep 17 00:00:00 2001 From: BFedder <80363742+BFedder@users.noreply.github.com> Date: Tue, 14 Jun 2022 15:04:56 +0100 Subject: [PATCH 02/10] make pep8speaks happy --- panedr/panedr.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/panedr/panedr.py b/panedr/panedr.py index 2155b9b..439bf4c 100644 --- a/panedr/panedr.py +++ b/panedr/panedr.py @@ -449,5 +449,6 @@ def edr_to_dict(path: str, verbose: bool = False): all_energies, all_names, times = read_edr(path) energy_dict = {} for idx, name in enumerate(all_names): - energy_dict[name] = np.array([all_energies[frame][idx] for frame in range(len(times))]) - return energy_dict \ No newline at end of file + energy_dict[name] = np.array( + [all_energies[frame][idx] for frame in range(len(times))]) + return energy_dict From e70070697e0d024efb233c2a08b821f7af4cccfe Mon Sep 17 00:00:00 2001 From: BFedder Date: Tue, 14 Jun 2022 15:43:20 +0100 Subject: [PATCH 03/10] rewrote verbose check --- panedr/panedr.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/panedr/panedr.py b/panedr/panedr.py index 439bf4c..d5edcd1 100644 --- a/panedr/panedr.py +++ b/panedr/panedr.py @@ -402,14 +402,14 @@ def is_frame_magic(data): return magic == -7777777 -def read_edr(path, verbose=False): +def read_edr(path, verbose_set=False): begin = time.time() edr_file = EDRFile(str(path)) all_energies = [] all_names = [u'Time'] + [nm.name for nm in edr_file.nms] times = [] for ifr, frame in enumerate(edr_file): - if verbose: + if verbose_set: if ((ifr < 20 or ifr % 10 == 0) and (ifr < 200 or ifr % 100 == 0) and (ifr < 2000 or ifr % 1000 == 0)): @@ -421,7 +421,7 @@ def read_edr(path, verbose=False): all_energies.append([frame.t] + [ener.e for ener in frame.ener]) end = time.time() - if verbose: + if verbose_set: print('\rLast Frame read : {}, time : {} ps' .format(ifr, frame.t), end='', file=sys.stderr) @@ -433,20 +433,14 @@ def read_edr(path, verbose=False): def edr_to_df(path: str, verbose: bool = False): import pandas - if verbose: - all_energies, all_names, times = read_edr(path, verbose=True) - else: - all_energies, all_names, times = read_edr(path) + all_energies, all_names, times = read_edr(path, verbose_set=verbose) df = pandas.DataFrame(all_energies, columns=all_names, index=times) return df def edr_to_dict(path: str, verbose: bool = False): import numpy as np - if verbose: - all_energies, all_names, times = read_edr(path, verbose=True) - else: - all_energies, all_names, times = read_edr(path) + all_energies, all_names, times = read_edr(path, verbose_set=verbose) energy_dict = {} for idx, name in enumerate(all_names): energy_dict[name] = np.array( From 18e68c95ca8beb9f7729b76721cd6618227a67d9 Mon Sep 17 00:00:00 2001 From: BFedder Date: Tue, 14 Jun 2022 17:35:01 +0100 Subject: [PATCH 04/10] added test for edr_to_dict() --- panedr/panedr.py | 2 +- tests/test_edr.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/panedr/panedr.py b/panedr/panedr.py index d5edcd1..1aafaa0 100644 --- a/panedr/panedr.py +++ b/panedr/panedr.py @@ -75,7 +75,7 @@ Enxnm = collections.namedtuple('Enxnm', 'name unit') ENX_VERSION = 5 -__all__ = ['edr_to_df'] +__all__ = ['edr_to_df', 'edr_to_dict'] class EDRFile(object): diff --git a/tests/test_edr.py b/tests/test_edr.py index 8062dfb..68ac226 100644 --- a/tests/test_edr.py +++ b/tests/test_edr.py @@ -163,6 +163,14 @@ def _assert_progress_range(self, progress, dt, start, stop, step): assert ref_line == progress_line +def test_edr_to_dict(): + array_dict = panedr.edr_to_dict(EDR) + ref_df = panedr.edr_to_df(EDR) + array_df = pandas.DataFrame.from_dict(array_dict).set_index( + "Time", drop=False) + assert array_df.equals(ref_df) + + def read_xvg(path): """ Reads XVG file, returning the data, names, and precision. From d69ae9e033e2172e2da93fe7d6bd807609a54ed5 Mon Sep 17 00:00:00 2001 From: BFedder Date: Sun, 19 Jun 2022 19:39:11 +0100 Subject: [PATCH 05/10] first batch of reviews addressed --- panedr/panedr.py | 22 ++++++++++++++-------- requirements.txt | 2 +- setup.cfg | 2 ++ tests/test_edr.py | 5 ++++- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/panedr/panedr.py b/panedr/panedr.py index 1aafaa0..72a6a44 100644 --- a/panedr/panedr.py +++ b/panedr/panedr.py @@ -45,6 +45,7 @@ import sys import itertools import time +import numpy as np #Index for the IDs of additional blocks in the energy file. @@ -75,7 +76,7 @@ Enxnm = collections.namedtuple('Enxnm', 'name unit') ENX_VERSION = 5 -__all__ = ['edr_to_df', 'edr_to_dict'] +__all__ = ['edr_to_df', 'edr_to_dict', 'read_edr'] class EDRFile(object): @@ -402,14 +403,14 @@ def is_frame_magic(data): return magic == -7777777 -def read_edr(path, verbose_set=False): +def read_edr(path, verbose=False): begin = time.time() edr_file = EDRFile(str(path)) all_energies = [] all_names = [u'Time'] + [nm.name for nm in edr_file.nms] times = [] for ifr, frame in enumerate(edr_file): - if verbose_set: + if verbose: if ((ifr < 20 or ifr % 10 == 0) and (ifr < 200 or ifr % 100 == 0) and (ifr < 2000 or ifr % 1000 == 0)): @@ -421,7 +422,7 @@ def read_edr(path, verbose_set=False): all_energies.append([frame.t] + [ener.e for ener in frame.ener]) end = time.time() - if verbose_set: + if verbose: print('\rLast Frame read : {}, time : {} ps' .format(ifr, frame.t), end='', file=sys.stderr) @@ -432,15 +433,20 @@ def read_edr(path, verbose_set=False): def edr_to_df(path: str, verbose: bool = False): - import pandas - all_energies, all_names, times = read_edr(path, verbose_set=verbose) + try: + import pandas + except ImportError: + raise ImportError("""ERROR --- pandas was not found! + pandas is required to use the `.edr_to_df()` + functionality. Try installing it using pip, e.g.: + python -m pip install pandas""") + all_energies, all_names, times = read_edr(path, verbose=verbose) df = pandas.DataFrame(all_energies, columns=all_names, index=times) return df def edr_to_dict(path: str, verbose: bool = False): - import numpy as np - all_energies, all_names, times = read_edr(path, verbose_set=verbose) + all_energies, all_names, times = read_edr(path, verbose=verbose) energy_dict = {} for idx, name in enumerate(all_names): energy_dict[name] = np.array( diff --git a/requirements.txt b/requirements.txt index d1c77d1..11a3b63 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -pandas +numpy pbr diff --git a/setup.cfg b/setup.cfg index 502b49d..43d9c69 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,3 +27,5 @@ classifier = test = six pytest +pandas = + pandas diff --git a/tests/test_edr.py b/tests/test_edr.py index 68ac226..6abb2e2 100644 --- a/tests/test_edr.py +++ b/tests/test_edr.py @@ -163,7 +163,7 @@ def _assert_progress_range(self, progress, dt, start, stop, step): assert ref_line == progress_line -def test_edr_to_dict(): +def test_edr_to_dict_matches_edr_to_df(): array_dict = panedr.edr_to_dict(EDR) ref_df = panedr.edr_to_df(EDR) array_df = pandas.DataFrame.from_dict(array_dict).set_index( @@ -171,6 +171,9 @@ def test_edr_to_dict(): assert array_df.equals(ref_df) +# TODO: write test that ImportError is raised when pandas not installed + + def read_xvg(path): """ Reads XVG file, returning the data, names, and precision. From 20d5e399b649ecef1896ee9268e518b2066ea561 Mon Sep 17 00:00:00 2001 From: BFedder Date: Sat, 25 Jun 2022 16:43:51 +0100 Subject: [PATCH 06/10] changed required numpy version to match MDAnalysis --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 11a3b63..87a4fa2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -numpy +numpy>=1.19.0 pbr From ce812c5ba10cca4b42523ff3aa9a0c10ecd5f429 Mon Sep 17 00:00:00 2001 From: BFedder Date: Mon, 27 Jun 2022 14:59:58 +0100 Subject: [PATCH 07/10] added test for pandas ImportError --- tests/test_edr.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/test_edr.py b/tests/test_edr.py index 6abb2e2..0838900 100644 --- a/tests/test_edr.py +++ b/tests/test_edr.py @@ -59,6 +59,20 @@ EDR_Data = namedtuple('EDR_Data', ['df', 'xvgdata', 'xvgtime', 'xvgnames', 'xvgprec', 'edrfile', 'xvgfile']) + +def test_failed_import(monkeypatch): + # Putting this test first to avoid datafiles already being loaded + errmsg = "ERROR --- pandas was not found!" + + monkeypatch.setitem(sys.modules, 'pandas', None) + + if 'MDAnalysis.tests.datafiles' in sys.modules: + monkeypatch.delitem(sys.modules, 'pandas') + + with pytest.raises(ImportError, match=errmsg): + panedr.edr_to_df(EDR) + + @pytest.fixture(scope='module', params=[(EDR, EDR_XVG), (EDR_IRREGULAR, EDR_IRREGULAR_XVG), @@ -73,7 +87,7 @@ def edr(request): xvgtime = xvgdata[:, 0] xvgdata = xvgdata[:, 1:] return EDR_Data(df, xvgdata, xvgtime, xvgnames, xvgprec, edrfile, xvgfile) - + class TestEdrToDf(object): """ @@ -177,7 +191,7 @@ def test_edr_to_dict_matches_edr_to_df(): def read_xvg(path): """ Reads XVG file, returning the data, names, and precision. - + The data is returned as a 2D numpy array. Column names are returned as an array of string objects. Precision is an integer corresponding to the least number of decimal places found, excluding the first (time) column. @@ -216,7 +230,7 @@ def read_xvg(path): def ndec(val): """Returns the number of decimal places of a string rep of a float - + """ try: return len(re.split(NDEC_PATTERN, val)[1]) From 692ccad0b61ba0eac82df38eccc7fb69eca06afd Mon Sep 17 00:00:00 2001 From: BFedder <80363742+BFedder@users.noreply.github.com> Date: Mon, 27 Jun 2022 17:54:32 +0100 Subject: [PATCH 08/10] Update test_edr.py --- tests/test_edr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_edr.py b/tests/test_edr.py index 0838900..d209c3d 100644 --- a/tests/test_edr.py +++ b/tests/test_edr.py @@ -66,7 +66,7 @@ def test_failed_import(monkeypatch): monkeypatch.setitem(sys.modules, 'pandas', None) - if 'MDAnalysis.tests.datafiles' in sys.modules: + if 'pandas' in sys.modules: monkeypatch.delitem(sys.modules, 'pandas') with pytest.raises(ImportError, match=errmsg): From 6ef2f84151cd7be7004786c2f5c929c83932a833 Mon Sep 17 00:00:00 2001 From: BFedder Date: Mon, 27 Jun 2022 18:19:57 +0100 Subject: [PATCH 09/10] Fixing --- tests/test_edr.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_edr.py b/tests/test_edr.py index d209c3d..83eea4e 100644 --- a/tests/test_edr.py +++ b/tests/test_edr.py @@ -63,12 +63,7 @@ def test_failed_import(monkeypatch): # Putting this test first to avoid datafiles already being loaded errmsg = "ERROR --- pandas was not found!" - monkeypatch.setitem(sys.modules, 'pandas', None) - - if 'pandas' in sys.modules: - monkeypatch.delitem(sys.modules, 'pandas') - with pytest.raises(ImportError, match=errmsg): panedr.edr_to_df(EDR) From 26592117a0e45af1a1521432b1b87ce676e6bd09 Mon Sep 17 00:00:00 2001 From: BFedder <80363742+BFedder@users.noreply.github.com> Date: Wed, 29 Jun 2022 12:48:08 +0100 Subject: [PATCH 10/10] Update tests/test_edr.py Co-authored-by: Irfan Alibay --- tests/test_edr.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_edr.py b/tests/test_edr.py index 83eea4e..b5c9606 100644 --- a/tests/test_edr.py +++ b/tests/test_edr.py @@ -180,9 +180,6 @@ def test_edr_to_dict_matches_edr_to_df(): assert array_df.equals(ref_df) -# TODO: write test that ImportError is raised when pandas not installed - - def read_xvg(path): """ Reads XVG file, returning the data, names, and precision.