Merge pull request #33 from BFedder/return_dict

Refactoring the way data is returned in panedr.
MDAnalysis · Jun 29, 2022 · 84bd117 · 84bd117
2 parents 81289f1 + 2659211
commit 84bd117
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 8 deletions.
diff --git a/panedr/panedr.py b/panedr/panedr.py
@@ -45,7 +45,8 @@
 import sys
 import itertools
 import time
-import pandas
+import numpy as np
+
 
 #Index for the IDs of additional blocks in the energy file.
 #Blocks can be added without sacrificing backward and forward
@@ -75,7 +76,7 @@
 Enxnm = collections.namedtuple('Enxnm', 'name unit')
 ENX_VERSION = 5
 
-__all__ = ['edr_to_df']
+__all__ = ['edr_to_df', 'edr_to_dict', 'read_edr']
 
 
 class EDRFile(object):
@@ -395,14 +396,14 @@ def edr_strings(data, file_version, n):
 
 def is_frame_magic(data):
     """Unpacks an int and checks whether it matches the EDR frame magic number
-    
+
     Does not roll the reading position back.
     """
     magic = data.unpack_int()
     return magic == -7777777
 
 
-def edr_to_df(path, verbose=False):
+def read_edr(path, verbose=False):
     begin = time.time()
     edr_file = EDRFile(str(path))
     all_energies = []
@@ -427,5 +428,27 @@ def edr_to_df(path, verbose=False):
               end='', file=sys.stderr)
         print('\n{} frame read in {:.2f} seconds'.format(ifr, end - begin),
               file=sys.stderr)
+
+    return all_energies, all_names, times
+
+
+def edr_to_df(path: str, verbose: bool = False):
+    try:
+        import pandas
+    except ImportError:
+        raise ImportError("""ERROR --- pandas was not found!
+                          pandas is required to use the `.edr_to_df()`
+                          functionality. Try installing it using pip, e.g.:
+                          python -m pip install pandas""")
+    all_energies, all_names, times = read_edr(path, verbose=verbose)
     df = pandas.DataFrame(all_energies, columns=all_names, index=times)
     return df
+
+
+def edr_to_dict(path: str, verbose: bool = False):
+    all_energies, all_names, times = read_edr(path, verbose=verbose)
+    energy_dict = {}
+    for idx, name in enumerate(all_names):
+        energy_dict[name] = np.array(
+            [all_energies[frame][idx] for frame in range(len(times))])
+    return energy_dict
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,2 @@
-pandas
+numpy>=1.19.0
 pbr
diff --git a/setup.cfg b/setup.cfg
@@ -27,3 +27,5 @@ classifier =
 test =
     six
     pytest
+pandas =
+    pandas
diff --git a/tests/test_edr.py b/tests/test_edr.py
@@ -59,6 +59,15 @@
 EDR_Data = namedtuple('EDR_Data', ['df', 'xvgdata', 'xvgtime', 'xvgnames',
                                    'xvgprec', 'edrfile', 'xvgfile'])
 
+
+def test_failed_import(monkeypatch):
+    # Putting this test first to avoid datafiles already being loaded
+    errmsg = "ERROR --- pandas was not found!"
+    monkeypatch.setitem(sys.modules, 'pandas', None)
+    with pytest.raises(ImportError, match=errmsg):
+        panedr.edr_to_df(EDR)
+
+
 @pytest.fixture(scope='module',
                 params=[(EDR, EDR_XVG),
                         (EDR_IRREGULAR, EDR_IRREGULAR_XVG),
@@ -73,7 +82,7 @@ def edr(request):
     xvgtime = xvgdata[:, 0]
     xvgdata = xvgdata[:, 1:]
     return EDR_Data(df, xvgdata, xvgtime, xvgnames, xvgprec, edrfile, xvgfile)
-    
+
 
 class TestEdrToDf(object):
     """
@@ -163,10 +172,18 @@ def _assert_progress_range(self, progress, dt, start, stop, step):
             assert ref_line == progress_line
 
 
+def test_edr_to_dict_matches_edr_to_df():
+    array_dict = panedr.edr_to_dict(EDR)
+    ref_df = panedr.edr_to_df(EDR)
+    array_df = pandas.DataFrame.from_dict(array_dict).set_index(
+        "Time", drop=False)
+    assert array_df.equals(ref_df)
+
+
 def read_xvg(path):
     """
     Reads XVG file, returning the data, names, and precision.
-    
+
     The data is returned as a 2D numpy array. Column names are returned as an
     array of string objects. Precision is an integer corresponding to the least
     number of decimal places found, excluding the first (time) column.
@@ -205,7 +222,7 @@ def read_xvg(path):
 
 def ndec(val):
     """Returns the number of decimal places of a string rep of a float
-    
+
     """
     try:
         return len(re.split(NDEC_PATTERN, val)[1])