westpa · SHZ66 · Feb 3, 2021 · Feb 3, 2021 · Feb 3, 2021 · Feb 5, 2021
diff --git a/RED_scheme/durationcorrection.py b/RED_scheme/durationcorrection.py
@@ -1,13 +1,14 @@
 #!/usr/bin/env python
 import h5py
 import numpy
+
 '''
 Calculate factor to correct for nonzero durations of barrier crossings. The
 event duration distribution is estimated from observed event durations,
 accounting for the fact that the probability of observing an event of a given
 duration is not proportional to the duration of that event.
 
-After specifying data to the DurationCorrection class via ``from_list``, call
+After specifying data to the DurationCorrection class via ``from_files``, call
 ``correction`` to return the correction factor
 
         __                              __  -1
@@ -28,41 +29,32 @@
 '''
 
 class DurationCorrection(object):
-    def __init__(self):
-        pass
-
-    def from_list(self, kinetics_path_list, fstate, lastiter=2000, **kwargs):
+    @staticmethod
+    def from_files(kinetics_files, istate, fstate, lastiter=None, **kwargs):
         weights = []
         durations = []
-        for path in kinetics_path_list: 
-            kinetics_file = h5py.File(path, 'r')
-            if lastiter is not None:
-                where = numpy.where(
-                    numpy.logical_and(kinetics_file['durations'][:lastiter]\
-                                                   ['weight'] > 0,
-                                      kinetics_file['durations'][:lastiter]\
-                                                   ['fstate'] == fstate))
-                d = kinetics_file['durations'][:lastiter]['duration'] 
-                w = kinetics_file['durations'][:lastiter]['weight']
-            else:
-                where = numpy.where(
-                    numpy.logical_and(kinetics_file['durations']\
-                                                   ['weight'] > 0,
-                                      kinetics_file['durations']\
-                                                   ['fstate'] == fstate))
-                d = kinetics_file['durations']['duration'] 
-                w = kinetics_file['durations']['weight']
-            for i in range(where[1].shape[0]):
-                weight = w[where[0][i],where[1][i]]
-                duration = d[where[0][i],where[1][i]]
-                if duration > 0:
-                    durations.append(duration)
+        for path in kinetics_files: 
+            with h5py.File(path, 'r') as kinetics_file:
+                if lastiter is not None:
+                    dataset = kinetics_file['durations'][:lastiter]
                 else:
-                    durations.append(where[0][i])
-                weights.append(weight)
+                    dataset = kinetics_file['durations']
+
+            torf = numpy.logical_and(dataset['istate'] == istate, dataset['fstate'] == fstate)
+            torf = numpy.logical_and(dataset['weight'] > 0, torf)
+
+            w = dataset['weight'][torf]
+            d = dataset['duration'][torf]
+            d[d<0] = numpy.where(torf)[0][d<0]
+            weights.extend(w)
+            durations.extend(d)
 
+        return DurationCorrection(durations, weights)
+
+    def __init__(self, durations, weights):
         self.weights = numpy.array(weights)
         self.durations = numpy.array(durations)
+
     def correction(self, maxduration, timepoints):
         '''
         Return the correction factor
@@ -102,6 +94,7 @@ def correction(self, maxduration, timepoints):
         for i, tau in enumerate(taugrid):
             matches = numpy.logical_and(self.durations >= tau, self.durations < tau + dtau)
             f_tilde[i] = self.weights[matches].sum()/(maxduration-tau+1)
+
         if f_tilde.sum() != 0:
             f_tilde /= f_tilde.sum()*dtau
 
@@ -115,6 +108,7 @@ def correction(self, maxduration, timepoints):
                 integral1[i] = numpy.trapz(f_tilde[:i+1], taugrid[:i+1])
         self.F_tilde = integral1
         integral2 = numpy.trapz(integral1, taugrid)
+
         if integral2 == 0:
             return 0.
         return maxduration/integral2
diff --git a/RED_scheme/rate.py b/RED_scheme/rate.py
@@ -1,44 +1,44 @@
 #!/usr/bin/env python
-import sys
-import durationcorrection
+from durationcorrection import DurationCorrection
 import h5py
-import matplotlib
-import matplotlib.pyplot as pyplot
 import numpy
 
-tau = 1
-concentration = 1
+TAU = 100
+CONCENTRATION = 1
 
-def rateanalysis(lastiter, directh5path, timepoints_per_iteration):
-    dc = durationcorrection.DurationCorrection()
-    dc.from_list([directh5path], 0, lastiter=lastiter)
+def rateanalysis(lastiter, directh5path, istate, fstate, timepoints_per_iteration, tau=TAU, conc=CONCENTRATION):
+    dc = DurationCorrection.from_files([directh5path], istate, fstate, lastiter=lastiter)
     correction = dc.correction(lastiter, timepoints_per_iteration)
 
-    directh5 = h5py.File(directh5path, 'r')
-    raw_rate = directh5['rate_evolution'][lastiter-1][1][0]['expected']
-    raw_rate = raw_rate/(tau*concentration)
+    with h5py.File(directh5path, 'r') as directh5:
+        raw_rate = directh5['rate_evolution'][lastiter-1][istate][fstate]['expected']
+    raw_rate = raw_rate/(tau*conc)
     rate = raw_rate*correction
     return raw_rate, rate
 
 def main():
-    # number of iterations in the simulation
-    n_iterations = 2000
     # path to the w_direct output files
     directh5path = './direct.h5'
     # number of timepoints per iteration; since the last timepoint of one 
     # iteration overlaps with the first timepoint of the next, we typically
     # have an odd number here (like 11 or 21 or 51).
     timepoints_per_iteration = 3
+
+    # number of iterations in the simulation
+    n_iterations = None
+    if n_iterations is None:
+        with h5py.File(directh5path, 'r') as directh5:
+            n_iterations = directh5['rate_evolution'].shape[0]
 
     # buffer to hold the corrected rate values; 
-    raw_rates = numpy.zeros(n_iterations+1)
-    rates = numpy.zeros(n_iterations+1)
+    raw_rates = numpy.zeros(n_iterations)
+    rates = numpy.zeros(n_iterations)
     # loop through all iterations
-    for iiter in range(1,n_iterations+1):
-        print(iiter)
+    for i in range(n_iterations):
+        print("iter %d"%(i + 1))
         # calculate corrected rate using data up to iteration iiter
         # and store the result in the buffer
-        raw_rates[iiter-1], rates[iiter-1] = rateanalysis(iiter, directh5path, timepoints_per_iteration)
+        raw_rates[i], rates[i] = rateanalysis(i + 1, directh5path, 1, 0, timepoints_per_iteration)
 
     # calculate the mean and sem for both raw and corrected rates
 #    raw_rates_mean = numpy.mean(raw_rates, axis=1)