From 2d625410bfc7c216330c0c721adb5294e6bad565 Mon Sep 17 00:00:00 2001
From: Kai-Feng Chen <kevin.ventus@gmail.com>
Date: Wed, 31 Jan 2024 13:50:37 -0700
Subject: [PATCH 1/5] Add a switch in lst_bin to allow for weight_by_nsamples
 or not

---
 hera_cal/lstbin.py | 86 ++++++++++++++++++++++++++--------------------
 1 file changed, 48 insertions(+), 38 deletions(-)

diff --git a/hera_cal/lstbin.py b/hera_cal/lstbin.py
index b4e99e69c..4167cf4cd 100644
--- a/hera_cal/lstbin.py
+++ b/hera_cal/lstbin.py
@@ -31,6 +31,7 @@ def profile(fnc):
 
 logger = logging.getLogger(__name__)
 
+
 def baselines_same_across_nights(data_list):
     """
     Check whether the sets of baselines in the datacontainers are consistent.
@@ -59,9 +60,10 @@ def baselines_same_across_nights(data_list):
     same_across_nights = np.all([baseline_counts[k] == baseline_counts[bl] for bl in baseline_counts])
     return same_across_nights
 
+
 @profile
 def lst_bin(data_list, lst_list, flags_list=None, nsamples_list=None, dlst=None, begin_lst=None, lst_low=None,
-            lst_hi=None, flag_thresh=0.7, atol=1e-10, median=False, truncate_empty=True,
+            lst_hi=None, flag_thresh=0.7, atol=1e-10, median=False, truncate_empty=True, weight_by_nsamples=True,
             sig_clip=False, sigma=4.0, min_N=4, flag_below_min_N=False, return_no_avg=False, antpos=None,
             rephase=False, freq_array=None, lat=-30.72152, verbose=True, bl_list=None):
     """
@@ -94,6 +96,8 @@ def lst_bin(data_list, lst_list, flags_list=None, nsamples_list=None, dlst=None,
              median=True is not supported when nsamples_list is not None.
     truncate_empty : type=boolean, if True, truncate output time bins that have
         no averaged data in them.
+    weight_by_nsamples : type=boolean, if True, weight by nsamples during lst binning;
+        if False, only weight by flags.
     sig_clip : type=boolean, if True, perform a sigma clipping algorithm of the LST bins on the
         real and imag components separately. Resultant clip flags are OR'd between real and imag.
         Warning: This is considerably slow.
@@ -211,9 +215,9 @@ def lst_bin(data_list, lst_list, flags_list=None, nsamples_list=None, dlst=None,
         # iterate over keys in d
         klist = list(d.keys())
         for j, key in enumerate(klist):
-            if j % max(1, (len(klist)//100)) == 0:
+            if j % max(1, (len(klist) // 100)) == 0:
                 logger.info(f"Doing key {key} [{j+1}/{len(klist)}]")
-            
+
             # if bl_list is not None, use it to determine conjugation:
             # this is to prevent situations where conjugation of bl in
             # data_list is different from bl in data which can cause
@@ -316,7 +320,6 @@ def lst_bin(data_list, lst_list, flags_list=None, nsamples_list=None, dlst=None,
     # wrap lst_bins if needed
     lst_bins = lst_bins % (2 * np.pi)
 
-
     # return un-averaged data if desired
     if return_no_avg:
         # return all binned data instead of just the bin average
@@ -346,7 +349,7 @@ def lst_bin(data_list, lst_list, flags_list=None, nsamples_list=None, dlst=None,
         bin_count = []
         # iterate over sorted LST grid indices in data[key]
         for j, ind in enumerate(sorted(data[key].keys())):
-            
+
             # make data and flag arrays from lists
             d = np.array(data[key][ind])  # shape = (Ndays x Nfreqs)
             f = np.array(flags[key][ind])
@@ -400,18 +403,22 @@ def lst_bin(data_list, lst_list, flags_list=None, nsamples_list=None, dlst=None,
                 # (inverse variance weighted sum).
                 isfinite = np.isfinite(d)
                 n[~isfinite] = 0.0
-
-                norm = np.sum(n, axis=0).clip(1e-99, np.inf)
+                if weight_by_nsamples:
+                    wgt = np.copy(n)
+                else:
+                    wgt = np.ones_like(n)
+                    wgt[~isfinite] = 0.0
+                norm = np.sum(wgt, axis=0).clip(1e-99, np.inf)
                 mask = norm > 0
                 real_avg_t = np.full(d.shape[1:], np.nan)
                 imag_avg_t = np.full(d.shape[1:], np.nan)
-                
-                sm = np.nansum(d * n, axis=0)
+
+                sm = np.nansum(d * wgt, axis=0)
                 sm[mask] /= norm[mask]
 
                 real_avg_t[mask] = sm.real[mask]
-                imag_avg_t[norm>0] = sm.imag[mask]
-                
+                imag_avg_t[norm > 0] = sm.imag[mask]
+
                 # add back nans as np.nansum sums nan slices to 0
                 flagged_f = np.logical_not(isfinite).all(axis=0)
                 real_avg_t[flagged_f] = np.nan
@@ -644,7 +651,7 @@ def config_lst_bin_files(data_files, dlst=None, atol=1e-10, lst_start=None, verb
 
 def lst_bin_files(data_files, input_cals=None, dlst=None, verbose=True, ntimes_per_file=60,
                   file_ext="{type}.{time:7.5f}.uvh5", outdir=None, overwrite=False, history='', lst_start=None,
-                  atol=1e-6, sig_clip=True, sigma=5.0, min_N=5, flag_below_min_N=False, rephase=False,
+                  atol=1e-6, weight_by_nsamples=True, sig_clip=True, sigma=5.0, min_N=5, flag_below_min_N=False, rephase=False,
                   output_file_select=None, Nbls_to_load=None, ignore_flags=False, average_redundant_baselines=False,
                   bl_error_tol=1.0, include_autos=True, ex_ant_yaml_files=None, Nblgroups=None, **kwargs):
     """
@@ -674,6 +681,8 @@ def lst_bin_files(data_files, input_cals=None, dlst=None, verbose=True, ntimes_p
     overwrite : type=bool, if True overwrite output files
     history : history to insert into output files
     rephase : type=bool, if True, rephase data points in LST bin to center of bin
+    weight_by_nsamples : type=boolean, if True, weight by nsamples during lst binning;
+        if False, only weight by flags.
     bin_kwargs : type=dictionary, keyword arguments for lst_bin.
     atol : type=float, absolute tolerance for LST bin float comparison
     output_file_select : type=int or integer list, list of integer indices of the output files to run on.
@@ -763,7 +772,7 @@ def lst_bin_files(data_files, input_cals=None, dlst=None, verbose=True, ntimes_p
     if Nblgroups is None:
         if Nbls_to_load in [None, 'None', 'none']:
             Nbls_to_load = len(bl_nightly_dicts) + 1
-        
+
         Nblgroups = len(bl_nightly_dicts) // Nbls_to_load + 1
     else:
         Nbls_to_load = len(bl_nightly_dicts) // Nblgroups
@@ -865,7 +874,7 @@ def lst_bin_files(data_files, input_cals=None, dlst=None, verbose=True, ntimes_p
                             uvc.select(times=uvc.time_array[tinds])
                             gains, cal_flags, _, _ = uvc.build_calcontainers()
                         apply_cal.calibrate_in_place(data, gains, data_flags=flags, cal_flags=cal_flags,
-                                                        gain_convention=uvc.gain_convention)
+                                                     gain_convention=uvc.gain_convention)
                         utils.echo("Done with calibration.", verbose=verbose)
 
                     # redundantly average baselines, keying to baseline group key
@@ -920,7 +929,8 @@ def lst_bin_files(data_files, input_cals=None, dlst=None, verbose=True, ntimes_p
             utils.echo("About to run LST binning...")
             (bin_lst, bin_data, flag_data, std_data,
              num_data) = lst_bin(data_list, lst_list, flags_list=flgs_list, dlst=dlst, begin_lst=begin_lst,
-                                 lst_low=fmin, lst_hi=fmax, truncate_empty=False, sig_clip=sig_clip, nsamples_list=nsamples_list,
+                                 lst_low=fmin, lst_hi=fmax, truncate_empty=False, sig_clip=sig_clip,
+                                 weight_by_nsamples=weight_by_nsamples, nsamples_list=nsamples_list,
                                  sigma=sigma, min_N=min_N, flag_below_min_N=flag_below_min_N, rephase=rephase, freq_array=freq_array,
                                  antpos=antpos, bl_list=all_blgroup_baselines)
             # append to lists
@@ -973,9 +983,9 @@ def lst_bin_files(data_files, input_cals=None, dlst=None, verbose=True, ntimes_p
 
 
 def make_lst_grid(
-    dlst: float, 
-    begin_lst: float | None = None, 
-    lst_width: float = 2*np.pi, 
+    dlst: float,
+    begin_lst: float | None = None,
+    lst_width: float = 2 * np.pi,
 ) -> np.ndarray:
     """
     Make a uniform grid in local sidereal time.
@@ -986,17 +996,17 @@ def make_lst_grid(
 
     Parameters:
     -----------
-    dlst : 
+    dlst :
         The width of a single LST bin in radians. 2pi must be equally divisible
         by dlst. If not, will default to the closest dlst that satisfies this criterion that
         is also greater than the input dlst. There is a minimum allowed dlst of 6.283e-6 radians,
         or .0864 seconds.
     begin_lst
-        Beginning point for lst_grid. ``begin_lst`` must fall exactly on an LST bin 
-        given a dlst, within 0-2pi. If not, it is replaced with the closest bin. 
+        Beginning point for lst_grid. ``begin_lst`` must fall exactly on an LST bin
+        given a dlst, within 0-2pi. If not, it is replaced with the closest bin.
         Default is zero radians.
     lst_width
-        The width of the LST grid (including all bins) in radians. 
+        The width of the LST grid (including all bins) in radians.
         Default is 2pi radians. Note that regardless of this value, the final grid
         will always be equally divisible by 2pi.
 
@@ -1010,7 +1020,7 @@ def make_lst_grid(
 
     # check 2pi is equally divisible by dlst
     if not (
-        np.isclose((2 * np.pi / dlst) % 1, 0.0, atol=1e-5) 
+        np.isclose((2 * np.pi / dlst) % 1, 0.0, atol=1e-5)
         or np.isclose((2 * np.pi / dlst) % 1, 1.0, atol=1e-5)
     ):
         # generate array of appropriate dlsts
@@ -1027,7 +1037,7 @@ def make_lst_grid(
         dlst = new_dlst
 
     # make an lst grid from [0, 2pi), with the first bin having a left-edge at 0 radians.
-    lst_grid = np.arange(0, 2*np.pi - 1e-7, dlst) + dlst / 2
+    lst_grid = np.arange(0, 2 * np.pi - 1e-7, dlst) + dlst / 2
 
     # shift grid by begin_lst
     if begin_lst is not None:
@@ -1041,17 +1051,17 @@ def make_lst_grid(
         begin_lst = 0.0
 
     lst_grid = lst_grid[lst_grid < (begin_lst + lst_width)]
-    
+
     return lst_grid
 
 
-def sigma_clip(array: np.ndarray | np.ma.MaskedArray, sigma: float=4.0, min_N: int=4, axis: int = 0):
+def sigma_clip(array: np.ndarray | np.ma.MaskedArray, sigma: float = 4.0, min_N: int = 4, axis: int = 0):
     """
     One-iteration robust sigma clipping algorithm.
 
     Parameters
     ----------
-    array 
+    array
         ndarray of *real* data.
     sigma
         sigma threshold to cut above.
@@ -1063,8 +1073,8 @@ def sigma_clip(array: np.ndarray | np.ma.MaskedArray, sigma: float=4.0, min_N: i
 
     Output
     ------
-    clip_flags 
-        A boolean array with same shape as input array, 
+    clip_flags
+        A boolean array with same shape as input array,
         with clipped values set to True.
     """
     # ensure array is an array
@@ -1092,11 +1102,11 @@ def sigma_clip(array: np.ndarray | np.ma.MaskedArray, sigma: float=4.0, min_N: i
 
 
 def gen_bl_nightly_dicts(
-    hds: list[io.HERAData], 
-    bl_error_tol: float=1.0, 
-    include_autos: bool=True, 
-    redundant: bool=False, 
-    ex_ant_yaml_files: list[str] | None=None
+    hds: list[io.HERAData],
+    bl_error_tol: float = 1.0,
+    include_autos: bool = True,
+    redundant: bool = False,
+    ex_ant_yaml_files: list[str] | None = None
 ) -> list[dict[int, list[tuple[int, int]]]]:
     """
     Helper function to generate baseline dicts to keep track of reds between nights.
@@ -1104,7 +1114,7 @@ def gen_bl_nightly_dicts(
     Parameters
     ----------
     hds
-        A list of HERAData objects. Can have no data loaded (preferable) and should 
+        A list of HERAData objects. Can have no data loaded (preferable) and should
         refer to single files.
     bl_error_tol
         Baselines whose vector difference are within this tolerance are considered
@@ -1112,8 +1122,8 @@ def gen_bl_nightly_dicts(
     include_autos
         Whether to include autos in bl_nightly_dicts.
     redundant
-        If True, ``bl_nightly_dict`` stores redundant group. If False, each 
-        ``bl_nightly_dict`` will contain a length-1 group for each baseline over all 
+        If True, ``bl_nightly_dict`` stores redundant group. If False, each
+        ``bl_nightly_dict`` will contain a length-1 group for each baseline over all
         the nights.
     ex_ant_yaml_files
         A list of paths to yaml files with antennas to throw out on each night.
@@ -1122,7 +1132,7 @@ def gen_bl_nightly_dicts(
     Outputs:
     --------
     If redundant:
-        list of dictionaries of the form 
+        list of dictionaries of the form
         ``{0: [(a0, b0), (a0, c0)...], 1: [(a1, b1),.., ], ... Nnight: [(ANnight, BNnight), ...,]}.
         Each dictionary represents a unique baseline length and orientation.
         where the key of each dictionary is an index for each night to be LST binned and each value

From 68f65da48a3d71db4b08951961daf826cacd3a81 Mon Sep 17 00:00:00 2001
From: Kai-Feng Chen <kevin.ventus@gmail.com>
Date: Wed, 31 Jan 2024 15:30:34 -0700
Subject: [PATCH 2/5] Add tests related to weight_by_nsamples

---
 hera_cal/tests/test_lstbin.py | 41 ++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/hera_cal/tests/test_lstbin.py b/hera_cal/tests/test_lstbin.py
index 0c7698213..e740f783a 100644
--- a/hera_cal/tests/test_lstbin.py
+++ b/hera_cal/tests/test_lstbin.py
@@ -13,6 +13,8 @@
 from ..datacontainer import DataContainer
 from ..data import DATA_PATH
 import shutil
+
+
 @pytest.mark.filterwarnings("ignore:The default for the `center` keyword has changed")
 @pytest.mark.filterwarnings("ignore:Degrees of freedom <= 0 for slice")
 @pytest.mark.filterwarnings("ignore:Mean of empty slice")
@@ -134,7 +136,7 @@ def test_lstbin(self):
         output = lstbin.lst_bin(self.data_list, self.lst_list, flags_list=None, dlst=0.01,
                                 verbose=False, sig_clip=True, min_N=15, flag_below_min_N=True, sigma=2)
         # test wrapping
-        lst_list = [(copy.deepcopy(l) + 6) % (2 * np.pi) for l in self.lst_list]
+        lst_list = [(copy.deepcopy(_lst) + 6) % (2 * np.pi) for _lst in self.lst_list]
         output = lstbin.lst_bin(self.data_list, lst_list, dlst=0.001, begin_lst=np.pi)
         assert output[0][0] > output[0][-1]
         assert len(output[0]) == 175
@@ -168,6 +170,33 @@ def test_lstbin(self):
             assert np.all(np.isclose(output[2][k], output2[2][k]))
             assert np.all(np.isclose(output[1][k], output2[1][k]))
 
+        # test weighted_by_nsamples, nsamples are propagated but data is not weighted by nsamples if set to False
+        output1 = lstbin.lst_bin(self.data_list, self.lst_list, dlst=dlst,
+                                 flags_list=self.flgs_list, nsamples_list=self.nsmp_list,
+                                 weight_by_nsamples=True)
+
+        nsmps1 = copy.deepcopy(self.nsmps1)
+        nsmps1[(24, 25, 'ee')][:, 32] = 0
+        nsmps2 = copy.deepcopy(self.nsmps2)
+        nsmps2[(24, 25, 'ee')][:, 32] = 0
+        nsmps3 = copy.deepcopy(self.nsmps3)
+        nsmps3[(24, 25, 'ee')][:, 32] = 0
+        nsmps_list = [nsmps1, nsmps2, nsmps3]
+        output = lstbin.lst_bin(self.data_list, self.lst_list, dlst=dlst,
+                                flags_list=self.flgs_list, nsamples_list=nsmps_list,
+                                weight_by_nsamples=True)
+        # Check Nsamples are all 0
+        assert np.allclose(output[-1][(24, 25, 'ee')].real[:, 32], 0)
+        # Check data got weighted sum to 0
+        assert np.allclose(output[1][(24, 25, 'ee')].real[100, 32], 0)
+        output = lstbin.lst_bin(self.data_list, self.lst_list, dlst=dlst,
+                                flags_list=self.flgs_list, nsamples_list=nsmps_list,
+                                weight_by_nsamples=False)
+        # Check Nsamples are all 0
+        assert np.allclose(output[-1][(24, 25, 'ee')].real[:, 32], 0)
+        # Check data is the same as before
+        assert np.allclose(output[1][(24, 25, 'ee')].real, output1[1][(24, 25, 'ee')].real)
+
     def test_lstbin_vary_nsamps(self):
         # test execution
         pytest.raises(NotImplementedError, lstbin.lst_bin, self.data_list, self.lst_list, flags_list=self.flgs_list,
@@ -244,6 +273,16 @@ def test_lst_bin_files(self):
         os.remove(output_lst_file)
         os.remove(output_std_file)
 
+        # test weight_by_nsamples
+        lstbin.lst_bin_files(self.data_files, ntimes_per_file=250, outdir="./", overwrite=True,
+                             verbose=False, rephase=True, weight_by_nsamples=False, file_ext=file_ext)
+        output_lst_file = "./zen.ee.LST.0.20124.uvh5"
+        output_std_file = "./zen.ee.STD.0.20124.uvh5"
+        assert os.path.exists(output_lst_file)
+        assert os.path.exists(output_std_file)
+        os.remove(output_lst_file)
+        os.remove(output_std_file)
+
         # test data_list is empty
         data_files = [[sorted(glob.glob(DATA_PATH + '/zen.2458043.*XRAA.uvh5'))[0]],
                       [sorted(glob.glob(DATA_PATH + '/zen.2458045.*XRAA.uvh5'))[-1]]]

From 2f59937be6b1b6fcea259b78913c25963d34bd74 Mon Sep 17 00:00:00 2001
From: Kai-Feng Chen <kevin.ventus@gmail.com>
Date: Wed, 31 Jan 2024 15:40:34 -0700
Subject: [PATCH 3/5] Add weight_by_nsamples in argparser

---
 hera_cal/lstbin.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hera_cal/lstbin.py b/hera_cal/lstbin.py
index 4167cf4cd..a9416de8b 100644
--- a/hera_cal/lstbin.py
+++ b/hera_cal/lstbin.py
@@ -549,6 +549,7 @@ def lst_bin_arg_parser():
     a.add_argument("--outdir", default=None, type=str, help="directory for writing output")
     a.add_argument("--overwrite", default=False, action='store_true', help="overwrite output files")
     a.add_argument("--lst_start", type=float, default=None, help="starting LST for binner as it sweeps across 2pi LST. Default is first LST of first file.")
+    a.add_argument("--weight_by_nsamples", default=True, action='store_true', help="Weight by nsamples during LST binning. If set to False, weight by flags only. Default True.")
     a.add_argument("--sig_clip", default=False, action='store_true', help="perform robust sigma clipping before binning")
     a.add_argument("--sigma", type=float, default=4.0, help="sigma threshold for sigma clipping")
     a.add_argument("--min_N", type=int, default=4, help="minimum number of points in bin needed to proceed with sigma clipping")

From 3f2f1f6f1fb7a02d15f6bfb3f4ef5e4b73971fb2 Mon Sep 17 00:00:00 2001
From: Kai-Feng Chen <kevin.ventus@gmail.com>
Date: Thu, 1 Feb 2024 17:38:51 -0700
Subject: [PATCH 4/5] Change from weight_by_nsamples to weight_only_by_flags

---
 hera_cal/lstbin.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/hera_cal/lstbin.py b/hera_cal/lstbin.py
index a9416de8b..355bb253d 100644
--- a/hera_cal/lstbin.py
+++ b/hera_cal/lstbin.py
@@ -63,7 +63,7 @@ def baselines_same_across_nights(data_list):
 
 @profile
 def lst_bin(data_list, lst_list, flags_list=None, nsamples_list=None, dlst=None, begin_lst=None, lst_low=None,
-            lst_hi=None, flag_thresh=0.7, atol=1e-10, median=False, truncate_empty=True, weight_by_nsamples=True,
+            lst_hi=None, flag_thresh=0.7, atol=1e-10, median=False, truncate_empty=True, weight_only_by_flags=False,
             sig_clip=False, sigma=4.0, min_N=4, flag_below_min_N=False, return_no_avg=False, antpos=None,
             rephase=False, freq_array=None, lat=-30.72152, verbose=True, bl_list=None):
     """
@@ -96,8 +96,7 @@ def lst_bin(data_list, lst_list, flags_list=None, nsamples_list=None, dlst=None,
              median=True is not supported when nsamples_list is not None.
     truncate_empty : type=boolean, if True, truncate output time bins that have
         no averaged data in them.
-    weight_by_nsamples : type=boolean, if True, weight by nsamples during lst binning;
-        if False, only weight by flags.
+    weight_only_by_flags : type=boolean, if True, weight by flags during lst binning instead of nsamples;
     sig_clip : type=boolean, if True, perform a sigma clipping algorithm of the LST bins on the
         real and imag components separately. Resultant clip flags are OR'd between real and imag.
         Warning: This is considerably slow.
@@ -403,11 +402,11 @@ def lst_bin(data_list, lst_list, flags_list=None, nsamples_list=None, dlst=None,
                 # (inverse variance weighted sum).
                 isfinite = np.isfinite(d)
                 n[~isfinite] = 0.0
-                if weight_by_nsamples:
-                    wgt = np.copy(n)
-                else:
+                if weight_only_by_flags:
                     wgt = np.ones_like(n)
                     wgt[~isfinite] = 0.0
+                else:
+                    wgt = np.copy(n)
                 norm = np.sum(wgt, axis=0).clip(1e-99, np.inf)
                 mask = norm > 0
                 real_avg_t = np.full(d.shape[1:], np.nan)
@@ -549,7 +548,7 @@ def lst_bin_arg_parser():
     a.add_argument("--outdir", default=None, type=str, help="directory for writing output")
     a.add_argument("--overwrite", default=False, action='store_true', help="overwrite output files")
     a.add_argument("--lst_start", type=float, default=None, help="starting LST for binner as it sweeps across 2pi LST. Default is first LST of first file.")
-    a.add_argument("--weight_by_nsamples", default=True, action='store_true', help="Weight by nsamples during LST binning. If set to False, weight by flags only. Default True.")
+    a.add_argument("--weight_only_by_flags", default=False, action='store_true', help="Weight by flags instead of nsamples during LST binning. Default is False.")
     a.add_argument("--sig_clip", default=False, action='store_true', help="perform robust sigma clipping before binning")
     a.add_argument("--sigma", type=float, default=4.0, help="sigma threshold for sigma clipping")
     a.add_argument("--min_N", type=int, default=4, help="minimum number of points in bin needed to proceed with sigma clipping")
@@ -652,7 +651,7 @@ def config_lst_bin_files(data_files, dlst=None, atol=1e-10, lst_start=None, verb
 
 def lst_bin_files(data_files, input_cals=None, dlst=None, verbose=True, ntimes_per_file=60,
                   file_ext="{type}.{time:7.5f}.uvh5", outdir=None, overwrite=False, history='', lst_start=None,
-                  atol=1e-6, weight_by_nsamples=True, sig_clip=True, sigma=5.0, min_N=5, flag_below_min_N=False, rephase=False,
+                  atol=1e-6, weight_only_by_flags=False, sig_clip=True, sigma=5.0, min_N=5, flag_below_min_N=False, rephase=False,
                   output_file_select=None, Nbls_to_load=None, ignore_flags=False, average_redundant_baselines=False,
                   bl_error_tol=1.0, include_autos=True, ex_ant_yaml_files=None, Nblgroups=None, **kwargs):
     """
@@ -682,8 +681,7 @@ def lst_bin_files(data_files, input_cals=None, dlst=None, verbose=True, ntimes_p
     overwrite : type=bool, if True overwrite output files
     history : history to insert into output files
     rephase : type=bool, if True, rephase data points in LST bin to center of bin
-    weight_by_nsamples : type=boolean, if True, weight by nsamples during lst binning;
-        if False, only weight by flags.
+    weight_only_by_flags : type=boolean, if True, weight by flags during lst binning instead of nsamples;
     bin_kwargs : type=dictionary, keyword arguments for lst_bin.
     atol : type=float, absolute tolerance for LST bin float comparison
     output_file_select : type=int or integer list, list of integer indices of the output files to run on.
@@ -931,7 +929,7 @@ def lst_bin_files(data_files, input_cals=None, dlst=None, verbose=True, ntimes_p
             (bin_lst, bin_data, flag_data, std_data,
              num_data) = lst_bin(data_list, lst_list, flags_list=flgs_list, dlst=dlst, begin_lst=begin_lst,
                                  lst_low=fmin, lst_hi=fmax, truncate_empty=False, sig_clip=sig_clip,
-                                 weight_by_nsamples=weight_by_nsamples, nsamples_list=nsamples_list,
+                                 weight_only_by_flags=weight_only_by_flags, nsamples_list=nsamples_list,
                                  sigma=sigma, min_N=min_N, flag_below_min_N=flag_below_min_N, rephase=rephase, freq_array=freq_array,
                                  antpos=antpos, bl_list=all_blgroup_baselines)
             # append to lists

From 592911beb1d2fa4e05c72f6077392c9333281354 Mon Sep 17 00:00:00 2001
From: Kai-Feng Chen <kevin.ventus@gmail.com>
Date: Thu, 1 Feb 2024 18:02:30 -0700
Subject: [PATCH 5/5] Modify tests after changing weight_by_nsamples to
 weight_only_by_flags

---
 hera_cal/tests/test_lstbin.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/hera_cal/tests/test_lstbin.py b/hera_cal/tests/test_lstbin.py
index e740f783a..7e57e5cf8 100644
--- a/hera_cal/tests/test_lstbin.py
+++ b/hera_cal/tests/test_lstbin.py
@@ -170,10 +170,12 @@ def test_lstbin(self):
             assert np.all(np.isclose(output[2][k], output2[2][k]))
             assert np.all(np.isclose(output[1][k], output2[1][k]))
 
-        # test weighted_by_nsamples, nsamples are propagated but data is not weighted by nsamples if set to False
+    def test_lstbin_weight_only_by_flags(self):
+        # test when --weight_only_by_flags, nsamples are propagated but data is not weighted by nsamples
+        dlst = 0.0007830490163484
         output1 = lstbin.lst_bin(self.data_list, self.lst_list, dlst=dlst,
                                  flags_list=self.flgs_list, nsamples_list=self.nsmp_list,
-                                 weight_by_nsamples=True)
+                                 weight_only_by_flags=False)
 
         nsmps1 = copy.deepcopy(self.nsmps1)
         nsmps1[(24, 25, 'ee')][:, 32] = 0
@@ -184,14 +186,14 @@ def test_lstbin(self):
         nsmps_list = [nsmps1, nsmps2, nsmps3]
         output = lstbin.lst_bin(self.data_list, self.lst_list, dlst=dlst,
                                 flags_list=self.flgs_list, nsamples_list=nsmps_list,
-                                weight_by_nsamples=True)
+                                weight_only_by_flags=False)
         # Check Nsamples are all 0
         assert np.allclose(output[-1][(24, 25, 'ee')].real[:, 32], 0)
         # Check data got weighted sum to 0
         assert np.allclose(output[1][(24, 25, 'ee')].real[100, 32], 0)
         output = lstbin.lst_bin(self.data_list, self.lst_list, dlst=dlst,
                                 flags_list=self.flgs_list, nsamples_list=nsmps_list,
-                                weight_by_nsamples=False)
+                                weight_only_by_flags=True)
         # Check Nsamples are all 0
         assert np.allclose(output[-1][(24, 25, 'ee')].real[:, 32], 0)
         # Check data is the same as before
@@ -273,9 +275,9 @@ def test_lst_bin_files(self):
         os.remove(output_lst_file)
         os.remove(output_std_file)
 
-        # test weight_by_nsamples
+        # test weight_only_by_flags
         lstbin.lst_bin_files(self.data_files, ntimes_per_file=250, outdir="./", overwrite=True,
-                             verbose=False, rephase=True, weight_by_nsamples=False, file_ext=file_ext)
+                             verbose=False, rephase=True, weight_only_by_flags=False, file_ext=file_ext)
         output_lst_file = "./zen.ee.LST.0.20124.uvh5"
         output_std_file = "./zen.ee.STD.0.20124.uvh5"
         assert os.path.exists(output_lst_file)