diff --git a/README.rst b/README.rst
index e97f337..000bd78 100644
--- a/README.rst
+++ b/README.rst
@@ -18,7 +18,7 @@ CESM simulations.
 
 
 :AUTHORS: Haiying Xu, Allison Baker, Daniel Milroy, Dorit Hammerling
-:COPYRIGHT: 2015-2022 University Corporation for Atmospheric Research
+:COPYRIGHT: 2015-2023 University Corporation for Atmospheric Research
 :LICENSE: Apache 2.0
 
 
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/der_test_pyEnsSumMPAS.sh b/der_test_pyEnsSumMPAS.sh
new file mode 100644
index 0000000..f086c34
--- /dev/null
+++ b/der_test_pyEnsSumMPAS.sh
@@ -0,0 +1,13 @@
+#!/bin/bash -l
+#PBS -A NTDD0004
+#PBS -N ensSumM
+#PBS -q main
+#PBS -l select=1:ncpus=36:mpiprocs=36
+#PBS -l walltime=0:20:00
+#PBS -j oe
+#PBS -M abaker@ucar.edu
+
+module load conda
+conda activate npl
+
+mpiexec -n 36 -ppn 36 python pyEnsSumMPAS.py --esize 100 --indir /glade/p/cisl/asap/abaker/mpas/large_ens  --sumfile mpas_sumt4.nc  --tslice 4 --tag v7.1 --model mpas  --mach cheyenne --verbose --jsonfile empty_excluded.json
diff --git a/docs/source/pyEnsSum.rst b/docs/source/pyEnsSum.rst
index 4c14e69..f11213a 100644
--- a/docs/source/pyEnsSum.rst
+++ b/docs/source/pyEnsSum.rst
@@ -60,7 +60,7 @@ To use pyEnsSum:
        --tslice <num>       : the index into the time dimension (default = 1)
        --mach <name>        : Machine name used in the metadata (default = cheyenne)
        --jsonfile <fname>   : Jsonfile to provide that a list of variables that will be excluded
-                               or included  (default = exclude_empty.json)
+                               (default = exclude_empty.json)
        --mpi_disable        : Disable mpi mode to run in serial (off by default)
        --fIndex <num>       : Use this to start at ensemble member <num> instead of 000 (so
                               ensembles with numbers less than <num> are excluded from summary file)
@@ -98,21 +98,20 @@ Notes:
 
 7. You must specify a json file (via ``--jsonfile``) that indicates
    the variables in the ensemble
-   output files that you want to include or exclude from the summary file
-   statistics (see the example json files).  We recommend excluding variables, as
-   this is typically less work and pyEnsSum will let you know if you have not
+   output files that you want to exclude from the summary file
+   statistics (see the example json files).  The pyEnsSum routine
+   will let you know if you have not
    listed variables that need to be excluded (see next note).  Keep in mind that
    you must have *fewer* variables included than ensemble members.
 
 8. *IMPORTANT:* If there are variables that need to be excluded (that are not in
-   the .json file  already), pyEnsSum will exit early and provide a list of the
-   variables to exclude in the output.  These should be added to your exclude
-   variable list  (or removed from an include list), and then pyEnsSum can
-   be re-run.  Note that additional problematic variables may be found by
-   pyEnsSum as variables are detected in three stages. (First any variables that
-   are constant across the ensemble are identified.  Once these are removed,
-   linearly dependant variables are indentified for removal. Finally, variables
-   that are not constant but have very few unique values are identified.)
+   the .json file  already) for the summary to be generated, pyEnsSum will list these
+   variables in the output.  These variables will also be added to a copy of
+   your exclude variable list (prefixed with "NEW.") for future reference and use.
+   The summary file will be geberated with all listed variables excluded.
+   Note that the following types of variables will be removed:  any variables that
+   are constant across the ensemble, are not floating-point (e.g., integer),
+   are linearly dependant, or have very few (< 3%) unique values.
 
 
 Example:
diff --git a/empty_included.json b/empty_included.json
deleted file mode 100644
index f2169ba..0000000
--- a/empty_included.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-  "IncludedVar": []
-}
diff --git a/mpas_ex_with_pv_vars.json b/mpas_ex_with_pv_vars.json
new file mode 100644
index 0000000..ac9f7f4
--- /dev/null
+++ b/mpas_ex_with_pv_vars.json
@@ -0,0 +1,31 @@
+{
+  "ExcludedVar": [
+    "i_rainc",
+    "i_rainnc",
+    "xice",
+    "xtime",
+    "xland",
+    "vegfra",
+    "depv_dt_diab",
+    "kpbl",
+    "iLev_DT",
+    "u",
+    "sst",
+    "u_pv",
+    "v_pv",
+    "theta_pv",
+    "vort_pv",
+    "depv_dt_lw",
+    "depv_dt_lw",
+    "depv_dt_sw",
+    "depv_dt_bl",
+    "depv_dt_cu",
+    "depv_dt_mix",
+    "dtheta_dt_mp",
+    "depv_dt_mp",
+    "depv_dt_fric",
+    "depv_dt_diab_pv",
+    "depv_dt_fric_pv",
+    "ertel_pv"
+  ]
+}
diff --git a/notebooks/ab_mpas_data.ipynb b/notebooks/ab_mpas_data.ipynb
index dc89fab..599f63f 100644
--- a/notebooks/ab_mpas_data.ipynb
+++ b/notebooks/ab_mpas_data.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "id": "92b3b345-f06f-4752-9a66-50739f398946",
    "metadata": {},
    "outputs": [],
@@ -12,7 +12,8 @@
     "import netCDF4 as nc\n",
     "import numpy as np\n",
     "\n",
-    "filedir = '/glade/scratch/abaker/mpas_hist/'"
+    "filedir = '/glade/scratch/abaker/mpas_hist/'\n",
+    "pydir = '/glade/u/home/abaker/repos/PyCECT/'"
    ]
   },
   {
@@ -146,11 +147,58 @@
     "in_files"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8217d907-6824-46bb-b5ea-ef170dc835dd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sumfile_o = nc.Dataset(pydir + 'mpas_sum_ts12.nc', 'r')\n",
+    "sumfile_n = nc.Dataset(pydir + 'mpas_sum_ts12_new.nc', 'r')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "0fd2e582-a39a-4409-b4da-cd13aa15e4d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vars_n = sumfile_n.variables\n",
+    "vars_o = sumfile_o.variables\n",
+    "gm_n = vars_n[\"global_mean\"][:, :]\n",
+    "gm_o = vars_o[\"global_mean\"][:, :]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "478930f9-c1b6-4ab4-82d1-e5eaa849d955",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.0"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.max(gm_n - gm_o)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 4,
    "id": "f4ea1560-03d7-4f4a-a34d-12fea6b2b3a4",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "myfile = nc.Dataset(filedir + in_files[0], 'r')\n",
@@ -521,9 +569,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "NPL (conda)",
+   "display_name": "NPL 2023b",
    "language": "python",
-   "name": "npl-conda"
+   "name": "npl-2023b"
   },
   "language_info": {
    "codemirror_mode": {
@@ -535,7 +583,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.12"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/pyCECT.py b/pyCECT.py
index bd300fa..783f4ac 100755
--- a/pyCECT.py
+++ b/pyCECT.py
@@ -28,7 +28,7 @@ def main(argv):
          minPCFail= minRunFail= numRunFile= printVars popens mpas pop cam
          jsonfile= mpi_enable nbin= minrange= maxrange= outfile=
          casejson= npick= pepsi_gm pop_tol= web_enabled
-         base_year= pop_threshold= printStdMean fIndex= lev= eet= saveResults json_case= """
+         base_year= pop_threshold= printStdMean fIndex= lev= eet= saveResults json_case= savePCAMat= saveEET="""
     optkeys = s.split()
     try:
         opts, args = getopt.getopt(argv, 'h', optkeys)
@@ -72,18 +72,24 @@ def main(argv):
     opts_dict['web_enabled'] = False
     opts_dict['saveResults'] = False
     opts_dict['base_year'] = 1
+    opts_dict['savePCAMat'] = ''
+    opts_dict['saveEET'] = ''
 
     # Call utility library getopt_parseconfig to parse the option keys
     # and save to the dictionary
     caller = 'CECT'
     opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict)
 
+    print(opts_dict)
+
     # ens type
     # cam = opts_dict['cam']
     popens = opts_dict['popens']
     pop = opts_dict['pop']
     mpas = opts_dict['mpas']
 
+    print(f'!test mpas:{mpas}')
+
     if pop or popens:
         ens = 'pop'
     elif mpas:
@@ -164,7 +170,7 @@ def main(argv):
             sys.exit()
         print('in_files=', in_files)
     else:
-        wildname = '*' + str(opts_dict['input_globs']) + '*'
+        wildname = '*' + str(opts_dict['input_globs']) + '*.nc'
         # Open all input files
         if os.path.exists(opts_dict['indir']):
             full_glob_str = os.path.join(opts_dict['indir'], wildname)
@@ -340,144 +346,117 @@ def main(argv):
             countgm = np.zeros(len(ifiles), dtype=np.int32)
 
             # Calculate the new run global mean
-            mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary(
-                ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'], opts_dict
+            mean3d, mean2d = pyEnsLib.generate_global_mean_for_summary(
+                ifiles, var_name3d, var_name2d, is_SE, opts_dict
             )
             means = np.concatenate((mean3d, mean2d), axis=0)
             # end cam
 
         # NOW this the same for MPAS and CAM
 
+        # extra info
         # Add the new run global mean to the dictionary "results"
         for i in range(means.shape[1]):
             for j in range(means.shape[0]):
                 pyEnsLib.addresults(results, 'means', means[j][i], ens_var_name[j], 'f' + str(i))
-
-        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
+        # Evaluate the new run global mean if it is in the range of the ensemble summary global mea?n range
         for fcount, fid in enumerate(ifiles):
             countgm[fcount] = pyEnsLib.evaluatestatus(
                 'means', 'gmRange', variables, 'gm', results, 'f' + str(fcount)
             )
+        # end extra
 
         # Calculate the PCA scores of the new run
-        new_scores, var_list, comp_std_gm = pyEnsLib.standardized(
+        new_scores, sum_std_mean, comp_std_gm = pyEnsLib.standardized(
             means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict, me
         )
         run_index, decision = pyEnsLib.comparePCAscores(
             ifiles, new_scores, sigma_scores_gm, opts_dict, me
         )
 
-        # If there is failure, plot out standardized mean and compared standardized mean in box plots
-        #        if opts_dict['printStdMean'] and decision == 'FAILED':
-        if opts_dict['printStdMean']:
+        # which vars are most outside the standardize mean (should be zero)
+        sort_index = np.argsort(sum_std_mean)[::-1]
+        sorted_sum = sum_std_mean[sort_index]
+        sorted_name = np.array(ens_var_name)[sort_index]
+        sorted_comp_std_gm = comp_std_gm[sort_index]
 
-            import matplotlib
-            import seaborn as sns
-
-            matplotlib.use('Agg')  # don't display figures
-            import matplotlib.pyplot as plt
+        if opts_dict['printStdMean'] or decision == 'FAILED':
 
             print(' ')
             print('***************************************************************************** ')
-            print(
-                'Test run variable standardized means (for reference only - not used to determine pass/fail)'
-            )
+            print('Test run variable standardized mean information')
             print('***************************************************************************** ')
             print(' ')
 
-            category = {
-                'all_outside99': [],
-                'two_outside99': [],
-                'one_outside99': [],
-                'all_oneside_outside1QR': [],
-            }
-            b = list(pyEnsLib.chunk(ens_var_name, 10))
-            for f, alist in enumerate(b):
-                for fc, avar in enumerate(alist):
-                    dist_995 = np.percentile(std_gm[avar], 99.5)
-                    dist_75 = np.percentile(std_gm[avar], 75)
-                    dist_25 = np.percentile(std_gm[avar], 25)
-                    dist_05 = np.percentile(std_gm[avar], 0.5)
-                    c = 0
-                    d = 0
-                    p = 0
-                    q = 0
-                    for i in range(comp_std_gm[f + fc].size):
-                        if comp_std_gm[f + fc][i] > dist_995:
-                            c = c + 1
-                        elif comp_std_gm[f + fc][i] < dist_05:
-                            d = d + 1
-                        elif comp_std_gm[f + fc][i] < dist_995 and comp_std_gm[f + fc][i] > dist_75:
-                            p = p + 1
-                        elif comp_std_gm[f + fc][i] > dist_05 and comp_std_gm[f + fc][i] < dist_25:
-                            q = q + 1
-                    if c == 3 or d == 3:
-                        category['all_outside99'].append((avar, f + fc))
-                    elif c == 2 or d == 2:
-                        category['two_outside99'].append((avar, f + fc))
-                    elif c == 1 or d == 1:
-                        category['one_outside99'].append((avar, f + fc))
-                    if p == 3 or q == 3:
-                        category['all_oneside_outside1QR'].append((avar, f + fc))
-            part_name = opts_dict['indir'].split('/')[-1]
-            if not part_name:
-                part_name = opts_dict['indir'].split('/')[-2]
-            for key in sorted(category):
-                list_array = []
-                list_array2 = []
-                list_var = []
-                value = category[key]
-
-                if key == 'all_outside99':
-                    print(
-                        '*** ',
-                        len(value),
-                        ' variables have 3 test run global means outside of the 99th percentile.',
-                    )
-                elif key == 'two_outside99':
-                    print(
-                        '*** ',
-                        len(value),
-                        ' variables have 2 test run global means outside of the 99th percentile.',
-                    )
-                elif key == 'one_outside99':
-                    print(
-                        '*** ',
-                        len(value),
-                        ' variables have 1 test run global mean outside of the 99th percentile.',
-                    )
-                elif key == 'all_oneside_outside1QR':
-                    print(
-                        '*** ',
-                        len(value),
-                        ' variables have all test run global means outside of the first quartile (but not outside the 99th percentile).',
-                    )
-
-                if len(value) > 0:
-                    print(' => generating plot ...')
-                    if len(value) > 20:
-                        print('    NOTE: truncating to only plot the first 20 variables.')
-                        value = value[0:20]
-
-                for each_var in value:
-                    list_array.append(std_gm[each_var[0]])
-                    list_array2.append(comp_std_gm[each_var[1]])
-                    name = each_var[0]
-                    if isinstance(name, str) is False:
-                        name = name.decode('utf-8')
-
-                    list_var.append(name)
-
-                if len(value) != 0:
-                    # ax = sns.boxplot(data=list_array, whis=[0.5, 99.5], fliersize=0.0)
-                    sns.stripplot(data=list_array2, jitter=True, color='r')
-                    plt.xticks(list(range(len(list_array))), list_var, fontsize=8, rotation=-45)
-
-                    if decision == 'FAILED':
-                        plt.savefig(part_name + '_' + key + '_fail.png')
-                    else:
-                        plt.savefig(part_name + '_' + key + '_pass.png')
-                    plt.close()
+            all_outside99 = []
+            two_outside99 = []
+            one_outside99 = []
+
+            # std_gm is a dictionary
+            tsize = comp_std_gm.shape[1]
+            b = list(ens_var_name)
+            for f, avar in enumerate(b):
+                if np.ma.is_masked(std_gm[avar]):
+                    tempa = std_gm[avar]
+                else:
+                    tempa = np.array(std_gm[avar])
+                dist_995 = np.percentile(tempa, 99.5)
+                dist_005 = np.percentile(tempa, 0.5)
+                # print(avar, " = ", dist_005, dist_995)
+                count = 0
+                for i in range(tsize):
+                    if comp_std_gm[f, i] > dist_995 or comp_std_gm[f, i] < dist_005:
+                        count = count + 1
+                if count == 1:
+                    one_outside99.append(avar)
+                elif count == 2:
+                    two_outside99.append(avar)
+                elif count == tsize:
+                    all_outside99.append(avar)
+
+            if len(all_outside99) > 0:
+                print(
+                    '*** ',
+                    len(all_outside99),
+                    ' variable(s) have all test run global means outside of the 99th percentile.',
+                )
+                print(all_outside99)
+            if len(two_outside99) > 0:
+                print(
+                    '*** ',
+                    len(two_outside99),
+                    ' variable(s) have 2 test run global means outside of the 99th percentile.',
+                )
+                print(two_outside99)
+            if len(one_outside99) > 0:
+                print(
+                    '*** ',
+                    len(one_outside99),
+                    ' variable(s) have 1 test run global means outside of the 99th percentile.',
+                )
+                print(one_outside99)
+
+            # count = len(all_outside99) + len(two_outside99) + len(one_outside99)
+            # count = max(10, count)
+            count = 20
+            count = min(count, means.shape[0])
+
+            print('')
+            print('***************************************************************************** ')
+            print(
+                'Top 20 test run variables in decreasing order of (abs) standardized mean sum (note: ensemble is standardized such that mean = 0 and std_dev = 1)'
+            )
+            for i in range(count):
+                print(
+                    sorted_name[i],
+                    ': ',
+                    'sum =',
+                    sorted_sum[i],
+                    '  (indiv. test run values = ',
+                    sorted_comp_std_gm[i, :],
+                    ')',
+                )
+            print('***************************************************************************** ')
 
         ##
         # Print file with info about new test runs....to a netcdf file
@@ -513,6 +492,9 @@ def main(argv):
             v_ens_sigma_scores = nc_savefile.createVariable('ens_sigma_scores', 'f8', ('nvars',))
             v_ens_std_gm = nc_savefile.createVariable('ens_std_gm', 'f8', ('nvars', 'ens_size'))
 
+            # v_ens_loadings = nc_savefile.createVariable('ens_loadings', 'f8', ('nvars', 'nvars'))
+            v_gm = nc_savefile.createVariable('gm', 'f8', ('nvars', 'test_size'))
+
             # hard-coded size
             ssize = 'S' + str(str_size)
             str_out = nc.stringtochar(np.array(ens_var_name, ssize))
@@ -523,36 +505,13 @@ def main(argv):
             v_ens_sigma_scores[:] = sigma_scores_gm[:]
             v_ens_std_gm[:, :] = std_gm_array[:, :]
 
+            # v_ens_loadings[:,:] = loadings_gm[:,:]
+            v_gm[:, :] = means[:, :]
+
             nc_savefile.close()
 
         # end of CAM and MPAS
 
-        # Print variables (optional)
-        if opts_dict['printVars']:
-            print(' ')
-            print('***************************************************************************** ')
-            print(
-                'Variable global mean information (for reference only - not used to determine pass/fail)'
-            )
-            print('***************************************************************************** ')
-            for fcount, fid in enumerate(ifiles):
-                print(' ')
-                print('Run ' + str(fcount + 1) + ':')
-                print(' ')
-                print(
-                    '***' + str(countgm[fcount]),
-                    ' of '
-                    + str(len(ens_var_name))
-                    + ' variables are outside of ensemble global mean distribution***',
-                )
-                pyEnsLib.printsummary(
-                    results, 'gm', 'means', 'gmRange', fcount, variables, 'global mean'
-                )
-                print(' ')
-                print(
-                    '----------------------------------------------------------------------------'
-                )
-
     if me.get_rank() == 0:
         print(' ')
         print('Testing complete.')
diff --git a/pyEnsLib.py b/pyEnsLib.py
index cfdd4c5..81e806b 100644
--- a/pyEnsLib.py
+++ b/pyEnsLib.py
@@ -348,8 +348,8 @@ def search_sumfile(opts_dict, ifiles):
 # Create some variables and call a function to calculate PCA
 # now gm comes in at 64 bits...
 
-
-def pre_PCA(gm_orig, all_var_names, whole_list, me):
+# pas in exclude list in case we have to add to id
+def pre_PCA(gm_orig, all_var_names, ex_list, me):
 
     # initialize
     b_exit = False
@@ -361,56 +361,33 @@ def pre_PCA(gm_orig, all_var_names, whole_list, me):
     else:
         gm = gm_orig[:]
 
-    mu_gm = np.average(gm, axis=1)
     sigma_gm = np.std(gm, axis=1, ddof=1)
 
-    standardized_global_mean = np.zeros(gm.shape, dtype=np.float64)
-    scores_gm = np.zeros(gm.shape, dtype=np.float64)
+    # keep track of orig vars in exclude file
+    new_ex_list = ex_list.copy()
+    orig_len = len(ex_list)
 
-    # AB: 4/19: whole list contains variables to be removed due to very small global means (calc elsewhere), but this is not currently  needed
-    # and whole_list will be len = 0
-    orig_len = len(whole_list)
-    if orig_len > 0:
-        if me.get_rank() == 0:
-            print('\n')
-            print(
-                '***************************************************************************************'
-            )
-            print(
-                (
-                    'Warning: these ',
-                    orig_len,
-                    ' variables have ~0 means (< O(e-15)) for each ensemble member, please exclude them via the json file (--jsonfile) :',
-                )
-            )
-            print((','.join(['"{0}"'.format(item) for item in whole_list])))
-            print(
-                '***************************************************************************************'
-            )
-            print('\n')
-
-    # check for constants across ensemble
+    ##### check for constants across ensemble
+    print('STATUS: checking for constant values across ensemble')
     for var in range(nvar):
         for file in range(nfile):
-            if np.any(sigma_gm[var] == 0.0) and all_var_names[var] not in set(whole_list):
-                # keep track of zeros standard deviations
-                whole_list.append(all_var_names[var])
+            if np.any(sigma_gm[var] == 0.0) and all_var_names[var] not in set(new_ex_list):
+                # keep track of zeros standard deviations and append
+                new_ex_list.append(all_var_names[var])
 
-    # print list
-    new_len = len(whole_list)
+    # did we add vars to exclude?
+    new_len = len(new_ex_list)
     if new_len > orig_len:
-        sub_list = whole_list[orig_len:]
+        sub_list = new_ex_list[orig_len:]
         if me.get_rank() == 0:
             print('\n')
             print(
                 '*************************************************************************************'
             )
             print(
-                (
-                    'Warning: these ',
-                    new_len - orig_len,
-                    ' variables are constant across ensemble members, please exclude them via the json file (--jsonfile): ',
-                )
+                'Warning: these ',
+                new_len - orig_len,
+                ' variables are constant across ensemble members, and will be excluded and added to a copy of the json file (--jsonfile): ',
             )
             print('\n')
             print((','.join(['"{0}"'.format(item) for item in sub_list])))
@@ -419,83 +396,28 @@ def pre_PCA(gm_orig, all_var_names, whole_list, me):
             )
             print('\n')
 
-    # exit if non-zero length whole_list
-    if new_len > 0:
-        print('=> Exiting ...')
-        b_exit = True
-
-    # check for linear dependent vars
-    if not b_exit:
-
-        for var in range(nvar):
-            for file in range(nfile):
-                standardized_global_mean[var, file] = (gm[var, file] - mu_gm[var]) / sigma_gm[var]
-
-        eps = np.finfo(np.float32).eps
-        norm = np.linalg.norm(standardized_global_mean, ord=2)
-        sh = max(standardized_global_mean.shape)
-        mytol = sh * norm * eps
-
-        standardized_rank = np.linalg.matrix_rank(standardized_global_mean, mytol)
-        print('STATUS: checking for dependent vars using QR...')
-        print(('STATUS: standardized_global_mean rank = ', standardized_rank))
-
-        dep_var_list = get_dependent_vars_index(standardized_global_mean, standardized_rank)
-        num_dep = len(dep_var_list)
-        orig_len = len(whole_list)
-
-        for i in dep_var_list:
-            whole_list.append(all_var_names[i])
-
-        if num_dep > 0:
-            sub_list = whole_list[orig_len:]
-
-            print('\n')
-            print(
-                '********************************************************************************************'
-            )
-            print(
-                (
-                    'Warning: these ',
-                    num_dep,
-                    ' variables are linearly dependent, please exclude them via the json file (--jsonfile): ',
-                )
-            )
-            print('\n')
-            print((','.join(['"{0}"'.format(item) for item in sub_list])))
-            print(
-                '********************************************************************************************'
-            )
-            print('\n')
-            print('=> EXITING....')
-
-            # need to exit
-            b_exit = True
-
-    # now check for any variables that have less than 3% (of the ensemble size) unique values
-    if not b_exit:
-        print('STATUS: checking for unique values across ensemble')
-
-        cts = np.count_nonzero(np.diff(np.sort(standardized_global_mean)), axis=1) + 1
-        #        thresh = .02* standardized_global_mean.shape[1]
-        thresh = 0.03 * standardized_global_mean.shape[1]
-        result = np.where(cts < thresh)
-        indices = result[0]
-        if len(indices) > 0:
-            nu_list = []
-            for i in indices:
+    #### now check for any variables that have less than 3% (of the ensemble size) unique values
+    print('STATUS: checking for unique values across ensemble')
+    cts = np.count_nonzero(np.diff(np.sort(gm)), axis=1) + 1
+    thresh = 0.03 * gm.shape[1]
+    result = np.where(cts < thresh)
+    indices = result[0]
+    if len(indices) > 0:
+        nu_list = []
+        for i in indices:
+            # only add if not in ex_list already
+            if all_var_names[i] not in set(new_ex_list):
                 nu_list.append(all_var_names[i])
 
+        if len(nu_list) > 0:
             print('\n')
             print(
                 '********************************************************************************************'
             )
             print(
-                (
-                    'Warning: these ',
-                    len(indices),
-                    ' variables contain fewer than 3% unique values across the ensemble, please exclude them via the json file (--jsonfile): ',
-                )
+                'Warning: these ',
+                len(nu_list),
+                ' variables contain fewer than 3% unique values across the ensemble, and will be excluded and added to a copy of thee json file (--jsonfile): ',
             )
             print('\n')
             print((','.join(['"{0}"'.format(item) for item in nu_list])))
@@ -503,24 +425,106 @@ def pre_PCA(gm_orig, all_var_names, whole_list, me):
                 '********************************************************************************************'
             )
             print('\n')
-            print('=> EXITING....')
 
-            # need to exit
-            b_exit = True
+            new_ex_list.extend(nu_list)
 
-    if not b_exit:
-        # find principal components
-        loadings_gm = princomp(standardized_global_mean)
-        # now do coord transformation on the standardized means to get the scores
-        scores_gm = np.dot(loadings_gm.T, standardized_global_mean)
-        sigma_scores_gm = np.std(scores_gm, axis=1, ddof=1)
-    else:
-        loadings_gm = np.zeros(gm.shape, dtype=np.float64)
-        sigma_scores_gm = np.zeros(gm.shape, dtype=np.float64)
+    ### REMOVE newly excluded stuff before the check for linear dependence
+    # remove var from nvar, all_var_names, gm, and recalculate: mu_gm, sigma_gm
+    new_len = len(new_ex_list)
+    indx = []
+    if new_len > orig_len:
+        print('Updating ...')
+        sub_list = new_ex_list[orig_len:]
+        for i in sub_list:
+            indx.append(all_var_names.index(i))
+        # now delete the rows from gm and names from list
+        gm_del = np.delete(gm, indx, axis=0)
+        all_var_names_del = np.delete(all_var_names, indx).tolist()
+
+        gm = gm_del
+        all_var_names = all_var_names_del
+        nvar = gm.shape[0]
+
+    mu_gm = np.average(gm, axis=1)
+    sigma_gm = np.std(gm, axis=1, ddof=1)
+    standardized_global_mean = np.zeros(gm.shape, dtype=np.float64)
+
+    ####### check for linear dependent vars
+    print('STATUS: checking for linear dependence across ensemble')
+    for var in range(nvar):
+        for file in range(nfile):
+            standardized_global_mean[var, file] = (gm[var, file] - mu_gm[var]) / sigma_gm[var]
+
+    eps = np.finfo(np.float32).eps
+    norm = np.linalg.norm(standardized_global_mean, ord=2)
+    sh = max(standardized_global_mean.shape)
+    mytol = sh * norm * eps
+
+    standardized_rank = np.linalg.matrix_rank(standardized_global_mean, mytol)
+    print('STATUS: using QR...')
+    print(('STATUS: standardized_global_mean rank = ', standardized_rank))
 
-    #    return mu_gm.astype(np.float32),sigma_gm.astype(np.float32),standardized_global_mean.astype(np.float32),loadings_gm.astype(np.float32),sigma_scores_gm.astype(np.float32),b_exit
+    dep_var_list = get_dependent_vars_index(standardized_global_mean, standardized_rank)
+    num_dep = len(dep_var_list)
+    new_len = len(new_ex_list)
 
-    return mu_gm, sigma_gm, standardized_global_mean, loadings_gm, sigma_scores_gm, b_exit
+    for i in dep_var_list:
+        new_ex_list.append(all_var_names[i])
+
+    if num_dep > 0:
+        sub_list = new_ex_list[new_len:]
+
+        print('\n')
+        print(
+            '********************************************************************************************'
+        )
+        print(
+            'Warning: these ',
+            num_dep,
+            ' variables are linearly dependent, and will be excluded and added to a copy of the json file (--jsonfile): ',
+        )
+        print('\n')
+        print((','.join(['"{0}"'.format(item) for item in sub_list])))
+        print(
+            '********************************************************************************************'
+        )
+        print('\n')
+
+        # REMOVE FROM gm, standardized gm and names
+        indx = []
+        for i in sub_list:
+            indx.append(all_var_names.index(i))
+        # now delete the rows in index from gm, std gm, and names from list
+        gm_del = np.delete(gm, indx, axis=0)
+        sgm_del = np.delete(standardized_global_mean, indx, axis=0)
+        all_var_names_del = np.delete(all_var_names, indx).tolist()
+
+        gm = gm_del
+        standardized_global_mean = sgm_del
+        all_var_names = all_var_names_del
+        nvar = gm.shape[0]
+
+        mu_gm = np.average(gm, axis=1)
+        sigma_gm = np.std(gm, axis=1, ddof=1)
+
+    # COMPUTE PCA
+    scores_gm = np.zeros(gm.shape, dtype=np.float64)
+    # find principal components
+    loadings_gm = princomp(standardized_global_mean)
+    # now do coord transformation on the standardized means to get the scores
+    scores_gm = np.dot(loadings_gm.T, standardized_global_mean)
+    sigma_scores_gm = np.std(scores_gm, axis=1, ddof=1)
+
+    return (
+        mu_gm,
+        sigma_gm,
+        standardized_global_mean,
+        loadings_gm,
+        sigma_scores_gm,
+        new_ex_list,
+        gm,
+        b_exit,
+    )
 
 
 #
@@ -572,13 +576,14 @@ def calc_Z(val, avg, stddev, count, flag):
 
 
 #
-# Read a json file for the excluded/included list of variables
-#
+# Read a json file for the excluded list of variables
+# (no longer allowing include files)
 def read_jsonlist(metajson, method_name):
 
     # method_name = ES for ensemble summary (CAM, MPAS)
     #            = ESP for POP ensemble summary
 
+    exclude = True
     if not os.path.exists(metajson):
         print('\n')
         print(
@@ -590,7 +595,6 @@ def read_jsonlist(metajson, method_name):
         )
         print('\n')
         varList = []
-        exclude = True
         return varList, exclude
     else:
         fd = open(metajson)
@@ -602,12 +606,9 @@ def read_jsonlist(metajson, method_name):
             exclude = []
             return varList, exclude
         if method_name == 'ES':  # CAM or MPAS
-            exclude = False
+            exclude = True
             if 'ExcludedVar' in metainfo:
-                exclude = True
                 varList = metainfo['ExcludedVar']
-            elif 'IncludedVar' in metainfo:
-                varList = metainfo['IncludedVar']
             return varList, exclude
         elif method_name == 'ESP':  # POP
             var2d = metainfo['Var2d']
@@ -906,7 +907,7 @@ def calc_global_mean_for_onefile_MPAS(fname, weight_dict, var_cell, var_edge, va
 # compute area_wgts, and then loop through all files to call calc_global_means_for_onefile
 # o_files are not open for CAM
 # 12/19 - summary file will now be double precision
-def generate_global_mean_for_summary(o_files, var_name3d, var_name2d, is_SE, pepsi_gm, opts_dict):
+def generate_global_mean_for_summary(o_files, var_name3d, var_name2d, is_SE, opts_dict):
 
     tslice = opts_dict['tslice']
     popens = opts_dict['popens']
@@ -957,10 +958,7 @@ def generate_global_mean_for_summary(o_files, var_name3d, var_name2d, is_SE, pep
 
         fname.close()
 
-    var_list = []
-    # some valid CAM vars are all small entries(e.g. DTWR_H2O2 and DTWR_H2O4), so we no longer exclude them via var_list
-
-    return gm3d, gm2d, var_list
+    return gm3d, gm2d
 
 
 # Calculate global means for one OCN input file
@@ -1511,30 +1509,7 @@ def standardized(gm, mu_gm, sigma_gm, loadings_gm, all_var_names, opts_dict, me)
             sum_std_mean[var] = sum_std_mean[var] + np.abs(standardized_mean[var, file])
     new_scores = np.dot(loadings_gm.T.astype(np.float64), standardized_mean)
 
-    var_list = []
-    sorted_sum_std_mean = np.argsort(sum_std_mean)[::-1]
-    if opts_dict['printStdMean']:
-        if me.get_rank() == 0:
-            print(' ')
-            print('************************************************************************')
-            print(' Sum of standardized mean of all variables in decreasing order')
-            print('************************************************************************')
-        for var in range(nvar):
-            var_list.append(all_var_names[sorted_sum_std_mean[var]])
-            vname = all_var_names[sorted_sum_std_mean[var]]
-            if me.get_rank() == 0:
-
-                if isinstance(vname, str):
-                    vname_d = vname
-                else:
-                    vname_d = vname.decode('utf-8')
-
-                print(
-                    '{:>15}'.format(vname_d),
-                    '{0:9.2e}'.format(sum_std_mean[sorted_sum_std_mean[var]]),
-                )
-                print(' ')
-    return new_scores, var_list, standardized_mean
+    return new_scores, sum_std_mean, standardized_mean
 
 
 #
@@ -1581,7 +1556,7 @@ def printsummary(results, key, name, namerange, thefilecount, variables, label):
                     if temp < 1:
                         print(' ')
                         print(
-                            f' {strname}: {v[name][thefile]:.2e} outside of of [{variables[k][namerange][0]:2e}, {variables[k][namerange][1]}]'
+                            f' {strname}: {v[name][thefile]:2e} outside of [{variables[k][namerange][0]:2e}, {variables[k][namerange][1]:2e}]'
                         )
 
 
@@ -1647,6 +1622,7 @@ def comparePCAscores(ifiles, new_scores, sigma_scores_gm, opts_dict, me):
     totalcount = 0
     sum_index = []
     if me.get_rank() == 0:
+        print('')
         print('*********************************************** ')
         print('PCA Test Results')
         print('*********************************************** ')
@@ -1670,6 +1646,11 @@ def comparePCAscores(ifiles, new_scores, sigma_scores_gm, opts_dict, me):
             totalcount = totalcount + 1
             sum_index.append(i + 1)
 
+    # save comp_array if filepath is provided
+    if me.get_rank() == 0:
+        if len(opts_dict['savePCAMat']) > 0:
+            np.save(opts_dict['savePCAMat'], comp_array)
+
     # false_positive=check_falsepositive(opts_dict,sum_index)
 
     # If the length of sum_index is larger than min_PC_fail, the three runs failed.
@@ -1690,7 +1671,9 @@ def comparePCAscores(ifiles, new_scores, sigma_scores_gm, opts_dict, me):
                 sum_index,
             )
             print(' ')
-            print('These runs ' + decision + ' according to our testing criterion.')
+            print('These runs ****' + decision + '**** according to our testing criterion.')
+            print(' ')
+
         elif me.get_rank() == 0:
             print(' ')
             print(
@@ -1745,6 +1728,10 @@ def comparePCAscores(ifiles, new_scores, sigma_scores_gm, opts_dict, me):
             else:
                 decision = 'PASSED'
 
+            # save eet if filepath is provided
+            if len(opts_dict['saveEET']) > 0:
+                np.save(opts_dict['saveEET'], np.array([passes, passes + failures]))
+
     else:
         for j in range(comp_array.shape[1]):
             index_list = []
@@ -1796,12 +1783,6 @@ def CECT_usage():
     print(
         '   --printVars             : print out variables that fall outside of the global mean ensemble distribution (off by default)'
     )
-    print(
-        '   --printStdMean          : print out sum of standardized mean of all variables in decreasing order.  If test returns a FAIL, '
-    )
-    print(
-        '                             then output associated box plots (off by default) - requires Python seaborn package'
-    )
     print(
         '   --saveResults           : save a netcdf file with scores and std global means from the test runs (savefile.nc). '
     )
@@ -1857,7 +1838,7 @@ def EnsSum_usage():
     print('   --mach <name>         : Machine name used in the metadata (default = cheyenne)')
     print('   --tslice <num>       : the index into the time dimension (default = 1)')
     print('   --jsonfile <fname>   : Jsonfile to provide that a list of variables that will ')
-    print('                          be excluded or included  (default = exclude_empty.json)')
+    print('                          be excluded (default = exclude_empty.json)')
     print('   --mpi_disable        : Disable mpi mode to run in serial (off by default)')
     #    print(
     #        '   --fIndex <num>       : Use this to start at ensemble member <num> instead of 000 (so '
@@ -1886,7 +1867,7 @@ def EnsSumMPAS_usage():
     print('   --mach <name>        : Machine name used in the metadata (default = cheyenne)')
     print('   --tslice <num>       : the index into the time dimension (default = 0)')
     print('   --jsonfile <fname>   : Jsonfile to provide that a list of variables that will ')
-    print('                          be excluded or included  (default = empty_excluded.json)')
+    print('                          be excluded  (default = empty_excluded.json)')
     print('   --mpi_disable        : Disable mpi mode to run in serial (mpi is enabled by default)')
     print('   ')
 
diff --git a/pyEnsSum.py b/pyEnsSum.py
index 471de9d..ad38900 100644
--- a/pyEnsSum.py
+++ b/pyEnsSum.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 import configparser
 import getopt
+import json
 import os
 import re
 import sys
@@ -19,10 +20,8 @@
 
 def main(argv):
 
-    # cumul is not being used currently
-
     # Get command line stuff and store in a dictionary
-    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens regx= startMon= endMon= fIndex= mpi_disable'
+    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm  popens regx= startMon= endMon= fIndex= mpi_disable'
     optkeys = s.split()
     try:
         opts, args = getopt.getopt(argv, 'h', optkeys)
@@ -43,14 +42,12 @@ def main(argv):
     opts_dict['sumfile'] = 'ens.summary.nc'
     opts_dict['indir'] = './'
     opts_dict['sumfiledir'] = './'
-    opts_dict['jsonfile'] = 'exclude_empty.json'
+    opts_dict['jsonfile'] = 'empty_excluded.json'
     opts_dict['verbose'] = False
     opts_dict['mpi_enable'] = True
     opts_dict['mpi_disable'] = False
     opts_dict['maxnorm'] = False
-    opts_dict['gmonly'] = True
     opts_dict['popens'] = False
-    opts_dict['cumul'] = False
     opts_dict['regx'] = 'test'
     opts_dict['startMon'] = 1
     opts_dict['endMon'] = 1
@@ -79,7 +76,6 @@ def main(argv):
     input_dir = opts_dict['indir']
     # The var list that will be excluded
     ex_varlist = []
-    inc_varlist = []
 
     # Create a mpi simplecomm object
     if opts_dict['mpi_enable']:
@@ -94,26 +90,17 @@ def main(argv):
         print(opts_dict)
         print('STATUS: Ensemble size for summary = ', esize)
 
-    exclude = False
     if me.get_rank() == 0:
         if opts_dict['jsonfile']:
-            inc_varlist = []
-            # Read in the excluded or included var list
+            # Read in the excluded var list
             ex_varlist, exclude = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ES')
             if len(ex_varlist) > 0:
                 if ex_varlist[0] == 'JSONERROR':
                     me.abort()
-            if exclude is False:
-                inc_varlist = ex_varlist
-                ex_varlist = []
 
     # Broadcast the excluded var list to each processor
     if opts_dict['mpi_enable']:
-        exclude = me.partition(exclude, func=Duplicate(), involved=True)
-        if exclude:
-            ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True)
-        else:
-            inc_varlist = me.partition(inc_varlist, func=Duplicate(), involved=True)
+        ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True)
 
     in_files = []
     if os.path.exists(input_dir):
@@ -150,13 +137,6 @@ def main(argv):
             print('ERROR: Input directory: ', input_dir, ' not found')
         sys.exit(2)
 
-    if opts_dict['cumul']:
-        if opts_dict['regx']:
-            in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'], opts_dict['regx'])
-        in_files = me.partition(in_files_list, func=EqualLength(), involved=True)
-        if me.get_rank() == 0 and verbose:
-            print('VERBOSE: in_files  = ', in_files)
-
     # Check full file names in input directory (don't open yet)
     full_in_files = []
     if me.get_rank() == 0 and opts_dict['verbose']:
@@ -225,21 +205,18 @@ def main(argv):
             print('nlat = ', nlat)
             print('nlon = ', nlon)
 
+    # invarient metadata  (will write to sum file later)
+    lev_data = first_file.variables['lev']
+    lev_data_copy = lev_data[:]  # doesn't go away when close first_file
+
     # Get 2d vars, 3d vars and all vars (For now include all variables)
     vars_dict_all = first_file.variables
 
     # Remove the excluded variables (specified in json file) from variable dictionary
-    if exclude:
-        vars_dict = vars_dict_all
-        for i in ex_varlist:
-            if i in vars_dict:
-                del vars_dict[i]
-    # Given an included var list, remove all the variables that are not on the list
-    else:
-        vars_dict = vars_dict_all.copy()
-        for k, v in vars_dict_all.items():
-            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
-                del vars_dict[k]
+    vars_dict = vars_dict_all.copy()
+    for i in ex_varlist:
+        if i in vars_dict:
+            del vars_dict[i]
 
     # num_vars = len(vars_dict)
 
@@ -312,7 +289,6 @@ def main(argv):
     # All vars is 3d vars first (sorted), the 2d vars
     all_var_names = list(d3_var_names)
     all_var_names += d2_var_names
-    # n_all_var_names = len(all_var_names)
 
     # Rank 0 - Create new summary ensemble file
     this_sumfile = opts_dict['sumfile']
@@ -328,8 +304,96 @@ def main(argv):
 
     this_sumfile = sum_dir + '/' + this_sumfile
 
+    # All:
+    var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True)
+    var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True)
+
+    # close first_file
+    first_file.close()
+
+    # Calculate global means #
+    if me.get_rank() == 0 and verbose:
+        print('VERBOSE: Calculating global means .....')
+
+    gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary(
+        full_in_files, var3_list_loc, var2_list_loc, is_SE, opts_dict
+    )
+
+    if me.get_rank() == 0 and verbose:
+        print('VERBOSE: Finished calculating global means .....')
+
+    # gather to rank = 0
+    if opts_dict['mpi_enable']:
+
+        # Gather the 3d variable results from all processors to the master processor
+        slice_index = get_stride_list(len(d3_var_names), me)
+
+        # Gather global means 3d results
+        gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(full_in_files)))
+
+        # Gather 2d variable results from all processors to the master processor
+        slice_index = get_stride_list(len(d2_var_names), me)
+
+        # Gather global means 2d results
+        gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(full_in_files)))
+
+    # rank =0 : complete calculations for summary file
     if me.get_rank() == 0:
+        gmall = np.concatenate((gm3d, gm2d), axis=0)
+
+        # PCA prep and calculation
+        (
+            mu_gm,
+            sigma_gm,
+            standardized_global_mean,
+            loadings_gm,
+            scores_gm,
+            new_ex_varlist,
+            new_gmall,
+            b_exit,
+        ) = pyEnsLib.pre_PCA(gmall, all_var_names, ex_varlist, me)
 
+        # if PCA calc encounters an error, then remove the summary file and exit
+        if b_exit:
+            print('STATUS: Summary could not be created.')
+            sys.exit(2)
+
+        # update json file?  update var 2d and 3d var lists?
+
+        # print('ex_varlist len = ', len(ex_varlist))
+        # print('new ex_varlist len = ', len(new_ex_varlist))
+        # print(new_ex_varlist)
+
+        if len(ex_varlist) < len(new_ex_varlist):
+            print('STATUS: Creating an updated JSON file (with prefix "NEW.")')
+            new_name = 'NEW.' + opts_dict['jsonfile']
+            print(
+                'STATUS: Adding ', len(new_ex_varlist) - len(ex_varlist), ' variables to ', new_name
+            )
+            jdict = {}
+            jdict['ExcludedVar'] = new_ex_varlist
+            with open(new_name, 'w') as outfile:
+                json.dump(jdict, outfile)
+
+            # update num_2d, num_3d => by removing vars from  d3_var_names and d2_var_names
+            for i in new_ex_varlist:
+                if i in all_var_names:
+                    all_var_names.remove(i)
+                if i in d3_var_names:
+                    d3_var_names.remove(i)
+                elif i in d2_var_names:
+                    d2_var_names.remove(i)
+
+            num_2d = len(d2_var_names)
+            num_3d = len(d3_var_names)
+
+            nvars = loadings_gm.shape[0]
+            if nvars != (num_2d + num_3d):
+                print('DIMENSION ERROR!')
+                print('STATUS: Summary could not be created.')
+                sys.exit(2)
+
+        # create the summary file  (still rank 0)
         if verbose:
             print('VERBOSE: Creating ', this_sumfile, '  ...')
 
@@ -421,89 +485,9 @@ def main(argv):
         v_var3d[:] = eq_d3_var_names[:]
         v_var2d[:] = eq_d2_var_names[:]
 
-        # Time-invarient metadata
-        if verbose:
-            print('VERBOSE: Assigning time invariant metadata .....')
-        lev_data = first_file.variables['lev']
-        v_lev[:] = lev_data[:]
-    # end of rank=0 work
-
-    # All:
-    # tslice = opts_dict['tslice']
-    if not opts_dict['cumul']:
-        # Partition the var list
-        var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True)
-        var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True)
-    else:
-        var3_list_loc = d3_var_names
-        var2_list_loc = d2_var_names
-
-    # close first_file
-    first_file.close()
-
-    # Calculate global means #
-    if me.get_rank() == 0 and verbose:
-        print('VERBOSE: Calculating global means .....')
-    if not opts_dict['cumul']:
-        gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary(
-            full_in_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict
-        )
-    if me.get_rank() == 0 and verbose:
-        print('VERBOSE: Finished calculating global means .....')
-
-    # gather to rank = 0
-    if opts_dict['mpi_enable']:
-
-        if not opts_dict['cumul']:
-            # Gather the 3d variable results from all processors to the master processor
-            slice_index = get_stride_list(len(d3_var_names), me)
-
-            # Gather global means 3d results
-            gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(full_in_files)))
-
-            # Gather 2d variable results from all processors to the master processor
-            slice_index = get_stride_list(len(d2_var_names), me)
-
-            # Gather global means 2d results
-            gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(full_in_files)))
-
-            # gather variables ro exclude (in pre_pca)
-            var_list = gather_list(var_list, me)
-
-        else:
-            # cumul is not being used and temp1 and temp2 are undefined
-            # gmall = np.concatenate((temp1, temp2), axis=0)
-            # gmall = pyEnsLib.gather_npArray_pop(
-            #    gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names))
-            # )
-            print('Warning: cumul not supported')
-
-    # rank =0 : complete calculations for summary file
-    if me.get_rank() == 0:
-        if not opts_dict['cumul']:
-            gmall = np.concatenate((gm3d, gm2d), axis=0)
-        else:
-            gmall_temp = np.transpose(gmall[:, :])
-            gmall = gmall_temp
-
-        # PCA prep and calculation
-        (
-            mu_gm,
-            sigma_gm,
-            standardized_global_mean,
-            loadings_gm,
-            scores_gm,
-            b_exit,
-        ) = pyEnsLib.pre_PCA(gmall, all_var_names, var_list, me)
-
-        # if PCA calc encounters an error, then remove the summary file and exit
-        if b_exit:
-            nc_sumfile.close()
-            os.unlink(this_sumfile)
-            print('STATUS: Summary could not be created.')
-            sys.exit(2)
-
-        v_gm[:, :] = gmall[:, :]
+        # populate variables
+        v_lev[:] = lev_data_copy[:]
+        v_gm[:, :] = new_gmall[:, :]
         v_standardized_gm[:, :] = standardized_global_mean[:, :]
         v_mu_gm[:] = mu_gm[:]
         v_sigma_gm[:] = sigma_gm[:]
@@ -530,8 +514,7 @@ def get_cumul_filelist(opts_dict, indir, regx):
                 res = [f for f in os.listdir(indir) if re.search(regx, f)]
                 in_files = sorted(res)
                 all_files.extend(in_files)
-    # print "all_files=",all_files
-    # in_files=res
+
     return all_files
 
 
diff --git a/pyEnsSumMPAS.py b/pyEnsSumMPAS.py
index 2dbe53c..89e272f 100644
--- a/pyEnsSumMPAS.py
+++ b/pyEnsSumMPAS.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 import configparser
 import getopt
+import json
 import os
 import re
 import sys
@@ -75,7 +76,6 @@ def main(argv):
     input_dir = opts_dict['indir']
     # The var list that will be excluded
     ex_varlist = []
-    inc_varlist = []
 
     # Create a mpi simplecomm object
     if opts_dict['mpi_enable']:
@@ -93,23 +93,15 @@ def main(argv):
     exclude = False
     if me.get_rank() == 0:
         if opts_dict['jsonfile']:
-            inc_varlist = []
-            # Read in the excluded or included var list
+            # Read in the excluded var list
             ex_varlist, exclude = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ES')
             if len(ex_varlist) > 0:
                 if ex_varlist[0] == 'JSONERROR':
                     me.abort()
-            if exclude is False:
-                inc_varlist = ex_varlist
-                ex_varlist = []
 
     # Broadcast the excluded var list to each processor
     if opts_dict['mpi_enable']:
-        exclude = me.partition(exclude, func=Duplicate(), involved=True)
-        if exclude:
-            ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True)
-        else:
-            inc_varlist = me.partition(inc_varlist, func=Duplicate(), involved=True)
+        ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True)
 
     in_files = []
     if os.path.exists(input_dir):
@@ -214,17 +206,10 @@ def main(argv):
     vars_dict_all = first_file.variables
 
     # Remove the excluded variables (specified in json file) from variable dictionary
-    if exclude:
-        vars_dict = vars_dict_all.copy()
-        for i in ex_varlist:
-            if i in vars_dict:
-                del vars_dict[i]
-    # Given an included var list, remove all the variables that are not on the list
-    else:
-        vars_dict = vars_dict_all.copy()
-        for k, v in vars_dict_all.items():
-            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
-                del vars_dict[k]
+    vars_dict = vars_dict_all.copy()
+    for i in ex_varlist:
+        if i in vars_dict:
+            del vars_dict[i]
 
     # We have cell vars and edge vars and vertex vars (and only want time-dependent vars)
     str_size = 0  # longest var name
@@ -237,28 +222,52 @@ def main(argv):
     v_dim = 'nVertices'
 
     # CHECK FOR edge variable u (horizontal wind velocity vector)
-
+    extra_exclude = 0
     # sort to cell, edge, and vertex (and grab max str_size)
     for k, v in vars_dict.items():
         # var = k
+        # get var type
+        dd = vars_dict[k][:].dtype
         vd = v.dimensions  # all the variable's dimensions (names)
+        # only car about time dependent vars
         if t_dim in vd:
+            # no integers
+            if dd == 'int32':
+                ex_varlist.append(k)
+                extra_exclude = extra_exclude + 1
+                if me.get_rank() == 0:
+                    print(
+                        'WARNING: Variable ',
+                        k,
+                        ' is an integer and should be excluded.  Added to json file.',
+                    )
+                continue
             if c_dim in vd:
                 cell_names.append(k)
             elif e_dim in vd:
-                edge_names.append(k)
                 if k == 'u':
-                    print(
-                        'Note: We suggest that variable u (Horizontal normal velocity at edges) be excluded from the summary file in favor of uReconstructZonal and uReconstructMeridional (cell variables).'
-                    )
+                    # check for uReconstructZonal and uReconstructMeridional
+                    if 'uReconstructZonal' in vars_dict and 'uReconstructMeridional' in vars_dict:
+                        ex_varlist.append(k)
+                        extra_exclude = extra_exclude + 1
+                        if me.get_rank() == 0:
+                            print(
+                                'WARNING: We suggest that variable u (Horizontal normal velocity at edges) be excluded from the summary file in favor of uReconstructZonal and uReconstructMeridional (cell variables) Added to json file.'
+                            )
+                        continue
+                edge_names.append(k)
             elif v_dim in vd:
                 vertex_names.append(k)
             else:
-                print(
-                    'Note: variable ',
-                    k,
-                    ' contains time but not cells, edges, or vertices (and will be excluded).',
-                )
+                # add to exclude list
+                ex_varlist.append(k)
+                extra_exclude = extra_exclude + 1
+                if me.get_rank() == 0:
+                    print(
+                        'WARNING: variable ',
+                        k,
+                        ' contains time but not cells, edges, or vertices (and will be excluded and added to a new jsonfile).',
+                    )
                 continue
             str_size = max(str_size, len(k))
 
@@ -268,7 +277,7 @@ def main(argv):
     total = num_cell + num_edge + num_vertex
 
     if me.get_rank() == 0 and verbose:
-        print('VERBOSE: Number of variables found:  ', total)
+        print('VERBOSE: Number of variables (after exclusions) found:  ', total)
         print(
             'VERBOSE: Cell variables: ',
             num_cell,
@@ -286,6 +295,7 @@ def main(argv):
 
     if esize < total:
         if me.get_rank() == 0:
+
             print(
                 '**************************************************************************************************'
             )
@@ -315,6 +325,7 @@ def main(argv):
     sum_dir = os.path.dirname(this_sumfile)
     if len(sum_dir) == 0:
         sum_dir = '.'
+
     if os.path.exists(sum_dir) is False:
         if me.get_rank() == 0:
             print('ERROR: Summary file directory: ', sum_dir, ' not found')
@@ -322,8 +333,107 @@ def main(argv):
 
     this_sumfile = sum_dir + '/' + this_sumfile
 
+    varCell_list_loc = me.partition(cell_names, func=EqualStride(), involved=True)
+    varEdge_list_loc = me.partition(edge_names, func=EqualStride(), involved=True)
+    varVertex_list_loc = me.partition(vertex_names, func=EqualStride(), involved=True)
+
+    # close first_file
+    first_file.close()
+
+    # Calculate global means #
+    if me.get_rank() == 0 and verbose:
+        print('VERBOSE: Calculating global means .....')
+
+    gmCell, gmEdge, gmVertex = pyEnsLib.generate_global_mean_for_summary_MPAS(
+        full_in_files, varCell_list_loc, varEdge_list_loc, varVertex_list_loc, opts_dict
+    )
+
+    if me.get_rank() == 0 and verbose:
+        print('VERBOSE: Finished calculating global means .....')
+
+    # gather to rank = 0
+    if opts_dict['mpi_enable']:
+
+        # Gather the cell variable results from all processors to the master processor
+        slice_index = get_stride_list(len(cell_names), me)
+        # Gather global means cell results
+
+        # print("MYRANK = ", me.get_rank(), slice_index)
+        gmCell = gather_npArray(gmCell, me, slice_index, (len(cell_names), len(full_in_files)))
+        # print(gmCell)
+
+        # Gather the edge variable results from all processors to the master processor
+        slice_index = get_stride_list(len(edge_names), me)
+        # Gather global means edge results
+        gmEdge = gather_npArray(gmEdge, me, slice_index, (len(edge_names), len(full_in_files)))
+
+        # Gather the vertex variable results from all processors to the master processor
+        slice_index = get_stride_list(len(vertex_names), me)
+        # Gather global means vertex results
+        gmVertex = gather_npArray(
+            gmVertex, me, slice_index, (len(vertex_names), len(full_in_files))
+        )
+
+    # rank =0 : complete calculations for summary file
     if me.get_rank() == 0:
+        gmall = np.concatenate((gmCell, gmEdge, gmVertex), axis=0)
+
+        # PCA prep and calculation
+        (
+            mu_gm,
+            sigma_gm,
+            standardized_global_mean,
+            loadings_gm,
+            scores_gm,
+            new_ex_varlist,
+            new_gmall,
+            b_exit,
+        ) = pyEnsLib.pre_PCA(gmall, all_var_names, ex_varlist, me)
 
+        # if PCA calc encounters an error, then remove the summary file and exit
+        if b_exit:
+            print('STATUS: Summary could not be created.')
+            sys.exit(2)
+
+        # update json file?
+        if len(ex_varlist) < len(new_ex_varlist) or extra_exclude > 0:
+            print('STATUS: Creating an updated JSON file (with prefix "NEW.")')
+            new_name = 'NEW.' + opts_dict['jsonfile']
+            print(
+                'STATUS: Adding ',
+                len(new_ex_varlist) - len(ex_varlist) + extra_exclude,
+                ' variables to ',
+                new_name,
+            )
+            jdict = {}
+            jdict['ExcludedVar'] = new_ex_varlist
+
+            with open(new_name, 'w') as outfile:
+                json.dump(jdict, outfile)
+
+            # update ncell, nedge, and nvertex => by removing vars from corresponding names
+            for i in new_ex_varlist:
+                if i in all_var_names:
+                    all_var_names.remove(i)
+                if i in cell_names:
+                    cell_names.remove(i)
+                elif i in edge_names:
+                    edge_names.remove(i)
+                elif i in vertex_names:
+                    vertex_names.remove(i)
+
+            num_cell = len(cell_names)
+            num_edge = len(edge_names)
+            num_vertex = len(vertex_names)
+            total = num_cell + num_edge + num_vertex
+
+            nvars = loadings_gm.shape[0]
+            if nvars != (total):
+                print('DIMENSION ERROR!')
+                print('STATUS: Summary could not be created.')
+                sys.exit(2)
+
+        # create the summary file (still rank 0)
         if verbose:
             print('VERBOSE: Creating ', this_sumfile, '  ...')
 
@@ -365,8 +475,6 @@ def main(argv):
         # Create variables
         if verbose:
             print('VERBOSE: Creating variables .....')
-        # TO DO - any time invarient data that we want?
-        # v_lev = nc_sumfile.createVariable('lev', 'f8', ('nlev',))
 
         v_vars = nc_sumfile.createVariable('vars', 'S1', ('nvars', 'str_size'))
         v_varCell = nc_sumfile.createVariable('varCell', 'S1', ('nvarsCell', 'str_size'))
@@ -433,91 +541,8 @@ def main(argv):
         v_varEdge[:] = eq_edge_names[:]
         v_varVertex[:] = eq_vertex_names[:]
 
-        # Time-invarient metadata
-        # TO DO
-        if verbose:
-            print('VERBOSE: Assigning time invariant metadata .....')
-        # lev_data = first_file.variables['lev']
-        # v_lev[:] = lev_data[:]
-
-    # end of rank=0 work
-
-    # All:
-
-    varCell_list_loc = me.partition(cell_names, func=EqualStride(), involved=True)
-    varEdge_list_loc = me.partition(edge_names, func=EqualStride(), involved=True)
-    varVertex_list_loc = me.partition(vertex_names, func=EqualStride(), involved=True)
-
-    # close first_file
-    first_file.close()
-
-    # Calculate global means #
-    if me.get_rank() == 0 and verbose:
-        print('VERBOSE: Calculating global means .....')
-
-    gmCell, gmEdge, gmVertex = pyEnsLib.generate_global_mean_for_summary_MPAS(
-        full_in_files, varCell_list_loc, varEdge_list_loc, varVertex_list_loc, opts_dict
-    )
-
-    if me.get_rank() == 0 and verbose:
-        print('VERBOSE: Finished calculating global means .....')
-
-    # empty - not excluding any vars due to global means at this time
-    var_list = []
-
-    # gather to rank = 0
-    if opts_dict['mpi_enable']:
-
-        # Gather the cell variable results from all processors to the master processor
-        slice_index = get_stride_list(len(cell_names), me)
-        # Gather global means cell results
-
-        # print("MYRANK = ", me.get_rank(), slice_index)
-        gmCell = gather_npArray(gmCell, me, slice_index, (len(cell_names), len(full_in_files)))
-        # print(gmCell)
-
-        # Gather the edge variable results from all processors to the master processor
-        slice_index = get_stride_list(len(edge_names), me)
-        # Gather global means edge results
-        gmEdge = gather_npArray(gmEdge, me, slice_index, (len(edge_names), len(full_in_files)))
-
-        # Gather the vertex variable results from all processors to the master processor
-        slice_index = get_stride_list(len(vertex_names), me)
-        # Gather global means vertex results
-        gmVertex = gather_npArray(
-            gmVertex, me, slice_index, (len(vertex_names), len(full_in_files))
-        )
-
-        #################
-        # gather variables to exclude (for pre_pca - currently will be empty)
-        var_list = gather_list(var_list, me)
-
-    # rank =0 : complete calculations for summary file
-    if me.get_rank() == 0:
-        gmall = np.concatenate((gmCell, gmEdge, gmVertex), axis=0)
-        # save to summary file
-        v_gm[:, :] = gmall[:, :]
-
-        # print("gmall = ", gmall)
-
-        # PCA prep and calculation
-        (
-            mu_gm,
-            sigma_gm,
-            standardized_global_mean,
-            loadings_gm,
-            scores_gm,
-            b_exit,
-        ) = pyEnsLib.pre_PCA(gmall, all_var_names, var_list, me)
-
-        # if PCA calc encounters an error, then remove the summary file and exit
-        if b_exit:
-            nc_sumfile.close()
-
-            os.unlink(this_sumfile)
-            print('STATUS: Summary could not be created.')
-            sys.exit(2)
-
+        # populate variables
+        v_gm[:, :] = new_gmall[:, :]
         v_standardized_gm[:, :] = standardized_global_mean[:, :]
         v_mu_gm[:] = mu_gm[:]
         v_sigma_gm[:] = sigma_gm[:]
diff --git a/pyproject.toml b/pyproject.toml
index af3bbc7..e78601a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,5 +11,5 @@ pyupgrade = 1
 [tool.nbqa.addopts]
 pyupgrade = ["--py36-plus"]
 
-#[build-system]
-#requires = ["setuptools>=45", "wheel", "setuptools_scm>=6.2"]
+[build-system]
+requires = ["setuptools>=45", "wheel", "setuptools_scm>=6.2"]
diff --git a/test_mpas_CECT.sh b/test_mpas_CECT.sh
new file mode 100644
index 0000000..b029956
--- /dev/null
+++ b/test_mpas_CECT.sh
@@ -0,0 +1,17 @@
+#!/bin/tcsh
+#PBS -A NTDD0004
+#PBS -N mpas-cect
+#PBS -q regular
+#PBS -l select=1:ncpus=1:mpiprocs=1
+#PBS -l walltime=0:30:00
+#PBS -j oe
+#PBS -M abaker@ucar.edu
+
+module load conda
+conda activate npl
+
+
+setenv TMPDIR /glade/scratch/$USER/temp
+mkdir -p $TMPDIR
+
+python pyCECT.py --sumfile /glade/work/abaker/mpas_data/100_ens_summary/mpas_sum_ts24.nc --indir /glade/scratch/abaker/longer_mpas_hist --tslice 8 --mpas
diff --git a/test_pyEnsSum.sh b/test_pyEnsSum.sh
index 530d673..e996d38 100644
--- a/test_pyEnsSum.sh
+++ b/test_pyEnsSum.sh
@@ -1,7 +1,7 @@
 #!/bin/tcsh
 #PBS -A NTDD0004
 #PBS -N ensSum
-#PBS -q regular
+#PBS -q premium
 #PBS -l select=8:ncpus=9:mpiprocs=9
 #PBS -l walltime=0:20:00
 #PBS -j oe
@@ -14,4 +14,8 @@ conda activate npl
 setenv TMPDIR /glade/scratch/$USER/temp
 mkdir -p $TMPDIR
 
-mpiexec python pyEnsSum.py --esize 350 --indir /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/uf_cam_ens_files --sumfile uf.ens.c1.2.2.1_fc5.ne30.nc  --tslice 1 --tag cesm1.2.2.1 --compset FC5 --res ne30_ne30 --mach cheyenne --verbose --jsonfile excluded_varlist.json
+#mpiexec python pyEnsSum.py --esize 350 --indir /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/uf_cam_ens_files --sumfile uf.ens.c1.2.2.1_fc5.ne30.nc  --tslice 1 --tag cesm1.2.2.1 --compset FC5 --res ne30_ne30 --mach cheyenne --verbose --jsonfile short.json
+
+#mpiexec python pyEnsSum.py --esize 350 --indir /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/uf_cam_ens_files --sumfile uf.ens.c1.2.2.1_fc5.ne30.nc  --tslice 1 --tag cesm1.2.2.1 --compset FC5 --res ne30_ne30 --mach cheyenne --verbose --jsonfile excluded_varlist.json
+
+mpiexec python pyEnsSum.py --esize 350 --indir /glade/p/cisl/asap/pycect_sample_data/cam_c1.2.2.1/uf_cam_ens_files --sumfile uf.ens.c1.2.2.1_fc5.ne30.nc  --tslice 1 --tag cesm1.2.2.1 --compset FC5 --res ne30_ne30 --mach cheyenne --verbose --jsonfile one.json
diff --git a/test_pyEnsSumMPAS.sh b/test_pyEnsSumMPAS.sh
index 878306c..65aa6d8 100644
--- a/test_pyEnsSumMPAS.sh
+++ b/test_pyEnsSumMPAS.sh
@@ -1,7 +1,7 @@
 #!/bin/tcsh
 #PBS -A NTDD0004
-#PBS -N ensSum
-#PBS -q regular
+#PBS -N MensSum
+#PBS -q premium
 #PBS -l select=4:ncpus=9:mpiprocs=9
 #PBS -l walltime=0:30:00
 #PBS -j oe
@@ -14,4 +14,4 @@ conda activate npl
 setenv TMPDIR /glade/scratch/$USER/temp
 mkdir -p $TMPDIR
 
-mpiexec python pyEnsSumMPAS.py --esize 100 --indir /glade/scratch/abaker/longer_mpas_hist --sumfile mpas_sum_t24_new.nc  --tslice 8 --tag v7.1 --model mpas  --mach cheyenne --verbose --jsonfile mpas_ex.json
+mpiexec python pyEnsSumMPAS.py --esize 100 --indir /glade/p/cisl/asap/abaker/mpas/large_ens  --sumfile mpas_sumt4.nc  --tslice 4 --tag v7.1 --model mpas  --mach cheyenne --verbose --jsonfile test.json