6/28/19

OpticalWaveGauging · Jun 28, 2019 · 97e9c82 · 97e9c82
1 parent fdbc8ec
commit 97e9c82
Show file tree

Hide file tree

Showing 12 changed files with 820 additions and 682 deletions.
diff --git a/README.md b/README.md
@@ -4,24 +4,23 @@ Data and code to implement Buscombe et al (2019) optical wave gauging (OWG) usin
 
 > Buscombe, Carini, Harrison, Chickadel, and Warrick (in review) Optical wave gauging with deep neural networks. Submitted to Coastal Engineering 
 
-
 Software and data for training deep convolutional neural network models to estimate wave height and wave period from surf zone imagery
 
-This software was tested on Windows 10 with python 3.6, tensorflow 1.11.0 and keras 2.2.4. This software was written by Dr Daniel Buscombe at Northern Arizona University, in the winter of 2018/19.
+This software was tested on Windows 10 and Ubuntu Linux with python 3.6, tensorflow 1.11.0 and keras 2.2.4. This software was written by Dr Daniel Buscombe at Northern Arizona University, 2018-2019.
 
 THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND. IF YOU ENCOUNTER A PROBLEM/BUG OR HAVE A QUESTION OR SUGGESTION, PLEASE USE THE "ISSUES" TAB ON GITHUB. OTHERWISE, THIS SOFTWARE IS UNSUPPORTED.
 
 
 ### Folder structure
 
 * \conda_env contains yml files for setting up a conda environment
-* \conf contains the configuration file with user-definable settings
+* \config contains the configuration file with user-definable settings
 * \train contains files using for training models 
 * \im128 is a file structure that will contain results from model training
 
 ## Setting up computing environments
 
-### Install Anaconda pyton distribution
+### Install Anaconda python distribution
 
 Install the latest version of Anaconda (https://www.anaconda.com/distribution/)
 
@@ -114,10 +113,12 @@ Configuration files are in JSON format, like this:
 {
   "samplewise_std_normalization" : true,
   "samplewise_center"  : true,
-  "input_image_format" : "png"
-  "input_csv_file"     : "IR-training-dataset.csv"
-  "category"           : 'H',
-
+  "input_image_format" : "jpg",
+  "input_csv_file"     : "snap-training-dataset.csv", 
+  "category"           : "H",
+  "prc_lower_withheld": 5,
+  "prc_upper_withheld": 5,
+  
   "horizontal_flip"    : false,
   "vertical_flip"      : false,
   "rotation_range"     : 10,
@@ -127,11 +128,9 @@ Configuration files are in JSON format, like this:
   "zoom_range"         : 0.2,
   "fill_mode"          : "reflect",
   
-  "batch_size"         : 64,
   "img_size"           : 128,
-  "num_epochs"         : 100,
-  "test_size"          : 0.33,
-  "steps_per_epoch"    : 100,
+  "num_epochs"         : 5,
+  "test_size"          : 0.4,
   "dropout_rate"       : 0.5,
   "epsilon"            : 0.0001,
   "min_lr"             : 0.0001,
@@ -150,8 +149,6 @@ Configuration files are in JSON format, like this:
 
 * num_epochs = number of training epochs
 * test_size = proportion of data set to use for training
-* batch_size = number of images to use per model training step
-* steps_per_epoch = number of training steps per training epoch
 * dropout_rate: proportion of neurons to randomly drop in dropout layer
 * factor: factor by which the learning rate will be reduced. new_lr = lr * factor
 * epsilon: threshold for measuring the new optimum, to only focus on significant changes.
@@ -187,19 +184,15 @@ With height_shift_range=2 possible values are integers [-1, 0, +1], same as with
 To train models to predict wave height, the following scripts will do so for all combinations of 4 models (MobileNetV1, MobileNetV2, InceptionV3, and InceptionResnet2), and 4 batch sizes (16, 32, 64, and 128 images). 
 
 ```
-python train_OWG.py
+python train_OWG.py -c configfile.json
 ```
 
-The following script does the same using generator functions in the training
+In the above, ```configfile.json``` is one of the config files in the . Just provide the name of the json file, including the 'json' file extension, not the full path to the file, like this:
 
 ```
-python train_OWG_gen.py
+python train_OWG.py -c config_IR_H.json
 ```
 
-Both scripts provide comparable results and are provided to illustrate two different options for training, mostly for advanced users wishing to modify and adapt the code for other purposes. 
-
-You may notice ```python train_OWG.py``` is marginally faster
-
 The best models are obtained using larger numbers of epochs (say, 100+), but you'll probably want to train them on a GPU (install ```tensorflow-gpu``` instead of ```tensorflow```).
 
 To train OWGs for wave period, change the category in the config file to 'T' and run the above again
@@ -298,30 +291,13 @@ im128
 
 ---------------batch128
 
-Then run a script to split large model files to smaller files < 100 MB (so they fit on github)
-
-```
-python split_model4.py
-```
 
 Finally, compile and plot results from all models using
 
 ```
 python compile_results.py
 ```
 
-Data are written out to the Matlab format. For example, for the IR imagery wave height model, the mat file would be:
-
-```
-IR_all_model_preds_height_128.mat
-```
-
-and for the IR imagery wave period model, the mat file would be:
-
-```
-IR_all_model_preds_period_128.mat
-```
-
 ## Operational Mode
 
 ### Testing model on a folder of images

diff --git a/__pycache__/utils.cpython-36.pyc b/__pycache__/utils.cpython-36.pyc
diff --git a/test_ensemble_OWG_folder.py → compile_results.py b/test_ensemble_OWG_folder.py → compile_results.py
@@ -1,5 +1,5 @@
-## test_OWG_folder.py 
-## A script to test a model on independent data
+## compile_results.py 
+## A script to test a model and make plots
 ## Written by Daniel Buscombe,
 ## Northern Arizona University
 ## daniel.buscombe.nau.edu
@@ -16,29 +16,31 @@
 from imageio import imread
 from keras.preprocessing.image import ImageDataGenerator
 from utils import *
-import os
+import sys, getopt, os
+
 os.environ['CUDA_VISIBLE_DEVICES'] = '-1' ##use CPU
 from glob import glob
 import zipfile
-
 from sklearn.model_selection import train_test_split
 import pandas as pd
-from keras.metrics import mean_absolute_error
-
-def mae_metric(in_gt, in_pred):
-    return mean_absolute_error(in_gt, in_pred)
 
 #==============================================================	
 ## script starts here
 if __name__ == '__main__':
 
-    #image_dir = 'snap_images'
-    #configfile = 'config_nearshore_H.json'
-    #configfile = 'config_nearshore_T.json'
-
-    image_dir = 'IR_images'		
-    #configfile = 'config_IR_H.json'
-    configfile = 'config_IR_T.json'
+	argv = sys.argv[1:]
+	try:
+	   opts, args = getopt.getopt(argv,"h:c:")
+	except getopt.GetoptError:
+	   print('python train_OWG.py -c configfile.json')
+	   sys.exit(2)
+	for opt, arg in opts:
+	   if opt == '-h':
+	      print('Example usage: python3 train_OWG.py -c conf_IR_h.json')
+	      sys.exit()
+	   elif opt in ("-c"):
+	      configfile = arg
+
     #==============================================================
     ## user inputs
     with open(os.getcwd()+os.sep+'config'+os.sep+configfile) as f:    
@@ -51,13 +53,13 @@ def mae_metric(in_gt, in_pred):
     samplewise_std_normalization = config["samplewise_std_normalization"]
     samplewise_center = config["samplewise_center"]  
     num_epochs = int(config["num_epochs"]) ##100
+	prc_lower_withheld = config['prc_lower_withheld'] 
+	prc_upper_withheld = config['prc_upper_withheld'] 
+	image_dir = config['image_direc']
 
     base_dir = os.path.normpath(os.getcwd()+os.sep+'train') 
 
     IMG_SIZE = (im_size, im_size) ##(128, 128)
-
-    prc_lower_withheld = 5
-    prc_upper_withheld = 5
 
     # #==============================================================
 
@@ -66,6 +68,8 @@ def mae_metric(in_gt, in_pred):
        df['path'] = df['id'].map(lambda x: os.path.join(base_dir,image_dir,'{}'.format(x)))#+".jpg"
     elif input_csv_file=='IR-training-dataset.csv':
        df['path'] = df['id'].map(lambda x: os.path.join(base_dir,image_dir,'{}'.format(x)))+".png"
+    elif input_csv_file=='Nearshore-Training-Oblique-cam2-snap.csv':
+       df['path'] = df['id'].map(lambda x: os.path.join(base_dir,image_dir,'{}'.format(x)))+".jpg"
 
     df = df.rename(index=str, columns={" H": "H", " T": "T"})   
 
@@ -74,7 +78,10 @@ def mae_metric(in_gt, in_pred):
     if input_csv_file=='snap-training-dataset.csv':    
         df['time'] = [int(k.split(os.sep)[-1].split('.')[0]) for k in df.path]
         df = df.sort_values(by='time', axis=0)
-
+    elif input_csv_file=='Nearshore-Training-Oblique-cam2-snap.csv':
+        df['time'] = [int(k.split(os.sep)[-1].split('.')[0]) for k in df.path]
+        df = df.sort_values(by='time', axis=0)
+
     ## making subsets of data based on prc_lower_withheld and prc_upper_withheld
     if (prc_lower_withheld>0) & (prc_upper_withheld>0):
         up = np.percentile(df[category], 100-prc_upper_withheld)
@@ -130,23 +137,16 @@ def mae_metric(in_gt, in_pred):
                 if input_csv_file=='snap-training-dataset.csv':			
                    weights_path=os.getcwd()+os.sep+'im'+str(im_size)+os.sep+'res'+os.sep+str(num_epochs)+'epoch'+os.sep+'H'+os.sep+'model'+str(counter)+os.sep+'batch'+str(batch_size)+os.sep+'waveheight_weights_model'+str(counter)+'_'+str(batch_size)+'batch.best.nearshore.hdf5'
                 elif input_csv_file=='IR-training-dataset.csv':
-                   weights_path=os.getcwd()+os.sep+'im'+str(im_size)+os.sep+'res'+os.sep+str(num_epochs)+'epoch'+os.sep+'H'+os.sep+'model'+str(counter)+os.sep+'batch'+str(batch_size)+os.sep+'waveheight_weights_model'+str(counter)+'_'+str(batch_size)+'batch.best.IR.hdf5'				
+                   weights_path=os.getcwd()+os.sep+'im'+str(im_size)+os.sep+'res'+os.sep+str(num_epochs)+'epoch'+os.sep+'H'+os.sep+'model'+str(counter)+os.sep+'batch'+str(batch_size)+os.sep+'waveheight_weights_model'+str(counter)+'_'+str(batch_size)+'batch.best.IR.hdf5'
+                elif input_csv_file=='Nearshore-Training-Oblique-cam2-snap.csv':
+                   weights_path=os.getcwd()+os.sep+'im'+str(im_size)+os.sep+'res'+os.sep+str(num_epochs)+'epoch'+os.sep+'H'+os.sep+'model'+str(counter)+os.sep+'batch'+str(batch_size)+os.sep+'waveheight_weights_model'+str(counter)+'_'+str(batch_size)+'batch.best.oblique.hdf5'
             else:
                 if input_csv_file=='snap-training-dataset.csv':			
                    weights_path=os.getcwd()+os.sep+'im'+str(im_size)+os.sep+'res'+os.sep+str(num_epochs)+'epoch'+os.sep+'T'+os.sep+'model'+str(counter)+os.sep+'batch'+str(batch_size)+os.sep+'waveperiod_weights_model'+str(counter)+'_'+str(batch_size)+'batch.best.nearshore.hdf5'
                 elif input_csv_file=='IR-training-dataset.csv':
-                   weights_path=os.getcwd()+os.sep+'im'+str(im_size)+os.sep+'res'+os.sep+str(num_epochs)+'epoch'+os.sep+'T'+os.sep+'model'+str(counter)+os.sep+'batch'+str(batch_size)+os.sep+'waveperiod_weights_model'+str(counter)+'_'+str(batch_size)+'batch.best.IR.hdf5'			   
-            if not os.path.isfile(weights_path): #counter==4:
-                if input_csv_file=='snap-training-dataset.csv':			
-                   files = sorted(glob(os.path.dirname(weights_path)+os.sep+'*nearshore*hdf5'))
-                elif input_csv_file=='IR-training-dataset.csv':
-                   files = sorted(glob(os.path.dirname(weights_path)+os.sep+'*IR*hdf5'))			   
-                out_data = b''
-                for fn in files:
-                    with open(fn, 'rb') as fp:
-                        out_data += fp.read()
-                with open(weights_path, 'wb') as fp:
-                   fp.write(out_data)                   	
+                   weights_path=os.getcwd()+os.sep+'im'+str(im_size)+os.sep+'res'+os.sep+str(num_epochs)+'epoch'+os.sep+'T'+os.sep+'model'+str(counter)+os.sep+'batch'+str(batch_size)+os.sep+'waveperiod_weights_model'+str(counter)+'_'+str(batch_size)+'batch.best.IR.hdf5'
+                elif input_csv_file=='Nearshore-Training-Oblique-cam2-snap.csv':
+                   weights_path=os.getcwd()+os.sep+'im'+str(im_size)+os.sep+'res'+os.sep+str(num_epochs)+'epoch'+os.sep+'T'+os.sep+'model'+str(counter)+os.sep+'batch'+str(batch_size)+os.sep+'waveperiod_weights_model'+str(counter)+'_'+str(batch_size)+'batch.best.oblique.hdf5'                 	
 
             # load json and create model
             print("Creating model")						
@@ -173,47 +173,44 @@ def mae_metric(in_gt, in_pred):
     counter = 1
     for model in [1,2,3,4]:
         for batch_size in [16,32,64,128]:
-            ## average over batch per model
             pred_Y = yhat['M'+str(model)+'_B'+str(batch_size)] 
-		    #(yhat['M'+str(counter)+'_B16']+yhat['M'+str(counter)+'_B32']+yhat['M'+str(counter)+'_B64']+yhat['M'+str(counter)+'_B128'])/4
             pred_Y = np.squeeze(np.asarray(pred_Y))
 
             pred_exY = exyhat['M'+str(model)+'_B'+str(batch_size)] 
-		    #(exyhat['M'+str(counter)+'_B16']+exyhat['M'+str(counter)+'_B32']+exyhat['M'+str(counter)+'_B64']+exyhat['M'+str(counter)+'_B128'])/4
             pred_exY = np.squeeze(np.asarray(pred_exY))
 
             plt.subplot(4,4,counter)
             plt.plot(test_Y, pred_Y, 'k.', markersize=3, label = 'predictions')
             plt.plot(ex_Y, pred_exY, 'bx', markersize=3, label = 'predictions')
-            if input_csv_file=='snap-training-dataset.csv':						
+            if input_csv_file=='IR-training-dataset.csv':						
                if category=='H':			
                   plt.plot([0.5, 2.75], [0.5, 2.75], 'r-', label = 'actual')
                   plt.xlim(0.25,3); plt.ylim(0.25, 3)
                else:
                   plt.plot([8, 23], [8, 23], 'r-', label = 'actual')
                   plt.xlim(7,24); plt.ylim(7, 24)	
-            elif input_csv_file=='IR-training-dataset.csv':						
-               if category=='H':			
+            else:
+				  if category=='H':			
                   plt.plot([0.25, 5.75], [0.25, 5.75], 'r-', label = 'actual')
                   plt.xlim(0,6); plt.ylim(0, 6)
                else:
                   plt.plot([3, 19], [3, 19], 'r-', label = 'actual')
                   plt.xlim(2,20); plt.ylim(2, 20)				  
             if counter==13:
-               if input_csv_file=='snap-training-dataset.csv':						
-                  if category=='H':
-                     plt.xlabel(r'Actual $H_s$ (m)', fontsize=6)
-                     plt.ylabel(r'Predicted $H_s$ (m)', fontsize=6)
-                  elif category=='T':
-                     plt.xlabel(r'Actual $T_p$ (s)', fontsize=6)
-                     plt.ylabel(r'Predicted $T_p$ (s)', fontsize=6)
-               elif input_csv_file=='IR-training-dataset.csv':						
+               if input_csv_file=='IR-training-dataset.csv':						
                   if category=='H':
                      plt.xlabel(r'Actual $H$ (m)', fontsize=6)
                      plt.ylabel(r'Predicted $H$ (m)', fontsize=6)
                   elif category=='T':
                      plt.xlabel(r'Actual $T$ (s)', fontsize=6)
-                     plt.ylabel(r'Predicted $T$ (s)', fontsize=6)				  
+                     plt.ylabel(r'Predicted $T$ (s)', fontsize=6)
+               else:						
+                  if category=='H':
+                     plt.xlabel(r'Actual $H_s$ (m)', fontsize=6)
+                     plt.ylabel(r'Predicted $H_s$ (m)', fontsize=6)
+                  elif category=='T':
+                     plt.xlabel(r'Actual $T_p$ (s)', fontsize=6)
+                     plt.ylabel(r'Predicted $T_p$ (s)', fontsize=6)					 
             rms = np.sqrt(np.nanmean((pred_Y - test_Y)**2))
             rsq = np.min(np.corrcoef(test_Y, pred_Y))**2
             exrms = np.sqrt(np.nanmean((pred_exY - ex_Y)**2))
@@ -229,6 +226,9 @@ def mae_metric(in_gt, in_pred):
        plt.savefig('ensemble_allmodels_'+category+'-IR.png', dpi=300, bbox_inches='tight')	
     elif input_csv_file=='snap-training-dataset.csv':
        plt.savefig('ensemble_allmodels_'+category+'-nearshore.png', dpi=300, bbox_inches='tight')
+    elif input_csv_file=='Nearshore-Training-Oblique-cam2-snap.csv':
+       plt.savefig('ensemble_allmodels_'+category+'-oblique.png', dpi=300, bbox_inches='tight')
+
     plt.close('all') ; del fig
 
 

diff --git a/config/config_test.json b/config/config_test.json
@@ -0,0 +1,12 @@
+{
+  "samplewise_std_normalization" : true,
+  "samplewise_center"  : true,
+  "weights_path" : "im128/res/100epoch/H/model1/batch16/waveheight_weights_model1_16batch.best.nearshore.hdf5",
+  "input_csv_file"     : "train/snap-training-dataset.csv", 
+  "category"           : "H",
+  "im_size"            : 128,
+  "image_direc"        : "train/snap_images",
+  "prc_lower_withheld": 5,
+  "prc_upper_withheld": 5,
+  "file_ext": "jpg"  
+}