Add reprocessing mode to trainer.

Also clean up model.
donkirkby · Nov 1, 2023 · b8e5759 · b8e5759
1 parent 1799828
commit b8e5759
Show file tree

Hide file tree

Showing 8 changed files with 150 additions and 59 deletions.
diff --git a/docs/journal/2023/training-100_000-one-hot.png b/docs/journal/2023/training-100_000-one-hot.png
diff --git a/docs/journal/2023/training-100_000-pos-neg.png b/docs/journal/2023/training-100_000-pos-neg.png
diff --git a/docs/journal/2023/training-one-hot.png b/docs/journal/2023/training-one-hot.png
diff --git a/docs/journal/2023/training-pos-neg.png b/docs/journal/2023/training-pos-neg.png
diff --git a/docs/journal/index.md b/docs/journal/index.md
@@ -36,10 +36,56 @@ Restore the tool to plot win rates between different numbers of MCTS iterations.
 Switch MCTS search to limit by time instead of iterations, since that will make
 more sense when comparing neural network with random playouts.
 
-### July 2023
+### Jul 2023
 Resurrect the old neural network training code, but start by going through the
 painful Tensorflow installation of [GPU support]. After that worked, the old
 training code was broken, so try basing it on a [regression tutorial].
 
 [GPU support]: https://stackoverflow.com/a/54567428/4794
 [regression tutorial]: https://www.tensorflow.org/tutorials/keras/regression
+
+### Oct 2023
+The training code runs, but doesn't seem to learn much. Here's the loss function
+as it trains on 10,000 board positions for 19s.
+
+![Training with positive/negative]
+
+Several tutorials used the one-hot format to store their board positions, so I
+switched to that, and saw no improvement. This training took 17s.
+
+![Training with one hot]
+
+Training on 100,000 board positions seems to give better results, but it takes
+about 3 hours to generate that much data. There doesn't seem to be a noticable
+difference between the +/- and the one-hot format. Here's the +/- training that
+runs for about 2.5 minutes.
+
+![Training +/- on 100,000]
+
+Here's the one-hot training for the same data.
+
+![Training one hot on 100,000]
+
+The next steps are to see if the model trains better with more layers, and see
+if the model can learn nearly as well on less data, so we can spend less than
+3 hours generating position data for each iteration.
+
+Here are results from learning on subsets of that same data set with both the
+positive / negative position data, as well as the one-hot position data.
+
+| position count | +/- avg. loss | +/- time(s) | 1-hot avg. loss | 1-hot time(s) |
+|----------------|---------------|-------------|-----------------|---------------|
+| 10,000         | 0.159         | 17          | 0.156           | 18            | 
+| 20,000         | 0.134         | 31          | 0.124           | 32            |
+| 40,000         | 0.128         | 61          | 0.139           | 62            |
+| 60,000         | 0.123         | 90          | 0.119           | 92            |
+| 80,000         | 0.115         | 120         | 0.115           | 122           |
+| 100,000        | 0.129         | 152         | 0.121           | 163           |
+
+It looks like we don't get much improvement past 50,000 positions, and I don't
+see much difference between the two different data formats.
+
+[Training with positive/negative]: 2023/training-pos-neg.png
+[Training with one hot]: 2023/training-one-hot.png
+[Training +/- on 100,000]: 2023/training-100_000-pos-neg.png
+[Training one hot on 100,000]: 2023/training-100_000-one-hot.png
diff --git a/zero_play/connect4/neural_net.py b/zero_play/connect4/neural_net.py
@@ -5,13 +5,16 @@
 
 import numpy as np
 # noinspection PyUnresolvedReferences
+from tensorflow.keras import optimizers
+# noinspection PyUnresolvedReferences
 from tensorflow.keras import Sequential, regularizers
 # noinspection PyUnresolvedReferences
 from tensorflow.keras.callbacks import TensorBoard
 # noinspection PyUnresolvedReferences
 from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten
 # noinspection PyUnresolvedReferences
 from tensorflow.keras.models import load_model
+from tensorflow.python.keras.callbacks import EarlyStopping
 
 from zero_play.game_state import GridGameState, GameState
 from zero_play.heuristic import Heuristic
@@ -20,7 +23,7 @@
 
 
 class NeuralNet(Heuristic):
-    def __init__(self, start_state: GameState):
+    def __init__(self, start_state: GameState) -> None:
         if not isinstance(start_state, GridGameState):
             raise ValueError(f'{start_state.__class__} is not a subclass of GridGameState.')
         super().__init__()
@@ -39,38 +42,23 @@ def __init__(self, start_state: GameState):
         self.checkpoint_name = 'random weights'
         self.args = args
 
-        num_channels = 512
+        num_channels = 64
         kernel_size = [3, 3]
-        dropout = 0.3
-        model = Sequential()
-        # regularizer = regularizers.l2(0.00006)
         regularizer = regularizers.l2(0.0001)
-        model.add(Conv2D(num_channels,
-                         kernel_size,
-                         padding='same',
-                         activation='relu',
-                         input_shape=(self.board_height, self.board_width, 1),
-                         activity_regularizer=regularizer))
-        # model.add(Conv2D(num_channels,
-        #                  kernel_size,
-        #                  padding='same',
-        #                  activation='relu',
-        #                  activity_regularizer=regularizer))
-        # model.add(Conv2D(num_channels,
-        #                  kernel_size,
-        #                  activation='relu',
-        #                  activity_regularizer=regularizer))
-        # model.add(Conv2D(num_channels,
-        #                  kernel_size,
-        #                  activation='relu',
-        #                  activity_regularizer=regularizer))
-        # model.add(Dropout(dropout))
-        # model.add(Dropout(dropout))
-        model.add(Flatten())
-        model.add(Dense(64))  # Remove to match paper?
-        model.add(Dense(64))  # Remove to match paper?
-        model.add(Dense(self.action_size + 1))
-        model.compile('adam', 'mean_squared_error')
+        input_shape = (self.board_height, self.board_width, 1)
+        model = Sequential(
+            [Conv2D(num_channels,
+                    kernel_size,
+                    padding='same',
+                    activation='relu',
+                    input_shape=input_shape,
+                    activity_regularizer=regularizer),
+             Flatten(),
+             Dense(64, activation='relu'),
+             Dense(64, activation='relu'),
+             Dense(self.action_size + 1)])
+        model.compile(loss='mean_absolute_error',
+                      optimizer=optimizers.Adam(0.001))
         self.model = model
 
     def get_summary(self) -> typing.Sequence[str]:
@@ -109,27 +97,25 @@ def load_checkpoint(self, folder=None, filename='checkpoint.h5'):
         file_path = folder_path / filename
         self.model = load_model(file_path)
 
-    def train(self, boards: np.ndarray, outputs: np.ndarray, log_dir=None):
+    def train(self, boards: np.ndarray, outputs: np.ndarray):
         """ Train the model on some sample data.
 
         :param boards: Each entry is a board position.
         :param outputs: Each entry is an array of policy values for the moves,
             as well as the estimated value of the board position.
-        :param log_dir: Directory for TensorBoard logs. None disables logging.
         """
 
         self.checkpoint_name += ' + training'
-        if log_dir is None:
-            callbacks = None
-        else:
-            callbacks = [TensorBoard(log_dir)]
+
+        callbacks = [EarlyStopping(patience=5)]
 
         history = self.model.fit(
             np.expand_dims(boards, -1),
             outputs,
             verbose=0,
             initial_epoch=self.epochs_completed,
             epochs=self.epochs_completed+self.epochs_to_train,
-            validation_split=0.2)  # TODO: Add callbacks?
+            validation_split=0.2,
+            callbacks=callbacks)
         self.epochs_completed += self.epochs_to_train
         return history
diff --git a/zero_play/trainer.py b/zero_play/trainer.py
@@ -3,8 +3,11 @@
 from datetime import datetime
 from itertools import count
 from pathlib import Path
+from statistics import mean
 
+import numpy as np
 import pandas as pd
+from matplotlib import pyplot as plt
 
 from zero_play.connect4.game import Connect4State
 from zero_play.connect4.neural_net import NeuralNet
@@ -16,11 +19,43 @@
 logger = logging.getLogger(__name__)
 
 
+def plot_loss(history):
+    plt.plot(history.history['loss'], label='loss')
+    plt.plot(history.history['val_loss'], label='val_loss')
+    average_loss = mean(history.history['val_loss'][-10:])
+    print(f'Final average validation loss: {average_loss}')
+    plt.ylim(bottom=0)
+    plt.title('Loss Function During Training on 100,000 Positions')
+    plt.xlabel('Epoch')
+    plt.ylabel('Error [angle]')
+    plt.legend()
+    plt.grid(True)
+    plt.show()
+
+
+def convert_one_hot(boards_path: Path, boards_hot_path: Path):
+    boards_df = pd.read_csv(boards_path)
+    np_boards = boards_df.to_numpy()
+    np_boards = np_boards[:, 1:]
+    board_count, column_count = np_boards.shape
+    hot_column_count = 2*column_count
+    np_hot = np.zeros((board_count, hot_column_count), np_boards.dtype)
+    np_hot[:, :column_count] = np_hot[:, column_count:] = np_boards
+    pos_hot = np_hot[:, :column_count]
+    neg_hot = np_hot[:, column_count:]
+    pos_hot[pos_hot < 0] = 0
+    neg_hot[neg_hot > 0] = 0
+    neg_hot *= -1
+    one_hot_df = pd.DataFrame.from_records(np_hot)
+    one_hot_df.to_csv(boards_hot_path)
+
+
 def train(search_milliseconds: int,
           training_size: int,
           comparison_size: int,
           min_win_rate: float,
-          data_folder: str):
+          data_folder: str,
+          is_reprocessing: bool = False):
     start_state = Connect4State()
     data_path = Path(data_folder)
     checkpoint_path = data_path / f'{start_state.game_name}-nn'
@@ -61,21 +96,37 @@ def train(search_milliseconds: int,
                                      players=[training_player, best_player])
     search_manager = SearchManager(start_state, training_net)
     for i in count():
-        logger.info('Creating training data.')
-        boards, outputs = search_manager.create_training_data(
-            milliseconds=search_milliseconds,
-            data_size=training_size)
-
         boards_path = data_path / 'boards.csv'
         outputs_path = data_path / 'outputs.csv'
-        boards_df = pd.DataFrame(boards.reshape(training_size, 6*7))
-        outputs_df = pd.DataFrame(outputs)
-        boards_df.to_csv(boards_path)
-        outputs_df.to_csv(outputs_path)
 
+        if is_reprocessing:
+            boards_df = pd.read_csv(boards_path)
+            outputs_df = pd.read_csv(outputs_path)
+            boards = boards_df.to_numpy()[:training_size, 1:]
+            outputs = outputs_df.to_numpy()[:training_size, 1:]
+        else:
+            logger.info('Creating training data.')
+            boards, outputs = search_manager.create_training_data(
+                milliseconds=search_milliseconds,
+                data_size=training_size)
+
+            boards_df = pd.DataFrame.from_records(boards)
+            outputs_df = pd.DataFrame.from_records(outputs)
+            boards_df.to_csv(boards_path)
+            outputs_df.to_csv(outputs_path)
+
+        boards = boards.reshape(training_size, 6, 7)
+
+        start = datetime.now()
         filename = f'checkpoint-{i:02d}.h5'
         logger.info('Training for %s.', filename)
-        training_net.train(boards, outputs, './logs')
+        history = training_net.train(boards, outputs)
+        training_time = datetime.now() - start
+        print(f'Trained for {training_time}.')
+
+        if is_reprocessing:
+            plot_loss(history)
+            return
 
         logger.info('Testing.')
         wins_vs_base, base_ties, base_wins = base_controller.play(

diff --git a/zero_play/zero_play.py b/zero_play/zero_play.py
@@ -21,11 +21,17 @@
                                QSizePolicy, QDialog, QWidget, QLabel, QComboBox)
 from alembic import command
 from alembic.config import Config
-from pkg_resources import iter_entry_points, EntryPoint
+from importlib.metadata import entry_points
 from sqlalchemy import create_engine
 from sqlalchemy.orm import Session as BaseSession
 from sqlalchemy.util import immutabledict
 
+try:
+    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'  # Warning and above.
+    import tensorflow as tf  # noqa Triggers Tensorflow warning messages.
+except ImportError:
+    raise
+
 import zero_play
 from zero_play.about_dialog import Ui_Dialog
 from zero_play.game_state import GameState
@@ -182,10 +188,7 @@ def get_collection_name() -> str:
         return 'Zero Play'
 
     @staticmethod
-    def filter_games(
-            entries: typing.Iterable[EntryPoint]) -> typing.Generator[EntryPoint,
-                                                                      None,
-                                                                      None]:
+    def filter_games(entries):
         yield from entries
 
     @property
@@ -225,7 +228,7 @@ def load_game_list(self, game_layout: QGridLayout):
             if child.widget():
                 child.widget().deleteLater()
         games = self.all_displays
-        all_entries = iter_entry_points('zero_play.game_display')
+        all_entries = entry_points(group='zero_play.game_display')
         filtered_entries = self.filter_games(all_entries)
         for game_entry in filtered_entries:
             display_class = game_entry.load()
@@ -422,6 +425,7 @@ def on_cancel(self):
         self.ui.stacked_widget.setCurrentWidget(self.ui.game_page)
 
     def on_network1(self):
+        # noinspection PyUnresolvedReferences
         file_name, _ = QFileDialog.getOpenFileName(
             self.ui.players_page,
             "Open a file for player 1's neural network.",
@@ -550,7 +554,7 @@ def on_choose_training_path(self) -> None:
         settings.setValue('training_data_path', str(data_path))
         self.ui.training_path.setText(file_name)
 
-    def on_start_training(self) -> None:
+    def on_start_training(self, is_reprocessing: bool = False) -> None:
         ui = self.ui
         if not ui.training_path.text():
             ui.training_message.setText('Choose a data folder.')
@@ -560,7 +564,8 @@ def on_start_training(self) -> None:
               ui.training_size.value(),
               ui.training_comparison.value(),
               ui.training_win_rate.value() / 100,
-              ui.training_path.text())
+              ui.training_path.text(),
+              is_reprocessing)
 
     def on_game_ended(self, game_state: GameState):
         if (self.is_history_dirty or
@@ -700,6 +705,7 @@ def get_file_dialog_options():
     kwargs = {}
     if 'SNAP' in os.environ:
         # Native dialog restricts paths for snap processes to /run/user.
+        # noinspection PyUnresolvedReferences
         kwargs['options'] = QFileDialog.DontUseNativeDialog
     return kwargs
 
@@ -723,7 +729,9 @@ def main():
     window = ZeroPlayWindow()
     if args.train:
         window.ui.training_path.setText(str(args.data.expanduser()))
-        window.on_start_training()
+        window.ui.training_size.setValue(50_000)
+        window.ui.training_comparison.setValue(200)
+        window.on_start_training(is_reprocessing=False)
     else:
         window.show()
         return app.exec()