diff --git a/docs/journal/2023/training-100_000-one-hot.png b/docs/journal/2023/training-100_000-one-hot.png new file mode 100644 index 0000000..ac1e2c4 Binary files /dev/null and b/docs/journal/2023/training-100_000-one-hot.png differ diff --git a/docs/journal/2023/training-100_000-pos-neg.png b/docs/journal/2023/training-100_000-pos-neg.png new file mode 100644 index 0000000..bd649af Binary files /dev/null and b/docs/journal/2023/training-100_000-pos-neg.png differ diff --git a/docs/journal/2023/training-one-hot.png b/docs/journal/2023/training-one-hot.png new file mode 100644 index 0000000..dec3484 Binary files /dev/null and b/docs/journal/2023/training-one-hot.png differ diff --git a/docs/journal/2023/training-pos-neg.png b/docs/journal/2023/training-pos-neg.png new file mode 100644 index 0000000..2b3b5ab Binary files /dev/null and b/docs/journal/2023/training-pos-neg.png differ diff --git a/docs/journal/index.md b/docs/journal/index.md index 637cfa4..f9a0834 100644 --- a/docs/journal/index.md +++ b/docs/journal/index.md @@ -36,10 +36,56 @@ Restore the tool to plot win rates between different numbers of MCTS iterations. Switch MCTS search to limit by time instead of iterations, since that will make more sense when comparing neural network with random playouts. -### July 2023 +### Jul 2023 Resurrect the old neural network training code, but start by going through the painful Tensorflow installation of [GPU support]. After that worked, the old training code was broken, so try basing it on a [regression tutorial]. [GPU support]: https://stackoverflow.com/a/54567428/4794 [regression tutorial]: https://www.tensorflow.org/tutorials/keras/regression + +### Oct 2023 +The training code runs, but doesn't seem to learn much. Here's the loss function +as it trains on 10,000 board positions for 19s. + +![Training with positive/negative] + +Several tutorials used the one-hot format to store their board positions, so I +switched to that, and saw no improvement. This training took 17s. + +![Training with one hot] + +Training on 100,000 board positions seems to give better results, but it takes +about 3 hours to generate that much data. There doesn't seem to be a noticable +difference between the +/- and the one-hot format. Here's the +/- training that +runs for about 2.5 minutes. + +![Training +/- on 100,000] + +Here's the one-hot training for the same data. + +![Training one hot on 100,000] + +The next steps are to see if the model trains better with more layers, and see +if the model can learn nearly as well on less data, so we can spend less than +3 hours generating position data for each iteration. + +Here are results from learning on subsets of that same data set with both the +positive / negative position data, as well as the one-hot position data. + +| position count | +/- avg. loss | +/- time(s) | 1-hot avg. loss | 1-hot time(s) | +|----------------|---------------|-------------|-----------------|---------------| +| 10,000 | 0.159 | 17 | 0.156 | 18 | +| 20,000 | 0.134 | 31 | 0.124 | 32 | +| 40,000 | 0.128 | 61 | 0.139 | 62 | +| 60,000 | 0.123 | 90 | 0.119 | 92 | +| 80,000 | 0.115 | 120 | 0.115 | 122 | +| 100,000 | 0.129 | 152 | 0.121 | 163 | + +It looks like we don't get much improvement past 50,000 positions, and I don't +see much difference between the two different data formats. + +[Training with positive/negative]: 2023/training-pos-neg.png +[Training with one hot]: 2023/training-one-hot.png +[Training +/- on 100,000]: 2023/training-100_000-pos-neg.png +[Training one hot on 100,000]: 2023/training-100_000-one-hot.png diff --git a/zero_play/connect4/neural_net.py b/zero_play/connect4/neural_net.py index 84d0043..ccebba4 100644 --- a/zero_play/connect4/neural_net.py +++ b/zero_play/connect4/neural_net.py @@ -5,6 +5,8 @@ import numpy as np # noinspection PyUnresolvedReferences +from tensorflow.keras import optimizers +# noinspection PyUnresolvedReferences from tensorflow.keras import Sequential, regularizers # noinspection PyUnresolvedReferences from tensorflow.keras.callbacks import TensorBoard @@ -12,6 +14,7 @@ from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten # noinspection PyUnresolvedReferences from tensorflow.keras.models import load_model +from tensorflow.python.keras.callbacks import EarlyStopping from zero_play.game_state import GridGameState, GameState from zero_play.heuristic import Heuristic @@ -20,7 +23,7 @@ class NeuralNet(Heuristic): - def __init__(self, start_state: GameState): + def __init__(self, start_state: GameState) -> None: if not isinstance(start_state, GridGameState): raise ValueError(f'{start_state.__class__} is not a subclass of GridGameState.') super().__init__() @@ -39,38 +42,23 @@ def __init__(self, start_state: GameState): self.checkpoint_name = 'random weights' self.args = args - num_channels = 512 + num_channels = 64 kernel_size = [3, 3] - dropout = 0.3 - model = Sequential() - # regularizer = regularizers.l2(0.00006) regularizer = regularizers.l2(0.0001) - model.add(Conv2D(num_channels, - kernel_size, - padding='same', - activation='relu', - input_shape=(self.board_height, self.board_width, 1), - activity_regularizer=regularizer)) - # model.add(Conv2D(num_channels, - # kernel_size, - # padding='same', - # activation='relu', - # activity_regularizer=regularizer)) - # model.add(Conv2D(num_channels, - # kernel_size, - # activation='relu', - # activity_regularizer=regularizer)) - # model.add(Conv2D(num_channels, - # kernel_size, - # activation='relu', - # activity_regularizer=regularizer)) - # model.add(Dropout(dropout)) - # model.add(Dropout(dropout)) - model.add(Flatten()) - model.add(Dense(64)) # Remove to match paper? - model.add(Dense(64)) # Remove to match paper? - model.add(Dense(self.action_size + 1)) - model.compile('adam', 'mean_squared_error') + input_shape = (self.board_height, self.board_width, 1) + model = Sequential( + [Conv2D(num_channels, + kernel_size, + padding='same', + activation='relu', + input_shape=input_shape, + activity_regularizer=regularizer), + Flatten(), + Dense(64, activation='relu'), + Dense(64, activation='relu'), + Dense(self.action_size + 1)]) + model.compile(loss='mean_absolute_error', + optimizer=optimizers.Adam(0.001)) self.model = model def get_summary(self) -> typing.Sequence[str]: @@ -109,20 +97,17 @@ def load_checkpoint(self, folder=None, filename='checkpoint.h5'): file_path = folder_path / filename self.model = load_model(file_path) - def train(self, boards: np.ndarray, outputs: np.ndarray, log_dir=None): + def train(self, boards: np.ndarray, outputs: np.ndarray): """ Train the model on some sample data. :param boards: Each entry is a board position. :param outputs: Each entry is an array of policy values for the moves, as well as the estimated value of the board position. - :param log_dir: Directory for TensorBoard logs. None disables logging. """ self.checkpoint_name += ' + training' - if log_dir is None: - callbacks = None - else: - callbacks = [TensorBoard(log_dir)] + + callbacks = [EarlyStopping(patience=5)] history = self.model.fit( np.expand_dims(boards, -1), @@ -130,6 +115,7 @@ def train(self, boards: np.ndarray, outputs: np.ndarray, log_dir=None): verbose=0, initial_epoch=self.epochs_completed, epochs=self.epochs_completed+self.epochs_to_train, - validation_split=0.2) # TODO: Add callbacks? + validation_split=0.2, + callbacks=callbacks) self.epochs_completed += self.epochs_to_train return history diff --git a/zero_play/trainer.py b/zero_play/trainer.py index dc7cd1f..b9344b8 100644 --- a/zero_play/trainer.py +++ b/zero_play/trainer.py @@ -3,8 +3,11 @@ from datetime import datetime from itertools import count from pathlib import Path +from statistics import mean +import numpy as np import pandas as pd +from matplotlib import pyplot as plt from zero_play.connect4.game import Connect4State from zero_play.connect4.neural_net import NeuralNet @@ -16,11 +19,43 @@ logger = logging.getLogger(__name__) +def plot_loss(history): + plt.plot(history.history['loss'], label='loss') + plt.plot(history.history['val_loss'], label='val_loss') + average_loss = mean(history.history['val_loss'][-10:]) + print(f'Final average validation loss: {average_loss}') + plt.ylim(bottom=0) + plt.title('Loss Function During Training on 100,000 Positions') + plt.xlabel('Epoch') + plt.ylabel('Error [angle]') + plt.legend() + plt.grid(True) + plt.show() + + +def convert_one_hot(boards_path: Path, boards_hot_path: Path): + boards_df = pd.read_csv(boards_path) + np_boards = boards_df.to_numpy() + np_boards = np_boards[:, 1:] + board_count, column_count = np_boards.shape + hot_column_count = 2*column_count + np_hot = np.zeros((board_count, hot_column_count), np_boards.dtype) + np_hot[:, :column_count] = np_hot[:, column_count:] = np_boards + pos_hot = np_hot[:, :column_count] + neg_hot = np_hot[:, column_count:] + pos_hot[pos_hot < 0] = 0 + neg_hot[neg_hot > 0] = 0 + neg_hot *= -1 + one_hot_df = pd.DataFrame.from_records(np_hot) + one_hot_df.to_csv(boards_hot_path) + + def train(search_milliseconds: int, training_size: int, comparison_size: int, min_win_rate: float, - data_folder: str): + data_folder: str, + is_reprocessing: bool = False): start_state = Connect4State() data_path = Path(data_folder) checkpoint_path = data_path / f'{start_state.game_name}-nn' @@ -61,21 +96,37 @@ def train(search_milliseconds: int, players=[training_player, best_player]) search_manager = SearchManager(start_state, training_net) for i in count(): - logger.info('Creating training data.') - boards, outputs = search_manager.create_training_data( - milliseconds=search_milliseconds, - data_size=training_size) - boards_path = data_path / 'boards.csv' outputs_path = data_path / 'outputs.csv' - boards_df = pd.DataFrame(boards.reshape(training_size, 6*7)) - outputs_df = pd.DataFrame(outputs) - boards_df.to_csv(boards_path) - outputs_df.to_csv(outputs_path) + if is_reprocessing: + boards_df = pd.read_csv(boards_path) + outputs_df = pd.read_csv(outputs_path) + boards = boards_df.to_numpy()[:training_size, 1:] + outputs = outputs_df.to_numpy()[:training_size, 1:] + else: + logger.info('Creating training data.') + boards, outputs = search_manager.create_training_data( + milliseconds=search_milliseconds, + data_size=training_size) + + boards_df = pd.DataFrame.from_records(boards) + outputs_df = pd.DataFrame.from_records(outputs) + boards_df.to_csv(boards_path) + outputs_df.to_csv(outputs_path) + + boards = boards.reshape(training_size, 6, 7) + + start = datetime.now() filename = f'checkpoint-{i:02d}.h5' logger.info('Training for %s.', filename) - training_net.train(boards, outputs, './logs') + history = training_net.train(boards, outputs) + training_time = datetime.now() - start + print(f'Trained for {training_time}.') + + if is_reprocessing: + plot_loss(history) + return logger.info('Testing.') wins_vs_base, base_ties, base_wins = base_controller.play( diff --git a/zero_play/zero_play.py b/zero_play/zero_play.py index 869b5a8..bd8556f 100644 --- a/zero_play/zero_play.py +++ b/zero_play/zero_play.py @@ -21,11 +21,17 @@ QSizePolicy, QDialog, QWidget, QLabel, QComboBox) from alembic import command from alembic.config import Config -from pkg_resources import iter_entry_points, EntryPoint +from importlib.metadata import entry_points from sqlalchemy import create_engine from sqlalchemy.orm import Session as BaseSession from sqlalchemy.util import immutabledict +try: + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # Warning and above. + import tensorflow as tf # noqa Triggers Tensorflow warning messages. +except ImportError: + raise + import zero_play from zero_play.about_dialog import Ui_Dialog from zero_play.game_state import GameState @@ -182,10 +188,7 @@ def get_collection_name() -> str: return 'Zero Play' @staticmethod - def filter_games( - entries: typing.Iterable[EntryPoint]) -> typing.Generator[EntryPoint, - None, - None]: + def filter_games(entries): yield from entries @property @@ -225,7 +228,7 @@ def load_game_list(self, game_layout: QGridLayout): if child.widget(): child.widget().deleteLater() games = self.all_displays - all_entries = iter_entry_points('zero_play.game_display') + all_entries = entry_points(group='zero_play.game_display') filtered_entries = self.filter_games(all_entries) for game_entry in filtered_entries: display_class = game_entry.load() @@ -422,6 +425,7 @@ def on_cancel(self): self.ui.stacked_widget.setCurrentWidget(self.ui.game_page) def on_network1(self): + # noinspection PyUnresolvedReferences file_name, _ = QFileDialog.getOpenFileName( self.ui.players_page, "Open a file for player 1's neural network.", @@ -550,7 +554,7 @@ def on_choose_training_path(self) -> None: settings.setValue('training_data_path', str(data_path)) self.ui.training_path.setText(file_name) - def on_start_training(self) -> None: + def on_start_training(self, is_reprocessing: bool = False) -> None: ui = self.ui if not ui.training_path.text(): ui.training_message.setText('Choose a data folder.') @@ -560,7 +564,8 @@ def on_start_training(self) -> None: ui.training_size.value(), ui.training_comparison.value(), ui.training_win_rate.value() / 100, - ui.training_path.text()) + ui.training_path.text(), + is_reprocessing) def on_game_ended(self, game_state: GameState): if (self.is_history_dirty or @@ -700,6 +705,7 @@ def get_file_dialog_options(): kwargs = {} if 'SNAP' in os.environ: # Native dialog restricts paths for snap processes to /run/user. + # noinspection PyUnresolvedReferences kwargs['options'] = QFileDialog.DontUseNativeDialog return kwargs @@ -723,7 +729,9 @@ def main(): window = ZeroPlayWindow() if args.train: window.ui.training_path.setText(str(args.data.expanduser())) - window.on_start_training() + window.ui.training_size.setValue(50_000) + window.ui.training_comparison.setValue(200) + window.on_start_training(is_reprocessing=False) else: window.show() return app.exec()