From 08af79082c9c9680a44b9387eb8a3ff5d0ed3b3c Mon Sep 17 00:00:00 2001
From: Daniel Derycke <104598708+DHDev0@users.noreply.github.com>
Date: Thu, 19 Jan 2023 02:34:27 -0500
Subject: [PATCH] manage illegal move

---
 README.md |  1 +
 game.py   | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 82ad5c0..9183801 100644
--- a/README.md
+++ b/README.md
@@ -161,6 +161,7 @@ Features
 * [x] MCTS with 0 simulation (use of prior) or any number of simulation.
 * [x] Model weights automatically saved at best selfplay average reward.
 * [x] Priority or Uniform for sampling in replay buffer.
+* [X] Manage illegal move with negative reward.
 * [X] Scale the loss using the importance sampling ratio.
 * [x] Custom "Loss function" class to apply transformation and loss on label/prediction.
 * [X] Load your pretrained model from tag number.
diff --git a/game.py b/game.py
index c7565b1..5ed7f47 100644
--- a/game.py
+++ b/game.py
@@ -137,7 +137,18 @@ def observation(self,observation_shape=None,
                 state = self.flatten_state(self.tuple_test_obs(state))
         else:
             state = feedback[0]
+        self.feedback_state = state
         return state
+    
+    def step(self,action):
+        try: 
+            next_step = (self.env.step(action))
+        except:
+            obs = self.feedback_state
+            reward = min(-len(self.rewards),-self.limit_of_game_play,-1)
+            done = self.done
+            next_step = (obs,reward,done)
+        return next_step
 
     
     def store_search_statistics(self, root):
@@ -185,7 +196,7 @@ def policy_step(self, policy = None, action = None , temperature = 0 ):
         action_onehot_encoded[selected_action] = 1
 
         # # # apply mouve and return variable of the env
-        step_output = (self.env.step(self.action_map[selected_action]))
+        step_output = self.step(self.action_map[selected_action])
 
         # # # save game variable to a list to return them 
         #contain [observation, reward, done, info] + [meta_data for some gym env]