complete human vs ai(minimax search) code

zhuliquan · Jul 4, 2018 · f36f066 · f36f066
commit f36f066
Show file tree

Hide file tree

Showing 10 changed files with 377 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+.idea/
diff --git a/README.md b/README.md
@@ -0,0 +1,29 @@
+# 项目说明
+
+> 这是一个使用python实现的`MiniMax`搜索玩TicTacToe游戏，
+这里主要参考了一篇[MiniMax的博文](https://blog.csdn.net/wldgg/article/details/50132923)
+
+## 文件说明
+
+项目中有四个文件
+- game.py
+- human.py
+- minimax.py
+- run_tictactoe.py
+---
+>比较重要的是game.py 和 minimax.py 文件。在game.py 中主要包含状态类State 和 游戏类Game
+在minimax.py 中主要包含搜索树的节点类Node和搜索树类MiniMax AlphaBeta
+
+## 运行程序
+如果你想ai玩这个游戏你可以运行run_tictactoe.py这个文件。
+```
+python run_tictactoe.py
+```
+
+## 运行条件
+- python 3.6.5
+- numpy 1.13.3
+- pandas 0.22.0
+
+## 参考
+[minimax搜索的ppt](./reference/ai_minimax.ppt)
diff --git a/__pycache__/game.cpython-36.pyc b/__pycache__/game.cpython-36.pyc
diff --git a/__pycache__/human.cpython-36.pyc b/__pycache__/human.cpython-36.pyc
diff --git a/__pycache__/minimax.cpython-36.pyc b/__pycache__/minimax.cpython-36.pyc
diff --git a/game.py b/game.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# author : Administrator
+# date   : 2018/6/26
+import numpy as np
+player_o = 1
+player_x = -1
+
+
+def get_opponent(player):
+    """
+    返回对手
+    :param player: 玩家
+    :return: 返回输入玩家的对手
+    """
+    opponent = player_o if player == player_x else player_x
+    return opponent
+
+
+class State:
+
+    def __init__(self, board, player):
+        self.board = board.copy()
+        self.player = player
+
+    def __eq__(self, other):
+        if (self.board == other.board).all() and self.player == other.player:
+            return True
+        else:
+            return False
+
+    def get_available_actions(self):
+        """
+        感觉状态的局面返回可以落子的位置，这些位置称之为动作
+        :return: 可行的动作
+        """
+        space = np.where(self.board == 0)
+        coordinate = zip(space[0], space[1])
+        available_actions = [(i, j) for i, j in coordinate]
+        return available_actions
+
+    def get_state_result(self):
+        """
+        返回状态对应局面的结果
+        如果游戏是已经结束了 is_over = True 如果没有 is_over = False, winner = None
+        如果游戏已经结束可以分为三种结果: x 的胜利, o 的胜利, 平局
+        winner 可以取[x,o,None]这三种情况
+        :return: 返回一个元组(is_over, winner)
+        """
+        board = self.board
+        sum_row = np.sum(board, 0)
+        sum_col = np.sum(board, 1)
+        diag_sum_tl = board.trace()
+        diag_sum_tr = np.fliplr(board).trace()
+
+        n = self.board.shape[0]
+        if (sum_row == n).any() or (sum_col == n).any() or diag_sum_tl == n or diag_sum_tr == n:
+            is_over, winner = True, player_o
+        elif (sum_row == -n).any() or (sum_col == -n).any() or diag_sum_tl == -n or diag_sum_tr == -n:
+            is_over, winner = True, player_x
+        elif (board != 0).all():
+            is_over, winner = True, None
+        else:
+            is_over, winner = False, None
+
+        return is_over, winner
+
+    def get_next_state(self, action):
+        """
+        根据动作返回一个新的状态
+        :param action: 动作
+        :return: 新的状态
+        """
+        next_board = self.board.copy()
+        next_board[action] = self.player
+        next_player = get_opponent(self.player)
+        next_state = State(next_board, next_player)
+        return next_state
+
+
+class Game:
+    start_player = player_o
+    game_size = 3
+
+    def __init__(self, state=None):
+        if state:
+            if state.board.shape[0] != Game.game_size:
+                raise Exception("用于初始化的棋盘尺寸不对")
+
+            board = state.board
+            player = state.player
+        else:
+            board = np.zeros((Game.game_size, Game.game_size), dtype=np.int32)
+            player = Game.start_player
+        self.state = State(board, player)
+
+    def reset(self, state=None):
+        """
+        初始化游戏局面
+        可以默认初始化，也可以使用外界的状态进行初始化
+        :param state: None 表示默认初始化，其他情况是利用外界状态初始化
+        """
+        if state:
+            if state.board.shape[0] != Game.game_size:
+                raise Exception("用于初始化的棋盘尺寸不对")
+            board = state.board
+            player = state.player
+            self.state = State(board, player)
+        else:
+            self.state.board = np.zeros((Game.game_size, Game.game_size), dtype=np.int32)
+            self.state.player = Game.start_player
+        return self.state
+
+    def step(self, action):
+        """
+        在局面上采取动作，采取动作会修改游戏的当前状态
+        :param action: 在局面可以落子的位置
+        :return: 新的状态
+        """
+        if action:
+            self.state = self.state.get_next_state(action)
+        return self.state
+
+    def game_result(self):
+        """
+        采取动作后返回游戏的结果
+        :return: tuple (is_over, winner)
+        """
+        return self.state.get_state_result()
+
+    def render(self, board=None):
+        """
+        渲染当前的局面和使用外界的局面去渲染
+        :param board: None 表示使用自己的局面进行渲染，其他情况是使用外界的局面进行渲染
+        :return:
+        """
+        if board:
+            if board.shape[0] != Game.game_size:
+                raise Exception("用于初始化的棋盘尺寸不对")
+        else:
+            board = self.state.board
+
+        for i in range(Game.game_size):
+            for j in range(Game.game_size):
+                if board[i, j] == player_x:
+                    print(" x ", end="")
+                elif board[i, j] == player_o:
+                    print(" o ", end="")
+                else:
+                    print(" . ", end="")
+            print()
+
diff --git a/human.py b/human.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# author : Administrator
+# date   : 2018/6/29
+
+
+class Human:
+    def __init__(self):
+        pass
+
+    def __str__(self):
+        return "human"
+
+    def take_action(self, current_state):
+        """
+        人类玩家采取动作
+        :param current_state: 当前的状态
+        :return: 最优动作
+        """
+        while True:
+            while True:
+                command = input("以 i,j 形式输入你的落子，例如1,2:")
+                try:
+                    i, j = [int(index) for index in command.split(",")]
+                    break
+                except:
+                    print("输入格式不对,请从新输入")
+            action = i, j
+            if action not in current_state.get_available_actions():
+                print("输入的位置有问题,请从新输入")
+            else:
+                break
+        return action
diff --git a/minimax.py b/minimax.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# author : Administrator
+# date   : 2018/6/26
+import numpy as np
+import pandas as pd
+from game import State
+from game import get_opponent
+from typing import Tuple
+
+
+
+class MiniMax:
+
+    def __init__(self):
+        pass
+
+    def __str__(self):
+        return "minimax ai"
+
+    def take_action(self, current_state: State):
+
+        def recurse(state: State) -> Tuple[int, object]:
+            """
+            根据当前状态返回一个当前最佳效应和所对应的动作
+            :param state: 当前的状态
+            :return: 返回一个元组 (utility action)
+            """
+            is_over, winner = state.get_state_result()
+            if is_over:
+                if winner == state.player:
+                    return 1, None
+                elif winner == get_opponent(state.player):
+                    return -1, None
+                else:
+                    return 0, None
+
+            available_actions = state.get_available_actions()
+            values = [- recurse(state.get_next_state(action))[0] for action in available_actions]
+            kws = pd.Series(data=values, index=available_actions)
+            action = kws.idxmax()
+            return kws[action], action
+
+        _, action = recurse(current_state)
+        return action
+
+
+
+class AlphaBeta:
+
+    def __init__(self):
+        pass
+
+    def __str__(self):
+        return "minimax ai with alpha-beta purning"
+
+    def take_action(self, current_state: State):
+        ### 这个问题比较特殊可以这样剪枝
+        # def recurse(state: State) -> Tuple[int, object]:
+        #     """
+        #     根据当前状态返回一个当前最佳效应和所对应的动作
+        #     :param state: 当前的状态
+        #     :return: 返回一个元组 (utility action)
+        #     """
+        #     is_over, winner = state.get_state_result()
+        #     if is_over:
+        #         if winner == state.player:
+        #             return 1, None
+        #         elif winner == get_opponent(state.player):
+        #             return -1, None
+        #         else:
+        #             return 0, None
+        #     available_actions = state.get_available_actions()
+        #     final_value = -1
+        #     final_action = available_actions[0]
+        #     for action in available_actions:
+        #         value = - recurse(state.get_next_state(action))[0]  # 由于下手是对手对于我的利益是负数
+        #         if value == 1:  # 如果是已经可以胜利则返回
+        #             final_value, final_action = value, action
+        #             break
+        #         elif value == 0:  # 如果是平局则有待观察
+        #             final_value, final_action = value, action
+        #     return final_value, final_action
+        #
+            ### 更加一般化alpha-beta剪枝
+
+        self.player = current_state.player
+        def recurse(state: State, alpha, beta) -> Tuple[int, object]:
+            """
+            根据当前状态返回一个当前最佳效应和所对应的动作
+            :param state: 当前的状态
+            :param alpha: 到当前状态的最开始玩家的收益下界
+            :param beta:  到结束状态的最对手玩家的收益上界
+            :return: 返回一个元组 (utility action)
+            """
+            is_over, winner = state.get_state_result()
+            if is_over:
+                if winner == self.player:
+                    return 1, None
+                elif winner == get_opponent(self.player):
+                    return -1, None
+                else:
+                    return 0, None
+
+            available_actions = state.get_available_actions()
+            if state.player == self.player:
+                max_value = (float("-inf"), None)
+                for action in available_actions:
+                    max_value = max(max_value, (recurse(state.get_next_state(action), alpha, beta)[0], action))
+                    alpha = max(alpha, max_value[0])
+                    if beta <= alpha:
+                        break
+                return max_value
+            elif state.player == get_opponent(self.player):
+                min_value = (float("inf"), None)
+                for action in available_actions:
+                    min_value = min(min_value, (recurse(state.get_next_state(action), alpha, beta)[0], action))
+                    beta = min(beta, min_value[0])
+                    if beta <= alpha:
+                        break
+                return min_value
+
+        _, action = recurse(current_state, float("-inf"), float("inf"))
+        return action
+
+
diff --git a/reference/ai_minimax.ppt b/reference/ai_minimax.ppt
diff --git a/run_tictactoe.py b/run_tictactoe.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# author : Administrator
+# date   : 2018/6/28
+from game import Game
+from minimax import MiniMax, AlphaBeta
+from human import Human
+
+
+if __name__ == '__main__':
+    game = Game()
+    human = Human()
+    # ai = MiniMax()
+    ai = AlphaBeta()
+    players = {0: ai, 1: human}
+
+    turn = 1
+    while True:
+        current_state = game.state
+        action = players[turn].take_action(current_state)
+        game.step(action)
+        game.render()
+        print("###{0}在{1}落子###".format(players[turn], action))
+
+        # 判断结果
+        is_over, winner = game.game_result()
+        if is_over:
+            if winner:
+                print("winner {0}".format(players[turn]))
+            else:
+                print("平局")
+            break
+
+        # 更新执棋方
+        turn = (turn + 1) % 2
+