Skip to content

Commit

Permalink
complete human vs ai(minimax search) code
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuliquan committed Jul 4, 2018
0 parents commit f36f066
Show file tree
Hide file tree
Showing 10 changed files with 377 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.idea/
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# 项目说明

> 这是一个使用python实现的`MiniMax`搜索玩TicTacToe游戏,
这里主要参考了一篇[MiniMax的博文](https://blog.csdn.net/wldgg/article/details/50132923)

## 文件说明

项目中有四个文件
- game.py
- human.py
- minimax.py
- run_tictactoe.py
---
>比较重要的是game.py 和 minimax.py 文件。在game.py 中主要包含状态类State 和 游戏类Game
在minimax.py 中主要包含搜索树的节点类Node和搜索树类MiniMax AlphaBeta

## 运行程序
如果你想ai玩这个游戏你可以运行run_tictactoe.py这个文件。
```
python run_tictactoe.py
```

## 运行条件
- python 3.6.5
- numpy 1.13.3
- pandas 0.22.0

## 参考
[minimax搜索的ppt](./reference/ai_minimax.ppt)
Binary file added __pycache__/game.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/human.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/minimax.cpython-36.pyc
Binary file not shown.
152 changes: 152 additions & 0 deletions game.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# author : Administrator
# date : 2018/6/26
import numpy as np
player_o = 1
player_x = -1


def get_opponent(player):
"""
返回对手
:param player: 玩家
:return: 返回输入玩家的对手
"""
opponent = player_o if player == player_x else player_x
return opponent


class State:

def __init__(self, board, player):
self.board = board.copy()
self.player = player

def __eq__(self, other):
if (self.board == other.board).all() and self.player == other.player:
return True
else:
return False

def get_available_actions(self):
"""
感觉状态的局面返回可以落子的位置,这些位置称之为动作
:return: 可行的动作
"""
space = np.where(self.board == 0)
coordinate = zip(space[0], space[1])
available_actions = [(i, j) for i, j in coordinate]
return available_actions

def get_state_result(self):
"""
返回状态对应局面的结果
如果游戏是已经结束了 is_over = True 如果没有 is_over = False, winner = None
如果游戏已经结束可以分为三种结果: x 的胜利, o 的胜利, 平局
winner 可以取[x,o,None]这三种情况
:return: 返回一个元组(is_over, winner)
"""
board = self.board
sum_row = np.sum(board, 0)
sum_col = np.sum(board, 1)
diag_sum_tl = board.trace()
diag_sum_tr = np.fliplr(board).trace()

n = self.board.shape[0]
if (sum_row == n).any() or (sum_col == n).any() or diag_sum_tl == n or diag_sum_tr == n:
is_over, winner = True, player_o
elif (sum_row == -n).any() or (sum_col == -n).any() or diag_sum_tl == -n or diag_sum_tr == -n:
is_over, winner = True, player_x
elif (board != 0).all():
is_over, winner = True, None
else:
is_over, winner = False, None

return is_over, winner

def get_next_state(self, action):
"""
根据动作返回一个新的状态
:param action: 动作
:return: 新的状态
"""
next_board = self.board.copy()
next_board[action] = self.player
next_player = get_opponent(self.player)
next_state = State(next_board, next_player)
return next_state


class Game:
start_player = player_o
game_size = 3

def __init__(self, state=None):
if state:
if state.board.shape[0] != Game.game_size:
raise Exception("用于初始化的棋盘尺寸不对")

board = state.board
player = state.player
else:
board = np.zeros((Game.game_size, Game.game_size), dtype=np.int32)
player = Game.start_player
self.state = State(board, player)

def reset(self, state=None):
"""
初始化游戏局面
可以默认初始化,也可以使用外界的状态进行初始化
:param state: None 表示默认初始化,其他情况是利用外界状态初始化
"""
if state:
if state.board.shape[0] != Game.game_size:
raise Exception("用于初始化的棋盘尺寸不对")
board = state.board
player = state.player
self.state = State(board, player)
else:
self.state.board = np.zeros((Game.game_size, Game.game_size), dtype=np.int32)
self.state.player = Game.start_player
return self.state

def step(self, action):
"""
在局面上采取动作,采取动作会修改游戏的当前状态
:param action: 在局面可以落子的位置
:return: 新的状态
"""
if action:
self.state = self.state.get_next_state(action)
return self.state

def game_result(self):
"""
采取动作后返回游戏的结果
:return: tuple (is_over, winner)
"""
return self.state.get_state_result()

def render(self, board=None):
"""
渲染当前的局面和使用外界的局面去渲染
:param board: None 表示使用自己的局面进行渲染,其他情况是使用外界的局面进行渲染
:return:
"""
if board:
if board.shape[0] != Game.game_size:
raise Exception("用于初始化的棋盘尺寸不对")
else:
board = self.state.board

for i in range(Game.game_size):
for j in range(Game.game_size):
if board[i, j] == player_x:
print(" x ", end="")
elif board[i, j] == player_o:
print(" o ", end="")
else:
print(" . ", end="")
print()

33 changes: 33 additions & 0 deletions human.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# author : Administrator
# date : 2018/6/29


class Human:
def __init__(self):
pass

def __str__(self):
return "human"

def take_action(self, current_state):
"""
人类玩家采取动作
:param current_state: 当前的状态
:return: 最优动作
"""
while True:
while True:
command = input("以 i,j 形式输入你的落子,例如1,2:")
try:
i, j = [int(index) for index in command.split(",")]
break
except:
print("输入格式不对,请从新输入")
action = i, j
if action not in current_state.get_available_actions():
print("输入的位置有问题,请从新输入")
else:
break
return action
126 changes: 126 additions & 0 deletions minimax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# author : Administrator
# date : 2018/6/26
import numpy as np
import pandas as pd
from game import State
from game import get_opponent
from typing import Tuple



class MiniMax:

def __init__(self):
pass

def __str__(self):
return "minimax ai"

def take_action(self, current_state: State):

def recurse(state: State) -> Tuple[int, object]:
"""
根据当前状态返回一个当前最佳效应和所对应的动作
:param state: 当前的状态
:return: 返回一个元组 (utility action)
"""
is_over, winner = state.get_state_result()
if is_over:
if winner == state.player:
return 1, None
elif winner == get_opponent(state.player):
return -1, None
else:
return 0, None

available_actions = state.get_available_actions()
values = [- recurse(state.get_next_state(action))[0] for action in available_actions]
kws = pd.Series(data=values, index=available_actions)
action = kws.idxmax()
return kws[action], action

_, action = recurse(current_state)
return action



class AlphaBeta:

def __init__(self):
pass

def __str__(self):
return "minimax ai with alpha-beta purning"

def take_action(self, current_state: State):
### 这个问题比较特殊可以这样剪枝
# def recurse(state: State) -> Tuple[int, object]:
# """
# 根据当前状态返回一个当前最佳效应和所对应的动作
# :param state: 当前的状态
# :return: 返回一个元组 (utility action)
# """
# is_over, winner = state.get_state_result()
# if is_over:
# if winner == state.player:
# return 1, None
# elif winner == get_opponent(state.player):
# return -1, None
# else:
# return 0, None
# available_actions = state.get_available_actions()
# final_value = -1
# final_action = available_actions[0]
# for action in available_actions:
# value = - recurse(state.get_next_state(action))[0] # 由于下手是对手对于我的利益是负数
# if value == 1: # 如果是已经可以胜利则返回
# final_value, final_action = value, action
# break
# elif value == 0: # 如果是平局则有待观察
# final_value, final_action = value, action
# return final_value, final_action
#
### 更加一般化alpha-beta剪枝

self.player = current_state.player
def recurse(state: State, alpha, beta) -> Tuple[int, object]:
"""
根据当前状态返回一个当前最佳效应和所对应的动作
:param state: 当前的状态
:param alpha: 到当前状态的最开始玩家的收益下界
:param beta: 到结束状态的最对手玩家的收益上界
:return: 返回一个元组 (utility action)
"""
is_over, winner = state.get_state_result()
if is_over:
if winner == self.player:
return 1, None
elif winner == get_opponent(self.player):
return -1, None
else:
return 0, None

available_actions = state.get_available_actions()
if state.player == self.player:
max_value = (float("-inf"), None)
for action in available_actions:
max_value = max(max_value, (recurse(state.get_next_state(action), alpha, beta)[0], action))
alpha = max(alpha, max_value[0])
if beta <= alpha:
break
return max_value
elif state.player == get_opponent(self.player):
min_value = (float("inf"), None)
for action in available_actions:
min_value = min(min_value, (recurse(state.get_next_state(action), alpha, beta)[0], action))
beta = min(beta, min_value[0])
if beta <= alpha:
break
return min_value

_, action = recurse(current_state, float("-inf"), float("inf"))
return action


Binary file added reference/ai_minimax.ppt
Binary file not shown.
36 changes: 36 additions & 0 deletions run_tictactoe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# author : Administrator
# date : 2018/6/28
from game import Game
from minimax import MiniMax, AlphaBeta
from human import Human


if __name__ == '__main__':
game = Game()
human = Human()
# ai = MiniMax()
ai = AlphaBeta()
players = {0: ai, 1: human}

turn = 1
while True:
current_state = game.state
action = players[turn].take_action(current_state)
game.step(action)
game.render()
print("###{0}在{1}落子###".format(players[turn], action))

# 判断结果
is_over, winner = game.game_result()
if is_over:
if winner:
print("winner {0}".format(players[turn]))
else:
print("平局")
break

# 更新执棋方
turn = (turn + 1) % 2

0 comments on commit f36f066

Please sign in to comment.