-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
complete human vs ai(minimax search) code
- Loading branch information
zhuliquan
committed
Jul 4, 2018
0 parents
commit f36f066
Showing
10 changed files
with
377 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# 项目说明 | ||
|
||
> 这是一个使用python实现的`MiniMax`搜索玩TicTacToe游戏, | ||
这里主要参考了一篇[MiniMax的博文](https://blog.csdn.net/wldgg/article/details/50132923) | ||
|
||
## 文件说明 | ||
|
||
项目中有四个文件 | ||
- game.py | ||
- human.py | ||
- minimax.py | ||
- run_tictactoe.py | ||
--- | ||
>比较重要的是game.py 和 minimax.py 文件。在game.py 中主要包含状态类State 和 游戏类Game | ||
在minimax.py 中主要包含搜索树的节点类Node和搜索树类MiniMax AlphaBeta | ||
|
||
## 运行程序 | ||
如果你想ai玩这个游戏你可以运行run_tictactoe.py这个文件。 | ||
``` | ||
python run_tictactoe.py | ||
``` | ||
|
||
## 运行条件 | ||
- python 3.6.5 | ||
- numpy 1.13.3 | ||
- pandas 0.22.0 | ||
|
||
## 参考 | ||
[minimax搜索的ppt](./reference/ai_minimax.ppt) |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
# author : Administrator | ||
# date : 2018/6/26 | ||
import numpy as np | ||
player_o = 1 | ||
player_x = -1 | ||
|
||
|
||
def get_opponent(player): | ||
""" | ||
返回对手 | ||
:param player: 玩家 | ||
:return: 返回输入玩家的对手 | ||
""" | ||
opponent = player_o if player == player_x else player_x | ||
return opponent | ||
|
||
|
||
class State: | ||
|
||
def __init__(self, board, player): | ||
self.board = board.copy() | ||
self.player = player | ||
|
||
def __eq__(self, other): | ||
if (self.board == other.board).all() and self.player == other.player: | ||
return True | ||
else: | ||
return False | ||
|
||
def get_available_actions(self): | ||
""" | ||
感觉状态的局面返回可以落子的位置,这些位置称之为动作 | ||
:return: 可行的动作 | ||
""" | ||
space = np.where(self.board == 0) | ||
coordinate = zip(space[0], space[1]) | ||
available_actions = [(i, j) for i, j in coordinate] | ||
return available_actions | ||
|
||
def get_state_result(self): | ||
""" | ||
返回状态对应局面的结果 | ||
如果游戏是已经结束了 is_over = True 如果没有 is_over = False, winner = None | ||
如果游戏已经结束可以分为三种结果: x 的胜利, o 的胜利, 平局 | ||
winner 可以取[x,o,None]这三种情况 | ||
:return: 返回一个元组(is_over, winner) | ||
""" | ||
board = self.board | ||
sum_row = np.sum(board, 0) | ||
sum_col = np.sum(board, 1) | ||
diag_sum_tl = board.trace() | ||
diag_sum_tr = np.fliplr(board).trace() | ||
|
||
n = self.board.shape[0] | ||
if (sum_row == n).any() or (sum_col == n).any() or diag_sum_tl == n or diag_sum_tr == n: | ||
is_over, winner = True, player_o | ||
elif (sum_row == -n).any() or (sum_col == -n).any() or diag_sum_tl == -n or diag_sum_tr == -n: | ||
is_over, winner = True, player_x | ||
elif (board != 0).all(): | ||
is_over, winner = True, None | ||
else: | ||
is_over, winner = False, None | ||
|
||
return is_over, winner | ||
|
||
def get_next_state(self, action): | ||
""" | ||
根据动作返回一个新的状态 | ||
:param action: 动作 | ||
:return: 新的状态 | ||
""" | ||
next_board = self.board.copy() | ||
next_board[action] = self.player | ||
next_player = get_opponent(self.player) | ||
next_state = State(next_board, next_player) | ||
return next_state | ||
|
||
|
||
class Game: | ||
start_player = player_o | ||
game_size = 3 | ||
|
||
def __init__(self, state=None): | ||
if state: | ||
if state.board.shape[0] != Game.game_size: | ||
raise Exception("用于初始化的棋盘尺寸不对") | ||
|
||
board = state.board | ||
player = state.player | ||
else: | ||
board = np.zeros((Game.game_size, Game.game_size), dtype=np.int32) | ||
player = Game.start_player | ||
self.state = State(board, player) | ||
|
||
def reset(self, state=None): | ||
""" | ||
初始化游戏局面 | ||
可以默认初始化,也可以使用外界的状态进行初始化 | ||
:param state: None 表示默认初始化,其他情况是利用外界状态初始化 | ||
""" | ||
if state: | ||
if state.board.shape[0] != Game.game_size: | ||
raise Exception("用于初始化的棋盘尺寸不对") | ||
board = state.board | ||
player = state.player | ||
self.state = State(board, player) | ||
else: | ||
self.state.board = np.zeros((Game.game_size, Game.game_size), dtype=np.int32) | ||
self.state.player = Game.start_player | ||
return self.state | ||
|
||
def step(self, action): | ||
""" | ||
在局面上采取动作,采取动作会修改游戏的当前状态 | ||
:param action: 在局面可以落子的位置 | ||
:return: 新的状态 | ||
""" | ||
if action: | ||
self.state = self.state.get_next_state(action) | ||
return self.state | ||
|
||
def game_result(self): | ||
""" | ||
采取动作后返回游戏的结果 | ||
:return: tuple (is_over, winner) | ||
""" | ||
return self.state.get_state_result() | ||
|
||
def render(self, board=None): | ||
""" | ||
渲染当前的局面和使用外界的局面去渲染 | ||
:param board: None 表示使用自己的局面进行渲染,其他情况是使用外界的局面进行渲染 | ||
:return: | ||
""" | ||
if board: | ||
if board.shape[0] != Game.game_size: | ||
raise Exception("用于初始化的棋盘尺寸不对") | ||
else: | ||
board = self.state.board | ||
|
||
for i in range(Game.game_size): | ||
for j in range(Game.game_size): | ||
if board[i, j] == player_x: | ||
print(" x ", end="") | ||
elif board[i, j] == player_o: | ||
print(" o ", end="") | ||
else: | ||
print(" . ", end="") | ||
print() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
# author : Administrator | ||
# date : 2018/6/29 | ||
|
||
|
||
class Human: | ||
def __init__(self): | ||
pass | ||
|
||
def __str__(self): | ||
return "human" | ||
|
||
def take_action(self, current_state): | ||
""" | ||
人类玩家采取动作 | ||
:param current_state: 当前的状态 | ||
:return: 最优动作 | ||
""" | ||
while True: | ||
while True: | ||
command = input("以 i,j 形式输入你的落子,例如1,2:") | ||
try: | ||
i, j = [int(index) for index in command.split(",")] | ||
break | ||
except: | ||
print("输入格式不对,请从新输入") | ||
action = i, j | ||
if action not in current_state.get_available_actions(): | ||
print("输入的位置有问题,请从新输入") | ||
else: | ||
break | ||
return action |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
# author : Administrator | ||
# date : 2018/6/26 | ||
import numpy as np | ||
import pandas as pd | ||
from game import State | ||
from game import get_opponent | ||
from typing import Tuple | ||
|
||
|
||
|
||
class MiniMax: | ||
|
||
def __init__(self): | ||
pass | ||
|
||
def __str__(self): | ||
return "minimax ai" | ||
|
||
def take_action(self, current_state: State): | ||
|
||
def recurse(state: State) -> Tuple[int, object]: | ||
""" | ||
根据当前状态返回一个当前最佳效应和所对应的动作 | ||
:param state: 当前的状态 | ||
:return: 返回一个元组 (utility action) | ||
""" | ||
is_over, winner = state.get_state_result() | ||
if is_over: | ||
if winner == state.player: | ||
return 1, None | ||
elif winner == get_opponent(state.player): | ||
return -1, None | ||
else: | ||
return 0, None | ||
|
||
available_actions = state.get_available_actions() | ||
values = [- recurse(state.get_next_state(action))[0] for action in available_actions] | ||
kws = pd.Series(data=values, index=available_actions) | ||
action = kws.idxmax() | ||
return kws[action], action | ||
|
||
_, action = recurse(current_state) | ||
return action | ||
|
||
|
||
|
||
class AlphaBeta: | ||
|
||
def __init__(self): | ||
pass | ||
|
||
def __str__(self): | ||
return "minimax ai with alpha-beta purning" | ||
|
||
def take_action(self, current_state: State): | ||
### 这个问题比较特殊可以这样剪枝 | ||
# def recurse(state: State) -> Tuple[int, object]: | ||
# """ | ||
# 根据当前状态返回一个当前最佳效应和所对应的动作 | ||
# :param state: 当前的状态 | ||
# :return: 返回一个元组 (utility action) | ||
# """ | ||
# is_over, winner = state.get_state_result() | ||
# if is_over: | ||
# if winner == state.player: | ||
# return 1, None | ||
# elif winner == get_opponent(state.player): | ||
# return -1, None | ||
# else: | ||
# return 0, None | ||
# available_actions = state.get_available_actions() | ||
# final_value = -1 | ||
# final_action = available_actions[0] | ||
# for action in available_actions: | ||
# value = - recurse(state.get_next_state(action))[0] # 由于下手是对手对于我的利益是负数 | ||
# if value == 1: # 如果是已经可以胜利则返回 | ||
# final_value, final_action = value, action | ||
# break | ||
# elif value == 0: # 如果是平局则有待观察 | ||
# final_value, final_action = value, action | ||
# return final_value, final_action | ||
# | ||
### 更加一般化alpha-beta剪枝 | ||
|
||
self.player = current_state.player | ||
def recurse(state: State, alpha, beta) -> Tuple[int, object]: | ||
""" | ||
根据当前状态返回一个当前最佳效应和所对应的动作 | ||
:param state: 当前的状态 | ||
:param alpha: 到当前状态的最开始玩家的收益下界 | ||
:param beta: 到结束状态的最对手玩家的收益上界 | ||
:return: 返回一个元组 (utility action) | ||
""" | ||
is_over, winner = state.get_state_result() | ||
if is_over: | ||
if winner == self.player: | ||
return 1, None | ||
elif winner == get_opponent(self.player): | ||
return -1, None | ||
else: | ||
return 0, None | ||
|
||
available_actions = state.get_available_actions() | ||
if state.player == self.player: | ||
max_value = (float("-inf"), None) | ||
for action in available_actions: | ||
max_value = max(max_value, (recurse(state.get_next_state(action), alpha, beta)[0], action)) | ||
alpha = max(alpha, max_value[0]) | ||
if beta <= alpha: | ||
break | ||
return max_value | ||
elif state.player == get_opponent(self.player): | ||
min_value = (float("inf"), None) | ||
for action in available_actions: | ||
min_value = min(min_value, (recurse(state.get_next_state(action), alpha, beta)[0], action)) | ||
beta = min(beta, min_value[0]) | ||
if beta <= alpha: | ||
break | ||
return min_value | ||
|
||
_, action = recurse(current_state, float("-inf"), float("inf")) | ||
return action | ||
|
||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
# author : Administrator | ||
# date : 2018/6/28 | ||
from game import Game | ||
from minimax import MiniMax, AlphaBeta | ||
from human import Human | ||
|
||
|
||
if __name__ == '__main__': | ||
game = Game() | ||
human = Human() | ||
# ai = MiniMax() | ||
ai = AlphaBeta() | ||
players = {0: ai, 1: human} | ||
|
||
turn = 1 | ||
while True: | ||
current_state = game.state | ||
action = players[turn].take_action(current_state) | ||
game.step(action) | ||
game.render() | ||
print("###{0}在{1}落子###".format(players[turn], action)) | ||
|
||
# 判断结果 | ||
is_over, winner = game.game_result() | ||
if is_over: | ||
if winner: | ||
print("winner {0}".format(players[turn])) | ||
else: | ||
print("平局") | ||
break | ||
|
||
# 更新执棋方 | ||
turn = (turn + 1) % 2 | ||
|