-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathNNGoPlayer.py
executable file
·202 lines (156 loc) · 6.31 KB
/
NNGoPlayer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import gym
import pachi_py
import Rocgo
import numpy as np
import random
from constants import *
from rochesterWrappers import *
from gym.envs.board_game import go
"""
pachi_py constants:
pachi_py.PASS_COORD : -1 (corresponds to 81 after _coord_to_action())
pachi_py.RESIGN_COORD : -2 (corresponds to 82 after _coord_to_action())
pachi_py.BLACK : 1
pachi_py.WHITE : 2
"""
def nn_vs_nnGame(rocEnv, playBlack, nnBlack, nnWhite, verbose=False, playbyplay=False):
# play out the game, returns the winner (NNGoPlayer.BLACK or NNGoPlayer.WHITE)
counter = 0
while True:
if playbyplay:
printRocBoard(nnBlack.rocEnv)
nnPlayer = nnBlack if playBlack else nnWhite
if nnPlayer.makemoveRL(playbyplay=playbyplay):
winner = NNGoPlayer.WHITE if playBlack else NNGoPlayer.BLACK
break
playBlack = not playBlack
# did both players pass?
if counter>MAX_MOVES_PER_GAME or rocEnv.is_end_of_game:
won = rocEnv.get_winner(verbose=verbose)
if won == 0: # tie
return None
winner = NNGoPlayer.BLACK if won==Rocgo.BLACK else NNGoPlayer.WHITE
break
counter += 1
return winner
class NNGoPlayer(object):
"""
Implements the Neural Net powered Go player.
"""
BLACK = 0
WHITE = 1
def __init__(self, color, nnmodel, gymEnv=None, rocEnv=None):
"""
@type env : Go Environment Object
@param env : Go Environment
@type color : int
@param color : BLACK or WHITE
"""
self.gymEnv = gymEnv
self.color = color
self.nnmodel = nnmodel
if rocEnv:
self.rocEnv = rocEnv
else:
self.rocEnv = initRocBoard()
self.rocColor = Rocgo.BLACK if color==NNGoPlayer.BLACK else Rocgo.WHITE
self.pachiColor = pachi_py.BLACK if color==NNGoPlayer.BLACK else pachi_py.WHITE
self.states = []
self.actions = []
def makeRandomValidMove(self):
return random.choice(get_legal_coords(self.rocEnv))
def nnMoveLogic(self, state):
# TODO: implement NN move decision logic here
# move : Zero-indexed, row major coordinate to play
# pass action is PASS_ACTION
# resign action is RESIGN_ACTION
incDimState = np.zeros((1, BOARD_SZ,BOARD_SZ, NUM_FEATURES))
transStates = np.transpose(state, axes=[1,2,0])
incDimState[0,:,:,:] = transStates
pyx = (self.nnmodel).make_move(incDimState)
predSortIndex = np.argsort(pyx)
legal_actions = get_legal_coords(self.rocEnv)
pyx = pyx[0][0]
for action in predSortIndex[0][0]:
if action in legal_actions:
continue
else:
pyx[action] = 0
pyx = [float(i)/sum(pyx) for i in pyx]
actProb = random.uniform(0, 1)
for action in range(0,NUM_ACTIONS):
actProb -= pyx[action]
if actProb <= 0:
return action
return PASS_ACTION
# return self.makeRandomValidMove()
def updatePachiMove(self, observation, playbyplay=False):
"""
Infer the pachi movement from observations.
"""
pachiBoard = observation[(self.color+1)%2]
if hasattr(self, 'last_obs'):
oneLoc = np.where((pachiBoard - self.last_obs)>0)
else:
oneLoc = np.where(pachiBoard>0)
pachiRocColor = Rocgo.WHITE if self.color==NNGoPlayer.BLACK else Rocgo.BLACK
if oneLoc[0].size==0:
opponentMv = None
self.last_pachi_mv = PASS_ACTION
else:
opponentMv = (oneLoc[1][0], oneLoc[0][0])
self.last_pachi_mv = opponentMv[0]+opponentMv[1]*BOARD_SZ
self.rocEnv.do_move(opponentMv, color=pachiRocColor)
if playbyplay:
if opponentMv:
print "Pachi (%s) - %d" % ("Black" if self.color==NNGoPlayer.WHITE else "White",
opponentMv[0]+opponentMv[1]*BOARD_SZ)
else:
print "Pachi (%s) - %s" % ("Black" if self.color==NNGoPlayer.WHITE else "White", opponentMv)
self.last_obs = pachiBoard
def makemoveGym(self, move=-1, playbyplay=False):
"""
Plays a move against Pachi.
Advances both the gym environment and the Rochester gameboard.
Returns True if the game ended, False otherwise
"""
# check the state and store it
state = rocBoard2State(self.rocEnv)
self.states.append(state)
if move==-1:
move = self.nnMoveLogic(state)
if playbyplay:
print "Policy (%s) - %d" % ("Black" if self.color==NNGoPlayer.BLACK else "White", move)
# store the action chosen
self.actions.append(move)
# take the action in the OpenAI world
observation,reward,_,_ = self.gymEnv.step(move)
if move==RESIGN_ACTION:
return True
# update the Rochster board for my move
self.rocEnv.do_move(intMove2rocMove(move), color=self.rocColor)
# update the Rochester board for the opponent's move
self.updatePachiMove(observation, playbyplay=playbyplay)
return self.gymEnv.state.board.is_terminal
def makemoveRL(self, playRandom=False, playbyplay=False):
"""
Plays a move for RL learning.
Only uses the Rochester gameboard.
Returns True if the player resigns, False otherwise
"""
# check the state and store it
state = rocBoard2State(self.rocEnv)
self.states.append(state)
if playRandom:
move = self.makeRandomValidMove()
else:
move = self.nnMoveLogic(state)
if playbyplay:
print "%s - move: %d" %("Black" if self.color==NNGoPlayer.BLACK else "White", move)
# store the action chosen
self.actions.append(move)
if move==RESIGN_ACTION:
return True
rocMove = intMove2rocMove(move)
self.rocEnv.do_move(rocMove, color=self.rocColor)
return False