-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflask_connect_four.py
160 lines (129 loc) · 5.5 KB
/
flask_connect_four.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import numpy as np
#import pandas as pd
from numpy import random
import time
import sys
import connect_four as cccc
import os
import json
import pickle
from flask import Flask, render_template, request, redirect, jsonify
app = Flask(__name__)
#app.debug = True
app.board = [0]*42
app.player = 1
#def net_value(board):
# t0=time.clock()
# (m1,m2,m3,m4) = np.load('TD_cccc_100_1_6million(1).npz')['arr_0']
# board = np.copy(board).reshape((1,42))
# m5 = np.dot(board, m1) + m2
# answer = np.tanh(np.dot(np.tanh(m5), m3) + m4) + random.random()*0.05 - 0.05
# print "node evaluation took {:.5}s".format(time.clock()-t0)
# return answer
# return np.tanh(np.dot(np.vectorize(np.tanh)(m5), m3) + m4) + random.random()*0.1 - 0.05
with open('c4_shallow_net.pkl','rb') as f:
value = pickle.load(f)
def net_value(board):
t0=time.clock()
h = board.reshape(np.prod(board.shape))
activations = [np.vectorize(lambda x: max(0.,x))]*(len(value)-1) + [np.tanh]
for i,layer in enumerate(value):
h = activations[i](np.dot(h,layer[0])+layer[1])
return float(h)
def sym_net_value(board):
size = np.prod(board.shape)
board = board.reshape((size/42,6,7))
mboard = board[:,:,::-1]
return (net_value(board) + net_value(mboard))*0.5 + np.random.random()*0.1-0.05
def game_over(board):
board = np.array(board).reshape((6,7))
return cccc.winner(board) or cccc.is_full(board)
inf = float("inf")
def update_move(board, move, turn):
board[np.where(board[:,move]==0)[0][-1], move] = turn
return None
def unupdate_move(board, move):
if 0 in board[:,move]:
board[np.where(board[:,move]==0)[0][-1]+1, move] = 0
else:
board[0,move]=0
return None
def winning_squares(board):
board = board.reshape((6,7))
coords = set()
for i in range(6):#rows
for j in range(4):
if abs(np.sum(board[i,j:j+4]))==4:
coords.update([(i,j+k) for k in range(4)])
for i in range(3):#cols
for j in range(7):
if abs(np.sum(board[i:i+4,j]))==4:
coords.update([(i+k,j) for k in range(4)])
for i in range(3):#diag
for j in range(4):
if abs(np.sum(np.diag(board[i:i+4,j:j+4])))==4:
coords.update([(i+k,j+k) for k in range(4)])
for i in range(3):#rdiag
for j in range(4):
if abs(np.sum(np.diag(np.fliplr(board[i:i+4,j:j+4]))))==4:
coords.update([(i+k,3+j-k) for k in range(4)])
return list(coords)
def alpha_beta_move(board, turn, depth = 0, alpha = (-inf,-inf), beta = (inf,inf), evaluation = lambda x: 0):
dummy_board = np.copy(board) # we don't want to change the board state
swap_player = {1:-1,-1:1} # So we can change whose turn
options = cccc.available_moves(board) # get legal moves
random.shuffle(options) # should inherit move order instead of randomizing
best_value = (-inf,-inf)
if not options:
print board, cccc.game_over(board)
print 'oops, no available moves'
cand_move = options[0]
if depth == 0:
for x in options:
update_move(dummy_board,x,turn)
op_value = (evaluation(dummy_board*swap_player[turn]) , depth)
if tuple(-1 * el for el in op_value) > best_value:
cand_move = x
best_value = tuple(-1 * el for el in op_value)
alpha = max(alpha, best_value)
# print depth,-op_value, best_value, cand_move,alpha,beta
if alpha >= beta:
# print 'pruned'
break #alpha-beta cutoff
unupdate_move(dummy_board,x)
else:
for x in options:
# dummy_board = np.copy(board)
# height= np.where(board[:,x]==0)[0][-1] #connect four only
# dummy_board[height, x] = turn
update_move(dummy_board,x,turn)
if cccc.winner(dummy_board): #should check over and tied too
return((inf,depth), x)
if cccc.is_full(dummy_board): #This assumes you can't lose on your turn
return((0,depth) , x)
op_value,_ = alpha_beta_move( dummy_board,
swap_player[turn],
depth-1,
alpha = tuple(-1 * el for el in beta),
beta = tuple(-1 * el for el in alpha),
evaluation = evaluation)
if tuple(-1 * el for el in op_value) > best_value:
cand_move = x
best_value = tuple(-1 * el for el in op_value)
alpha = max(alpha, best_value)
# print depth,-op_value, best_value, cand_move,alpha,beta
if alpha >= beta:
# print 'pruned'
break #alpha-beta cutoff
unupdate_move(dummy_board,x)
# dummy_board[height, x] = 0
return (best_value, cand_move)
def play(types=['human','human'],depths=(0,0),evals = ("random","random")):
print 'starting a game of connect Four'
return render_template('connect_four.html',
board = list(np.zeros(42)),
player = 1,
finished = -2,
types = map(str,types),
depths = list(depths),
evals = map(str,evals))