diff --git a/README.md b/README.md index c8b2503cc..dc4b5c94a 100644 --- a/README.md +++ b/README.md @@ -245,7 +245,7 @@ You can use the the following interface to make an environment. You may optional * `allow_step_back`: Default `False`. `True` if allowing `step_back` function to traverse backward in the tree. * Game specific configurations: These fields start with `game_`. Currently, we only support `game_num_players` in Blackjack, . -Once the environemnt is made, we can access some information of the game. +Once the environment is made, we can access some information of the game. * **env.num_actions**: The number of actions. * **env.num_players**: The number of players. * **env.state_shape**: The shape of the state space of the observations. diff --git a/docs/games.md b/docs/games.md index 301f85d58..bcd40ea84 100644 --- a/docs/games.md +++ b/docs/games.md @@ -90,7 +90,7 @@ At each decision point of the game, the corresponding player will be able to obs | ------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | | seen\_cards | Three face-down cards distributed to the landlord after bidding. Then these cards will be made public to all players. | TQA | | landlord | An integer of landlord's id | 0 | -| self | An integer of current player's id | 2 | +| cls | An integer of current player's id | 2 | | trace | A list of tuples which records every actions in one game. The first entry of the tuple is player's id, the second is corresponding player's action. | \[(0, '8222'), (1, 'pass'), (2, 'pass'), (0 '6KKK'), (1, 'pass'), (2, 'pass'), (0, '8'), (1, 'Q')\] | | played\_cards | As the game progresses, the cards which have been played by the three players and sorted from low to high. | \['6', '8', '8', 'Q', 'K', 'K', 'K', '2', '2', '2'\] | | others\_hand | The union of the other two player's current hand | 333444555678899TTTJJJQQAA2R | @@ -134,7 +134,7 @@ If the landlord first get rid of all the cards in his hand, he will win and rece ## Mahjong Mahjong is a tile-based game developed in China, and has spread throughout the world since 20th century. It is commonly played by 4 players. The game is played with a set of 136 tiles. In turn players draw and discard tiles until -The goal of the game is to complete the leagal hand using the 14th drawn tile to form 4 sets and a pair. +The goal of the game is to complete the legal hand using the 14th drawn tile to form 4 sets and a pair. We revised the game into a simple version that all of the winning set are equal, and player will win as long as she complete forming 4 sets and a pair. Please refer the detail on [Wikipedia](https://en.wikipedia.org/wiki/Mahjong) or [Baike](https://baike.baidu.com/item/麻将/215). diff --git a/docs/high-level-design.md b/docs/high-level-design.md index 22b6686da..855837e2c 100644 --- a/docs/high-level-design.md +++ b/docs/high-level-design.md @@ -25,4 +25,4 @@ Card games usually have similar structures. We abstract some concepts in card ga To summarize, in one `Game`, a `Dealer` deals the cards for each `Player`. In each `Round` of the game, a `Judger` will make major decisions about the next round and the payoffs in the end of the game. ## Agents -We provide examples of several representative algorithms and wrap them as `Agent` to show how a learning algorithm can be connected to the toolkit. The first example is DQN which is a representative of the Reinforcement Learning (RL) algorithms category. The second example is NFSP which is a representative of the Reinforcement Learning (RL) with self-play. We also provide CFR (chance sampling) and DeepCFR which belong to Conterfactual Regret Minimization (CFR) category. Other algorithms from these three categories can be connected in similar ways. +We provide examples of several representative algorithms and wrap them as `Agent` to show how a learning algorithm can be connected to the toolkit. The first example is DQN which is a representative of the Reinforcement Learning (RL) algorithms category. The second example is NFSP which is a representative of the Reinforcement Learning (RL) with self-play. We also provide CFR (chance sampling) and DeepCFR which belong to Counterfactual Regret Minimization (CFR) category. Other algorithms from these three categories can be connected in similar ways. diff --git a/docs/toy-examples.md b/docs/toy-examples.md index 412b8c81b..61ad378a9 100644 --- a/docs/toy-examples.md +++ b/docs/toy-examples.md @@ -339,7 +339,7 @@ def train(args): # Seed numpy, torch, random set_seed(args.seed) - # Initilize CFR Agent + # Initialize CFR Agent agent = CFRAgent( env, os.path.join( diff --git a/examples/evaluate.py b/examples/evaluate.py index a5f70905f..d79a54b15 100644 --- a/examples/evaluate.py +++ b/examples/evaluate.py @@ -1,19 +1,16 @@ -''' An example of evluating the trained models in RLCard -''' +"""An example of evaluating the trained models in RLCard""" import os import argparse import rlcard -from rlcard.agents import ( - DQNAgent, - RandomAgent, -) + from rlcard.utils import ( get_device, set_seed, tournament, ) + def load_model(model_path, env=None, position=None, device=None): if os.path.isfile(model_path): # Torch model import torch @@ -29,14 +26,14 @@ def load_model(model_path, env=None, position=None, device=None): else: # A model in the model zoo from rlcard import models agent = models.load(model_path).agents[position] - + return agent -def evaluate(args): +def evaluate(args): # Check whether gpu is available device = get_device() - + # Seed numpy, torch, random set_seed(args.seed) @@ -54,6 +51,7 @@ def evaluate(args): for position, reward in enumerate(rewards): print(position, args.models[position], reward) + if __name__ == '__main__': parser = argparse.ArgumentParser("Evaluation example in RLCard") parser.add_argument( @@ -99,4 +97,3 @@ def evaluate(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda evaluate(args) - diff --git a/examples/human/blackjack_human.py b/examples/human/blackjack_human.py index 46f3f2b72..96fd7f27b 100644 --- a/examples/human/blackjack_human.py +++ b/examples/human/blackjack_human.py @@ -1,5 +1,4 @@ -''' A toy example of self playing for Blackjack -''' +"""A toy example of self playing for Blackjack """ import rlcard from rlcard.agents import RandomAgent as RandomAgent @@ -23,7 +22,7 @@ print(">> Blackjack human agent") -while (True): +while True: print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) diff --git a/examples/human/gin_rummy_human.py b/examples/human/gin_rummy_human.py index 230e3640f..ce4b680fc 100644 --- a/examples/human/gin_rummy_human.py +++ b/examples/human/gin_rummy_human.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: gin_rummy_human.py Author: William Hale Date created: 3/14/2020 -''' +""" # You need to install tkinter if it is not already installed. # Tkinter is Python's defacto standard GUI (Graphical User Interface) package. diff --git a/examples/human/leduc_holdem_human.py b/examples/human/leduc_holdem_human.py index 55e73c33f..57133cb68 100644 --- a/examples/human/leduc_holdem_human.py +++ b/examples/human/leduc_holdem_human.py @@ -1,5 +1,4 @@ -''' A toy example of playing against pretrianed AI on Leduc Hold'em -''' +"""A toy example of playing against pretrianed AI on Leduc Hold'em""" import rlcard from rlcard import models @@ -17,7 +16,7 @@ print(">> Leduc Hold'em pre-trained model") -while (True): +while True: print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) diff --git a/examples/human/limit_holdem_human.py b/examples/human/limit_holdem_human.py index 1491180e7..3f37deda1 100644 --- a/examples/human/limit_holdem_human.py +++ b/examples/human/limit_holdem_human.py @@ -1,5 +1,4 @@ -''' A toy example of playing against a random agent on Limit Hold'em -''' +"""A toy example of playing against a random agent on Limit Hold'em""" import rlcard from rlcard.agents import LimitholdemHumanAgent as HumanAgent @@ -17,7 +16,7 @@ print(">> Limit Hold'em random agent") -while (True): +while True: print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) diff --git a/examples/human/nolimit_holdem_human.py b/examples/human/nolimit_holdem_human.py index 76f29da11..bfb812ac9 100644 --- a/examples/human/nolimit_holdem_human.py +++ b/examples/human/nolimit_holdem_human.py @@ -1,5 +1,4 @@ -''' A toy example of playing against pretrianed AI on Leduc Hold'em -''' +"""A toy example of playing against pretrained AI on Leduc Hold'em""" from rlcard.agents import RandomAgent import rlcard @@ -17,7 +16,7 @@ env.set_agents([human_agent, human_agent2]) -while (True): +while True: print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) diff --git a/examples/human/uno_human.py b/examples/human/uno_human.py index a5110b16b..19198088e 100644 --- a/examples/human/uno_human.py +++ b/examples/human/uno_human.py @@ -1,5 +1,4 @@ -''' A toy example of playing against rule-based bot on UNO -''' +"""A toy example of playing against rule-based bot on UNO""" import rlcard from rlcard import models @@ -16,7 +15,7 @@ print(">> UNO rule model V1") -while (True): +while True: print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) diff --git a/examples/pettingzoo/run_dmc.py b/examples/pettingzoo/run_dmc.py index a44ef0678..e88cc9601 100644 --- a/examples/pettingzoo/run_dmc.py +++ b/examples/pettingzoo/run_dmc.py @@ -1,6 +1,4 @@ -''' An example of training a Deep Monte-Carlo (DMC) Agent on PettingZoo environments -wrapping RLCard -''' +"""An example of training a Deep Monte-Carlo (DMC) Agent on PettingZoo environments wrapping RLCard""" import os import argparse diff --git a/examples/pettingzoo/run_rl.py b/examples/pettingzoo/run_rl.py index a648135b0..fb7b5cee1 100644 --- a/examples/pettingzoo/run_rl.py +++ b/examples/pettingzoo/run_rl.py @@ -1,6 +1,4 @@ -''' An example of training a reinforcement learning agent on the PettingZoo -environments that wrap RLCard -''' +"""An example of training a reinforcement learning agent on the PettingZoo environments that wrap RLCard""" import os import argparse diff --git a/examples/run_cfr.py b/examples/run_cfr.py index b5d67d08d..b5862a2e0 100644 --- a/examples/run_cfr.py +++ b/examples/run_cfr.py @@ -1,5 +1,4 @@ -''' An example of solve Leduc Hold'em with CFR (chance sampling) -''' +"""An example of solve Leduc Hold'em with CFR (chance sampling)""" import os import argparse @@ -15,6 +14,7 @@ plot_curve, ) + def train(args): # Make environments, CFR only supports Leduc Holdem env = rlcard.make( @@ -34,7 +34,7 @@ def train(args): # Seed numpy, torch, random set_seed(args.seed) - # Initilize CFR Agent + # Initialize CFR Agent agent = CFRAgent( env, os.path.join( @@ -71,6 +71,7 @@ def train(args): # Plot the learning curve plot_curve(csv_path, fig_path, 'cfr') + if __name__ == '__main__': parser = argparse.ArgumentParser("CFR example in RLCard") parser.add_argument( diff --git a/examples/run_dmc.py b/examples/run_dmc.py index 401b14d68..3d955e47f 100644 --- a/examples/run_dmc.py +++ b/examples/run_dmc.py @@ -1,5 +1,4 @@ -''' An example of training a Deep Monte-Carlo (DMC) Agent on the environments in RLCard -''' +"""An example of training a Deep Monte-Carlo (DMC) Agent on the environments in RLCard""" import os import argparse @@ -8,8 +7,8 @@ import rlcard from rlcard.agents.dmc_agent import DMCTrainer -def train(args): +def train(args): # Make the environment env = rlcard.make(args.env) @@ -29,6 +28,7 @@ def train(args): # Train DMC Agents trainer.start() + if __name__ == '__main__': parser = argparse.ArgumentParser("DMC example in RLCard") parser.add_argument( @@ -95,4 +95,3 @@ def train(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda train(args) - diff --git a/examples/run_random.py b/examples/run_random.py index 9a2681288..1287a4a13 100644 --- a/examples/run_random.py +++ b/examples/run_random.py @@ -1,5 +1,4 @@ -''' An example of playing randomly in RLCard -''' +"""An example of playing randomly in RLCard""" import argparse import pprint diff --git a/examples/run_rl.py b/examples/run_rl.py index 3727f3ae8..08c05db96 100644 --- a/examples/run_rl.py +++ b/examples/run_rl.py @@ -1,5 +1,4 @@ -''' An example of training a reinforcement learning agent on the environments in RLCard -''' +"""An example of training a reinforcement learning agent on the environments in RLCard""" import os import argparse @@ -16,11 +15,11 @@ plot_curve, ) -def train(args): +def train(args): # Check whether gpu is available device = get_device() - + # Seed numpy, torch, random set_seed(args.seed) @@ -41,7 +40,7 @@ def train(args): agent = DQNAgent( num_actions=env.num_actions, state_shape=env.state_shape[0], - mlp_layers=[64,64], + mlp_layers=[64, 64], device=device, save_path=args.log_dir, save_every=args.save_every @@ -55,8 +54,8 @@ def train(args): agent = NFSPAgent( num_actions=env.num_actions, state_shape=env.state_shape[0], - hidden_layers_sizes=[64,64], - q_mlp_layers=[64,64], + hidden_layers_sizes=[64, 64], + q_mlp_layers=[64, 64], device=device, save_path=args.log_dir, save_every=args.save_every @@ -106,6 +105,7 @@ def train(args): torch.save(agent, save_path) print('Model saved in', save_path) + if __name__ == '__main__': parser = argparse.ArgumentParser("DQN/NFSP example in RLCard") parser.add_argument( @@ -163,13 +163,13 @@ def train(args): type=str, default='experiments/leduc_holdem_dqn_result/', ) - + parser.add_argument( "--load_checkpoint_path", type=str, default="", ) - + parser.add_argument( "--save_every", type=int, @@ -179,4 +179,3 @@ def train(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda train(args) - diff --git a/rlcard/agents/cfr_agent.py b/rlcard/agents/cfr_agent.py index 406b0c12d..0ae429b2c 100644 --- a/rlcard/agents/cfr_agent.py +++ b/rlcard/agents/cfr_agent.py @@ -1,4 +1,3 @@ -import numpy as np import collections import os @@ -6,16 +5,16 @@ from rlcard.utils.utils import * -class CFRAgent(): - ''' Implement CFR (chance sampling) algorithm - ''' + +class CFRAgent: + """Implement CFR (chance sampling) algorithm """ def __init__(self, env, model_path='./cfr_model'): - ''' Initilize Agent + """Initialize Agent Args: env (Env): Env class - ''' + """ self.use_raw = False self.env = env self.model_path = model_path @@ -30,8 +29,7 @@ def __init__(self, env, model_path='./cfr_model'): self.iteration = 0 def train(self): - ''' Do one iteration of CFR - ''' + """Do one iteration of CFR """ self.iteration += 1 # Firstly, traverse tree to compute counterfactual regret for each player # The regrets are recorded in traversal @@ -44,7 +42,7 @@ def train(self): self.update_policy() def traverse_tree(self, probs, player_id): - ''' Traverse the game tree, update the regrets + """Traverse the game tree, update the regrets Args: probs: The reach probability of the current node @@ -52,7 +50,7 @@ def traverse_tree(self, probs, player_id): Returns: state_utilities (list): The expected utilities for all the players - ''' + """ if self.env.is_over(): return self.env.get_payoffs() @@ -82,7 +80,7 @@ def traverse_tree(self, probs, player_id): # If it is current player, we record the policy and compute regret player_prob = probs[current_player] counterfactual_prob = (np.prod(probs[:current_player]) * - np.prod(probs[current_player + 1:])) + np.prod(probs[current_player + 1:])) player_state_utility = state_utility[current_player] if obs not in self.regrets: @@ -92,23 +90,22 @@ def traverse_tree(self, probs, player_id): for action in legal_actions: action_prob = action_probs[action] regret = counterfactual_prob * (action_utilities[action][current_player] - - player_state_utility) + - player_state_utility) self.regrets[obs][action] += regret self.average_policy[obs][action] += self.iteration * player_prob * action_prob return state_utility def update_policy(self): - ''' Update policy based on the current regrets - ''' + """Update policy based on the current regrets """ for obs in self.regrets: self.policy[obs] = self.regret_matching(obs) def regret_matching(self, obs): - ''' Apply regret matching + """Apply regret matching Args: obs (string): The state_str - ''' + """ regret = self.regrets[obs] positive_regret_sum = sum([r for r in regret if r > 0]) @@ -122,11 +119,11 @@ def regret_matching(self, obs): return action_probs def action_probs(self, obs, legal_actions, policy): - ''' Obtain the action probabilities of the current state + """Obtain the action probabilities of the current state Args: obs (str): state_str - legal_actions (list): List of leagel actions + legal_actions (list): List of legal actions player_id (int): The current player policy (dict): The used policy @@ -134,9 +131,9 @@ def action_probs(self, obs, legal_actions, policy): (tuple) that contains: action_probs(numpy.array): The action probabilities legal_actions (list): Indices of legal actions - ''' + """ if obs not in policy.keys(): - action_probs = np.array([1.0/self.env.num_actions for _ in range(self.env.num_actions)]) + action_probs = np.array([1.0 / self.env.num_actions for _ in range(self.env.num_actions)]) self.policy[obs] = action_probs else: action_probs = policy[obs] @@ -144,7 +141,7 @@ def action_probs(self, obs, legal_actions, policy): return action_probs def eval_step(self, state): - ''' Given a state, predict action based on average policy + """Given a state, predict action based on average policy Args: state (numpy.array): State representation @@ -152,17 +149,17 @@ def eval_step(self, state): Returns: action (int): Predicted action info (dict): A dictionary containing information - ''' + """ probs = self.action_probs(state['obs'].tostring(), list(state['legal_actions'].keys()), self.average_policy) action = np.random.choice(len(probs), p=probs) - info = {} - info['probs'] = {state['raw_legal_actions'][i]: float(probs[list(state['legal_actions'].keys())[i]]) for i in range(len(state['legal_actions']))} + info = {'probs': {state['raw_legal_actions'][i]: float(probs[list(state['legal_actions'].keys())[i]]) for i in + range(len(state['legal_actions']))}} return action, info def get_state(self, player_id): - ''' Get state_str of the player + """Get state_str of the player Args: player_id (int): The player id @@ -171,51 +168,48 @@ def get_state(self, player_id): (tuple) that contains: state (str): The state str legal_actions (list): Indices of legal actions - ''' + """ state = self.env.get_state(player_id) return state['obs'].tostring(), list(state['legal_actions'].keys()) def save(self): - ''' Save model - ''' + """Save model """ if not os.path.exists(self.model_path): os.makedirs(self.model_path) - policy_file = open(os.path.join(self.model_path, 'policy.pkl'),'wb') + policy_file = open(os.path.join(self.model_path, 'policy.pkl'), 'wb') pickle.dump(self.policy, policy_file) policy_file.close() - average_policy_file = open(os.path.join(self.model_path, 'average_policy.pkl'),'wb') + average_policy_file = open(os.path.join(self.model_path, 'average_policy.pkl'), 'wb') pickle.dump(self.average_policy, average_policy_file) average_policy_file.close() - regrets_file = open(os.path.join(self.model_path, 'regrets.pkl'),'wb') + regrets_file = open(os.path.join(self.model_path, 'regrets.pkl'), 'wb') pickle.dump(self.regrets, regrets_file) regrets_file.close() - iteration_file = open(os.path.join(self.model_path, 'iteration.pkl'),'wb') + iteration_file = open(os.path.join(self.model_path, 'iteration.pkl'), 'wb') pickle.dump(self.iteration, iteration_file) iteration_file.close() def load(self): - ''' Load model - ''' + """Load model """ if not os.path.exists(self.model_path): return - policy_file = open(os.path.join(self.model_path, 'policy.pkl'),'rb') + policy_file = open(os.path.join(self.model_path, 'policy.pkl'), 'rb') self.policy = pickle.load(policy_file) policy_file.close() - average_policy_file = open(os.path.join(self.model_path, 'average_policy.pkl'),'rb') + average_policy_file = open(os.path.join(self.model_path, 'average_policy.pkl'), 'rb') self.average_policy = pickle.load(average_policy_file) average_policy_file.close() - regrets_file = open(os.path.join(self.model_path, 'regrets.pkl'),'rb') + regrets_file = open(os.path.join(self.model_path, 'regrets.pkl'), 'rb') self.regrets = pickle.load(regrets_file) regrets_file.close() - iteration_file = open(os.path.join(self.model_path, 'iteration.pkl'),'rb') + iteration_file = open(os.path.join(self.model_path, 'iteration.pkl'), 'rb') self.iteration = pickle.load(iteration_file) iteration_file.close() - diff --git a/rlcard/agents/dmc_agent/model.py b/rlcard/agents/dmc_agent/model.py index 2adf72cda..7086d40cc 100644 --- a/rlcard/agents/dmc_agent/model.py +++ b/rlcard/agents/dmc_agent/model.py @@ -18,19 +18,17 @@ import torch from torch import nn + class DMCNet(nn.Module): - def __init__( - self, - state_shape, - action_shape, - mlp_layers=[512,512,512,512,512] - ): + def __init__(self, state_shape, action_shape, mlp_layers=None): super().__init__() + if mlp_layers is None: + mlp_layers = [512, 512, 512, 512, 512] input_dim = np.prod(state_shape) + np.prod(action_shape) layer_dims = [input_dim] + mlp_layers fc = [] - for i in range(len(layer_dims)-1): - fc.append(nn.Linear(layer_dims[i], layer_dims[i+1])) + for i in range(len(layer_dims) - 1): + fc.append(nn.Linear(layer_dims[i], layer_dims[i + 1])) fc.append(nn.ReLU()) fc.append(nn.Linear(layer_dims[-1], 1)) self.fc_layers = nn.Sequential(*fc) @@ -42,17 +40,13 @@ def forward(self, obs, actions): values = self.fc_layers(x).flatten() return values + class DMCAgent: - def __init__( - self, - state_shape, - action_shape, - mlp_layers=[512,512,512,512,512], - exp_epsilon=0.01, - device="0", - ): + def __init__(self, state_shape, action_shape, mlp_layers=None, exp_epsilon=0.01, device="0"): + if mlp_layers is None: + mlp_layers = [512, 512, 512, 512, 512] self.use_raw = False - self.device = 'cuda:'+device if device != "cpu" else "cpu" + self.device = 'cuda:' + device if device != "cpu" else "cpu" self.net = DMCNet(state_shape, action_shape, mlp_layers).to(self.device) self.exp_epsilon = exp_epsilon self.action_shape = action_shape @@ -74,8 +68,7 @@ def eval_step(self, state): action_idx = np.argmax(values) action = action_keys[action_idx] - info = {} - info['values'] = {state['raw_legal_actions'][i]: float(values[i]) for i in range(len(action_keys))} + info = {'values': {state['raw_legal_actions'][i]: float(values[i]) for i in range(len(action_keys))}} return action, info @@ -121,15 +114,11 @@ def state_dict(self): def set_device(self, device): self.device = device + class DMCModel: - def __init__( - self, - state_shape, - action_shape, - mlp_layers=[512,512,512,512,512], - exp_epsilon=0.01, - device=0 - ): + def __init__(self, state_shape, action_shape, mlp_layers=None, exp_epsilon=0.01, device=0): + if mlp_layers is None: + mlp_layers = [512, 512, 512, 512, 512] self.agents = [] for player_id in range(len(state_shape)): agent = DMCAgent( diff --git a/rlcard/agents/dmc_agent/pettingzoo_model.py b/rlcard/agents/dmc_agent/pettingzoo_model.py index 8a6d577c1..aa1419d13 100644 --- a/rlcard/agents/dmc_agent/pettingzoo_model.py +++ b/rlcard/agents/dmc_agent/pettingzoo_model.py @@ -20,13 +20,10 @@ def feed(self, ts): class DMCModelPettingZoo: - def __init__( - self, - env, - mlp_layers=[512,512,512,512,512], - exp_epsilon=0.01, - device="0" - ): + def __init__(self, env, mlp_layers=None, exp_epsilon=0.01, device="0"): + if mlp_layers is None: + mlp_layers = [512, 512, 512, 512, 512] + self.agents = OrderedDict() for agent_name in env.agents: agent = DMCAgentPettingZoo( diff --git a/rlcard/agents/dmc_agent/trainer.py b/rlcard/agents/dmc_agent/trainer.py index 043f99fcb..5de12b0c9 100644 --- a/rlcard/agents/dmc_agent/trainer.py +++ b/rlcard/agents/dmc_agent/trainer.py @@ -163,7 +163,7 @@ def __init__( if not self.is_pettingzoo_env: self.num_players = self.env.num_players self.action_shape = self.env.action_shape - if self.action_shape[0] == None: # One-hot encoding + if self.action_shape[0] is None: # One-hot encoding self.action_shape = [[self.env.num_actions] for _ in range(self.num_players)] def model_func(device): diff --git a/rlcard/agents/dqn_agent.py b/rlcard/agents/dqn_agent.py index 11c6875d0..8cf7f4555 100644 --- a/rlcard/agents/dqn_agent.py +++ b/rlcard/agents/dqn_agent.py @@ -1,4 +1,4 @@ -''' DQN agent +"""DQN agent The code is derived from https://github.com/dennybritz/reinforcement-learning/blob/master/DQN/dqn.py @@ -23,7 +23,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -''' +""" import os import random @@ -39,10 +39,10 @@ class DQNAgent(object): - ''' + """ Approximate clone of rlcard.agents.dqn_agent.DQNAgent that depends on PyTorch instead of Tensorflow - ''' + """ def __init__(self, replay_memory_size=20000, replay_memory_init_size=100, @@ -61,7 +61,7 @@ def __init__(self, save_path=None, save_every=float('inf'),): - ''' + """ Q-Learning algorithm for off-policy TD control using Function Approximation. Finds the optimal greedy policy while following an epsilon-greedy policy. @@ -86,7 +86,7 @@ def __init__(self, device (torch.device): whether to use the cpu or gpu save_path (str): The path to save the model checkpoints save_every (int): Save the model every X training steps - ''' + """ self.use_raw = False self.replay_memory_init_size = replay_memory_init_size self.update_target_estimator_every = update_target_estimator_every @@ -112,10 +112,10 @@ def __init__(self, self.epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) # Create estimators - self.q_estimator = Estimator(num_actions=num_actions, learning_rate=learning_rate, state_shape=state_shape, \ - mlp_layers=mlp_layers, device=self.device) - self.target_estimator = Estimator(num_actions=num_actions, learning_rate=learning_rate, state_shape=state_shape, \ - mlp_layers=mlp_layers, device=self.device) + self.q_estimator = Estimator(num_actions=num_actions, learning_rate=learning_rate, state_shape=state_shape, + mlp_layers=mlp_layers, device=self.device) + self.target_estimator = Estimator(num_actions=num_actions, learning_rate=learning_rate, state_shape=state_shape, + mlp_layers=mlp_layers, device=self.device) # Create replay memory self.memory = Memory(replay_memory_size, batch_size) @@ -125,13 +125,13 @@ def __init__(self, self.save_every = save_every def feed(self, ts): - ''' Store data in to replay buffer and train the agent. There are two stages. + """Store data in to replay buffer and train the agent. There are two stages. In stage 1, populate the memory without training In stage 2, train the agent every several timesteps Args: ts (list): a list of 5 elements that represent the transition - ''' + """ (state, action, reward, next_state, done) = tuple(ts) self.feed_memory(state['obs'], action, reward, next_state['obs'], list(next_state['legal_actions'].keys()), done) self.total_t += 1 @@ -140,7 +140,7 @@ def feed(self, ts): self.train() def step(self, state): - ''' Predict the action for genrating training data but + """Predict the action for genrating training data but have the predictions disconnected from the computation graph Args: @@ -148,7 +148,7 @@ def step(self, state): Returns: action (int): an action id - ''' + """ q_values = self.predict(state) epsilon = self.epsilons[min(self.total_t, self.epsilon_decay_steps-1)] legal_actions = list(state['legal_actions'].keys()) @@ -160,7 +160,7 @@ def step(self, state): return legal_actions[action_idx] def eval_step(self, state): - ''' Predict the action for evaluation purpose. + """Predict the action for evaluation purpose. Args: state (numpy.array): current state @@ -168,7 +168,7 @@ def eval_step(self, state): Returns: action (int): an action id info (dict): A dictionary containing information - ''' + """ q_values = self.predict(state) best_action = np.argmax(q_values) @@ -178,14 +178,14 @@ def eval_step(self, state): return best_action, info def predict(self, state): - ''' Predict the masked Q-values + """Predict the masked Q-values Args: state (numpy.array): current state Returns: q_values (numpy.array): a 1-d array where each entry represents a Q value - ''' + """ q_values = self.q_estimator.predict_nograd(np.expand_dims(state['obs'], 0))[0] masked_q_values = -np.inf * np.ones(self.num_actions, dtype=float) @@ -195,11 +195,11 @@ def predict(self, state): return masked_q_values def train(self): - ''' Train the network + """Train the network Returns: loss (float): The loss of the current batch. - ''' + """ state_batch, action_batch, reward_batch, next_state_batch, done_batch, legal_actions_batch = self.memory.sample() # Calculate best next actions using Q-network (Double DQN) @@ -238,7 +238,7 @@ def train(self): def feed_memory(self, state, action, reward, next_state, legal_actions, done): - ''' Feed transition to memory + """Feed transition to memory Args: state (numpy.array): the current state @@ -247,7 +247,7 @@ def feed_memory(self, state, action, reward, next_state, legal_actions, done): next_state (numpy.array): the next state after performing the action legal_actions (list): the legal actions of the next state done (boolean): whether the episode is finished - ''' + """ self.memory.save(state, action, reward, next_state, legal_actions, done) def set_device(self, device): @@ -256,11 +256,11 @@ def set_device(self, device): self.target_estimator.device = device def checkpoint_attributes(self): - ''' + """ Return the current checkpoint attributes (dict) Checkpoint attributes are used to save and restore the model in the middle of training Saves the model state dict, optimizer state dict, and all other instance variables - ''' + """ return { 'agent_type': 'DQNAgent', @@ -284,12 +284,12 @@ def checkpoint_attributes(self): @classmethod def from_checkpoint(cls, checkpoint): - ''' + """ Restore the model from a checkpoint - + Args: checkpoint (dict): the checkpoint attributes generated by checkpoint_attributes() - ''' + """ print("\nINFO - Restoring model from checkpoint...") agent_instance = cls( @@ -321,33 +321,33 @@ def from_checkpoint(cls, checkpoint): return agent_instance def save_checkpoint(self, path, filename='checkpoint_dqn.pt'): - ''' Save the model checkpoint (all attributes) + """Save the model checkpoint (all attributes) Args: path (str): the path to save the model filename(str): the file name of checkpoint - ''' + """ torch.save(self.checkpoint_attributes(), os.path.join(path, filename)) class Estimator(object): - ''' + """ Approximate clone of rlcard.agents.dqn_agent.Estimator that uses PyTorch instead of Tensorflow. All methods input/output np.ndarray. Q-Value Estimator neural network. This network is used for both the Q-Network and the Target Network. - ''' + """ def __init__(self, num_actions=2, learning_rate=0.001, state_shape=None, mlp_layers=None, device=None): - ''' Initilalize an Estimator object. + """Initialize an Estimator object. Args: num_actions (int): the number output actions state_shape (list): the shape of the state space mlp_layers (list): size of outputs of mlp layers device (torch.device): whether to use cpu or gpu - ''' + """ self.num_actions = num_actions self.learning_rate=learning_rate self.state_shape = state_shape @@ -372,7 +372,7 @@ def __init__(self, num_actions=2, learning_rate=0.001, state_shape=None, mlp_lay self.optimizer = torch.optim.Adam(self.qnet.parameters(), lr=self.learning_rate) def predict_nograd(self, s): - ''' Predicts action values, but prediction is not included + """Predicts action values, but prediction is not included in the computation graph. It is used to predict optimal next actions in the Double-DQN algorithm. @@ -382,14 +382,14 @@ def predict_nograd(self, s): Returns: np.ndarray of shape (batch_size, NUM_VALID_ACTIONS) containing the estimated action values. - ''' + """ with torch.no_grad(): s = torch.from_numpy(s).float().to(self.device) q_as = self.qnet(s).cpu().numpy() return q_as def update(self, s, a, y): - ''' Updates the estimator towards the given targets. + """Updates the estimator towards the given targets. In this case y is the target-network estimated value of the Q-network optimal actions, which is labeled y in Algorithm 1 of Minh et al. (2015) @@ -401,7 +401,7 @@ def update(self, s, a, y): Returns: The calculated loss on the batch. - ''' + """ self.optimizer.zero_grad() self.qnet.train() @@ -427,8 +427,8 @@ def update(self, s, a, y): return batch_loss def checkpoint_attributes(self): - ''' Return the attributes needed to restore the model from a checkpoint - ''' + """Return the attributes needed to restore the model from a checkpoint + """ return { 'qnet': self.qnet.state_dict(), 'optimizer': self.optimizer.state_dict(), @@ -441,8 +441,8 @@ def checkpoint_attributes(self): @classmethod def from_checkpoint(cls, checkpoint): - ''' Restore the model from a checkpoint - ''' + """Restore the model from a checkpoint + """ estimator = cls( num_actions=checkpoint['num_actions'], learning_rate=checkpoint['learning_rate'], @@ -457,18 +457,18 @@ def from_checkpoint(cls, checkpoint): class EstimatorNetwork(nn.Module): - ''' The function approximation network for Estimator + """The function approximation network for Estimator It is just a series of tanh layers. All in/out are torch.tensor - ''' + """ def __init__(self, num_actions=2, state_shape=None, mlp_layers=None): - ''' Initialize the Q network + """Initialize the Q network Args: num_actions (int): number of legal actions state_shape (list): shape of state tensor mlp_layers (list): output size of each fc layer - ''' + """ super(EstimatorNetwork, self).__init__() self.num_actions = num_actions @@ -477,8 +477,7 @@ def __init__(self, num_actions=2, state_shape=None, mlp_layers=None): # build the Q network layer_dims = [np.prod(self.state_shape)] + self.mlp_layers - fc = [nn.Flatten()] - fc.append(nn.BatchNorm1d(layer_dims[0])) + fc = [nn.Flatten(), nn.BatchNorm1d(layer_dims[0])] for i in range(len(layer_dims)-1): fc.append(nn.Linear(layer_dims[i], layer_dims[i+1], bias=True)) fc.append(nn.Tanh()) @@ -486,28 +485,28 @@ def __init__(self, num_actions=2, state_shape=None, mlp_layers=None): self.fc_layers = nn.Sequential(*fc) def forward(self, s): - ''' Predict action values + """Predict action values Args: s (Tensor): (batch, state_shape) - ''' + """ return self.fc_layers(s) class Memory(object): - ''' Memory for saving transitions - ''' + """Memory for saving transitions + """ def __init__(self, memory_size, batch_size): - ''' Initialize + """Initialize Args: memory_size (int): the size of the memroy buffer - ''' + """ self.memory_size = memory_size self.batch_size = batch_size self.memory = [] def save(self, state, action, reward, next_state, legal_actions, done): - ''' Save transition into memory + """Save transition into memory Args: state (numpy.array): the current state @@ -516,14 +515,14 @@ def save(self, state, action, reward, next_state, legal_actions, done): next_state (numpy.array): the next state after performing the action legal_actions (list): the legal actions of the next state done (boolean): whether the episode is finished - ''' + """ if len(self.memory) == self.memory_size: self.memory.pop(0) transition = Transition(state, action, reward, next_state, done, legal_actions) self.memory.append(transition) def sample(self): - ''' Sample a minibatch from the replay memory + """Sample a minibatch from the replay memory Returns: state_batch (list): a batch of states @@ -531,14 +530,14 @@ def sample(self): reward_batch (list): a batch of rewards next_state_batch (list): a batch of states done_batch (list): a batch of dones - ''' + """ samples = random.sample(self.memory, self.batch_size) samples = tuple(zip(*samples)) return tuple(map(np.array, samples[:-1])) + (samples[-1],) def checkpoint_attributes(self): - ''' Returns the attributes that need to be checkpointed - ''' + """Returns the attributes that need to be checkpointed + """ return { 'memory_size': self.memory_size, @@ -548,15 +547,15 @@ def checkpoint_attributes(self): @classmethod def from_checkpoint(cls, checkpoint): - ''' + """ Restores the attributes from the checkpoint - + Args: checkpoint (dict): the checkpoint dictionary - + Returns: instance (Memory): the restored instance - ''' + """ instance = cls(checkpoint['memory_size'], checkpoint['batch_size']) instance.memory = checkpoint['memory'] diff --git a/rlcard/agents/human_agents/blackjack_human_agent.py b/rlcard/agents/human_agents/blackjack_human_agent.py index 53354c1c2..892b523ba 100644 --- a/rlcard/agents/human_agents/blackjack_human_agent.py +++ b/rlcard/agents/human_agents/blackjack_human_agent.py @@ -2,28 +2,27 @@ class HumanAgent(object): - ''' A human agent for Blackjack. It can be used to play alone for understand how the blackjack code runs - ''' + """A human agent for Blackjack. It can be used to play alone for understand how the blackjack code runs """ def __init__(self, num_actions): - ''' Initilize the human agent + """Initialize the human agent Args: num_actions (int): the size of the output action space - ''' + """ self.use_raw = True self.num_actions = num_actions @staticmethod def step(state): - ''' Human agent will display the state and make decisions through interfaces + """Human agent will display the state and make decisions through interfaces Args: state (dict): A dictionary that represents the current state Returns: action (int): The action decided by human - ''' + """ _print_state(state['raw_obs'], state['raw_legal_actions'], state['action_record']) action = int(input('>> You choose action (integer): ')) while action < 0 or action >= len(state['legal_actions']): @@ -32,23 +31,24 @@ def step(state): return state['raw_legal_actions'][action] def eval_step(self, state): - ''' Predict the action given the current state for evaluation. The same to step here. + """Predict the action given the current state for evaluation. The same to step here. Args: - state (numpy.array): an numpy array that represents the current state + state (numpy.array): a numpy array that represents the current state Returns: action (int): the action predicted (randomly chosen) by the random agent - ''' + """ return self.step(state), {} + def _print_state(state, raw_legal_actions, action_record): - ''' Print out the state + """Print out the state Args: state (dict): A dictionary of the raw state - action_record (list): A list of the each player's historical actions - ''' + action_record (list): A list of each player's historical actions + """ _action_list = [] for i in range(1, len(action_record)+1): _action_list.insert(0, action_record[-i]) diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gin_rummy_human_agent.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gin_rummy_human_agent.py index 7624e66ec..a90cb2c1b 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gin_rummy_human_agent.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gin_rummy_human_agent.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: gin_rummy_human_agent.py Author: William Hale Date created: 3/14/2020 -''' +""" import time @@ -12,15 +12,14 @@ class HumanAgent(object): - ''' A human agent for Gin Rummy. It can be used to play against trained models. - ''' + """A human agent for Gin Rummy. It can be used to play against trained models""" def __init__(self, num_actions): - ''' Initialize the human agent + """Initialize the human agent Args: num_actions (int): the size of the output action space - ''' + """ self.use_raw = True self.num_actions = num_actions self.is_choosing_action_id = False @@ -28,14 +27,14 @@ def __init__(self, num_actions): self.state = None def step(self, state): - ''' Human agent will display the state and make decisions through interfaces + """Human agent will display the state and make decisions through interfaces Args: state (dict): A dictionary that represents the current state Returns: action (int): The action decided by human - ''' + """ if self.is_choosing_action_id: raise GinRummyProgramError("self.is_choosing_action_id must be False.") if self.state is not None: @@ -55,12 +54,12 @@ def step(self, state): return chosen_action_event def eval_step(self, state): - ''' Predict the action given the current state for evaluation. The same to step here. + """Predict the action given the current state for evaluation. The same to step here. Args: - state (numpy.array): an numpy array that represents the current state + state (numpy.array): a numpy array that represents the current state Returns: action (int): the action predicted (randomly chosen) by the random agent - ''' + """ return self.step(state), {} diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/card_image.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/card_image.py index 43e111917..2e56b6249 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/card_image.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/card_image.py @@ -1,20 +1,22 @@ -''' +""" Project: Gui Gin Rummy File name: card_image.py Author: William Hale Date created: 3/14/2020 -''' +""" import os from PIL import Image, ImageTk, ImageDraw image_dir = os.path.abspath(os.path.dirname(__file__)) + if not os.path.isdir(os.path.join(image_dir, 'cards_png')): print('Downloading images...') import time import urllib.request import sys import zipfile + def reporthook(count, block_size, total_size): global start_time if count == 0: @@ -25,12 +27,12 @@ def reporthook(count, block_size, total_size): speed = int(progress_size / (1024 * duration)) percent = int(count * block_size * 100 / total_size) sys.stdout.write("\r...%d%%, %d KB, %d KB/s, %d seconds passed" % - (percent, progress_size / (1024), speed, duration)) + (percent, progress_size / 1024, speed, duration)) sys.stdout.flush() zipurl = 'https://dczha.com/files/rlcard/cards_png.zip' filehandle, _ = urllib.request.urlretrieve(zipurl, reporthook=reporthook) - with zipfile.ZipFile(filehandle,"r") as zip_ref: + with zipfile.ZipFile(filehandle, "r") as zip_ref: zip_ref.extractall(image_dir) print() diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/Gin-Rummy-GUI-Design.md b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/Gin-Rummy-GUI-Design.md index 7ca44e02a..f2fff11da 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/Gin-Rummy-GUI-Design.md +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/Gin-Rummy-GUI-Design.md @@ -95,8 +95,8 @@ The GameCanvas creates the canvas card image items as follows: for card_id in range(52): card = gin_rummy_utils.card_from_card_id(card_id) card_image = card_images[card.rank, card.suit] - card_item_id = self.create_image((0, -9999), image=card_image, anchor="nw") - self.itemconfigure(card_item_id, state=tk.HIDDEN) + card_item_id = cls.create_image((0, -9999), image=card_image, anchor="nw") + cls.itemconfigure(card_item_id, state=tk.HIDDEN) card_item_ids.append(card_item_id) ``` @@ -114,7 +114,7 @@ The card_items array is a way to access the card_item by its card_id. card = gin_rummy_utils.card_from_card_id(card_id) card_item_id = card_item_ids[card_id] card_image = card_images[card.rank, card.suit] - card_item = CardItem(item_id=card_item_id, card_id=card_id, card_image=card_image, game_canvas=self) + card_item = CardItem(item_id=card_item_id, card_id=card_id, card_image=card_image, game_canvas=cls) card_items.append(card_item) canvas_items.append(card_item) ``` @@ -140,12 +140,12 @@ It creates this canvas item as follows: discard_pile_box_top = discard_pile_anchor[1] discard_pile_box_right = discard_pile_box_left + card_width discard_pile_box_bottom = discard_pile_box_top + card_height - discard_pile_box_item_id = self.create_rectangle(discard_pile_box_left, + discard_pile_box_item_id = cls.create_rectangle(discard_pile_box_left, discard_pile_box_top, discard_pile_box_right, discard_pile_box_bottom, fill="gray") - discard_pile_box_item = CanvasItem(item_id=discard_pile_box_item_id, game_canvas=self) + discard_pile_box_item = CanvasItem(item_id=discard_pile_box_item_id, game_canvas=cls) canvas_items.append(discard_pile_box_item) ``` @@ -164,9 +164,9 @@ I'm not sure if this is any better than handling it as a special case. for player_id in range(2): x, y = player_held_pile_anchors[player_id] x -= held_pile_tab - ghost_card_item_id = self.create_rectangle(x, y, x + card_width, y + card_height, width=0, fill='') - self.itemconfig(ghost_card_item_id, tag=held_pile_tags[player_id]) - ghost_card_item = CanvasItem(item_id=ghost_card_item_id, game_canvas=self) + ghost_card_item_id = cls.create_rectangle(x, y, x + card_width, y + card_height, width=0, fill='') + cls.itemconfig(ghost_card_item_id, tag=held_pile_tags[player_id]) + ghost_card_item = CanvasItem(item_id=ghost_card_item_id, game_canvas=cls) canvas_items.append(ghost_card_item) held_pile_ghost_card_items.append(ghost_card_item) ``` @@ -203,12 +203,12 @@ The code is: ```python class GameApp(object): - def __init__(self, make_gin_rummy_env: Callable[[], 'GinRummyEnv'] = None): - self.make_gin_rummy_env = make_gin_rummy_env if make_gin_rummy_env else GameApp._make_gin_rummy_env + def __init__(cls, make_gin_rummy_env: Callable[[], 'GinRummyEnv'] = None): + cls.make_gin_rummy_env = make_gin_rummy_env if make_gin_rummy_env else GameApp._make_gin_rummy_env root = tk.Tk() root.resizable(False, False) - self.game_frame = GameFrame(root=root, game_app=self) - self.menu_bar = MenuBar(root, game_frame=self.game_frame) + cls.game_frame = GameFrame(root=root, game_app=cls) + cls.menu_bar = MenuBar(root, game_frame=cls.game_frame) root.mainloop() ``` @@ -217,10 +217,10 @@ The EnvThread is a background daemon thread that runs gin_rummy_env. It also starts the GameCanvasUpdater loop on the main thread. It maintains the following variables: ```python - self.gin_rummy_env = gin_rummy_env - self.game_canvas = game_canvas - self.mark = 0 - self.is_stopped = False + cls.gin_rummy_env = gin_rummy_env + cls.game_canvas = game_canvas + cls.mark = 0 + cls.is_stopped = False ``` The mark variable is the number of actions that the GameCanvas has processed. As the gin_rummy_env processes actions, the GameCanvasUpdater will be notified when a human action is needed. @@ -237,9 +237,9 @@ The HumanAgent supplies the step action when the gin_rummy_env asks for it. It goes into a wait loop until the GameCanvasUpdater provides the step action taken by the human player. It maintains the following variables: ```python - self.is_choosing_action_id = False - self.chosen_action_id = None # type: int or None - self.state = None + cls.is_choosing_action_id = False + cls.chosen_action_id = None # type: int or None + cls.state = None ``` The HumanAgent sets the state variable to the current state and sets the variable is_choosing_action_id to be True @@ -254,11 +254,11 @@ The GameCanvasUpdater runs a loop on the main thread to keep the gui in sync wit It also returns the action taken by the human player to the gin_rummy_env via the human_agent. It maintains the following variables: ```python - self.game_canvas = game_canvas - self.env_thread = None - self.pending_human_action_ids = [] # type: List[int] - self.busy_body_id = None # type: int or None - self.is_stopped = False + cls.game_canvas = game_canvas + cls.env_thread = None + cls.pending_human_action_ids = [] # type: List[int] + cls.busy_body_id = None # type: int or None + cls.is_stopped = False ``` The game_canvas is set on initialization and is never changed. When a new game starts, the env_thread is set to the new env_thread for the new game @@ -270,13 +270,13 @@ then he can tap it a second time to cancel that action. The GameCanvasUpdater runs the following loop on the main thread: ```python - def apply_canvas_updates(self): - if not self.env_thread.is_stopped: - self._advance_mark() + def apply_canvas_updates(cls): + if not cls.env_thread.is_stopped: + cls._advance_mark() delay_ms = 1 - self.game_canvas.after(delay_ms, func=self.apply_canvas_updates) + cls.game_canvas.after(delay_ms, func=cls.apply_canvas_updates) else: - self.is_stopped = True + cls.is_stopped = True ``` It is always trying to advance the mark to keep up with the gin_rummy_env that is running in the env_thread. The busy_body_id is the player_id whose action is being processed. diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/canvas_item.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/canvas_item.py index 711a4e1fa..076b2a388 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/canvas_item.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/canvas_item.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: canvas_item.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/configurations.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/configurations.py index bc3ad3287..c56b8a1e2 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/configurations.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/configurations.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: configurations.py Author: William Hale Date created: 3/14/2020 -''' +""" import os diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/env_thread.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/env_thread.py index 6b286cff2..57ef6a098 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/env_thread.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/env_thread.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: env_thread.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_app.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_app.py index f44750558..3c6c2b396 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_app.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_app.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: game_app.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas.py index 6a35f002b..284a5f0e5 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: game_canvas.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_debug.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_debug.py index cfa16a4e9..ea023e103 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_debug.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_debug.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: game_canvas_debug.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING @@ -36,11 +36,10 @@ def description(self): discard_pile_items = game_canvas.find_withtag(configurations.DISCARD_PILE_TAG) north_held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id=0) south_held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id=1) - lines = [] - lines.append("dealer: {}".format(GinRummyPlayer.short_name_of(player_id=dealer_id))) - lines.append("current_player: {}".format(GinRummyPlayer.short_name_of(player_id=current_player_id))) - lines.append("north hand: {}".format([card_name(card_item_id) for card_item_id in north_held_pile_item_ids])) - lines.append("stockpile: {}".format([card_name(card_item_id) for card_item_id in stock_pile_item_ids])) - lines.append("discard pile: {}".format([card_name(card_item_id) for card_item_id in discard_pile_items])) - lines.append("south hand: {}".format([card_name(card_item_id) for card_item_id in south_held_pile_item_ids])) + lines = ["dealer: {}".format(GinRummyPlayer.short_name_of(player_id=dealer_id)), + "current_player: {}".format(GinRummyPlayer.short_name_of(player_id=current_player_id)), + "north hand: {}".format([card_name(card_item_id) for card_item_id in north_held_pile_item_ids]), + "stockpile: {}".format([card_name(card_item_id) for card_item_id in stock_pile_item_ids]), + "discard pile: {}".format([card_name(card_item_id) for card_item_id in discard_pile_items]), + "south hand: {}".format([card_name(card_item_id) for card_item_id in south_held_pile_item_ids])] return "\n".join(lines) diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_getter.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_getter.py index 0060b41af..8ab5356ae 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_getter.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_getter.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: game_canvas_getter.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_post_doing_action.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_post_doing_action.py index befde76b3..453073fcc 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_post_doing_action.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_post_doing_action.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: game_canvas_post_doing_action.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_query.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_query.py index 59b000afb..c4dd195f6 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_query.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_query.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: game_canvas.query.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_updater.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_updater.py index 97822153b..9c48b43ca 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_updater.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_updater.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: game_canvas_updater.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_frame.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_frame.py index 001b1a237..cd3dc3edd 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_frame.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_frame.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: game_frame.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap.py index 07ec23184..ff746830b 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: handling_tap.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_discard_pile.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_discard_pile.py index 5fc8fc611..3826a1f79 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_discard_pile.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_discard_pile.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: handling_tap_discard_pile.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_held_pile.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_held_pile.py index f6616794b..86f7121c8 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_held_pile.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_held_pile.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: handling_tap_held_pile.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_player_pane.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_player_pane.py index 1b35d0c3e..c6718d2b6 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_player_pane.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_player_pane.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: handling_tap_player_pane.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_stock_pile.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_stock_pile.py index 811d0d0e2..aaa1458e3 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_stock_pile.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_stock_pile.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: handling_tap_stock_pile.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_to_arrange_held_pile.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_to_arrange_held_pile.py index 19d7c312c..26c331f49 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_to_arrange_held_pile.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_to_arrange_held_pile.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: handling_tap_to_arrange_held_pile.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/info_messaging.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/info_messaging.py index f7402800d..2d8e8fb57 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/info_messaging.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/info_messaging.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: info_messaging.py Author: William Hale Date created: 3/28/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING @@ -30,10 +30,8 @@ def show_activate_menus_message(game_canvas: 'GameCanvas'): return if game_canvas.query.is_going_out_button_visible(): return - lines = [] # type: List[str] - lines.append("The menu items may not drop down.") - lines.append("On an Apple computer, this is a known problem.") - lines.append("A workaround is to hit cmd-tab twice to switch to another application and back to this application.") + lines = ["The menu items may not drop down.", "On an Apple computer, this is a known problem.", + "A workaround is to hit cmd-tab twice to switch to another application and back to this application."] # type: List[str] info_message = " ".join(lines) game_canvas.info_message_label.configure(text=info_message) @@ -81,10 +79,8 @@ def show_arrange_cards_message(player_id: int, game_canvas: 'GameCanvas'): if move_count <= 1 or move_count > 8: return if player_id == 1 and game_canvas.info_message_label['text'] == "": - lines = ["Tip:"] # type: List[str] - lines.append("You can arrange cards in your hand.") - lines.append("Select the cards you want to move by tapping them.") - lines.append("Right click the card that you want to drop them on.") + lines = ["Tip:", "You can arrange cards in your hand.", "Select the cards you want to move by tapping them.", + "Right click the card that you want to drop them on."] # type: List[str] info_message = " ".join(lines) game_canvas.info_message_label.configure(text=info_message) @@ -94,7 +90,6 @@ def show_hide_tips_message(game_canvas: 'GameCanvas'): return if not configurations.IS_SHOW_TIPS: return - lines = ["Tip:"] # type: List[str] - lines.append("Uncheck 'show tips' in the preferences to hide tips.") + lines = ["Tip:", "Uncheck 'show tips' in the preferences to hide tips."] # type: List[str] info_message = " ".join(lines) game_canvas.info_message_label.configure(text=info_message) diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/menu_bar.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/menu_bar.py index bd342e2e0..14524f221 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/menu_bar.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/menu_bar.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: menu_bar.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/player_type.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/player_type.py index 7757ad15d..7645dca90 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/player_type.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/player_type.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: player_type.py Author: William Hale Date created: 3/14/2020 -''' +""" import enum diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/preferences_window.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/preferences_window.py index e07bd0c22..3df52c142 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/preferences_window.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/preferences_window.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: preferences_window.py Author: William Hale Date created: 3/14/2020 -''' +""" from tkinter import * import tkinter.colorchooser as colorchooser diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/starting_new_game.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/starting_new_game.py index 734cc9432..d4e2c5779 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/starting_new_game.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/starting_new_game.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: starting_new_game.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/status_messaging.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/status_messaging.py index 2709f93e7..bf085b5e3 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/status_messaging.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/status_messaging.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: status_messaging.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils.py index 4c82ac6df..4b1fb7047 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: utils.py Author: William Hale Date created: 3/14/2020 -''' +""" # from __future__ import annotations from typing import TYPE_CHECKING diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils_extra.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils_extra.py index cb738450e..592f3e267 100644 --- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils_extra.py +++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils_extra.py @@ -1,9 +1,9 @@ -''' +""" Project: Gui Gin Rummy File name: utils_extra.py Author: William Hale Date created: 3/14/2020 -''' +""" from PIL import Image, ImageDraw, ImageFilter diff --git a/rlcard/agents/human_agents/leduc_holdem_human_agent.py b/rlcard/agents/human_agents/leduc_holdem_human_agent.py index 65b10fca6..9a52ade6b 100644 --- a/rlcard/agents/human_agents/leduc_holdem_human_agent.py +++ b/rlcard/agents/human_agents/leduc_holdem_human_agent.py @@ -2,28 +2,28 @@ class HumanAgent(object): - ''' A human agent for Leduc Holdem. It can be used to play against trained models - ''' + """A human agent for Leduc Holdem. It can be used to play against trained models + """ def __init__(self, num_actions): - ''' Initilize the human agent + """Initialize the human agent Args: num_actions (int): the size of the ouput action space - ''' + """ self.use_raw = True self.num_actions = num_actions @staticmethod def step(state): - ''' Human agent will display the state and make decisions through interfaces + """Human agent will display the state and make decisions through interfaces Args: state (dict): A dictionary that represents the current state Returns: action (int): The action decided by human - ''' + """ _print_state(state['raw_obs'], state['action_record']) action = int(input('>> You choose action (integer): ')) while action < 0 or action >= len(state['legal_actions']): @@ -32,23 +32,23 @@ def step(state): return state['raw_legal_actions'][action] def eval_step(self, state): - ''' Predict the action given the curent state for evaluation. The same to step here. + """Predict the action given the curent state for evaluation. The same to step here. Args: state (numpy.array): an numpy array that represents the current state Returns: action (int): the action predicted (randomly chosen) by the random agent - ''' + """ return self.step(state), {} def _print_state(state, action_record): - ''' Print out the state + """Print out the state Args: state (dict): A dictionary of the raw state action_record (list): A list of the historical actions - ''' + """ _action_list = [] for i in range(1, len(action_record)+1): if action_record[-i][0] == state['current_player']: diff --git a/rlcard/agents/human_agents/limit_holdem_human_agent.py b/rlcard/agents/human_agents/limit_holdem_human_agent.py index 1a893bdf9..d18b5443a 100644 --- a/rlcard/agents/human_agents/limit_holdem_human_agent.py +++ b/rlcard/agents/human_agents/limit_holdem_human_agent.py @@ -2,28 +2,28 @@ class HumanAgent(object): - ''' A human agent for Limit Holdem. It can be used to play against trained models - ''' + """A human agent for Limit Holdem. It can be used to play against trained models + """ def __init__(self, num_actions): - ''' Initilize the human agent + """Initialize the human agent Args: num_actions (int): the size of the ouput action space - ''' + """ self.use_raw = True self.num_actions = num_actions @staticmethod def step(state): - ''' Human agent will display the state and make decisions through interfaces + """Human agent will display the state and make decisions through interfaces Args: state (dict): A dictionary that represents the current state Returns: action (int): The action decided by human - ''' + """ _print_state(state['raw_obs'], state['action_record']) action = int(input('>> You choose action (integer): ')) while action < 0 or action >= len(state['legal_actions']): @@ -32,23 +32,23 @@ def step(state): return state['raw_legal_actions'][action] def eval_step(self, state): - ''' Predict the action given the curent state for evaluation. The same to step here. + """Predict the action given the curent state for evaluation. The same to step here. Args: state (numpy.array): an numpy array that represents the current state Returns: action (int): the action predicted (randomly chosen) by the random agent - ''' + """ return self.step(state), {} def _print_state(state, action_record): - ''' Print out the state + """Print out the state Args: state (dict): A dictionary of the raw state action_record (list): A list of the each player's historical actions - ''' + """ _action_list = [] for i in range(1, len(action_record)+1): _action_list.insert(0, action_record[-i]) diff --git a/rlcard/agents/human_agents/nolimit_holdem_human_agent.py b/rlcard/agents/human_agents/nolimit_holdem_human_agent.py index 4d1e8b805..ec3616177 100644 --- a/rlcard/agents/human_agents/nolimit_holdem_human_agent.py +++ b/rlcard/agents/human_agents/nolimit_holdem_human_agent.py @@ -2,28 +2,27 @@ class HumanAgent(object): - ''' A human agent for No Limit Holdem. It can be used to play against trained models - ''' + """A human agent for No Limit Holdem. It can be used to play against trained models""" def __init__(self, num_actions): - ''' Initilize the human agent + """Initialize the human agent Args: num_actions (int): the size of the ouput action space - ''' + """ self.use_raw = True self.num_actions = num_actions @staticmethod def step(state): - ''' Human agent will display the state and make decisions through interfaces + """Human agent will display the state and make decisions through interfaces Args: state (dict): A dictionary that represents the current state Returns: action (int): The action decided by human - ''' + """ _print_state(state['raw_obs'], state['action_record']) action = int(input('>> You choose action (integer): ')) while action < 0 or action >= len(state['legal_actions']): @@ -32,25 +31,26 @@ def step(state): return state['raw_legal_actions'][action] def eval_step(self, state): - ''' Predict the action given the curent state for evaluation. The same to step here. + """Predict the action given the current state for evaluation. The same to step here. Args: - state (numpy.array): an numpy array that represents the current state + state (numpy.array): a numpy array that represents the current state Returns: action (int): the action predicted (randomly chosen) by the random agent - ''' + """ return self.step(state), {} + def _print_state(state, action_record): - ''' Print out the state + """Print out the state Args: state (dict): A dictionary of the raw state action_record (list): A list of the historical actions - ''' + """ _action_list = [] - for i in range(1, len(action_record)+1): + for i in range(1, len(action_record) + 1): if action_record[-i][0] == state['current_player']: break _action_list.insert(0, action_record[-i]) @@ -60,12 +60,12 @@ def _print_state(state, action_record): print('\n=============== Community Card ===============') print_card(state['public_cards']) - print('============= Player',state["current_player"],'- Hand =============') + print('============= Player', state["current_player"], '- Hand =============') print_card(state['hand']) print('=============== Chips ===============') - print('In Pot:',state["pot"]) - print('Remaining:',state["stakes"]) + print('In Pot:', state["pot"]) + print('Remaining:', state["stakes"]) print('\n=========== Actions You Can Choose ===========') print(', '.join([str(index) + ': ' + str(action) for index, action in enumerate(state['legal_actions'])])) diff --git a/rlcard/agents/human_agents/uno_human_agent.py b/rlcard/agents/human_agents/uno_human_agent.py index caf507b16..6f21704df 100644 --- a/rlcard/agents/human_agents/uno_human_agent.py +++ b/rlcard/agents/human_agents/uno_human_agent.py @@ -1,28 +1,28 @@ from rlcard.games.uno.card import UnoCard + class HumanAgent(object): - ''' A human agent for Leduc Holdem. It can be used to play against trained models - ''' + """A human agent for Leduc Holdem. It can be used to play against trained models""" def __init__(self, num_actions): - ''' Initilize the human agent + """Initialize the human agent Args: num_actions (int): the size of the ouput action space - ''' + """ self.use_raw = True self.num_actions = num_actions @staticmethod def step(state): - ''' Human agent will display the state and make decisions through interfaces + """Human agent will display the state and make decisions through interfaces Args: state (dict): A dictionary that represents the current state Returns: action (int): The action decided by human - ''' + """ print(state['raw_obs']) _print_state(state['raw_obs'], state['action_record']) action = int(input('>> You choose action (integer): ')) @@ -32,24 +32,25 @@ def step(state): return state['raw_legal_actions'][action] def eval_step(self, state): - ''' Predict the action given the curent state for evaluation. The same to step here. + """Predict the action given the curent state for evaluation. The same to step here. Args: - state (numpy.array): an numpy array that represents the current state + state (numpy.array): a numpy array that represents the current state Returns: action (int): the action predicted (randomly chosen) by the random agent - ''' + """ return self.step(state), {} + def _print_state(state, action_record): - ''' Print out the state of a given player + """Print out the state of a given player Args: player (int): Player id - ''' + """ _action_list = [] - for i in range(1, len(action_record)+1): + for i in range(1, len(action_record) + 1): if action_record[-i][0] == state['current_player']: break _action_list.insert(0, action_record[-i]) @@ -70,16 +71,17 @@ def _print_state(state, action_record): print('Player {} has {} cards.'.format(i, state['num_cards'][i])) print('======== Actions You Can Choose =========') for i, action in enumerate(state['legal_actions']): - print(str(i)+': ', end='') + print(str(i) + ': ', end='') UnoCard.print_cards(action, wild_color=True) if i < len(state['legal_actions']) - 1: print(', ', end='') print('\n') + def _print_action(action): - ''' Print out an action in a nice form + """Print out an action in a nice form Args: action (str): A string a action - ''' + """ UnoCard.print_cards(action, wild_color=True) diff --git a/rlcard/agents/nfsp_agent.py b/rlcard/agents/nfsp_agent.py index 34f739fe9..89014ec6a 100644 --- a/rlcard/agents/nfsp_agent.py +++ b/rlcard/agents/nfsp_agent.py @@ -14,10 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -''' Neural Fictitious Self-Play (NFSP) agent implemented in TensorFlow. +"""Neural Fictitious Self-Play (NFSP) agent implemented in TensorFlow. See the paper https://arxiv.org/abs/1603.01121 for more details. -''' +""" import os import random @@ -33,13 +33,14 @@ Transition = collections.namedtuple('Transition', 'info_state action_probs') + class NFSPAgent(object): - ''' An approximate clone of rlcard.agents.nfsp_agent that uses + """An approximate clone of rlcard.agents.nfsp_agent that uses pytorch instead of tensorflow. Note that this implementation differs from Henrich and Silver (2016) in that the supervised training minimizes cross-entropy with respect to the stored action probabilities rather than the realized actions. - ''' + """ def __init__(self, num_actions=4, @@ -66,7 +67,7 @@ def __init__(self, device=None, save_path=None, save_every=float('inf')): - ''' Initialize the NFSP agent. + """Initialize the NFSP agent. Args: num_actions (int): The number of actions. @@ -74,7 +75,7 @@ def __init__(self, hidden_layers_sizes (list): The hidden layers sizes for the layers of the average policy. reservoir_buffer_capacity (int): The size of the buffer for average policy. - anticipatory_param (float): The hyper-parameter that balances rl/avarage policy. + anticipatory_param (float): The hyper-parameter that balances rl/average policy. batch_size (int): The batch_size for training average policy. train_every (int): Train the SL policy every X steps. rl_learning_rate (float): The learning rate of the RL agent. @@ -92,7 +93,7 @@ def __init__(self, q_train_step (int): Train the model every X steps. q_mlp_layers (list): The layer sizes of inner DQN agent. device (torch.device): Whether to use the cpu or gpu - ''' + """ self.use_raw = False self._num_actions = num_actions self._state_shape = state_shape @@ -120,24 +121,23 @@ def __init__(self, self.train_t = 0 # Build the action-value network - self._rl_agent = DQNAgent(q_replay_memory_size, q_replay_memory_init_size, \ - q_update_target_estimator_every, q_discount_factor, q_epsilon_start, q_epsilon_end, \ - q_epsilon_decay_steps, q_batch_size, num_actions, state_shape, q_train_every, q_mlp_layers, \ - rl_learning_rate, device) + self._rl_agent = DQNAgent(q_replay_memory_size, q_replay_memory_init_size, + q_update_target_estimator_every, q_discount_factor, q_epsilon_start, q_epsilon_end, + q_epsilon_decay_steps, q_batch_size, num_actions, state_shape, q_train_every, + q_mlp_layers, + rl_learning_rate, device) # Build the average policy supervised model self._build_model() self.sample_episode_policy() - + # Checkpoint saving parameters self.save_path = save_path self.save_every = save_every def _build_model(self): - ''' Build the average policy network - ''' - + """Build the average policy network""" # configure the average policy network policy_network = AveragePolicyNetwork(self._num_actions, self._state_shape, self._layer_sizes) policy_network = policy_network.to(self.device) @@ -153,26 +153,27 @@ def _build_model(self): self.policy_network_optimizer = torch.optim.Adam(self.policy_network.parameters(), lr=self._sl_learning_rate) def feed(self, ts): - ''' Feed data to inner RL agent + """Feed data to inner RL agent Args: ts (list): A list of 5 elements that represent the transition. - ''' + """ self._rl_agent.feed(ts) self.total_t += 1 - if self.total_t>0 and len(self._reservoir_buffer) >= self._min_buffer_size_to_learn and self.total_t%self._train_every == 0: - sl_loss = self.train_sl() + if self.total_t > 0 and len( + self._reservoir_buffer) >= self._min_buffer_size_to_learn and self.total_t % self._train_every == 0: + sl_loss = self.train_sl() print('\rINFO - Step {}, sl-loss: {}'.format(self.total_t, sl_loss), end='') def step(self, state): - ''' Returns the action to be taken. + """Returns the action to be taken. Args: state (dict): The current state Returns: action (int): An action id - ''' + """ obs = state['obs'] legal_actions = list(state['legal_actions'].keys()) if self._mode == 'best_response': @@ -189,7 +190,7 @@ def step(self, state): return action def eval_step(self, state): - ''' Use the average policy for evaluation purpose + """Use the average policy for evaluation purpose Args: state (dict): The current state. @@ -197,7 +198,7 @@ def eval_step(self, state): Returns: action (int): An action id. info (dict): A dictionary containing information - ''' + """ if self.evaluate_with == 'best_response': action, info = self._rl_agent.eval_step(state) elif self.evaluate_with == 'average_policy': @@ -207,28 +208,28 @@ def eval_step(self, state): probs = remove_illegal(probs, legal_actions) action = np.random.choice(len(probs), p=probs) info = {} - info['probs'] = {state['raw_legal_actions'][i]: float(probs[list(state['legal_actions'].keys())[i]]) for i in range(len(state['legal_actions']))} + info['probs'] = {state['raw_legal_actions'][i]: float(probs[list(state['legal_actions'].keys())[i]]) for i + in range(len(state['legal_actions']))} else: raise ValueError("'evaluate_with' should be either 'average_policy' or 'best_response'.") return action, info def sample_episode_policy(self): - ''' Sample average/best_response policy - ''' + """Sample average/best_response policy""" if np.random.rand() < self._anticipatory_param: self._mode = 'best_response' else: self._mode = 'average_policy' def _act(self, info_state): - ''' Predict action probability givin the observation and legal actions + """Predict action probability givin the observation and legal actions Not connected to computation graph Args: - info_state (numpy.array): An obervation. + info_state (numpy.array): An observation. Returns: action_probs (numpy.array): The predicted action probability. - ''' + """ info_state = np.expand_dims(info_state, axis=0) info_state = torch.from_numpy(info_state).float().to(self.device) @@ -240,28 +241,28 @@ def _act(self, info_state): return action_probs def _add_transition(self, state, probs): - ''' Adds the new transition to the reservoir buffer. + """Adds the new transition to the reservoir buffer. Transitions are in the form (state, probs). Args: state (numpy.array): The state. probs (numpy.array): The probabilities of each action. - ''' + """ transition = Transition( - info_state=state, - action_probs=probs) + info_state=state, + action_probs=probs) self._reservoir_buffer.add(transition) def train_sl(self): - ''' Compute the loss on sampled transitions and perform a avg-network update. + """Compute the loss on sampled transitions and perform a avg-network update. If there are not enough elements in the buffer, no loss is computed and `None` is returned instead. Returns: loss (float): The average loss obtained on this batch of transitions or `None`. - ''' + """ if (len(self._reservoir_buffer) < self._batch_size or len(self._reservoir_buffer) < self._min_buffer_size_to_learn): return None @@ -302,14 +303,14 @@ def train_sl(self): def set_device(self, device): self.device = device self._rl_agent.set_device(device) - + def checkpoint_attributes(self): - ''' + """ Return the current checkpoint attributes (dict) Checkpoint attributes are used to save and restore the model in the middle of training Saves the model state dict, optimizer state dict, and all other instance variables - ''' - + """ + return { 'agent_type': 'NFSPAgent', 'policy_network': self.policy_network.checkpoint_attributes(), @@ -328,15 +329,14 @@ def checkpoint_attributes(self): 'sl_learning_rate': self._sl_learning_rate, 'train_every': self._train_every, } - + @classmethod def from_checkpoint(cls, checkpoint): - ''' - Restore the model from a checkpoint - + """Restore the model from a checkpoint + Args: checkpoint (dict): the checkpoint attributes generated by checkpoint_attributes() - ''' + """ print("\nINFO - Restoring model from checkpoint...") agent = cls( anticipatory_param=checkpoint['anticipatory_param'], @@ -351,7 +351,7 @@ def from_checkpoint(cls, checkpoint): state_shape=checkpoint['rl_agent']['q_estimator']['state_shape'], hidden_layers_sizes=[], ) - + agent.policy_network = AveragePolicyNetwork.from_checkpoint(checkpoint['policy_network']) agent._reservoir_buffer = ReservoirBuffer.from_checkpoint(checkpoint['reservoir_buffer']) agent._mode = checkpoint['mode'] @@ -364,25 +364,25 @@ def from_checkpoint(cls, checkpoint): agent._rl_agent.from_checkpoint(checkpoint['rl_agent']) agent._rl_agent.set_device(agent.device) return agent - + def save_checkpoint(self, path, filename='checkpoint_nfsp.pt'): - ''' Save the model checkpoint (all attributes) + """Save the model checkpoint (all attributes) Args: path (str): the path to save the model - ''' + """ torch.save(self.checkpoint_attributes(), os.path.join(path, filename)) - + class AveragePolicyNetwork(nn.Module): - ''' + """ Approximates the history of action probabilities given state (average policy). Forward pass returns log probabilities of actions. - ''' + """ def __init__(self, num_actions=2, state_shape=None, mlp_layers=None): - ''' Initialize the policy network. It's just a bunch of ReLU + """Initialize the policy network. It's just a bunch of ReLU layers with no activation on the final one, initialized with Xavier (sonnet.nets.MLP and tensorflow defaults) @@ -390,7 +390,7 @@ def __init__(self, num_actions=2, state_shape=None, mlp_layers=None): num_actions (int): number of output actions state_shape (list): shape of state tensor for each sample mlp_laters (list): output size of each mlp layer including final - ''' + """ super(AveragePolicyNetwork, self).__init__() self.num_actions = num_actions @@ -399,80 +399,80 @@ def __init__(self, num_actions=2, state_shape=None, mlp_layers=None): # set up mlp w/ relu activations layer_dims = [np.prod(self.state_shape)] + self.mlp_layers - mlp = [nn.Flatten()] - mlp.append(nn.BatchNorm1d(layer_dims[0])) - for i in range(len(layer_dims)-1): - mlp.append(nn.Linear(layer_dims[i], layer_dims[i+1])) - if i != len(layer_dims) - 2: # all but final have relu + mlp = [nn.Flatten(), nn.BatchNorm1d(layer_dims[0])] + for i in range(len(layer_dims) - 1): + mlp.append(nn.Linear(layer_dims[i], layer_dims[i + 1])) + if i != len(layer_dims) - 2: # all but final have relu mlp.append(nn.ReLU()) self.mlp = nn.Sequential(*mlp) def forward(self, s): - ''' Log action probabilities of each action from state + """Log action probabilities of each action from state Args: s (Tensor): (batch, state_shape) state tensor Returns: log_action_probs (Tensor): (batch, num_actions) - ''' + """ logits = self.mlp(s) log_action_probs = F.log_softmax(logits, dim=-1) return log_action_probs - + def checkpoint_attributes(self): - ''' + """ Return the current checkpoint attributes (dict) Checkpoint attributes are used to save and restore the model in the middle of training - ''' - + """ + return { 'num_actions': self.num_actions, 'state_shape': self.state_shape, 'mlp_layers': self.mlp_layers, 'mlp': self.mlp.state_dict(), } - + @classmethod def from_checkpoint(cls, checkpoint): - ''' + """ Restore the model from a checkpoint - + Args: checkpoint (dict): the checkpoint attributes generated by checkpoint_attributes() - ''' - + """ + agent = cls( num_actions=checkpoint['num_actions'], state_shape=checkpoint['state_shape'], mlp_layers=checkpoint['mlp_layers'], ) - + agent.mlp.load_state_dict(checkpoint['mlp']) return agent + class ReservoirBuffer(object): - ''' Allows uniform sampling over a stream of data. + """Allows uniform sampling over a stream of data. This class supports the storage of arbitrary elements, such as observation tensors, integer actions, etc. See https://en.wikipedia.org/wiki/Reservoir_sampling for more details. - ''' + """ def __init__(self, reservoir_buffer_capacity): - ''' Initialize the buffer. - ''' + """Initialize the buffer. + """ self._reservoir_buffer_capacity = reservoir_buffer_capacity self._data = [] self._add_calls = 0 def add(self, element): - ''' Potentially adds `element` to the reservoir buffer. + """Potentially adds `element` to the reservoir buffer. Args: element (object): data to be added to the reservoir buffer. - ''' + """ if len(self._data) < self._reservoir_buffer_capacity: self._data.append(element) else: @@ -482,7 +482,7 @@ def add(self, element): self._add_calls += 1 def sample(self, num_samples): - ''' Returns `num_samples` uniformly sampled from the buffer. + """Returns `num_samples` uniformly sampled from the buffer. Args: num_samples (int): The number of samples to draw. @@ -492,25 +492,25 @@ def sample(self, num_samples): Raises: ValueError: If there are less than `num_samples` elements in the buffer - ''' + """ if len(self._data) < num_samples: raise ValueError("{} elements could not be sampled from size {}".format( - num_samples, len(self._data))) + num_samples, len(self._data))) return random.sample(self._data, num_samples) def clear(self): - ''' Clear the buffer - ''' + """Clear the buffer + """ self._data = [] self._add_calls = 0 - + def checkpoint_attributes(self): return { 'data': self._data, 'add_calls': self._add_calls, 'reservoir_buffer_capacity': self._reservoir_buffer_capacity, } - + @classmethod def from_checkpoint(cls, checkpoint): reservoir_buffer = cls(checkpoint['reservoir_buffer_capacity']) @@ -523,4 +523,3 @@ def __len__(self): def __iter__(self): return iter(self._data) - diff --git a/rlcard/agents/random_agent.py b/rlcard/agents/random_agent.py index ecdab040c..241a16647 100644 --- a/rlcard/agents/random_agent.py +++ b/rlcard/agents/random_agent.py @@ -2,32 +2,31 @@ class RandomAgent(object): - ''' A random agent. Random agents is for running toy examples on the card games - ''' + """A random agent. Random agents is for running toy examples on the card games""" def __init__(self, num_actions): - ''' Initilize the random agent + """Initialize the random agent Args: num_actions (int): The size of the ouput action space - ''' + """ self.use_raw = False self.num_actions = num_actions @staticmethod def step(state): - ''' Predict the action given the curent state in gerenerating training data. + """Predict the action given the curent state in gerenerating training data. Args: state (dict): An dictionary that represents the current state Returns: action (int): The action predicted (randomly chosen) by the random agent - ''' + """ return np.random.choice(list(state['legal_actions'].keys())) def eval_step(self, state): - ''' Predict the action given the current state for evaluation. + """Predict the action given the current state for evaluation. Since the random agents are not trained. This function is equivalent to step function Args: @@ -36,7 +35,7 @@ def eval_step(self, state): Returns: action (int): The action predicted (randomly chosen) by the random agent probs (list): The list of action probabilities - ''' + """ probs = [0 for _ in range(self.num_actions)] for i in state['legal_actions']: probs[i] = 1/len(state['legal_actions']) diff --git a/rlcard/envs/__init__.py b/rlcard/envs/__init__.py index de9dbb8c1..a56d69f11 100644 --- a/rlcard/envs/__init__.py +++ b/rlcard/envs/__init__.py @@ -1,5 +1,4 @@ -''' Register new environments -''' +"""Register new environments""" from rlcard.envs.env import Env from rlcard.envs.registration import register, make diff --git a/rlcard/envs/blackjack.py b/rlcard/envs/blackjack.py index 459d3d813..4e5c91d32 100644 --- a/rlcard/envs/blackjack.py +++ b/rlcard/envs/blackjack.py @@ -5,17 +5,16 @@ from rlcard.games.blackjack import Game DEFAULT_GAME_CONFIG = { - 'game_num_players': 1, - 'game_num_decks': 1 - } + 'game_num_players': 1, + 'game_num_decks': 1 +} + class BlackjackEnv(Env): - ''' Blackjack Environment - ''' + """Blackjack Environment """ def __init__(self, config): - ''' Initialize the Blackjack environment - ''' + """Initialize the Blackjack environment """ self.name = 'blackjack' self.default_game_config = DEFAULT_GAME_CONFIG self.game = Game() @@ -25,25 +24,25 @@ def __init__(self, config): self.action_shape = [None for _ in range(self.num_players)] def _get_legal_actions(self): - ''' Get all leagal actions + """Get all legal actions Returns: encoded_action_list (list): return encoded legal action list (from str to int) - ''' + """ encoded_action_list = [] for i in range(len(self.actions)): encoded_action_list.append(i) return encoded_action_list def _extract_state(self, state): - ''' Extract the state representation from state dictionary for agent + """Extract the state representation from state dictionary for agent Args: state (dict): Original state from the game Returns: observation (list): combine the player's score and dealer's observable score for observation - ''' + """ cards = state['state'] my_cards = cards[0] dealer_cards = cards[1] @@ -60,11 +59,11 @@ def _extract_state(self, state): return extracted_state def get_payoffs(self): - ''' Get the payoff of a game + """Get the payoff of a game Returns: payoffs (list): list of payoffs - ''' + """ payoffs = [] for i in range(self.num_players): @@ -77,19 +76,22 @@ def get_payoffs(self): return np.array(payoffs) - def _decode_action(self, action_id): - ''' Decode the action for applying to the game + """Decode the action for applying to the game Args: action id (int): action id Returns: action (str): action for the game - ''' + """ return self.actions[action_id] -rank2score = {"A":11, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9, "T":10, "J":10, "Q":10, "K":10} + +rank2score = {"A": 11, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "T": 10, "J": 10, "Q": 10, + "K": 10} + + def get_score(hand): score = 0 count_a = 0 diff --git a/rlcard/envs/bridge.py b/rlcard/envs/bridge.py index 13190c0c6..1ba70704f 100644 --- a/rlcard/envs/bridge.py +++ b/rlcard/envs/bridge.py @@ -1,8 +1,8 @@ -''' +""" File name: envs/bridge.py Author: William Hale Date created: 11/26/2021 -''' +""" import numpy as np from collections import OrderedDict @@ -42,8 +42,7 @@ class BridgeEnv(Env): - ''' Bridge Environment - ''' + """Bridge Environment""" def __init__(self, config): self.name = 'bridge' self.game = Game() @@ -55,62 +54,62 @@ def __init__(self, config): self.action_shape = [None for _ in range(self.num_players)] def get_payoffs(self): - ''' Get the payoffs of players. + """Get the payoffs of players. Returns: (list): A list of payoffs for each player. - ''' + """ return self.bridgePayoffDelegate.get_payoffs(game=self.game) def get_perfect_information(self): - ''' Get the perfect information of the current state + """Get the perfect information of the current state Returns: (dict): A dictionary of all the perfect information of the current state - ''' + """ return self.game.round.get_perfect_information() def _extract_state(self, state): # wch: don't use state 211126 - ''' Extract useful information from state for RL. + """Extract useful information from state for RL. Args: state (dict): The raw state Returns: (numpy.array): The extracted state - ''' + """ return self.bridgeStateExtractor.extract_state(game=self.game) def _decode_action(self, action_id): - ''' Decode Action id to the action in the game. + """Decode Action id to the action in the game. Args: action_id (int): The id of the action Returns: (ActionEvent): The action that will be passed to the game engine. - ''' + """ return ActionEvent.from_action_id(action_id=action_id) def _get_legal_actions(self): - ''' Get all legal actions for current state. + """Get all legal actions for current state. Returns: (list): A list of legal actions' id. - ''' + """ raise NotImplementedError # wch: not needed class BridgePayoffDelegate(object): def get_payoffs(self, game: BridgeGame): - ''' Get the payoffs of players. Must be implemented in the child class. + """Get the payoffs of players. Must be implemented in the child class. Returns: (list): A list of payoffs for each player. Note: Must be implemented in the child class. - ''' + """ raise NotImplementedError @@ -120,11 +119,11 @@ def __init__(self): self.make_bid_bonus = 2 def get_payoffs(self, game: BridgeGame): - ''' Get the payoffs of players. + """Get the payoffs of players. Returns: (list): A list of payoffs for each player. - ''' + """ contract_bid_move = game.round.contract_bid_move if contract_bid_move: declarer = contract_bid_move.player @@ -149,23 +148,23 @@ def get_state_shape_size(self) -> int: raise NotImplementedError def extract_state(self, game: BridgeGame): - ''' Extract useful information from state for RL. Must be implemented in the child class. + """Extract useful information from state for RL. Must be implemented in the child class. Args: game (BridgeGame): The game Returns: (numpy.array): The extracted state - ''' + """ raise NotImplementedError @staticmethod def get_legal_actions(game: BridgeGame): - ''' Get all legal actions for current state. + """Get all legal actions for current state. Returns: (OrderedDict): A OrderedDict of legal actions' id. - ''' + """ legal_actions = game.judger.get_legal_actions() legal_actions_ids = {action_event.action_id: None for action_event in legal_actions} return OrderedDict(legal_actions_ids) @@ -194,14 +193,14 @@ def get_state_shape_size(self) -> int: return state_shape_size def extract_state(self, game: BridgeGame): - ''' Extract useful information from state for RL. + """Extract useful information from state for RL. Args: game (BridgeGame): The game Returns: (numpy.array): The extracted state - ''' + """ extracted_state = {} legal_actions: OrderedDict = self.get_legal_actions(game=game) raw_legal_actions = list(legal_actions.keys()) diff --git a/rlcard/envs/doudizhu.py b/rlcard/envs/doudizhu.py index 0f46d2322..ae20b876c 100644 --- a/rlcard/envs/doudizhu.py +++ b/rlcard/envs/doudizhu.py @@ -5,8 +5,7 @@ class DoudizhuEnv(Env): - ''' Doudizhu Environment - ''' + """Doudizhu Environment """ def __init__(self, config): from rlcard.games.doudizhu.utils import ACTION_2_ID, ID_2_ACTION @@ -24,11 +23,11 @@ def __init__(self, config): self.action_shape = [[54] for _ in range(self.num_players)] def _extract_state(self, state): - ''' Encode state + """Encode state Args: state (dict): dict of original state - ''' + """ current_hand = _cards2array(state['current_hand']) others_hand = _cards2array(state['others_hand']) @@ -91,40 +90,40 @@ def _extract_state(self, state): return extracted_state def get_payoffs(self): - ''' Get the payoffs of players. Must be implemented in the child class. + """Get the payoffs of players. Must be implemented in the child class. Returns: payoffs (list): a list of payoffs for each player - ''' + """ return self.game.judger.judge_payoffs(self.game.round.landlord_id, self.game.winner_id) def _decode_action(self, action_id): - ''' Action id -> the action in the game. Must be implemented in the child class. + """Action id -> the action in the game. Must be implemented in the child class. Args: action_id (int): the id of the action Returns: action (string): the action that will be passed to the game engine. - ''' + """ return self._ID_2_ACTION[action_id] def _get_legal_actions(self): - ''' Get all legal actions for current state + """Get all legal actions for current state Returns: legal_actions (list): a list of legal actions' id - ''' + """ legal_actions = self.game.state['actions'] legal_actions = {self._ACTION_2_ID[action]: _cards2array(action) for action in legal_actions} return legal_actions def get_perfect_information(self): - ''' Get the perfect information of the current state + """Get the perfect information of the current state Returns: (dict): A dictionary of all the perfect information of the current state - ''' + """ state = {} state['hand_cards_with_suit'] = [self._cards2str_with_suit(player.current_hand) for player in self.game.players] state['hand_cards'] = [self._cards2str(player.current_hand) for player in self.game.players] @@ -134,11 +133,11 @@ def get_perfect_information(self): return state def get_action_feature(self, action): - ''' For some environments such as DouDizhu, we can have action features + """For some environments such as DouDizhu, we can have action features Returns: (numpy.array): The action features - ''' + """ return _cards2array(self._decode_action(action)) Card2Column = {'3': 0, '4': 1, '5': 2, '6': 3, '7': 4, '8': 5, '9': 6, 'T': 7, diff --git a/rlcard/envs/env.py b/rlcard/envs/env.py index 93e239548..6cc47c4c6 100644 --- a/rlcard/envs/env.py +++ b/rlcard/envs/env.py @@ -1,13 +1,13 @@ from rlcard.utils import * class Env(object): - ''' + """ The base Env class. For all the environments in RLCard, we should base on this class and implement as many functions as we can. - ''' + """ def __init__(self, config): - ''' Initialize the environment + """Initialize the environment Args: config (dict): A config dictionary. All the fields are @@ -23,7 +23,7 @@ def __init__(self, config): the default game configurations for Blackjack should be in 'rlcard/envs/blackjack.py' TODO: Support more game configurations in the future. - ''' + """ self.allow_step_back = self.game.allow_step_back = config['allow_step_back'] self.action_recorder = [] @@ -50,20 +50,20 @@ def __init__(self, config): def reset(self): - ''' Start a new game + """Start a new game Returns: (tuple): Tuple containing: (numpy.array): The begining state of the game (int): The begining player - ''' + """ state, player_id = self.game.init_game() self.action_recorder = [] return self._extract_state(state), player_id def step(self, action, raw_action=False): - ''' Step forward + """Step forward Args: action (int): The action taken by the current player @@ -74,7 +74,7 @@ def step(self, action, raw_action=False): (dict): The next state (int): The ID of the next player - ''' + """ if not raw_action: action = self._decode_action(action) @@ -86,7 +86,7 @@ def step(self, action, raw_action=False): return self._extract_state(next_state), player_id def step_back(self): - ''' Take one step backward. + """Take one step backward. Returns: (tuple): Tuple containing: @@ -95,7 +95,7 @@ def step_back(self): (int): The ID of the previous player Note: Error will be raised if step back from the root node. - ''' + """ if not self.allow_step_back: raise Exception('Step back is off. To use step_back, please set allow_step_back=True in rlcard.make') @@ -108,17 +108,17 @@ def step_back(self): return state, player_id def set_agents(self, agents): - ''' + """ Set the agents that will interact with the environment. This function must be called before `run`. Args: agents (list): List of Agent classes - ''' + """ self.agents = agents def run(self, is_training=False): - ''' + """ Run a complete game, either for evaluation or training RL agent. Args: @@ -132,7 +132,7 @@ def run(self, is_training=False): Note: The trajectories are 3-dimension list. The first dimension is for different players. The second dimension is for different transitions. The third dimension is for the contents of each transiton - ''' + """ trajectories = [[] for _ in range(self.num_players)] state, player_id = self.reset() @@ -169,57 +169,57 @@ def run(self, is_training=False): return trajectories, payoffs def is_over(self): - ''' Check whether the curent game is over + """Check whether the curent game is over Returns: (boolean): True if current game is over - ''' + """ return self.game.is_over() def get_player_id(self): - ''' Get the current player id + """Get the current player id Returns: (int): The id of the current player - ''' + """ return self.game.get_player_id() def get_state(self, player_id): - ''' Get the state given player id + """Get the state given player id Args: player_id (int): The player id Returns: (numpy.array): The observed state of the player - ''' + """ return self._extract_state(self.game.get_state(player_id)) def get_payoffs(self): - ''' Get the payoffs of players. Must be implemented in the child class. + """Get the payoffs of players. Must be implemented in the child class. Returns: (list): A list of payoffs for each player. Note: Must be implemented in the child class. - ''' + """ raise NotImplementedError def get_perfect_information(self): - ''' Get the perfect information of the current state + """Get the perfect information of the current state Returns: (dict): A dictionary of all the perfect information of the current state - ''' + """ raise NotImplementedError def get_action_feature(self, action): - ''' For some environments such as DouDizhu, we can have action features + """For some environments such as DouDizhu, we can have action features Returns: (numpy.array): The action features - ''' + """ # By default we use one-hot encoding feature = np.zeros(self.num_actions, dtype=np.int8) feature[action] = 1 @@ -231,18 +231,18 @@ def seed(self, seed=None): return seed def _extract_state(self, state): - ''' Extract useful information from state for RL. Must be implemented in the child class. + """Extract useful information from state for RL. Must be implemented in the child class. Args: state (dict): The raw state Returns: (numpy.array): The extracted state - ''' + """ raise NotImplementedError def _decode_action(self, action_id): - ''' Decode Action id to the action in the game. + """Decode Action id to the action in the game. Args: action_id (int): The id of the action @@ -251,15 +251,15 @@ def _decode_action(self, action_id): (string): The action that will be passed to the game engine. Note: Must be implemented in the child class. - ''' + """ raise NotImplementedError def _get_legal_actions(self): - ''' Get all legal actions for current state. + """Get all legal actions for current state. Returns: (list): A list of legal actions' id. Note: Must be implemented in the child class. - ''' + """ raise NotImplementedError diff --git a/rlcard/envs/gin_rummy.py b/rlcard/envs/gin_rummy.py index fd9c3ae4f..accf5388c 100644 --- a/rlcard/envs/gin_rummy.py +++ b/rlcard/envs/gin_rummy.py @@ -1,16 +1,17 @@ -''' +""" File name: envs/gin_rummy.py Author: William Hale Date created: 2/12/2020 -''' +""" import numpy as np from collections import OrderedDict from rlcard.envs import Env + class GinRummyEnv(Env): - ''' GinRummy Environment - ''' + """GinRummy Environment""" + def __init__(self, config): from rlcard.games.gin_rummy.utils.move import ScoreSouthMove from rlcard.games.gin_rummy.utils import utils @@ -25,7 +26,7 @@ def __init__(self, config): self.action_shape = [None for _ in range(self.num_players)] def _extract_state(self, state): # 200213 don't use state ??? - ''' Encode state + """Encode state Args: state (dict): dict of original state @@ -37,12 +38,15 @@ def _extract_state(self, state): # 200213 don't use state ??? dead_cards (1 for discards except for top_discard else 0) opponent known cards (likewise) unknown cards (likewise) # is this needed ??? 200213 - ''' + """ if self.game.is_over(): obs = np.array([self._utils.encode_cards([]) for _ in range(5)]) - extracted_state = {'obs': obs, 'legal_actions': self._get_legal_actions()} - extracted_state['raw_legal_actions'] = list(self._get_legal_actions().keys()) - extracted_state['raw_obs'] = obs + extracted_state = { + 'obs': obs, + 'legal_actions': self._get_legal_actions(), + 'raw_legal_actions': list(self._get_legal_actions().keys()), + 'raw_obs': obs + } else: discard_pile = self.game.round.dealer.discard_pile stock_pile = self.game.round.dealer.stock_pile @@ -59,16 +63,20 @@ def _extract_state(self, state): # 200213 don't use state ??? unknown_cards_rep = self._utils.encode_cards(unknown_cards) rep = [hand_rep, top_discard_rep, dead_cards_rep, known_cards_rep, unknown_cards_rep] obs = np.array(rep) - extracted_state = {'obs': obs, 'legal_actions': self._get_legal_actions(), 'raw_legal_actions': list(self._get_legal_actions().keys())} - extracted_state['raw_obs'] = obs + extracted_state = { + 'obs': obs, + 'legal_actions': self._get_legal_actions(), + 'raw_legal_actions': list(self._get_legal_actions().keys()), + 'raw_obs': obs + } return extracted_state def get_payoffs(self): - ''' Get the payoffs of players. Must be implemented in the child class. + """Get the payoffs of players. Must be implemented in the child class. Returns: payoffs (list): a list of payoffs for each player - ''' + """ # determine whether game completed all moves is_game_complete = False if self.game.round: @@ -79,22 +87,22 @@ def get_payoffs(self): return np.array(payoffs) def _decode_action(self, action_id): # FIXME 200213 should return str - ''' Action id -> the action in the game. Must be implemented in the child class. + """Action id -> the action in the game. Must be implemented in the child class. Args: action_id (int): the id of the action Returns: action (ActionEvent): the action that will be passed to the game engine. - ''' + """ return self.game.decode_action(action_id=action_id) def _get_legal_actions(self): - ''' Get all legal actions for current state + """Get all legal actions for current state Returns: legal_actions (list): a list of legal actions' id - ''' + """ legal_actions = self.game.judge.get_legal_actions() legal_actions_ids = {action_event.action_id: None for action_event in legal_actions} return OrderedDict(legal_actions_ids) diff --git a/rlcard/envs/leducholdem.py b/rlcard/envs/leducholdem.py index da389dd8e..777c8792e 100644 --- a/rlcard/envs/leducholdem.py +++ b/rlcard/envs/leducholdem.py @@ -1,6 +1,5 @@ import json import os -import numpy as np from collections import OrderedDict import rlcard @@ -9,17 +8,17 @@ from rlcard.utils import * DEFAULT_GAME_CONFIG = { - 'game_num_players': 2, - } + 'game_num_players': 2, +} + class LeducholdemEnv(Env): - ''' Leduc Hold'em Environment - ''' + """Leduc Hold'em Environment""" def __init__(self, config): - ''' Initialize the Limitholdem environment - ''' - self.name = 'leduc-holdem' + """Initialize the Limitholdem environment + """ + self.name = 'leduc-holdem' self.default_game_config = DEFAULT_GAME_CONFIG self.game = Game() super().__init__(config) @@ -31,15 +30,15 @@ def __init__(self, config): self.card2index = json.load(file) def _get_legal_actions(self): - ''' Get all leagal actions + """Get all legal actions Returns: encoded_action_list (list): return encoded legal action list (from str to int) - ''' + """ return self.game.get_legal_actions() def _extract_state(self, state): - ''' Extract the state representation from state dictionary for agent + """Extract the state representation from state dictionary for agent Note: Currently the use the hand cards and the public cards. TODO: encode the states @@ -48,7 +47,7 @@ def _extract_state(self, state): Returns: observation (list): combine the player's score and dealer's observable score for observation - ''' + """ extracted_state = {} legal_actions = OrderedDict({self.actions.index(a): None for a in state['legal_actions']}) @@ -59,9 +58,9 @@ def _extract_state(self, state): obs = np.zeros(36) obs[self.card2index[hand]] = 1 if public_card: - obs[self.card2index[public_card]+3] = 1 - obs[state['my_chips']+6] = 1 - obs[sum(state['all_chips'])-state['my_chips']+21] = 1 + obs[self.card2index[public_card] + 3] = 1 + obs[state['my_chips'] + 6] = 1 + obs[sum(state['all_chips']) - state['my_chips'] + 21] = 1 extracted_state['obs'] = obs extracted_state['raw_obs'] = state @@ -71,22 +70,22 @@ def _extract_state(self, state): return extracted_state def get_payoffs(self): - ''' Get the payoff of a game + """Get the payoff of a game Returns: payoffs (list): list of payoffs - ''' + """ return self.game.get_payoffs() def _decode_action(self, action_id): - ''' Decode the action for applying to the game + """Decode the action for applying to the game Args: action id (int): action id Returns: action (str): action for the game - ''' + """ legal_actions = self.game.get_legal_actions() if self.actions[action_id] not in legal_actions: if 'check' in legal_actions: @@ -96,11 +95,11 @@ def _decode_action(self, action_id): return self.actions[action_id] def get_perfect_information(self): - ''' Get the perfect information of the current state + """Get the perfect information of the current state Returns: (dict): A dictionary of all the perfect information of the current state - ''' + """ state = {} state['chips'] = [self.game.players[i].in_chips for i in range(self.num_players)] state['public_card'] = self.game.public_card.get_index() if self.game.public_card else None diff --git a/rlcard/envs/limitholdem.py b/rlcard/envs/limitholdem.py index aa8e62cab..5307ae947 100644 --- a/rlcard/envs/limitholdem.py +++ b/rlcard/envs/limitholdem.py @@ -12,12 +12,12 @@ } class LimitholdemEnv(Env): - ''' Limitholdem Environment - ''' + """Limitholdem Environment + """ def __init__(self, config): - ''' Initialize the Limitholdem environment - ''' + """Initialize the Limitholdem environment + """ self.name = 'limit-holdem' self.default_game_config = DEFAULT_GAME_CONFIG self.game = Game() @@ -30,15 +30,15 @@ def __init__(self, config): self.card2index = json.load(file) def _get_legal_actions(self): - ''' Get all leagal actions + """Get all legal actions Returns: encoded_action_list (list): return encoded legal action list (from str to int) - ''' + """ return self.game.get_legal_actions() def _extract_state(self, state): - ''' Extract the state representation from state dictionary for agent + """Extract the state representation from state dictionary for agent Note: Currently the use the hand cards and the public cards. TODO: encode the states @@ -47,7 +47,7 @@ def _extract_state(self, state): Returns: observation (list): combine the player's score and dealer's observable score for observation - ''' + """ extracted_state = {} legal_actions = OrderedDict({self.actions.index(a): None for a in state['legal_actions']}) @@ -71,22 +71,22 @@ def _extract_state(self, state): return extracted_state def get_payoffs(self): - ''' Get the payoff of a game + """Get the payoff of a game Returns: payoffs (list): list of payoffs - ''' + """ return self.game.get_payoffs() def _decode_action(self, action_id): - ''' Decode the action for applying to the game + """Decode the action for applying to the game Args: action id (int): action id Returns: action (str): action for the game - ''' + """ legal_actions = self.game.get_legal_actions() if self.actions[action_id] not in legal_actions: if 'check' in legal_actions: @@ -96,11 +96,11 @@ def _decode_action(self, action_id): return self.actions[action_id] def get_perfect_information(self): - ''' Get the perfect information of the current state + """Get the perfect information of the current state Returns: (dict): A dictionary of all the perfect information of the current state - ''' + """ state = {} state['chips'] = [self.game.players[i].in_chips for i in range(self.num_players)] state['public_card'] = [c.get_index() for c in self.game.public_cards] if self.game.public_cards else None diff --git a/rlcard/envs/mahjong.py b/rlcard/envs/mahjong.py index 0c9fb350c..858bfcd12 100644 --- a/rlcard/envs/mahjong.py +++ b/rlcard/envs/mahjong.py @@ -6,9 +6,9 @@ from rlcard.games.mahjong import Card from rlcard.games.mahjong.utils import card_encoding_dict, encode_cards, pile2list + class MahjongEnv(Env): - ''' Mahjong Environment - ''' + """Mahjong Environment""" def __init__(self, config): self.name = 'mahjong' @@ -20,7 +20,7 @@ def __init__(self, config): self.action_shape = [None for _ in range(self.num_players)] def _extract_state(self, state): - ''' Encode state + """Encode state Args: state (dict): dict of original state @@ -31,7 +31,7 @@ def _extract_state(self, state): the union of the other two players' hand the recent three actions the union of all played cards - ''' + """ players_pile = state['players_pile'] hand_rep = encode_cards(state['current_hand']) piles_rep = [] @@ -43,19 +43,22 @@ def _extract_state(self, state): rep.extend(piles_rep) obs = np.array(rep) - extracted_state = {'obs': obs, 'legal_actions': self._get_legal_actions()} - extracted_state['raw_obs'] = state - extracted_state['raw_legal_actions'] = [a for a in state['action_cards']] - extracted_state['action_record'] = self.action_recorder + extracted_state = { + 'obs': obs, + 'legal_actions': self._get_legal_actions(), + 'raw_obs': state, + 'raw_legal_actions': [a for a in state['action_cards']], + 'action_record': self.action_recorder + } return extracted_state def get_payoffs(self): - ''' Get the payoffs of players. Must be implemented in the child class. + """Get the payoffs of players. Must be implemented in the child class. Returns: payoffs (list): a list of payoffs for each player - ''' + """ _, player, _ = self.game.judger.judge_game(self.game) if player == -1: payoffs = [0, 0, 0, 0] @@ -65,14 +68,14 @@ def get_payoffs(self): return np.array(payoffs) def _decode_action(self, action_id): - ''' Action id -> the action in the game. Must be implemented in the child class. + """Action id -> the action in the game. Must be implemented in the child class. Args: action_id (int): the id of the action Returns: action (string): the action that will be passed to the game engine. - ''' + """ action = self.de_action_id[action_id] if action_id < 34: candidates = self.game.get_legal_actions(self.game.get_state(self.game.round.current_player)) @@ -83,7 +86,7 @@ def _decode_action(self, action_id): return action def _get_legal_actions(self): - ''' Get all legal actions for current state + """Get all legal actions for current state Returns: if type(legal_actions[0]) == Card: @@ -91,7 +94,7 @@ def _get_legal_actions(self): else: print(legal_actions) legal_actions (list): a list of legal actions' id - ''' + """ legal_action_id = {} legal_actions = self.game.get_legal_actions(self.game.get_state(self.game.round.current_player)) if legal_actions: @@ -106,6 +109,6 @@ def _get_legal_actions(self): print(self.game.judger.judge_game(self.game)) print(self.game.is_over()) print([len(p.pile) for p in self.game.players]) - #print(self.game.get_state(self.game.round.current_player)) - #exit() + # print(self.game.get_state(self.game.round.current_player)) + # exit() return OrderedDict(legal_action_id) diff --git a/rlcard/envs/nolimitholdem.py b/rlcard/envs/nolimitholdem.py index e7522cb44..fc4b6f0b0 100644 --- a/rlcard/envs/nolimitholdem.py +++ b/rlcard/envs/nolimitholdem.py @@ -9,18 +9,17 @@ from rlcard.games.nolimitholdem.round import Action DEFAULT_GAME_CONFIG = { - 'game_num_players': 2, - 'chips_for_each': 100, - 'dealer_id': None, - } + 'game_num_players': 2, + 'chips_for_each': 100, + 'dealer_id': None, +} + class NolimitholdemEnv(Env): - ''' Limitholdem Environment - ''' + """Nolimitholdem Environment""" def __init__(self, config): - ''' Initialize the Limitholdem environment - ''' + """Initialize the Nolimitholdem environment""" self.name = 'no-limit-holdem' self.default_game_config = DEFAULT_GAME_CONFIG self.game = Game() @@ -35,15 +34,15 @@ def __init__(self, config): self.card2index = json.load(file) def _get_legal_actions(self): - ''' Get all leagal actions + """Get all legal actions Returns: encoded_action_list (list): return encoded legal action list (from str to int) - ''' + """ return self.game.get_legal_actions() def _extract_state(self, state): - ''' Extract the state representation from state dictionary for agent + """Extract the state representation from state dictionary for agent Note: Currently the use the hand cards and the public cards. TODO: encode the states @@ -52,7 +51,7 @@ def _extract_state(self, state): Returns: observation (list): combine the player's score and dealer's observable score for observation - ''' + """ extracted_state = {} legal_actions = OrderedDict({action.value: None for action in state['legal_actions']}) @@ -77,22 +76,22 @@ def _extract_state(self, state): return extracted_state def get_payoffs(self): - ''' Get the payoff of a game + """Get the payoff of a game Returns: payoffs (list): list of payoffs - ''' + """ return np.array(self.game.get_payoffs()) def _decode_action(self, action_id): - ''' Decode the action for applying to the game + """Decode the action for applying to the game Args: action id (int): action id Returns: action (str): action for the game - ''' + """ legal_actions = self.game.get_legal_actions() if self.actions(action_id) not in legal_actions: if Action.CHECK in legal_actions: @@ -103,11 +102,11 @@ def _decode_action(self, action_id): return self.actions(action_id) def get_perfect_information(self): - ''' Get the perfect information of the current state + """Get the perfect information of the current state Returns: (dict): A dictionary of all the perfect information of the current state - ''' + """ state = {} state['chips'] = [self.game.players[i].in_chips for i in range(self.num_players)] state['public_card'] = [c.get_index() for c in self.game.public_cards] if self.game.public_cards else None @@ -115,5 +114,3 @@ def get_perfect_information(self): state['current_player'] = self.game.game_pointer state['legal_actions'] = self.game.get_legal_actions() return state - - diff --git a/rlcard/envs/registration.py b/rlcard/envs/registration.py index c27e06e67..8cd7122b7 100644 --- a/rlcard/envs/registration.py +++ b/rlcard/envs/registration.py @@ -7,82 +7,95 @@ } class EnvSpec(object): - ''' A specification for a particular instance of the environment. - ''' + """A specification for a particular instance of the environment.""" def __init__(self, env_id, entry_point=None): - ''' Initilize + """ + Initialize Args: - env_id (string): The name of the environent - entry_point (string): A string the indicates the location of the envronment class - ''' + env_id (string): The name of the environment + entry_point (string): A string the indicates the location of the environment class + """ self.env_id = env_id mod_name, class_name = entry_point.split(':') self._entry_point = getattr(importlib.import_module(mod_name), class_name) - def make(self, config=DEFAULT_CONFIG): - ''' Instantiates an instance of the environment + def make(self, config=None): + """ + Instantiates an instance of the environment Returns: - env (Env): An instance of the environemnt + env (Env): An instance of the environment config (dict): A dictionary of the environment settings - ''' + """ + if config is None: + config = DEFAULT_CONFIG env = self._entry_point(config) return env + class EnvRegistry(object): - ''' Register an environment (game) by ID - ''' + """Register an environment (game) by ID""" def __init__(self): - ''' Initilize - ''' + """Initialize""" self.env_specs = {} def register(self, env_id, entry_point): - ''' Register an environment + """ + Register an environment Args: - env_id (string): The name of the environent - entry_point (string): A string the indicates the location of the envronment class - ''' + env_id (string): The name of the environment + entry_point (string): A string the indicates the location of the environment class + """ if env_id in self.env_specs: raise ValueError('Cannot re-register env_id: {}'.format(env_id)) self.env_specs[env_id] = EnvSpec(env_id, entry_point) - def make(self, env_id, config=DEFAULT_CONFIG): - ''' Create and environment instance + def make(self, env_id, config=None): + """Create and environment instance Args: env_id (string): The name of the environment config (dict): A dictionary of the environment settings - ''' + """ + if config is None: + config = DEFAULT_CONFIG + if env_id not in self.env_specs: raise ValueError('Cannot find env_id: {}'.format(env_id)) + return self.env_specs[env_id].make(config) + # Have a global registry registry = EnvRegistry() + def register(env_id, entry_point): - ''' Register an environment + """Register an environment Args: - env_id (string): The name of the environent - entry_point (string): A string the indicates the location of the envronment class - ''' + env_id (string): The name of the environment + entry_point (string): A string the indicates the location of the environment class + """ return registry.register(env_id, entry_point) -def make(env_id, config={}): - ''' Create and environment instance + +def make(env_id, config=None): + """Create and environment instance Args: env_id (string): The name of the environment config (dict): A dictionary of the environment settings - env_num (int): The number of environments - ''' + """ + if config is None: + config = {} + _config = DEFAULT_CONFIG.copy() + for key in config: _config[key] = config[key] diff --git a/rlcard/envs/uno.py b/rlcard/envs/uno.py index b9d6d7ea1..a7b87c0ff 100644 --- a/rlcard/envs/uno.py +++ b/rlcard/envs/uno.py @@ -50,11 +50,11 @@ def _get_legal_actions(self): return OrderedDict(legal_ids) def get_perfect_information(self): - ''' Get the perfect information of the current state + """Get the perfect information of the current state Returns: (dict): A dictionary of all the perfect information of the current state - ''' + """ state = {} state['num_players'] = self.num_players state['hand_cards'] = [cards2list(player.hand) diff --git a/rlcard/games/base.py b/rlcard/games/base.py index 5424f4857..e5a77a607 100644 --- a/rlcard/games/base.py +++ b/rlcard/games/base.py @@ -1,25 +1,26 @@ -''' Game-related base classes -''' +"""Game-related base classes """ + + class Card: - ''' + """ Card stores the suit and rank of a single card Note: The suit variable in a standard card game should be one of [S, H, D, C, BJ, RJ] meaning [Spades, Hearts, Diamonds, Clubs, Black Joker, Red Joker] Similarly the rank variable should be one of [A, 2, 3, 4, 5, 6, 7, 8, 9, T, J, Q, K] - ''' + """ suit = None rank = None valid_suit = ['S', 'H', 'D', 'C', 'BJ', 'RJ'] valid_rank = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K'] def __init__(self, suit, rank): - ''' Initialize the suit and rank of a card + """Initialize the suit and rank of a card Args: suit: string, suit of the card, should be one of valid_suit rank: string, rank of the card, should be one of valid_rank - ''' + """ self.suit = suit self.rank = rank @@ -36,17 +37,17 @@ def __hash__(self): return rank_index + 100 * suit_index def __str__(self): - ''' Get string representation of a card. + """Get string representation of a card. Returns: string: the combination of rank and suit of a card. Eg: AS, 5H, JD, 3C, ... - ''' + """ return self.rank + self.suit def get_index(self): - ''' Get index of a card. + """Get index of a card. Returns: string: the combination of suit and rank of a card. Eg: 1S, 2H, AD, BJ, RJ... - ''' + """ return self.suit+self.rank diff --git a/rlcard/games/blackjack/dealer.py b/rlcard/games/blackjack/dealer.py index 88929b5e7..2f4efe186 100644 --- a/rlcard/games/blackjack/dealer.py +++ b/rlcard/games/blackjack/dealer.py @@ -1,11 +1,11 @@ from rlcard.utils import init_standard_deck import numpy as np + class BlackjackDealer: def __init__(self, np_random, num_decks=1): - ''' Initialize a Blackjack dealer class - ''' + """Initialize a Blackjack dealer class """ self.np_random = np_random self.num_decks = num_decks self.deck = init_standard_deck() @@ -17,18 +17,17 @@ def __init__(self, np_random, num_decks=1): self.score = 0 def shuffle(self): - ''' Shuffle the deck - ''' + """Shuffle the deck """ shuffle_deck = np.array(self.deck) self.np_random.shuffle(shuffle_deck) self.deck = list(shuffle_deck) def deal_card(self, player): - ''' Distribute one card to the player + """Distribute one card to the player Args: player_id (int): the target player's id - ''' + """ idx = self.np_random.choice(len(self.deck)) card = self.deck[idx] if self.num_decks != 0: # If infinite decks, do not pop card from deck diff --git a/rlcard/games/blackjack/game.py b/rlcard/games/blackjack/game.py index fa0cddac5..584c4cbac 100644 --- a/rlcard/games/blackjack/game.py +++ b/rlcard/games/blackjack/game.py @@ -5,27 +5,26 @@ from rlcard.games.blackjack import Player from rlcard.games.blackjack import Judger + class BlackjackGame: def __init__(self, allow_step_back=False): - ''' Initialize the class Blackjack Game - ''' + """Initialize the class Blackjack Game""" self.allow_step_back = allow_step_back self.np_random = np.random.RandomState() def configure(self, game_config): - ''' Specifiy some game specific parameters, such as number of players - ''' + """Specify some game specific parameters, such as number of players""" self.num_players = game_config['game_num_players'] self.num_decks = game_config['game_num_decks'] def init_game(self): - ''' Initialilze the game + """Initialize the game Returns: state (dict): the first state of the game player_id (int): current player's id - ''' + """ self.dealer = Dealer(self.np_random, self.num_decks) self.players = [] @@ -54,15 +53,15 @@ def init_game(self): return self.get_state(self.game_pointer), self.game_pointer def step(self, action): - ''' Get the next state + """Get the next state Args: action (str): a specific action of blackjack. (Hit or Stand) - Returns:/ + Returns: dict: next player's state int: next plater's id - ''' + """ if self.allow_step_back: p = deepcopy(self.players[self.game_pointer]) d = deepcopy(self.dealer) @@ -82,13 +81,11 @@ def step(self, action): self.dealer.deal_card(self.dealer) self.dealer.status, self.dealer.score = self.judger.judge_round(self.dealer) for i in range(self.num_players): - self.judger.judge_game(self, i) + self.judger.judge_game(self, i) self.game_pointer = 0 else: self.game_pointer += 1 - - - elif action == "stand": # If stand, first try to pass the pointer, if it's the last player, dealer deal for himself, then judge game for everyone using a loop + elif action == "stand": # If stand, first try to pass the pointer, if it's the last player, dealer deal for himself, then judge game for everyone using a loop self.players[self.game_pointer].status, self.players[self.game_pointer].score = self.judger.judge_round( self.players[self.game_pointer]) if self.game_pointer >= self.num_players - 1: @@ -96,15 +93,11 @@ def step(self, action): self.dealer.deal_card(self.dealer) self.dealer.status, self.dealer.score = self.judger.judge_round(self.dealer) for i in range(self.num_players): - self.judger.judge_game(self, i) + self.judger.judge_game(self, i) self.game_pointer = 0 else: self.game_pointer += 1 - - - - hand = [card.get_index() for card in self.players[self.game_pointer].hand] if self.is_over(): @@ -118,62 +111,61 @@ def step(self, action): next_state['actions'] = ('hit', 'stand') next_state['state'] = (hand, dealer_hand) - - return next_state, self.game_pointer def step_back(self): - ''' Return to the previous state of the game + """Return to the previous state of the game Returns: Status (bool): check if the step back is success or not - ''' - #while len(self.history) > 0: + """ + # while len(self.history) > 0: if len(self.history) > 0: self.dealer, self.players[self.game_pointer], self.winner = self.history.pop() return True return False def get_num_players(self): - ''' Return the number of players in blackjack + """Return the number of players in blackjack Returns: number_of_player (int): blackjack only have 1 player - ''' + """ return self.num_players @staticmethod def get_num_actions(): - ''' Return the number of applicable actions + """Return the number of applicable actions Returns: number_of_actions (int): there are only two actions (hit and stand) - ''' + """ return 2 def get_player_id(self): - ''' Return the current player's id + """Return the current player's id Returns: player_id (int): current player's id - ''' + """ return self.game_pointer def get_state(self, player_id): - ''' Return player's state + """Return player's state Args: player_id (int): player id Returns: state (dict): corresponding player's state + """ ''' + before change state only have two keys (action, state) + but now have more than 4 keys (action, state, player0 hand, player1 hand, ... , dealer hand) + Although key 'state' have duplicated information with key 'player hand' and 'dealer hand', I couldn't + remove it because of other codes. + To remove it, we need to change dqn agent too in my opinion ''' - before change state only have two keys (action, state) - but now have more than 4 keys (action, state, player0 hand, player1 hand, ... , dealer hand) - Although key 'state' have duplicated information with key 'player hand' and 'dealer hand', I couldn't remove it because of other codes - To remove it, we need to change dqn agent too in my opinion - ''' state = {} state['actions'] = ('hit', 'stand') hand = [card.get_index() for card in self.players[player_id].hand] @@ -190,11 +182,11 @@ def get_state(self, player_id): return state def is_over(self): - ''' Check if the game is over + """Check if the game is over Returns: status (bool): True/False - ''' + """ ''' I should change here because judger and self.winner is changed too ''' diff --git a/rlcard/games/blackjack/judger.py b/rlcard/games/blackjack/judger.py index af18b9cce..f18de34f9 100644 --- a/rlcard/games/blackjack/judger.py +++ b/rlcard/games/blackjack/judger.py @@ -1,13 +1,12 @@ - class BlackjackJudger: def __init__(self, np_random): - ''' Initialize a BlackJack judger class - ''' + """Initialize a BlackJack judger class""" self.np_random = np_random - self.rank2score = {"A":11, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9, "T":10, "J":10, "Q":10, "K":10} + self.rank2score = {"A": 11, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "T": 10, "J": 10, + "Q": 10, "K": 10} def judge_round(self, player): - ''' Judge the target player's status + """Judge the target player's status Args: player (int): target player's id @@ -15,7 +14,7 @@ def judge_round(self, player): Returns: status (str): the status of the target player score (int): the current score of the player - ''' + """ score = self.judge_score(player.hand) if score <= 21: return "alive", score @@ -23,21 +22,21 @@ def judge_round(self, player): return "bust", score def judge_game(self, game, game_pointer): - ''' Judge the winner of the game + """Judge the winner of the game Args: game (class): target game class + """ ''' - ''' - game.winner['dealer'] doesn't need anymore if we change code like this + game.winner['dealer'] doesn't need anymore if we change code like this - player bust (whether dealer bust or not) => game.winner[playerX] = -1 - player and dealer tie => game.winner[playerX] = 1 - dealer bust and player not bust => game.winner[playerX] = 2 - player get higher score than dealer => game.winner[playerX] = 2 - dealer get higher score than player => game.winner[playerX] = -1 - game.winner[playerX] = 0 => the game is still ongoing - ''' + player bust (whether dealer bust or not) => game.winner[playerX] = -1 + player and dealer tie => game.winner[playerX] = 1 + dealer bust and player not bust => game.winner[playerX] = 2 + player get higher score than dealer => game.winner[playerX] = 2 + dealer get higher score than player => game.winner[playerX] = -1 + game.winner[playerX] = 0 => the game is still ongoing + ''' if game.players[game_pointer].status == 'bust': game.winner['player' + str(game_pointer)] = -1 @@ -52,14 +51,14 @@ def judge_game(self, game, game_pointer): game.winner['player' + str(game_pointer)] = 1 def judge_score(self, cards): - ''' Judge the score of a given cards set + """Judge the score of a given cards set Args: cards (list): a list of cards Returns: score (int): the score of the given cards set - ''' + """ score = 0 count_a = 0 for card in cards: diff --git a/rlcard/games/blackjack/player.py b/rlcard/games/blackjack/player.py index 654a179ab..0f13a6636 100644 --- a/rlcard/games/blackjack/player.py +++ b/rlcard/games/blackjack/player.py @@ -2,11 +2,11 @@ class BlackjackPlayer: def __init__(self, player_id, np_random): - ''' Initialize a Blackjack player class + """Initialize a Blackjack player class Args: player_id (int): id for the player - ''' + """ self.np_random = np_random self.player_id = player_id self.hand = [] @@ -14,6 +14,5 @@ def __init__(self, player_id, np_random): self.score = 0 def get_player_id(self): - ''' Return player's id - ''' + """Return player's id""" return self.player_id diff --git a/rlcard/games/bridge/dealer.py b/rlcard/games/bridge/dealer.py index 955ae52f9..504e582a7 100644 --- a/rlcard/games/bridge/dealer.py +++ b/rlcard/games/bridge/dealer.py @@ -1,8 +1,8 @@ -''' +""" File name: bridge/dealer.py Author: William Hale Date created: 11/25/2021 -''' +""" from typing import List @@ -11,22 +11,20 @@ class BridgeDealer: - ''' Initialize a BridgeDealer dealer class - ''' + """Initialize a BridgeDealer dealer class """ def __init__(self, np_random): - ''' set shuffled_deck, set stock_pile - ''' + """set shuffled_deck, set stock_pile """ self.np_random = np_random self.shuffled_deck: List[BridgeCard] = BridgeCard.get_deck() # keep a copy of the shuffled cards at start of new hand self.np_random.shuffle(self.shuffled_deck) self.stock_pile: List[BridgeCard] = self.shuffled_deck.copy() def deal_cards(self, player: BridgePlayer, num: int): - ''' Deal some cards from stock_pile to one player + """Deal some cards from stock_pile to one player Args: player (BridgePlayer): The BridgePlayer object num (int): The number of cards to be dealt - ''' + """ for _ in range(num): player.hand.append(self.stock_pile.pop()) diff --git a/rlcard/games/bridge/game.py b/rlcard/games/bridge/game.py index 038d2e954..03a8df305 100644 --- a/rlcard/games/bridge/game.py +++ b/rlcard/games/bridge/game.py @@ -1,8 +1,8 @@ -''' +""" File name: bridge/game.py Author: William Hale Date created: 11/25/2021 -''' +""" from typing import List @@ -14,12 +14,10 @@ class BridgeGame: - ''' Game class. This class will interact with outer environment. - ''' + """Game class. This class will interact with outer environment""" def __init__(self, allow_step_back=False): - '''Initialize the class BridgeGame - ''' + """Initialize the class BridgeGame""" self.allow_step_back: bool = allow_step_back self.np_random = np.random.RandomState() self.judger: BridgeJudger = BridgeJudger(game=self) @@ -28,8 +26,7 @@ def __init__(self, allow_step_back=False): self.num_players: int = 4 def init_game(self): - ''' Initialize all characters in the game and start round 1 - ''' + """Initialize all characters in the game and start round 1""" board_id = self.np_random.choice([1, 2, 3, 4]) self.actions: List[ActionEvent] = [] self.round = BridgeRound(num_players=self.num_players, board_id=board_id, np_random=self.np_random) @@ -41,8 +38,7 @@ def init_game(self): return state, current_player_id def step(self, action: ActionEvent): - ''' Perform game action and return next player number, and the state for next player - ''' + """Perform game action and return next player number, and the state for next player""" if isinstance(action, CallActionEvent): self.round.make_call(action=action) elif isinstance(action, PlayCardAction): @@ -55,32 +51,28 @@ def step(self, action: ActionEvent): return next_state, next_player_id def get_num_players(self) -> int: - ''' Return the number of players in the game - ''' + """Return the number of players in the game""" return self.num_players @staticmethod def get_num_actions() -> int: - ''' Return the number of possible actions in the game - ''' + """Return the number of possible actions in the game""" return ActionEvent.get_num_actions() def get_player_id(self): - ''' Return the current player that will take actions soon - ''' + """Return the current player that will take actions soon""" return self.round.current_player_id def is_over(self) -> bool: - ''' Return whether the current game is over - ''' + """Return whether the current game is over""" return self.round.is_over() def get_state(self, player_id: int): # wch: not really used - ''' Get player's state + """Get player's state Return: state (dict): The information of the state - ''' + """ state = {} if not self.is_over(): state['player_id'] = player_id diff --git a/rlcard/games/bridge/judger.py b/rlcard/games/bridge/judger.py index 4158ec7f4..9023e9989 100644 --- a/rlcard/games/bridge/judger.py +++ b/rlcard/games/bridge/judger.py @@ -1,8 +1,8 @@ -''' +""" File name: bridge/judger.py Author: William Hale Date created: 11/25/2021 -''' +""" from typing import List @@ -17,15 +17,12 @@ class BridgeJudger: - - ''' - Judger decides legal actions for current player - ''' + """Judger decides legal actions for current player""" def __init__(self, game: 'BridgeGame'): - ''' Initialize the class BridgeJudger + """Initialize the class BridgeJudger :param game: BridgeGame - ''' + """ self.game: BridgeGame = game def get_legal_actions(self) -> List[ActionEvent]: diff --git a/rlcard/games/bridge/player.py b/rlcard/games/bridge/player.py index a4e4cff89..41c6fe2ec 100644 --- a/rlcard/games/bridge/player.py +++ b/rlcard/games/bridge/player.py @@ -1,8 +1,8 @@ -''' +""" File name: bridge/player.py Author: William Hale Date created: 11/25/2021 -''' +""" from typing import List @@ -12,11 +12,11 @@ class BridgePlayer: def __init__(self, player_id: int, np_random): - ''' Initialize a BridgePlayer player class + """Initialize a BridgePlayer player class Args: player_id (int): id for the player - ''' + """ if player_id < 0 or player_id > 3: raise Exception(f'BridgePlayer has invalid player_id: {player_id}') self.np_random = np_random diff --git a/rlcard/games/bridge/round.py b/rlcard/games/bridge/round.py index b7e3df4b6..ab1a7aaff 100644 --- a/rlcard/games/bridge/round.py +++ b/rlcard/games/bridge/round.py @@ -1,8 +1,8 @@ -''' +""" File name: bridge/round.py Author: William Hale Date created: 11/25/2021 -''' +""" from typing import List @@ -39,7 +39,7 @@ def round_phase(self): return result def __init__(self, num_players: int, board_id: int, np_random): - ''' Initialize the round class + """Initialize the round class The round class maintains the following instances: 1) dealer: the dealer of the round; dealer has trick_pile @@ -59,7 +59,7 @@ def __init__(self, num_players: int, board_id: int, np_random): num_players: int board_id: int np_random - ''' + """ tray = Tray(board_id=board_id) dealer_id = tray.dealer_id self.tray = tray @@ -77,8 +77,7 @@ def __init__(self, num_players: int, board_id: int, np_random): self.move_sheet.append(DealHandMove(dealer=self.players[dealer_id], shuffled_deck=self.dealer.shuffled_deck)) def is_bidding_over(self) -> bool: - ''' Return whether the current bidding is over - ''' + """Return whether the current bidding is over""" is_bidding_over = True if len(self.move_sheet) < 5: is_bidding_over = False @@ -97,8 +96,7 @@ def is_bidding_over(self) -> bool: return is_bidding_over def is_over(self) -> bool: - ''' Return whether the current game is over - ''' + """Return whether the current game is over""" is_over = True if not self.is_bidding_over(): is_over = False diff --git a/rlcard/games/bridge/utils/action_event.py b/rlcard/games/bridge/utils/action_event.py index 0700f2ae6..2f5d35c03 100644 --- a/rlcard/games/bridge/utils/action_event.py +++ b/rlcard/games/bridge/utils/action_event.py @@ -1,8 +1,8 @@ -''' +""" File name: bridge/utils/action_event.py Author: William Hale Date created: 11/25/2021 -''' +""" from .bridge_card import BridgeCard @@ -57,8 +57,7 @@ def from_action_id(action_id: int): @staticmethod def get_num_actions(): - ''' Return the number of possible actions in the game - ''' + """Return the number of possible actions in the game """ return 1 + 35 + 3 + 52 # no_bid, 35 bids, pass, dbl, rdl, 52 play_card diff --git a/rlcard/games/bridge/utils/bridge_card.py b/rlcard/games/bridge/utils/bridge_card.py index f62268eca..2b0f0a417 100644 --- a/rlcard/games/bridge/utils/bridge_card.py +++ b/rlcard/games/bridge/utils/bridge_card.py @@ -1,8 +1,8 @@ -''' +""" File name: bridge/utils/bridge_card.py Author: William Hale Date created: 11/25/2021 -''' +""" from rlcard.games.base import Card diff --git a/rlcard/games/bridge/utils/move.py b/rlcard/games/bridge/utils/move.py index e3ff9eb9d..bd3053ef7 100644 --- a/rlcard/games/bridge/utils/move.py +++ b/rlcard/games/bridge/utils/move.py @@ -1,8 +1,8 @@ -''' +""" File name: bridge/utils/move.py Author: William Hale Date created: 11/25/2021 -''' +""" # # These classes are used to keep a move_sheet history of the moves in a round. diff --git a/rlcard/games/bridge/utils/tray.py b/rlcard/games/bridge/utils/tray.py index 7af25f0a1..51118a879 100644 --- a/rlcard/games/bridge/utils/tray.py +++ b/rlcard/games/bridge/utils/tray.py @@ -1,8 +1,8 @@ -''' +""" File name: bridge/utils/tray.py Author: William Hale Date created: 11/28/2021 -''' +""" class Tray(object): diff --git a/rlcard/games/bridge/utils/utils.py b/rlcard/games/bridge/utils/utils.py index 02cf2c50d..bea8e3c4f 100644 --- a/rlcard/games/bridge/utils/utils.py +++ b/rlcard/games/bridge/utils/utils.py @@ -1,8 +1,8 @@ -''' +""" File name: bridge/utils/utils.py Author: William Hale Date created: 11/26/2021 -''' +""" from typing import List diff --git a/rlcard/games/doudizhu/dealer.py b/rlcard/games/doudizhu/dealer.py index b6c90515d..8461607c5 100644 --- a/rlcard/games/doudizhu/dealer.py +++ b/rlcard/games/doudizhu/dealer.py @@ -1,52 +1,51 @@ # -*- coding: utf-8 -*- -''' Implement Doudizhu Dealer class -''' +"""Implement Doudizhu Dealer class """ import functools from rlcard.utils import init_54_deck from rlcard.games.doudizhu.utils import cards2str, doudizhu_sort_card + class DoudizhuDealer: - ''' Dealer will shuffle, deal cards, and determine players' roles - ''' + """Dealer will shuffle, deal cards, and determine players' roles """ + def __init__(self, np_random): - '''Give dealer the deck + """Give dealer the deck Notes: 1. deck with 54 cards including black joker and red joker - ''' + """ self.np_random = np_random self.deck = init_54_deck() self.deck.sort(key=functools.cmp_to_key(doudizhu_sort_card)) self.landlord = None def shuffle(self): - ''' Randomly shuffle the deck - ''' + """Randomly shuffle the deck """ self.np_random.shuffle(self.deck) def deal_cards(self, players): - ''' Deal cards to players + """Deal cards to players Args: players (list): list of DoudizhuPlayer objects - ''' + """ hand_num = (len(self.deck) - 3) // len(players) for index, player in enumerate(players): - current_hand = self.deck[index*hand_num:(index+1)*hand_num] + current_hand = self.deck[index * hand_num:(index + 1) * hand_num] current_hand.sort(key=functools.cmp_to_key(doudizhu_sort_card)) player.set_current_hand(current_hand) player.initial_hand = cards2str(player.current_hand) def determine_role(self, players): - ''' Determine landlord and peasants according to players' hand + """Determine landlord and peasants according to players' hand Args: players (list): list of DoudizhuPlayer objects Returns: int: landlord's player_id - ''' + """ # deal cards self.shuffle() self.deal_cards(players) @@ -54,20 +53,20 @@ def determine_role(self, players): self.landlord = players[0] players[1].role = 'peasant' players[2].role = 'peasant' - #players[0].role = 'peasant' - #self.landlord = players[0] + # players[0].role = 'peasant' + # self.landlord = players[0] ## determine 'landlord' - #max_score = get_landlord_score( + # max_score = get_landlord_score( # cards2str(self.landlord.current_hand)) - #for player in players[1:]: + # for player in players[1:]: # player.role = 'peasant' # score = get_landlord_score( # cards2str(player.current_hand)) # if score > max_score: # max_score = score # self.landlord = player - #self.landlord.role = 'landlord' + # self.landlord.role = 'landlord' # give the 'landlord' the three cards self.landlord.current_hand.extend(self.deck[-3:]) diff --git a/rlcard/games/doudizhu/game.py b/rlcard/games/doudizhu/game.py index 945235d5c..f66602ca5 100644 --- a/rlcard/games/doudizhu/game.py +++ b/rlcard/games/doudizhu/game.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -''' Implement Doudizhu Game class -''' +"""Implement Doudizhu Game class""" import functools from heapq import merge import numpy as np @@ -12,21 +11,20 @@ class DoudizhuGame: - ''' Provide game APIs for env to run doudizhu and get corresponding state - information. - ''' + """Provide game APIs for env to run doudizhu and get corresponding state information""" + def __init__(self, allow_step_back=False): self.allow_step_back = allow_step_back self.np_random = np.random.RandomState() self.num_players = 3 def init_game(self): - ''' Initialize players and state. + """Initialize players and state. Returns: dict: first state in one game int: current player's id - ''' + """ # initialize public variables self.winner_id = None self.history = [] @@ -36,8 +34,8 @@ def init_game(self): for num in range(self.num_players)] # initialize round to deal cards and determine landlord - self.played_cards = [np.zeros((len(CARD_RANK_STR), ), dtype=np.int32) - for _ in range(self.num_players)] + self.played_cards = [np.zeros((len(CARD_RANK_STR),), dtype=np.int32) + for _ in range(self.num_players)] self.round = Round(self.np_random, self.played_cards) self.round.initiate(self.players) @@ -51,7 +49,7 @@ def init_game(self): return self.state, player_id def step(self, action): - ''' Perform one draw of the game + """Perform one draw of the game Args: action (str): specific action of doudizhu. Eg: '33344' @@ -59,19 +57,19 @@ def step(self, action): Returns: dict: next player's state int: next player's id - ''' + """ if self.allow_step_back: # TODO: don't record game.round, game.players, game.judger if allow_step_back not set pass - # perfrom action + # perform action player = self.players[self.round.current_player] self.round.proceed_round(player, action) - if (action != 'pass'): + if action != 'pass': self.judger.calc_playable_cards(player) if self.judger.judge_game(self.players, self.round.current_player): self.winner_id = self.round.current_player - next_id = (player.player_id+1) % len(self.players) + next_id = (player.player_id + 1) % len(self.players) self.round.current_player = next_id # get next state @@ -81,41 +79,41 @@ def step(self, action): return state, next_id def step_back(self): - ''' Return to the previous state of the game + """Return to the previous state of the game Returns: (bool): True if the game steps back successfully - ''' + """ if not self.round.trace: return False - #winner_id will be always None no matter step_back from any case + # winner_id will be always None no matter step_back from any case self.winner_id = None - #reverse round + # reverse round player_id, cards = self.round.step_back(self.players) - #reverse player - if (cards != 'pass'): + # reverse player + if cards != 'pass': self.players[player_id].played_cards = self.round.find_last_played_cards_in_trace(player_id) self.players[player_id].play_back() - #reverse judger.played_cards if needed - if (cards != 'pass'): + # reverse judger.played_cards if needed + if cards != 'pass': self.judger.restore_playable_cards(player_id) self.state = self.get_state(self.round.current_player) return True def get_state(self, player_id): - ''' Return player's state + """Return player's state Args: player_id (int): player id Returns: (dict): The state of the player - ''' + """ player = self.players[player_id] others_hands = self._get_others_current_hand(player) num_cards_left = [len(self.players[i].current_hand) for i in range(self.num_players)] @@ -129,41 +127,42 @@ def get_state(self, player_id): @staticmethod def get_num_actions(): - ''' Return the total number of abstract acitons + """Return the total number of abstract actions Returns: int: the total number of abstract actions of doudizhu - ''' + """ return 27472 def get_player_id(self): - ''' Return current player's id + """Return current player's id Returns: int: current player's id - ''' + """ return self.round.current_player def get_num_players(self): - ''' Return the number of players in doudizhu + """Return the number of players in doudizhu Returns: int: the number of players in doudizhu - ''' + """ return self.num_players def is_over(self): - ''' Judge whether a game is over + """Judge whether a game is over Returns: Bool: True(over) / False(not over) - ''' + """ if self.winner_id is None: return False return True def _get_others_current_hand(self, player): - player_up = self.players[(player.player_id+1) % len(self.players)] - player_down = self.players[(player.player_id-1) % len(self.players)] - others_hand = merge(player_up.current_hand, player_down.current_hand, key=functools.cmp_to_key(doudizhu_sort_card)) + player_up = self.players[(player.player_id + 1) % len(self.players)] + player_down = self.players[(player.player_id - 1) % len(self.players)] + others_hand = merge(player_up.current_hand, player_down.current_hand, + key=functools.cmp_to_key(doudizhu_sort_card)) return cards2str(others_hand) diff --git a/rlcard/games/doudizhu/judger.py b/rlcard/games/doudizhu/judger.py index 375301b0b..b6e6b1fc7 100644 --- a/rlcard/games/doudizhu/judger.py +++ b/rlcard/games/doudizhu/judger.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -''' Implement Doudizhu Judger class -''' +"""Implement Doudizhu Judger class""" import numpy as np import collections from itertools import combinations @@ -10,13 +9,12 @@ from rlcard.games.doudizhu.utils import cards2str, contains_cards - class DoudizhuJudger: - ''' Determine what cards a player can play - ''' + """Determine what cards a player can play""" + @staticmethod def chain_indexes(indexes_list): - ''' Find chains for solos, pairs and trios by using indexes_list + """Find chains for solos, pairs and trios by using indexes_list Args: indexes_list: the indexes of cards those have the same count, the count could be 1, 2, or 3. @@ -24,29 +22,29 @@ def chain_indexes(indexes_list): Returns: list of tuples: [(start_index1, length1), (start_index1, length1), ...] - ''' + """ chains = [] prev_index = -100 count = 0 start = None for i in indexes_list: - if (i[0] >= 12): #no chains for '2BR' + if i[0] >= 12: # no chains for '2BR' break - if (i[0] == prev_index + 1): + if i[0] == prev_index + 1: count += 1 else: - if (count > 1): + if count > 1: chains.append((start, count)) count = 1 start = i[0] prev_index = i[0] - if (count > 1): + if count > 1: chains.append((start, count)) return chains @classmethod def solo_attachments(cls, hands, chain_start, chain_length, size): - ''' Find solo attachments for trio_chain_solo_x and four_two_solo + """Find solo attachments for trio_chain_solo_x and four_two_solo Args: hands: @@ -59,21 +57,22 @@ def solo_attachments(cls, hands, chain_start, chain_length, size): Each attachment has two elemnts, the first one contains indexes of attached cards smaller than the index of chain_start, the first one contains indexes of attached cards larger than the index of chain_start - ''' + """ attachments = set() candidates = [] prev_card = None same_card_count = 0 for card in hands: - #dont count those cards in the chain - if (CARD_RANK_STR_INDEX[card] >= chain_start and CARD_RANK_STR_INDEX[card] < chain_start + chain_length): + # don't count those cards in the chain + if CARD_RANK_STR_INDEX[card] >= chain_start and CARD_RANK_STR_INDEX[card] < chain_start + chain_length: continue - if (card == prev_card): - #attachments can not have bomb - if (same_card_count == 3): + if card == prev_card: + # attachments can not have bomb + if same_card_count == 3: continue - #attachments can not have 3 same cards consecutive with the trio (except 3 cards of '222') - elif (same_card_count == 2 and (CARD_RANK_STR_INDEX[card] == chain_start - 1 or CARD_RANK_STR_INDEX[card] == chain_start + chain_length) and card != '2'): + # attachments can not have 3 same cards consecutive with the trio (except 3 cards of '222') + elif same_card_count == 2 and (CARD_RANK_STR_INDEX[card] == chain_start - 1 or CARD_RANK_STR_INDEX[ + card] == chain_start + chain_length) and card != '2': continue else: same_card_count += 1 @@ -82,7 +81,7 @@ def solo_attachments(cls, hands, chain_start, chain_length, size): same_card_count = 1 candidates.append(CARD_RANK_STR_INDEX[card]) for attachment in combinations(candidates, size): - if (attachment[-1] == 14 and attachment[-2] == 13): + if attachment[-1] == 14 and attachment[-2] == 13: continue i = bisect_left(attachment, chain_start) attachments.add((attachment[:i], attachment[i:])) @@ -90,7 +89,7 @@ def solo_attachments(cls, hands, chain_start, chain_length, size): @classmethod def pair_attachments(cls, cards_count, chain_start, chain_length, size): - ''' Find pair attachments for trio_chain_pair_x and four_two_pair + """Find pair attachments for trio_chain_pair_x and four_two_pair Args: cards_count: @@ -103,30 +102,30 @@ def pair_attachments(cls, cards_count, chain_start, chain_length, size): Each attachment has two elemnts, the first one contains indexes of attached cards smaller than the index of chain_start, the first one contains indexes of attached cards larger than the index of chain_start - ''' + """ attachments = set() candidates = [] for i, _ in enumerate(cards_count): - if (i >= chain_start and i < chain_start + chain_length): + if i >= chain_start and i < chain_start + chain_length: continue - if (cards_count[i] == 2 or cards_count[i] == 3): + if cards_count[i] == 2 or cards_count[i] == 3: candidates.append(i) - elif (cards_count[i] == 4): + elif cards_count[i] == 4: candidates.append(i) for attachment in combinations(candidates, size): - if (attachment[-1] == 14 and attachment[-2] == 13): + if attachment[-1] == 14 and attachment[-2] == 13: continue i = bisect_left(attachment, chain_start) attachments.add((attachment[:i], attachment[i:])) return list(attachments) - + @staticmethod def playable_cards_from_hand(current_hand): - ''' Get playable cards from hand + """Get playable cards from hand Returns: set: set of string of playable cards - ''' + """ cards_dict = collections.defaultdict(int) for card in current_hand: cards_dict[card] += 1 @@ -137,13 +136,13 @@ def playable_cards_from_hand(current_hand): more_than_1_indexes = np.argwhere(cards_count > 1) more_than_2_indexes = np.argwhere(cards_count > 2) more_than_3_indexes = np.argwhere(cards_count > 3) - #solo + # solo for i in non_zero_indexes: playable_cards.add(CARD_RANK_STR[i[0]]) - #pair + # pair for i in more_than_1_indexes: playable_cards.add(CARD_RANK_STR[i[0]] * 2) - #bomb, four_two_solo, four_two_pair + # bomb, four_two_solo, four_two_pair for i in more_than_3_indexes: cards = CARD_RANK_STR[i[0]] * 4 playable_cards.add(cards) @@ -164,73 +163,73 @@ def playable_cards_from_hand(current_hand): post_attached += CARD_RANK_STR[j] * 2 playable_cards.add(pre_attached + cards + post_attached) - #solo_chain_5 -- #solo_chain_12 + # solo_chain_5 -- #solo_chain_12 solo_chain_indexes = DoudizhuJudger.chain_indexes(non_zero_indexes) for (start_index, length) in solo_chain_indexes: s, l = start_index, length - while(l >= 5): + while l >= 5: cards = '' curr_index = s - 1 curr_length = 0 - while (curr_length < l and curr_length < 12): + while curr_length < l and curr_length < 12: curr_index += 1 curr_length += 1 cards += CARD_RANK_STR[curr_index] - if (curr_length >= 5): + if curr_length >= 5: playable_cards.add(cards) l -= 1 s += 1 - #pair_chain_3 -- #pair_chain_10 + # pair_chain_3 -- #pair_chain_10 pair_chain_indexes = DoudizhuJudger.chain_indexes(more_than_1_indexes) for (start_index, length) in pair_chain_indexes: s, l = start_index, length - while(l >= 3): + while l >= 3: cards = '' curr_index = s - 1 curr_length = 0 - while (curr_length < l and curr_length < 10): + while curr_length < l and curr_length < 10: curr_index += 1 curr_length += 1 cards += CARD_RANK_STR[curr_index] * 2 - if (curr_length >= 3): + if curr_length >= 3: playable_cards.add(cards) l -= 1 s += 1 - #trio, trio_solo and trio_pair + # trio, trio_solo and trio_pair for i in more_than_2_indexes: playable_cards.add(CARD_RANK_STR[i[0]] * 3) for j in non_zero_indexes: - if (j < i): + if j < i: playable_cards.add(CARD_RANK_STR[j[0]] + CARD_RANK_STR[i[0]] * 3) - elif (j > i): + elif j > i: playable_cards.add(CARD_RANK_STR[i[0]] * 3 + CARD_RANK_STR[j[0]]) for j in more_than_1_indexes: - if (j < i): + if j < i: playable_cards.add(CARD_RANK_STR[j[0]] * 2 + CARD_RANK_STR[i[0]] * 3) - elif (j > i): + elif j > i: playable_cards.add(CARD_RANK_STR[i[0]] * 3 + CARD_RANK_STR[j[0]] * 2) - #trio_solo, trio_pair, #trio -- trio_chain_2 -- trio_chain_6; trio_solo_chain_2 -- trio_solo_chain_5; trio_pair_chain_2 -- trio_pair_chain_4 + # trio_solo, trio_pair, #trio -- trio_chain_2 -- trio_chain_6; trio_solo_chain_2 -- trio_solo_chain_5; trio_pair_chain_2 -- trio_pair_chain_4 trio_chain_indexes = DoudizhuJudger.chain_indexes(more_than_2_indexes) for (start_index, length) in trio_chain_indexes: s, l = start_index, length - while(l >= 2): + while l >= 2: cards = '' curr_index = s - 1 curr_length = 0 - while (curr_length < l and curr_length < 6): + while curr_length < l and curr_length < 6: curr_index += 1 curr_length += 1 cards += CARD_RANK_STR[curr_index] * 3 - #trio_chain_2 to trio_chain_6 - if (curr_length >= 2 and curr_length <= 6): + # trio_chain_2 to trio_chain_6 + if curr_length >= 2 and curr_length <= 6: playable_cards.add(cards) - #trio_solo_chain_2 to trio_solo_chain_5 - if (curr_length >= 2 and curr_length <= 5): + # trio_solo_chain_2 to trio_solo_chain_5 + if curr_length >= 2 and curr_length <= 5: for left, right in DoudizhuJudger.solo_attachments(current_hand, s, curr_length, curr_length): pre_attached = '' for j in left: @@ -240,8 +239,8 @@ def playable_cards_from_hand(current_hand): post_attached += CARD_RANK_STR[j] playable_cards.add(pre_attached + cards + post_attached) - #trio_pair_chain2 -- trio_pair_chain_4 - if (curr_length >= 2 and curr_length <= 4): + # trio_pair_chain2 -- trio_pair_chain_4 + if curr_length >= 2 and curr_length <= 4: for left, right in DoudizhuJudger.pair_attachments(cards_count, s, curr_length, curr_length): pre_attached = '' for j in left: @@ -252,14 +251,13 @@ def playable_cards_from_hand(current_hand): playable_cards.add(pre_attached + cards + post_attached) l -= 1 s += 1 - #rocket - if (cards_count[13] and cards_count[14]): + # rocket + if cards_count[13] and cards_count[14]: playable_cards.add(CARD_RANK_STR[13] + CARD_RANK_STR[14]) return playable_cards def __init__(self, players, np_random): - ''' Initilize the Judger class for Dou Dizhu - ''' + """Initialize the Judger class for Dou Dizhu""" self.playable_cards = [set() for _ in range(3)] self._recorded_removed_playable_cards = [[] for _ in range(3)] for player in players: @@ -268,17 +266,16 @@ def __init__(self, players, np_random): self.playable_cards[player_id] = self.playable_cards_from_hand(current_hand) def calc_playable_cards(self, player): - ''' Recalculate all legal cards the player can play according to his + """Recalculate all legal cards the player can play according to his current hand. Args: player (DoudizhuPlayer object): object of DoudizhuPlayer - init_flag (boolean): For the first time, set it True to accelerate - the preocess. + init_flag (boolean): For the first time, set it True to accelerate the process. Returns: list: list of string of playable cards - ''' + """ removed_playable_cards = [] player_id = player.player_id @@ -293,7 +290,7 @@ def calc_playable_cards(self, player): if missed is not None: position = player.singles.find(missed) - player.singles = player.singles[position+1:] + player.singles = player.singles[position + 1:] for cards in playable_cards: if missed in cards or (not contains_cards(current_hand, cards)): removed_playable_cards.append(cards) @@ -301,39 +298,37 @@ def calc_playable_cards(self, player): else: for cards in playable_cards: if not contains_cards(current_hand, cards): - #del self.playable_cards[player_id][cards] + # del self.playable_cards[player_id][cards] removed_playable_cards.append(cards) self.playable_cards[player_id].remove(cards) self._recorded_removed_playable_cards[player_id].append(removed_playable_cards) return self.playable_cards[player_id] def restore_playable_cards(self, player_id): - ''' restore playable_cards for judger for game.step_back(). + """restore playable_cards for judger for game.step_back(). Args: player_id: The id of the player whose playable_cards need to be restored - ''' + """ removed_playable_cards = self._recorded_removed_playable_cards[player_id].pop() self.playable_cards[player_id].update(removed_playable_cards) def get_playable_cards(self, player): - ''' Provide all legal cards the player can play according to his + """Provide all legal cards the player can play according to his current hand. Args: player (DoudizhuPlayer object): object of DoudizhuPlayer - init_flag (boolean): For the first time, set it True to accelerate - the preocess. + init_flag (boolean): For the first time, set it True to accelerate the process. Returns: list: list of string of playable cards - ''' + """ return self.playable_cards[player.player_id] - @staticmethod def judge_game(players, player_id): - ''' Judge whether the game is over + """Judge whether the game is over Args: players (list): list of DoudizhuPlayer objects @@ -341,7 +336,7 @@ def judge_game(players, player_id): Returns: (bool): True if the game is over - ''' + """ player = players[player_id] if not player.current_hand: return True diff --git a/rlcard/games/doudizhu/player.py b/rlcard/games/doudizhu/player.py index 6cfcf41b3..23879e4b9 100644 --- a/rlcard/games/doudizhu/player.py +++ b/rlcard/games/doudizhu/player.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -''' Implement Doudizhu Player class -''' +"""Implement Doudizhu Player class""" import functools from rlcard.games.doudizhu.utils import get_gt_cards @@ -8,12 +7,12 @@ class DoudizhuPlayer: - ''' Player can store cards in the player's hand and the role, + """Player can store cards in the player's hand and the role, determine the actions can be made according to the rules, - and can perfrom corresponding action - ''' + and can perform corresponding action + """ def __init__(self, player_id, np_random): - ''' Give the player an id in one game + """Give the player an id in one game Args: player_id (int): the player_id of a player @@ -23,7 +22,7 @@ def __init__(self, player_id, np_random): 2. played_cards: The cards played in one round 3. hand: Initial cards 4. _current_hand: The rest of the cards after playing some of them - ''' + """ self.np_random = np_random self.player_id = player_id self.initial_hand = None @@ -58,7 +57,7 @@ def get_state(self, public, others_hands, num_cards_left, actions): return state def available_actions(self, greater_player=None, judger=None): - ''' Get the actions can be made based on the rules + """Get the actions can be made based on the rules Args: greater_player (DoudizhuPlayer object): player who played @@ -67,7 +66,7 @@ def available_actions(self, greater_player=None, judger=None): Returns: list: list of string of actions. Eg: ['pass', '8', '9', 'T', 'J'] - ''' + """ actions = [] if greater_player is None or greater_player.player_id == self.player_id: actions = judger.get_playable_cards(self) @@ -76,7 +75,7 @@ def available_actions(self, greater_player=None, judger=None): return actions def play(self, action, greater_player=None): - ''' Perfrom action + """Perform action Args: action (string): specific action @@ -84,7 +83,7 @@ def play(self, action, greater_player=None): Returns: object of DoudizhuPlayer: If there is a new greater_player, return it, if not, return None - ''' + """ trans = {'B': 'BJ', 'R': 'RJ'} if action == 'pass': self._recorded_played_cards.append([]) @@ -108,8 +107,7 @@ def play(self, action, greater_player=None): return self def play_back(self): - ''' Restore recorded cards back to self._current_hand - ''' + """Restore recorded cards back to self._current_hand""" removed_cards = self._recorded_played_cards.pop() self._current_hand.extend(removed_cards) self._current_hand.sort(key=functools.cmp_to_key(doudizhu_sort_card)) diff --git a/rlcard/games/doudizhu/round.py b/rlcard/games/doudizhu/round.py index 8682d914f..abd1e3a0b 100644 --- a/rlcard/games/doudizhu/round.py +++ b/rlcard/games/doudizhu/round.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- -''' Implement Doudizhu Round class -''' +"""Implement Doudizhu Round class""" import functools -import numpy as np from rlcard.games.doudizhu import Dealer from rlcard.games.doudizhu.utils import cards2str, doudizhu_sort_card @@ -11,8 +9,7 @@ class DoudizhuRound: - ''' Round can call other Classes' functions to keep the game running - ''' + """Round can call other Classes' functions to keep the game running""" def __init__(self, np_random, played_cards): self.np_random = np_random self.played_cards = played_cards @@ -23,11 +20,11 @@ def __init__(self, np_random, played_cards): self.deck_str = cards2str(self.dealer.deck) def initiate(self, players): - ''' Call dealer to deal cards and bid landlord. + """Call dealer to deal cards and bid landlord. Args: players (list): list of DoudizhuPlayer objects - ''' + """ landlord_id = self.dealer.determine_role(players) seen_cards = self.dealer.deck[-3:] seen_cards.sort(key=functools.cmp_to_key(doudizhu_sort_card)) @@ -50,11 +47,11 @@ def cards_ndarray_to_str(ndarray_cards): return result def update_public(self, action): - ''' Update public trace and played cards + """Update public trace and played cards Args: action(str): string of legal specific action - ''' + """ self.trace.append((self.current_player, action)) if action != 'pass': for c in action: @@ -65,7 +62,7 @@ def update_public(self, action): self.public['played_cards'] = self.cards_ndarray_to_str(self.played_cards) def proceed_round(self, player, action): - ''' Call other Classes's functions to keep one round running + """Call other Classes's functions to keep one round running Args: player (object): object of DoudizhuPlayer @@ -73,53 +70,53 @@ def proceed_round(self, player, action): Returns: object of DoudizhuPlayer: player who played current biggest cards. - ''' + """ self.update_public(action) self.greater_player = player.play(action, self.greater_player) return self.greater_player def step_back(self, players): - ''' Reverse the last action + """Reverse the last action Args: players (list): list of DoudizhuPlayer objects Returns: The last player id and the cards played - ''' + """ player_id, cards = self.trace.pop() self.current_player = player_id - if (cards != 'pass'): + if cards != 'pass': for card in cards: # self.played_cards.remove(card) self.played_cards[player_id][CARD_RANK_STR_INDEX[card]] -= 1 self.public['played_cards'] = self.cards_ndarray_to_str(self.played_cards) greater_player_id = self.find_last_greater_player_id_in_trace() - if (greater_player_id is not None): + if greater_player_id is not None: self.greater_player = players[greater_player_id] else: self.greater_player = None return player_id, cards def find_last_greater_player_id_in_trace(self): - ''' Find the last greater_player's id in trace + """Find the last greater_player's id in trace Returns: The last greater_player's id in trace - ''' + """ for i in range(len(self.trace) - 1, -1, -1): _id, action = self.trace[i] - if (action != 'pass'): + if action != 'pass': return _id return None def find_last_played_cards_in_trace(self, player_id): - ''' Find the player_id's last played_cards in trace + """Find the player_id's last played_cards in trace Returns: The player_id's last played_cards in trace - ''' + """ for i in range(len(self.trace) - 1, -1, -1): _id, action = self.trace[i] - if (_id == player_id and action != 'pass'): + if _id == player_id and action != 'pass': return action return None diff --git a/rlcard/games/doudizhu/utils.py b/rlcard/games/doudizhu/utils.py index 7d697ccfb..d7362539f 100644 --- a/rlcard/games/doudizhu/utils.py +++ b/rlcard/games/doudizhu/utils.py @@ -1,5 +1,4 @@ -''' Doudizhu utils -''' +"""Doudizhu utils""" import os import json from collections import OrderedDict @@ -15,7 +14,8 @@ or not os.path.isfile(os.path.join(ROOT_PATH, 'games/doudizhu/jsondata/card_type.json')) \ or not os.path.isfile(os.path.join(ROOT_PATH, 'games/doudizhu/jsondata/type_card.json')): import zipfile - with zipfile.ZipFile(os.path.join(ROOT_PATH, 'games/doudizhu/jsondata.zip'),"r") as zip_ref: + + with zipfile.ZipFile(os.path.join(ROOT_PATH, 'games/doudizhu/jsondata.zip'), "r") as zip_ref: zip_ref.extractall(os.path.join(ROOT_PATH, 'games/doudizhu/')) # Action space @@ -41,8 +41,8 @@ CARD_RANK_STR = ['3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A', '2', 'B', 'R'] CARD_RANK_STR_INDEX = {'3': 0, '4': 1, '5': 2, '6': 3, '7': 4, - '8': 5, '9': 6, 'T': 7, 'J': 8, 'Q': 9, - 'K': 10, 'A': 11, '2': 12, 'B': 13, 'R': 14} + '8': 5, '9': 6, 'T': 7, 'J': 8, 'Q': 9, + 'K': 10, 'A': 11, '2': 12, 'B': 13, 'R': 14} # rank list CARD_RANK = ['3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A', '2', 'BJ', 'RJ'] @@ -54,7 +54,7 @@ def doudizhu_sort_str(card_1, card_2): - ''' Compare the rank of two cards of str representation + """Compare the rank of two cards of str representation Args: card_1 (str): str representation of solo card @@ -62,7 +62,7 @@ def doudizhu_sort_str(card_1, card_2): Returns: int: 1(card_1 > card_2) / 0(card_1 = card2) / -1(card_1 < card_2) - ''' + """ key_1 = CARD_RANK_STR.index(card_1) key_2 = CARD_RANK_STR.index(card_2) if key_1 > key_2: @@ -73,12 +73,12 @@ def doudizhu_sort_str(card_1, card_2): def doudizhu_sort_card(card_1, card_2): - ''' Compare the rank of two cards of Card object + """Compare the rank of two cards of Card object Args: card_1 (object): object of Card card_2 (object): object of card - ''' + """ key = [] for card in [card_1, card_2]: if card.rank == '': @@ -93,15 +93,14 @@ def doudizhu_sort_card(card_1, card_2): def get_landlord_score(current_hand): - ''' Roughly judge the quality of the hand, and provide a score as basis to - bid landlord. + """Roughly judge the quality of the hand, and provide a score as basis to bid landlord. Args: current_hand (str): string of cards. Eg: '56888TTQKKKAA222R' Returns: int: score - ''' + """ score_map = {'A': 1, '2': 2, 'B': 3, 'R': 4} score = 0 # rocket @@ -112,7 +111,7 @@ def get_landlord_score(current_hand): i = 0 while i < length: # bomb - if i <= (length - 4) and current_hand[i] == current_hand[i+3]: + if i <= (length - 4) and current_hand[i] == current_hand[i + 3]: score += 6 i += 4 continue @@ -122,26 +121,28 @@ def get_landlord_score(current_hand): i += 1 return score + def cards2str_with_suit(cards): - ''' Get the corresponding string representation of cards with suit + """Get the corresponding string representation of cards with suit Args: cards (list): list of Card objects Returns: string: string representation of cards - ''' - return ' '.join([card.suit+card.rank for card in cards]) + """ + return ' '.join([card.suit + card.rank for card in cards]) + def cards2str(cards): - ''' Get the corresponding string representation of cards + """Get the corresponding string representation of cards Args: cards (list): list of Card objects Returns: string: string representation of cards - ''' + """ response = '' for card in cards: if card.rank == '': @@ -150,13 +151,17 @@ def cards2str(cards): response += card.rank return response + class LocalObjs(threading.local): def __init__(self): self.cached_candidate_cards = None + + _local_objs = LocalObjs() + def contains_cards(candidate, target): - ''' Check if cards of candidate contains cards of target. + """Check if cards of candidate contains cards of target. Args: candidate (string): A string representing the cards of candidate @@ -164,7 +169,7 @@ def contains_cards(candidate, target): Returns: boolean - ''' + """ # In normal cases, most continuous calls of this function # will test different targets against the same candidate. # So the cached counts of each card in candidate can speed up @@ -176,29 +181,30 @@ def contains_cards(candidate, target): cards_dict[card] += 1 _local_objs.cached_candidate_cards_dict = cards_dict cards_dict = _local_objs.cached_candidate_cards_dict - if (target == ''): + if target == '': return True curr_card = target[0] curr_count = 1 for card in target[1:]: - if (card != curr_card): - if (cards_dict[curr_card] < curr_count): + if card != curr_card: + if cards_dict[curr_card] < curr_count: return False curr_card = card curr_count = 1 else: curr_count += 1 - if (cards_dict[curr_card] < curr_count): + if cards_dict[curr_card] < curr_count: return False return True + def encode_cards(plane, cards): - ''' Encode cards and represerve it into plane. + """Encode cards and represerve it into plane. Args: cards (list or str): list or str of cards, every entry is a character of solo representation of card - ''' + """ if not cards: return None layer = 1 @@ -210,10 +216,10 @@ def encode_cards(plane, cards): for index, card in enumerate(cards): if index == 0: continue - if card == cards[index-1]: + if card == cards[index - 1]: layer += 1 else: - rank = CARD_RANK_STR.index(cards[index-1]) + rank = CARD_RANK_STR.index(cards[index - 1]) plane[layer][rank] = 1 layer = 1 plane[0][rank] = 0 @@ -223,7 +229,7 @@ def encode_cards(plane, cards): def get_gt_cards(player, greater_player): - ''' Provide player's cards which are greater than the ones played by + """Provide player's cards which are greater than the ones played by previous player in one round Args: @@ -235,9 +241,8 @@ def get_gt_cards(player, greater_player): Note: 1. return value contains 'pass' - ''' - # add 'pass' to legal actions - gt_cards = ['pass'] + """ + gt_cards = ['pass'] # add 'pass' to legal actions current_hand = cards2str(player.current_hand) target_cards = greater_player.played_cards target_types = CARD_TYPE[0][target_cards] diff --git a/rlcard/games/gin_rummy/dealer.py b/rlcard/games/gin_rummy/dealer.py index c14713147..f792ef890 100644 --- a/rlcard/games/gin_rummy/dealer.py +++ b/rlcard/games/gin_rummy/dealer.py @@ -1,19 +1,17 @@ -''' +""" File name: gin_rummy/dealer.py Author: William Hale Date created: 2/12/2020 -''' +""" from .player import GinRummyPlayer from .utils import utils as utils class GinRummyDealer: - ''' Initialize a GinRummy dealer class - ''' + """Initialize a GinRummy dealer class """ def __init__(self, np_random): - ''' Empty discard_pile, set shuffled_deck, set stock_pile - ''' + """Empty discard_pile, set shuffled_deck, set stock_pile """ self.np_random = np_random self.discard_pile = [] # type: List[Card] self.shuffled_deck = utils.get_deck() # keep a copy of the shuffled cards at start of new hand @@ -21,12 +19,12 @@ def __init__(self, np_random): self.stock_pile = self.shuffled_deck.copy() # type: List[Card] def deal_cards(self, player: GinRummyPlayer, num: int): - ''' Deal some cards from stock_pile to one player + """Deal some cards from stock_pile to one player Args: player (GinRummyPlayer): The GinRummyPlayer object num (int): The number of cards to be dealt - ''' + """ for _ in range(num): player.hand.append(self.stock_pile.pop()) player.did_populate_hand() diff --git a/rlcard/games/gin_rummy/game.py b/rlcard/games/gin_rummy/game.py index 4666b8ccf..d85a221dc 100644 --- a/rlcard/games/gin_rummy/game.py +++ b/rlcard/games/gin_rummy/game.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/game.py Author: William Hale Date created: 2/12/2020 -''' +""" import numpy as np @@ -15,12 +15,10 @@ class GinRummyGame: - ''' Game class. This class will interact with outer environment. - ''' + """Game class. This class will interact with outer environment""" def __init__(self, allow_step_back=False): - '''Initialize the class GinRummyGame - ''' + """Initialize the class GinRummyGame""" self.allow_step_back = allow_step_back self.np_random = np.random.RandomState() self.judge = GinRummyJudge(game=self) @@ -30,8 +28,7 @@ def __init__(self, allow_step_back=False): self.num_players = 2 def init_game(self): - ''' Initialize all characters in the game and start round 1 - ''' + """Initialize all characters in the game and start round 1""" dealer_id = self.np_random.choice([0, 1]) if self.settings.dealer_for_round == DealerForRound.North: dealer_id = 0 @@ -48,8 +45,7 @@ def init_game(self): return state, current_player_id def step(self, action: ActionEvent): - ''' Perform game action and return next player number, and the state for next player - ''' + """Perform game action and return next player number, and the state for next player""" if isinstance(action, ScoreNorthPlayerAction): self.round.score_player_0(action) elif isinstance(action, ScoreSouthPlayerAction): @@ -74,28 +70,23 @@ def step(self, action: ActionEvent): return next_state, next_player_id def step_back(self): - ''' Takes one step backward and restore to the last state - ''' + """Takes one step backward and restore to the last state""" raise NotImplementedError def get_num_players(self): - ''' Return the number of players in the game - ''' + """Return the number of players in the game""" return 2 def get_num_actions(self): - ''' Return the number of possible actions in the game - ''' + """Return the number of possible actions in the game""" return ActionEvent.get_num_actions() def get_player_id(self): - ''' Return the current player that will take actions soon - ''' + """Return the current player that will take actions soon""" return self.round.current_player_id def is_over(self): - ''' Return whether the current game is over - ''' + """Return whether the current game is over""" return self.round.is_over def get_current_player(self) -> GinRummyPlayer or None: @@ -105,11 +96,11 @@ def get_last_action(self) -> ActionEvent or None: return self.actions[-1] if self.actions and len(self.actions) > 0 else None def get_state(self, player_id: int): - ''' Get player's state + """Get player's state Return: state (dict): The information of the state - ''' + """ state = {} if not self.is_over(): discard_pile = self.round.dealer.discard_pile @@ -132,12 +123,12 @@ def get_state(self, player_id: int): @staticmethod def decode_action(action_id) -> ActionEvent: # FIXME 200213 should return str - ''' Action id -> the action_event in the game. + """Action id -> the action_event in the game. Args: action_id (int): the id of the action Returns: action (ActionEvent): the action that will be passed to the game engine. - ''' + """ return ActionEvent.decode_action(action_id=action_id) diff --git a/rlcard/games/gin_rummy/judge.py b/rlcard/games/gin_rummy/judge.py index bd47fa887..efa19c1bf 100644 --- a/rlcard/games/gin_rummy/judge.py +++ b/rlcard/games/gin_rummy/judge.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/judge.py Author: William Hale Date created: 2/12/2020 -''' +""" from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -19,15 +19,12 @@ class GinRummyJudge: - - ''' - Judge decides legal actions for current player - ''' + """Judge decides legal actions for current player""" def __init__(self, game: 'GinRummyGame'): - ''' Initialize the class GinRummyJudge + """Initialize the class GinRummyJudge :param game: GinRummyGame - ''' + """ self.game = game self.scorer = GinRummyScorer() @@ -98,11 +95,11 @@ def get_legal_actions(self) -> List[ActionEvent]: def get_going_out_cards(hand: List[Card], going_out_deadwood_count: int) -> Tuple[List[Card], List[Card]]: - ''' + """ :param hand: List[Card] -- must have 11 cards :param going_out_deadwood_count: int :return List[Card], List[Card: cards in hand that be knocked, cards in hand that can be ginned - ''' + """ if not len(hand) == 11: raise GinRummyProgramError("len(hand) is {}: should be 11.".format(len(hand))) meld_clusters = melding.get_meld_clusters(hand=hand) @@ -112,19 +109,15 @@ def get_going_out_cards(hand: List[Card], going_out_deadwood_count: int) -> Tupl return list(knock_cards), list(gin_cards) -# -# private methods -# - def _get_going_out_cards(meld_clusters: List[List[List[Card]]], hand: List[Card], going_out_deadwood_count: int) -> Tuple[List[Card], List[Card]]: - ''' + """ :param meld_clusters :param hand: List[Card] -- must have 11 cards :param going_out_deadwood_count: int :return List[Card], List[Card: cards in hand that be knocked, cards in hand that can be ginned - ''' + """ if not len(hand) == 11: raise GinRummyProgramError("len(hand) is {}: should be 11.".format(len(hand))) knock_cards = set() diff --git a/rlcard/games/gin_rummy/player.py b/rlcard/games/gin_rummy/player.py index 570977bc3..5801e59fe 100644 --- a/rlcard/games/gin_rummy/player.py +++ b/rlcard/games/gin_rummy/player.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/player.py Author: William Hale Date created: 2/12/2020 -''' +""" from typing import List @@ -16,11 +16,11 @@ class GinRummyPlayer: def __init__(self, player_id: int, np_random): - ''' Initialize a GinRummy player class + """Initialize a GinRummy player class Args: player_id (int): id for the player - ''' + """ self.np_random = np_random self.player_id = player_id self.hand = [] # type: List[Card] @@ -30,8 +30,7 @@ def __init__(self, player_id: int, np_random): self.meld_run_by_suit_id = [[] for _ in range(4)] # type: List[List[List[Card]]] def get_player_id(self) -> int: - ''' Return player's id - ''' + """Return player's id""" return self.player_id def get_meld_clusters(self) -> List[List[List[Card]]]: diff --git a/rlcard/games/gin_rummy/round.py b/rlcard/games/gin_rummy/round.py index 02426d9c4..d27b51c19 100644 --- a/rlcard/games/gin_rummy/round.py +++ b/rlcard/games/gin_rummy/round.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/round.py Author: William Hale Date created: 2/12/2020 -''' +""" from typing import TYPE_CHECKING if TYPE_CHECKING: from .utils.move import GinRummyMove @@ -32,7 +32,7 @@ class GinRummyRound: def __init__(self, dealer_id: int, np_random): - ''' Initialize the round class + """Initialize the round class The round class maintains the following instances: 1) dealer: the dealer of the round; dealer has stock_pile and discard_pile @@ -51,7 +51,7 @@ def __init__(self, dealer_id: int, np_random): Args: dealer_id: int - ''' + """ self.np_random = np_random self.dealer_id = dealer_id self.dealer = GinRummyDealer(self.np_random) diff --git a/rlcard/games/gin_rummy/utils/action_event.py b/rlcard/games/gin_rummy/utils/action_event.py index 3126712c5..3ab9db1b1 100644 --- a/rlcard/games/gin_rummy/utils/action_event.py +++ b/rlcard/games/gin_rummy/utils/action_event.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/action_event.py Author: William Hale Date created: 2/12/2020 -''' +""" from rlcard.games.base import Card @@ -43,20 +43,19 @@ def __eq__(self, other): @staticmethod def get_num_actions(): - ''' Return the number of possible actions in the game - ''' + """Return the number of possible actions in the game """ return knock_action_id + 52 # FIXME: sensitive to code changes 200213 @staticmethod def decode_action(action_id) -> 'ActionEvent': - ''' Action id -> the action_event in the game. + """Action id -> the action_event in the game. Args: action_id (int): the id of the action Returns: action (ActionEvent): the action that will be passed to the game engine. - ''' + """ if action_id == score_player_0_action_id: action_event = ScoreNorthPlayerAction() elif action_id == score_player_1_action_id: diff --git a/rlcard/games/gin_rummy/utils/gin_rummy_error.py b/rlcard/games/gin_rummy/utils/gin_rummy_error.py index ef38a45da..5da56c826 100644 --- a/rlcard/games/gin_rummy/utils/gin_rummy_error.py +++ b/rlcard/games/gin_rummy/utils/gin_rummy_error.py @@ -1,9 +1,9 @@ -''' +""" Project: Gin Rummy File name: gin_rummy/utils/gin_rummy_error.py Author: William Hale Date created: 4/29/2020 -''' +""" class GinRummyError(Exception): diff --git a/rlcard/games/gin_rummy/utils/melding.py b/rlcard/games/gin_rummy/utils/melding.py index dea15ebf1..559812167 100644 --- a/rlcard/games/gin_rummy/utils/melding.py +++ b/rlcard/games/gin_rummy/utils/melding.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/melding.py Author: William Hale Date created: 2/12/2020 -''' +""" from typing import List diff --git a/rlcard/games/gin_rummy/utils/move.py b/rlcard/games/gin_rummy/utils/move.py index 78d028ea3..cdc5605d1 100644 --- a/rlcard/games/gin_rummy/utils/move.py +++ b/rlcard/games/gin_rummy/utils/move.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/move.py Author: William Hale Date created: 2/16/2020 -''' +""" from typing import List diff --git a/rlcard/games/gin_rummy/utils/scorers.py b/rlcard/games/gin_rummy/utils/scorers.py index 7e8a84ba7..2b3edb958 100644 --- a/rlcard/games/gin_rummy/utils/scorers.py +++ b/rlcard/games/gin_rummy/utils/scorers.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/scorers.py Author: William Hale Date created: 2/15/2020 -''' +""" from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -35,11 +35,11 @@ def get_payoffs(self, game: 'GinRummyGame'): def get_payoff_gin_rummy_v0(player: GinRummyPlayer, game: 'GinRummyGame') -> int: - ''' Get the payoff of player: deadwood_count of player + """Get the payoff of player: deadwood_count of player Returns: payoff (int or float): payoff for player (lower is better) - ''' + """ moves = game.round.move_sheet if player.player_id == 0: score_player_move = moves[-2] @@ -54,14 +54,14 @@ def get_payoff_gin_rummy_v0(player: GinRummyPlayer, game: 'GinRummyGame') -> int def get_payoff_gin_rummy_v1(player: GinRummyPlayer, game: 'GinRummyGame') -> float: - ''' Get the payoff of player: + """Get the payoff of player: a) 1.0 if player gins b) 0.2 if player knocks c) -deadwood_count / 100 otherwise Returns: payoff (int or float): payoff for player (higher is better) - ''' + """ # payoff is 1.0 if player gins # payoff is 0.2 if player knocks # payoff is -deadwood_count / 100 if otherwise diff --git a/rlcard/games/gin_rummy/utils/settings.py b/rlcard/games/gin_rummy/utils/settings.py index f232e20d2..7a4914c93 100644 --- a/rlcard/games/gin_rummy/utils/settings.py +++ b/rlcard/games/gin_rummy/utils/settings.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/settings.py Author: William Hale Date created: 2/16/2020 -''' +""" from typing import Dict, Any diff --git a/rlcard/games/gin_rummy/utils/thinker.py b/rlcard/games/gin_rummy/utils/thinker.py index 5e8ffe0a4..c49319384 100644 --- a/rlcard/games/gin_rummy/utils/thinker.py +++ b/rlcard/games/gin_rummy/utils/thinker.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/thinker.py Author: William Hale Date created: 3/28/2020 -''' +""" from typing import List diff --git a/rlcard/games/gin_rummy/utils/utils.py b/rlcard/games/gin_rummy/utils/utils.py index 66413ed79..bbbb70bbe 100644 --- a/rlcard/games/gin_rummy/utils/utils.py +++ b/rlcard/games/gin_rummy/utils/utils.py @@ -1,8 +1,8 @@ -''' +""" File name: gin_rummy/utils.py Author: William Hale Date created: 2/12/2020 -''' +""" from typing import List, Iterable @@ -20,11 +20,11 @@ def card_from_card_id(card_id: int) -> Card: - ''' Make card from its card_id + """Make card from its card_id Args: card_id: int in range(0, 52) - ''' + """ if not (0 <= card_id < 52): raise GinRummyProgramError("card_id is {}: should be 0 <= card_id < 52.".format(card_id)) rank_id = card_id % 13 diff --git a/rlcard/games/leducholdem/dealer.py b/rlcard/games/leducholdem/dealer.py index 6af8ff63f..7fbdf7fec 100644 --- a/rlcard/games/leducholdem/dealer.py +++ b/rlcard/games/leducholdem/dealer.py @@ -1,11 +1,11 @@ from rlcard.games.base import Card from rlcard.games.limitholdem import Dealer + class LeducholdemDealer(Dealer): def __init__(self, np_random): - ''' Initialize a leducholdem dealer class - ''' + """Initialize a leduc holdem dealer class """ self.np_random = np_random self.deck = [Card('S', 'J'), Card('H', 'J'), Card('S', 'Q'), Card('H', 'Q'), Card('S', 'K'), Card('H', 'K')] self.shuffle() diff --git a/rlcard/games/leducholdem/game.py b/rlcard/games/leducholdem/game.py index fe31b9fdd..ebd4142f6 100644 --- a/rlcard/games/leducholdem/game.py +++ b/rlcard/games/leducholdem/game.py @@ -8,15 +8,15 @@ from rlcard.games.limitholdem import Game + class LeducholdemGame(Game): def __init__(self, allow_step_back=False, num_players=2): - ''' Initialize the class leducholdem Game - ''' + """Initialize the class leducholdem Game""" self.allow_step_back = allow_step_back self.np_random = np.random.RandomState() ''' No big/small blind - # Some configarations of the game + # Some configurations of the game # These arguments are fixed in Leduc Hold'em Game # Raise amount and allowed times @@ -25,7 +25,7 @@ def __init__(self, allow_step_back=False, num_players=2): self.num_players = 2 ''' - # Some configarations of the game + # Some configurations of the game # These arguments can be specified for creating new games # Small blind and big blind @@ -39,12 +39,11 @@ def __init__(self, allow_step_back=False, num_players=2): self.num_players = num_players def configure(self, game_config): - ''' Specifiy some game specific parameters, such as number of players - ''' + """Specify some game specific parameters, such as number of players""" self.num_players = game_config['game_num_players'] def init_game(self): - ''' Initialilze the game of Limit Texas Hold'em + """Initialize the game of Limit Texas Hold'em This version supports two-player limit texas hold'em @@ -53,11 +52,11 @@ def init_game(self): (dict): The first state of the game (int): Current player's id - ''' - # Initilize a dealer that can deal cards + """ + # Initialize a dealer that can deal cards self.dealer = Dealer(self.np_random) - # Initilize two players to play the game + # Initialize two players to play the game self.players = [Player(i, self.np_random) for i in range(self.num_players)] # Initialize a judger class which will decide who wins in the end @@ -75,7 +74,7 @@ def init_game(self): # The player with small blind plays the first self.game_pointer = s - # Initilize a bidding round, in the first round, the big blind and the small blind needs to + # Initialize a bidding round, in the first round, the big blind and the small blind needs to # be passed to the round for processing. self.round = Round(raise_amount=self.raise_amount, allowed_raise_num=self.allowed_raise_num, @@ -87,7 +86,7 @@ def init_game(self): # Count the round. There are 2 rounds in each game. self.round_counter = 0 - # Save the hisory for stepping back to the last state. + # Save the history for stepping back to the last state. self.history = [] state = self.get_state(self.game_pointer) @@ -95,7 +94,7 @@ def init_game(self): return state, self.game_pointer def step(self, action): - ''' Get the next state + """Get the next state Args: action (str): a specific action. (call, raise, fold, or check) @@ -105,7 +104,7 @@ def step(self, action): (dict): next player's state (int): next plater's id - ''' + """ if self.allow_step_back: # First snapshot the current state r = copy(self.round) @@ -136,14 +135,14 @@ def step(self, action): return state, self.game_pointer def get_state(self, player): - ''' Return player's state + """Return player's state Args: player_id (int): player id Returns: (dict): The state of the player - ''' + """ chips = [self.players[i].in_chips for i in range(self.num_players)] legal_actions = self.get_legal_actions() state = self.players[player].get_state(self.public_card, chips, legal_actions) @@ -152,37 +151,37 @@ def get_state(self, player): return state def is_over(self): - ''' Check if the game is over + """Check if the game is over Returns: (boolean): True if the game is over - ''' - alive_players = [1 if p.status=='alive' else 0 for p in self.players] + """ + alive_players = [1 if p.status == 'alive' else 0 for p in self.players] # If only one player is alive, the game is over. if sum(alive_players) == 1: return True - # If all rounds are finshed + # If all rounds are finished if self.round_counter >= 2: return True return False def get_payoffs(self): - ''' Return the payoffs of the game + """Return the payoffs of the game Returns: (list): Each entry corresponds to the payoff of one player - ''' + """ chips_payoffs = self.judger.judge_game(self.players, self.public_card) - payoffs = np.array(chips_payoffs) / (self.big_blind) + payoffs = np.array(chips_payoffs) / self.big_blind return payoffs def step_back(self): - ''' Return to the previous state of the game + """Return to the previous state of the game Returns: (bool): True if the game steps back successfully - ''' + """ if len(self.history) > 0: self.round, r_raised, self.game_pointer, self.round_counter, d_deck, self.public_card, self.players, ps_hand = self.history.pop() self.round.raised = r_raised diff --git a/rlcard/games/leducholdem/judger.py b/rlcard/games/leducholdem/judger.py index dc24d779f..24bf43cd9 100644 --- a/rlcard/games/leducholdem/judger.py +++ b/rlcard/games/leducholdem/judger.py @@ -1,16 +1,16 @@ from rlcard.utils.utils import rank2int + class LeducholdemJudger: - ''' The Judger class for Leduc Hold'em - ''' + """The Judger class for Leduc Hold'em""" + def __init__(self, np_random): - ''' Initialize a judger class - ''' + """Initialize a judger class""" self.np_random = np_random @staticmethod def judge_game(players, public_card): - ''' Judge the winner of the game. + """Judge the winner of the game. Args: players (list): The list of players who play the game @@ -18,7 +18,7 @@ def judge_game(players, public_card): Returns: (list): Each entry of the list corresponds to one entry of the - ''' + """ # Judge who are the winners winners = [0] * len(players) fold_count = 0 @@ -27,20 +27,20 @@ def judge_game(players, public_card): for idx, player in enumerate(players): ranks.append(rank2int(player.hand.rank)) if player.status == 'folded': - fold_count += 1 + fold_count += 1 elif player.status == 'alive': alive_idx = idx if fold_count == (len(players) - 1): winners[alive_idx] = 1 - + # If any of the players matches the public card wins if sum(winners) < 1: for idx, player in enumerate(players): if player.hand.rank == public_card.rank: winners[idx] = 1 break - - # If non of the above conditions, the winner player is the one with the highest card rank + + # If none of the above conditions, the winner player is the one with the highest card rank if sum(winners) < 1: max_rank = max(ranks) max_index = [i for i, j in enumerate(ranks) if j == max_rank] diff --git a/rlcard/games/leducholdem/player.py b/rlcard/games/leducholdem/player.py index 1ec66c339..f667772ae 100644 --- a/rlcard/games/leducholdem/player.py +++ b/rlcard/games/leducholdem/player.py @@ -1,11 +1,11 @@ class LeducholdemPlayer: def __init__(self, player_id, np_random): - ''' Initilize a player. + """Initialize a player. Args: player_id (int): The id of the player - ''' + """ self.np_random = np_random self.player_id = player_id self.status = 'alive' @@ -15,7 +15,7 @@ def __init__(self, player_id, np_random): self.in_chips = 0 def get_state(self, public_card, all_chips, legal_actions): - ''' Encode the state for the player + """Encode the state for the player Args: public_card (object): The public card that seen by all the players @@ -23,7 +23,7 @@ def get_state(self, public_card, all_chips, legal_actions): Returns: (dict): The state of the player - ''' + """ state = {} state['hand'] = self.hand.get_index() state['public_card'] = public_card.get_index() if public_card else None @@ -33,6 +33,5 @@ def get_state(self, public_card, all_chips, legal_actions): return state def get_player_id(self): - ''' Return the id of the player - ''' + """Return the id of the player""" return self.player_id diff --git a/rlcard/games/leducholdem/round.py b/rlcard/games/leducholdem/round.py index 3a0459607..deb5c06cf 100644 --- a/rlcard/games/leducholdem/round.py +++ b/rlcard/games/leducholdem/round.py @@ -1,19 +1,17 @@ # -*- coding: utf-8 -*- -''' Implement Leduc Hold'em Round class -''' +"""Implement Leduc Hold'em Round class""" from rlcard.games.limitholdem import Round class LeducholdemRound(Round): - ''' Round can call other Classes' functions to keep the game running - ''' + """Round can call other Classes' functions to keep the game running""" def __init__(self, raise_amount, allowed_raise_num, num_players, np_random): - ''' Initilize the round class + """Initialize the round class Args: raise_amount (int): the raise amount for each raise allowed_raise_num (int): The number of allowed raise num num_players (int): The number of players - ''' + """ super(LeducholdemRound, self).__init__(raise_amount, allowed_raise_num, num_players, np_random=np_random) diff --git a/rlcard/games/limitholdem/utils.py b/rlcard/games/limitholdem/utils.py index 2443ed1d2..094875362 100644 --- a/rlcard/games/limitholdem/utils.py +++ b/rlcard/games/limitholdem/utils.py @@ -1,42 +1,41 @@ -import numpy as np - class Hand: def __init__(self, all_cards): - self.all_cards = all_cards # two hand cards + five public cards + self.all_cards = all_cards # two hand cards + five public cards self.category = 0 - #type of a players' best five cards, greater combination has higher number eg: 0:"Not_Yet_Evaluated" 1: "High_Card" , 9:"Straight_Flush" + # type of players' best five cards, greater combination has higher number eg: 0:"Not_Yet_Evaluated" 1: + # "High_Card" , 9:"Straight_Flush" self.best_five = [] - #the largest combination of five cards in all the seven cards + # the largest combination of five cards in all the seven cards self.flush_cards = [] - #cards with same suit + # cards with same suit self.cards_by_rank = [] - #cards after sort + # cards after sort self.product = 1 - #cards’ type indicator + # cards’ type indicator self.RANK_TO_STRING = {2: "2", 3: "3", 4: "4", 5: "5", 6: "6", 7: "7", 8: "8", 9: "9", 10: "T", 11: "J", 12: "Q", 13: "K", 14: "A"} - self.STRING_TO_RANK = {v:k for k, v in self.RANK_TO_STRING.items()} + self.STRING_TO_RANK = {v: k for k, v in self.RANK_TO_STRING.items()} self.RANK_LOOKUP = "23456789TJQKA" self.SUIT_LOOKUP = "SCDH" def get_hand_five_cards(self): - ''' + """ Get the best five cards of a player Returns: (list): the best five cards among the seven cards of a player - ''' + """ return self.best_five def _sort_cards(self): - ''' + """ Sort all the seven cards ascendingly according to RANK_LOOKUP - ''' + """ self.all_cards = sorted( self.all_cards, key=lambda card: self.RANK_LOOKUP.index(card[1])) - def evaluateHand(self): + def evaluate_hand(self): """ - Evaluate all the seven cards, get the best combination catagory + Evaluate all the seven cards, get the best combination category And pick the best five cards (for comparing in case 2 hands have the same Category) . """ if len(self.all_cards) != 7: @@ -49,47 +48,47 @@ def evaluateHand(self): if self._has_straight_flush(): self.category = 9 - #Straight Flush + # Straight Flush elif self._has_four(): self.category = 8 - #Four of a Kind + # Four of a Kind self.best_five = self._get_Four_of_a_kind_cards() elif self._has_fullhouse(): self.category = 7 - #Full house + # Full house self.best_five = self._get_Fullhouse_cards() elif self._has_flush(): self.category = 6 - #Flush + # Flush i = len(self.flush_cards) - self.best_five = [card for card in self.flush_cards[i-5:i]] + self.best_five = [card for card in self.flush_cards[i - 5:i]] elif self._has_straight(self.all_cards): self.category = 5 - #Straight + # Straight elif self._has_three(): self.category = 4 - #Three of a Kind + # Three of a Kind self.best_five = self._get_Three_of_a_kind_cards() elif self._has_two_pairs(): self.category = 3 - #Two Pairs + # Two Pairs self.best_five = self._get_Two_Pair_cards() elif self._has_pair(): self.category = 2 - #One Pair + # One Pair self.best_five = self._get_One_Pair_cards() elif self._has_high_card(): self.category = 1 - #High Card + # High Card self.best_five = self._get_High_cards() def _has_straight_flush(self): - ''' + """ Check the existence of straight_flush cards Returns: True: exist False: not exist - ''' + """ self.flush_cards = self._getflush_cards() if len(self.flush_cards) > 0: straightflush_cards = self._get_straightflush_cards() @@ -99,20 +98,20 @@ def _has_straight_flush(self): return False def _get_straightflush_cards(self): - ''' + """ Pick straight_flush cards Returns: (list): the straightflush cards - ''' + """ straightflush_cards = self._get_straight_cards(self.flush_cards) return straightflush_cards def _getflush_cards(self): - ''' + """ Pick flush cards Returns: (list): the flush cards - ''' + """ card_string = ''.join(self.all_cards) for suit in self.SUIT_LOOKUP: suit_count = card_string.count(suit) @@ -123,52 +122,52 @@ def _getflush_cards(self): return [] def _has_flush(self): - ''' + """ Check the existence of flush cards Returns: True: exist False: not exist - ''' + """ if len(self.flush_cards) > 0: return True else: return False def _has_straight(self, all_cards): - ''' + """ Check the existence of straight cards Returns: True: exist False: not exist - ''' + """ diff_rank_cards = self._get_different_rank_list(all_cards) self.best_five = self._get_straight_cards(diff_rank_cards) if len(self.best_five) != 0: return True else: return False + @classmethod - def _get_different_rank_list(self, all_cards): - ''' + def _get_different_rank_list(cls, all_cards): + """ Get cards with different ranks, that is to say, remove duplicate-ranking cards, for picking straight cards' use Args: (list): two hand cards + five public cards Returns: (list): a list of cards with duplicate-ranking cards removed - ''' - different_rank_list = [] - different_rank_list.append(all_cards[0]) + """ + different_rank_list = [all_cards[0]] for card in all_cards: - if(card[1] != different_rank_list[-1][1]): + if card[1] != different_rank_list[-1][1]: different_rank_list.append(card) return different_rank_list def _get_straight_cards(self, Cards): - ''' + """ Pick straight cards Returns: (list): the straight cards - ''' + """ ranks = [self.STRING_TO_RANK[c[1]] for c in Cards] highest_card = Cards[-1] @@ -177,19 +176,19 @@ def _get_straight_cards(self, Cards): ranks.insert(0, 1) for i_last in range(len(ranks) - 1, 3, -1): - if ranks[i_last-4] + 4 == ranks[i_last]: # works because ranks are unique and sorted in ascending order - return Cards[i_last-4:i_last+1] + if ranks[i_last - 4] + 4 == ranks[i_last]: # works because ranks are unique and sorted in ascending order + return Cards[i_last - 4:i_last + 1] return [] def _getcards_by_rank(self, all_cards): - ''' + """ Get cards by rank Args: (list): # two hand cards + five public cards Return: card_group(list): cards after sort product(int):cards‘ type indicator - ''' + """ card_group = [] card_group_element = [] product = 1 @@ -218,8 +217,7 @@ def _getcards_by_rank(self, all_cards): card_group.append(card_group_element) # reset counting count = 1 - card_group_element = [] - card_group_element.append(card) + card_group_element = [card] current_rank = rank # the For Loop misses operation for the last card # These 3 lines below to compensate that @@ -231,83 +229,83 @@ def _getcards_by_rank(self, all_cards): return card_group, product def _has_four(self): - ''' + """ Check the existence of four cards Returns: True: exist False: not exist - ''' + """ if self.product == 5 or self.product == 10 or self.product == 15: return True else: return False def _has_fullhouse(self): - ''' + """ Check the existence of fullhouse cards Returns: True: exist False: not exist - ''' + """ if self.product == 6 or self.product == 9 or self.product == 12: return True else: return False def _has_three(self): - ''' + """ Check the existence of three cards Returns: True: exist False: not exist - ''' + """ if self.product == 3: return True else: return False def _has_two_pairs(self): - ''' + """ Check the existence of 2 pair cards Returns: True: exist False: not exist - ''' + """ if self.product == 4 or self.product == 8: return True else: return False def _has_pair(self): - ''' + """ Check the existence of 1 pair cards Returns: True: exist False: not exist - ''' + """ if self.product == 2: return True else: return False def _has_high_card(self): - ''' + """ Check the existence of high cards Returns: True: exist False: not exist - ''' + """ if self.product == 1: return True else: return False def _get_Four_of_a_kind_cards(self): - ''' + """ Get the four of a kind cards among a player's cards Returns: (list): best five hand cards after sort - ''' + """ Four_of_a_Kind = [] cards_by_rank = self.cards_by_rank cards_len = len(cards_by_rank) @@ -322,12 +320,11 @@ def _get_Four_of_a_kind_cards(self): return Four_of_a_Kind def _get_Fullhouse_cards(self): - ''' + """ Get the fullhouse cards among a player's cards Returns: (list): best five hand cards after sort - ''' - Fullhouse = [] + """ cards_by_rank = self.cards_by_rank cards_len = len(cards_by_rank) for i in reversed(range(cards_len)): @@ -342,11 +339,11 @@ def _get_Fullhouse_cards(self): return Fullhouse def _get_Three_of_a_kind_cards(self): - ''' + """ Get the three of a kind cards among a player's cards Returns: (list): best five hand cards after sort - ''' + """ Trip_cards = [] cards_by_rank = self.cards_by_rank cards_len = len(cards_by_rank) @@ -361,11 +358,11 @@ def _get_Three_of_a_kind_cards(self): return Trip_cards def _get_Two_Pair_cards(self): - ''' + """ Get the two pair cards among a player's cards Returns: (list): best five hand cards after sort - ''' + """ Two_Pair_cards = [] cards_by_rank = self.cards_by_rank cards_len = len(cards_by_rank) @@ -378,11 +375,11 @@ def _get_Two_Pair_cards(self): return Two_Pair_cards def _get_One_Pair_cards(self): - ''' + """ Get the one pair cards among a player's cards Returns: (list): best five hand cards after sort - ''' + """ One_Pair_cards = [] cards_by_rank = self.cards_by_rank cards_len = len(cards_by_rank) @@ -398,16 +395,17 @@ def _get_One_Pair_cards(self): return One_Pair_cards def _get_High_cards(self): - ''' + """ Get the high cards among a player's cards Returns: (list): best five hand cards after sort - ''' + """ High_cards = self.all_cards[2:7] return High_cards + def compare_ranks(position, hands, winner): - ''' + """ Compare cards in same position of plays' five handcards Args: position(int): the position of a card in a sorted handcard @@ -421,19 +419,19 @@ def compare_ranks(position, hands, winner): [1, 1, 1]: draw [1, 1, 0]: player1 and player0 draws - ''' + """ assert len(hands) == len(winner) RANKS = '23456789TJQKA' - cards_figure_all_players = [None]*len(hands) #cards without suit + cards_figure_all_players = [None] * len(hands) # cards without suit for i, hand in enumerate(hands): if winner[i]: cards = hands[i].get_hand_five_cards() - if len(cards[0]) != 1:# remove suit + if len(cards[0]) != 1: # remove suit for p in range(5): cards[p] = cards[p][1:] cards_figure_all_players[i] = cards - rival_ranks = [] # ranks of rival_figures + rival_ranks = [] # ranks of rival_figures for i, cards_figure in enumerate(cards_figure_all_players): if winner[i]: rank = cards_figure_all_players[i][position] @@ -446,23 +444,23 @@ def compare_ranks(position, hands, winner): new_winner[i] = 0 return new_winner + def determine_winner(key_index, hands, all_players, potential_winner_index): - ''' - Find out who wins in the situation of having players with same highest hand_catagory + """ + Find out who wins in the situation of having players with same highest hand_category Args: key_index(int): the position of a card in a sorted handcard - hands(list): cards of those players with same highest hand_catagory. + hands(list): cards of those players with same highest hand_category. e.g. hands = [['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CJ', 'SJ', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7']] all_players(list): all the players in this round, 0 for losing and 1 for winning or draw - potential_winner_index(list): the positions of those players with same highest hand_catagory in all_players + potential_winner_index(list): the positions of those players with same highest hand_category in all_players Returns: [0, 1, 0]: player1 wins [1, 0, 0]: player0 wins [1, 1, 1]: draw [1, 1, 0]: player1 and player0 draws - - ''' - winner = [1]*len(hands) + """ + winner = [1] * len(hands) i_index = 0 while i_index < len(key_index) and sum(winner) > 1: index_break_tie = key_index[i_index] @@ -473,20 +471,21 @@ def determine_winner(key_index, hands, all_players, potential_winner_index): all_players[potential_winner_index[i]] = 1 return all_players + def determine_winner_straight(hands, all_players, potential_winner_index): - ''' + """ Find out who wins in the situation of having players all having a straight or straight flush Args: key_index(int): the position of a card in a sorted handcard hands(list): cards of those players which all have a straight or straight flush all_players(list): all the players in this round, 0 for losing and 1 for winning or draw - potential_winner_index(list): the positions of those players with same highest hand_catagory in all_players + potential_winner_index(list): the positions of those players with same highest hand_category in all_players Returns: [0, 1, 0]: player1 wins [1, 0, 0]: player0 wins [1, 1, 1]: draw [1, 1, 0]: player1 and player0 draws - ''' + """ highest_ranks = [] for hand in hands: highest_rank = hand.STRING_TO_RANK[hand.best_five[-1][1]] # cards are sorted in ascending order @@ -497,21 +496,22 @@ def determine_winner_straight(hands, all_players, potential_winner_index): all_players[potential_winner_index[i_player]] = 1 return all_players + def determine_winner_four_of_a_kind(hands, all_players, potential_winner_index): - ''' + """ Find out who wins in the situation of having players which all have a four of a kind Args: key_index(int): the position of a card in a sorted handcard hands(list): cards of those players with a four of a kind e.g. hands = [['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CJ', 'SJ', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7']] all_players(list): all the players in this round, 0 for losing and 1 for winning or draw - potential_winner_index(list): the positions of those players with same highest hand_catagory in all_players + potential_winner_index(list): the positions of those players with same highest hand_category in all_players Returns: [0, 1, 0]: player1 wins [1, 0, 0]: player0 wins [1, 1, 1]: draw [1, 1, 0]: player1 and player0 draws - ''' + """ ranks = [] for hand in hands: rank_1 = hand.STRING_TO_RANK[hand.best_five[-1][1]] # rank of the four of a kind @@ -523,11 +523,12 @@ def determine_winner_four_of_a_kind(hands, all_players, potential_winner_index): all_players[potential_winner_index[i]] = 1 return all_players + def compare_hands(hands): - ''' + """ Compare all palyer's all seven cards Args: - hands(list): cards of those players with same highest hand_catagory. + hands(list): cards of those players with same highest hand_category. e.g. hands = [['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CJ', 'SJ', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7']] Returns: [0, 1, 0]: player1 wins @@ -539,9 +540,9 @@ def compare_hands(hands): return [0, 1] elif hands[1] == None: return [1, 0] - ''' - hand_category = [] #such as high_card, straight_flush, etc - all_players = [0]*len(hands) #all the players in this round, 0 for losing and 1 for winning or draw + """ + hand_category = [] # such as high_card, straight_flush, etc + all_players = [0] * len(hands) # all the players in this round, 0 for losing and 1 for winning or draw if None in hands: fold_players = [i for i, j in enumerate(hands) if j is None] if len(fold_players) == len(all_players) - 1: @@ -555,26 +556,28 @@ def compare_hands(hands): for _ in enumerate(hands): if hands[_[0]] is not None: hand = Hand(hands[_[0]]) - hand.evaluateHand() + hand.evaluate_hand() hand_category.append(hand.category) elif hands[_[0]] is None: hand_category.append(0) else: - for i in enumerate(hands): - hand = Hand(hands[i[0]]) - hand.evaluateHand() - hand_category.append(hand.category) - potential_winner_index = [i for i, j in enumerate(hand_category) if j == max(hand_category)]# potential winner are those with same max card_catagory + for i in enumerate(hands): + hand = Hand(hands[i[0]]) + hand.evaluate_hand() + hand_category.append(hand.category) + potential_winner_index = [i for i, j in enumerate(hand_category) if + j == max(hand_category)] # potential winner are those with same max card_category return final_compare(hands, potential_winner_index, all_players) + def final_compare(hands, potential_winner_index, all_players): - ''' + """ Find out the winners from those who didn't fold Args: - hands(list): cards of those players with same highest hand_catagory. + hands(list): cards of those players with same highest hand_category. e.g. hands = [['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CJ', 'SJ', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7']] - potential_winner_index(list): index of those with same max card_catagory in all_players + potential_winner_index(list): index of those with same max card_category in all_players all_players(list): a list of all the player's win/lose situation, 0 for lose and 1 for win Returns: [0, 1, 0]: player1 wins @@ -586,7 +589,7 @@ def final_compare(hands, potential_winner_index, all_players): return [0, 1] elif hands[1] == None: return [1, 0] - ''' + """ if len(potential_winner_index) == 1: all_players[potential_winner_index[0]] = 1 return all_players @@ -595,7 +598,7 @@ def final_compare(hands, potential_winner_index, all_players): equal_hands = [] for _ in potential_winner_index: hand = Hand(hands[_]) - hand.evaluateHand() + hand.evaluate_hand() equal_hands.append(hand) hand = equal_hands[0] if hand.category == 8: diff --git a/rlcard/games/mahjong/card.py b/rlcard/games/mahjong/card.py index 38cb20da3..13ed9cc6c 100644 --- a/rlcard/games/mahjong/card.py +++ b/rlcard/games/mahjong/card.py @@ -1,28 +1,27 @@ - class MahjongCard: - - info = {'type': ['dots', 'bamboo', 'characters', 'dragons', 'winds'], - 'trait': ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'green', 'red', 'white', 'east', 'west', 'north', 'south'] - } + info = { + 'type': ['dots', 'bamboo', 'characters', 'dragons', 'winds'], + 'trait': ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'green', 'red', 'white', 'east', 'west', 'north', 'south'] + } def __init__(self, card_type, trait): - ''' Initialize the class of MahjongCard + """Initialize the class of MahjongCard Args: card_type (str): The type of card trait (str): The trait of card - ''' + """ self.type = card_type self.trait = trait self.index_num = 0 def get_str(self): - ''' Get the string representation of card + """Get the string representation of card Return: (str): The string of card's color and trait - ''' - return self.type+ '-'+ self.trait + """ + return self.type + '-' + self.trait def set_index_num(self, index_num): diff --git a/rlcard/games/mahjong/dealer.py b/rlcard/games/mahjong/dealer.py index b3266aaf2..1fdd72549 100644 --- a/rlcard/games/mahjong/dealer.py +++ b/rlcard/games/mahjong/dealer.py @@ -2,8 +2,7 @@ class MahjongDealer: - ''' Initialize a mahjong dealer class - ''' + """Initialize a mahjong dealer class """ def __init__(self, np_random): self.np_random = np_random self.deck = init_deck() @@ -11,17 +10,16 @@ def __init__(self, np_random): self.table = [] def shuffle(self): - ''' Shuffle the deck - ''' + """Shuffle the deck """ self.np_random.shuffle(self.deck) def deal_cards(self, player, num): - ''' Deal some cards from deck to one player + """Deal some cards from deck to one player Args: player (object): The object of DoudizhuPlayer num (int): The number of cards to be dealed - ''' + """ for _ in range(num): player.hand.append(self.deck.pop()) diff --git a/rlcard/games/mahjong/game.py b/rlcard/games/mahjong/game.py index 0aa574db1..f67eebc6c 100644 --- a/rlcard/games/mahjong/game.py +++ b/rlcard/games/mahjong/game.py @@ -6,17 +6,17 @@ from rlcard.games.mahjong import Round from rlcard.games.mahjong import Judger + class MahjongGame: def __init__(self, allow_step_back=False): - '''Initialize the class MajongGame - ''' + """Initialize the class MajongGame""" self.allow_step_back = allow_step_back self.np_random = np.random.RandomState() self.num_players = 4 def init_game(self): - ''' Initialilze the game of Mahjong + """Initialize the game of Mahjong This version supports two-player Mahjong @@ -25,7 +25,7 @@ def init_game(self): (dict): The first state of the game (int): Current player's id - ''' + """ # Initialize a dealer that can deal cards self.dealer = Dealer(self.np_random) @@ -48,7 +48,7 @@ def init_game(self): return state, self.round.current_player def step(self, action): - ''' Get the next state + """Get the next state Args: action (str): a specific action. (call, raise, fold, or check) @@ -58,7 +58,7 @@ def step(self, action): (dict): next player's state (int): next plater's id - ''' + """ # First snapshot the current state if self.allow_step_back: hist_dealer = deepcopy(self.dealer) @@ -71,35 +71,35 @@ def step(self, action): return state, self.round.current_player def step_back(self): - ''' Return to the previous state of the game + """Return to the previous state of the game Returns: (bool): True if the game steps back successfully - ''' + """ if not self.history: return False self.dealer, self.players, self.round = self.history.pop() return True def get_state(self, player_id): - ''' Return player's state + """Return player's state Args: player_id (int): player id Returns: (dict): The state of the player - ''' + """ state = self.round.get_state(self.players, player_id) return state @staticmethod def get_legal_actions(state): - ''' Return the legal actions for current player + """Return the legal actions for current player Returns: (list): A list of legal actions - ''' + """ if state['valid_act'] == ['play']: state['valid_act'] = state['action_cards'] return state['action_cards'] @@ -108,40 +108,40 @@ def get_legal_actions(state): @staticmethod def get_num_actions(): - ''' Return the number of applicable actions + """Return the number of applicable actions Returns: (int): The number of actions. There are 4 actions (call, raise, check and fold) - ''' + """ return 38 def get_num_players(self): - ''' return the number of players in Mahjong + """Return the number of players in Mahjong returns: (int): the number of players in the game - ''' + """ return self.num_players def get_player_id(self): - ''' return the id of current player in Mahjong + """Return the id of current player in Mahjong returns: (int): the number of players in the game - ''' + """ return self.round.current_player def is_over(self): - ''' Check if the game is over + """Check if the game is over Returns: (boolean): True if the game is over - ''' + """ win, player, _ = self.judger.judge_game(self) - #pile =[sorted([c.get_str() for c in s ]) for s in self.players[player].pile if self.players[player].pile != None] - #cards = sorted([c.get_str() for c in self.players[player].hand]) - #count = len(cards) + sum([len(p) for p in pile]) + # pile =[sorted([c.get_str() for c in s ]) for s in self.players[player].pile if self.players[player].pile != None] + # cards = sorted([c.get_str() for c in self.players[player].hand]) + # count = len(cards) + sum([len(p) for p in pile]) self.winner = player - #print(win, player, players_val) - #print(win, self.round.current_player, player, cards, pile, count) + # print(win, player, players_val) + # print(win, self.round.current_player, player, cards, pile, count) return win diff --git a/rlcard/games/mahjong/judger.py b/rlcard/games/mahjong/judger.py index 057645441..67da62174 100644 --- a/rlcard/games/mahjong/judger.py +++ b/rlcard/games/mahjong/judger.py @@ -1,52 +1,50 @@ # -*- coding: utf-8 -*- -''' Implement Mahjong Judger class -''' +"""Implement Mahjong Judger class""" from collections import defaultdict import numpy as np + class MahjongJudger: - ''' Determine what cards a player can play - ''' + """Determine what cards a player can play""" def __init__(self, np_random): - ''' Initilize the Judger class for Mahjong - ''' + """Initialize the Judger class for Mahjong""" self.np_random = np_random @staticmethod def judge_pong_gong(dealer, players, last_player): - ''' Judge which player has pong/gong + """Judge which player has pong/gong Args: dealer (object): The dealer object. players (list): List of all players last_player (int): The player id of last player - ''' + """ last_card = dealer.table[-1] last_card_str = last_card.get_str() - #last_card_value = last_card_str.split("-")[-1] - #last_card_type = last_card_str.split("-")[0] + # last_card_value = last_card_str.split("-")[-1] + # last_card_type = last_card_str.split("-")[0] for player in players: hand = [card.get_str() for card in player.hand] hand_dict = defaultdict(list) for card in hand: hand_dict[card.split("-")[0]].append(card.split("-")[1]) - #pile = player.pile + # pile = player.pile # check gong if hand.count(last_card_str) == 3 and last_player != player.player_id: - return 'gong', player, [last_card]*4 + return 'gong', player, [last_card] * 4 # check pong if hand.count(last_card_str) == 2 and last_player != player.player_id: - return 'pong', player, [last_card]*3 + return 'pong', player, [last_card] * 3 return False, None, None def judge_chow(self, dealer, players, last_player): - ''' Judge which player has chow + """Judge which player has chow Args: dealer (object): The dealer object. players (list): List of all players last_player (int): The player id of last player - ''' + """ last_card = dealer.table[-1] last_card_str = last_card.get_str() @@ -54,31 +52,31 @@ def judge_chow(self, dealer, players, last_player): last_card_index = last_card.index_num for player in players: if last_card_type != "dragons" and last_card_type != "winds" and last_player == player.get_player_id() - 1: - # Create 9 dimensional vector where each dimension represent a specific card with the type same as last_card_type - # Numbers in each dimension represent how many of that card the player has it in hand - # If the last_card_type is 'characters' for example, and the player has cards: characters_3, characters_6, characters_3, - # The hand_list vector looks like: [0,0,2,0,0,1,0,0,0] + # Create 9 dimensional vector where each dimension represent a specific card with the type same as + # last_card_type Numbers in each dimension represent how many of that card the player has it in hand + # If the last_card_type is 'characters' for example, and the player has cards: characters_3, + # characters_6, characters_3, The hand_list vector looks like: [0,0,2,0,0,1,0,0,0] hand_list = np.zeros(9) for card in player.hand: if card.get_str().split("-")[0] == last_card_type: - hand_list[card.index_num] = hand_list[card.index_num]+1 + hand_list[card.index_num] = hand_list[card.index_num] + 1 - #pile = player.pile - #check chow + # pile = player.pile + # check chow test_cases = [] if last_card_index == 0: - if hand_list[last_card_index+1] > 0 and hand_list[last_card_index+2] > 0: - test_cases.append([last_card_index+1, last_card_index+2]) + if hand_list[last_card_index + 1] > 0 and hand_list[last_card_index + 2] > 0: + test_cases.append([last_card_index + 1, last_card_index + 2]) elif last_card_index < 9: - if hand_list[last_card_index-2] > 0 and hand_list[last_card_index-1] > 0: - test_cases.append([last_card_index-2, last_card_index-1]) + if hand_list[last_card_index - 2] > 0 and hand_list[last_card_index - 1] > 0: + test_cases.append([last_card_index - 2, last_card_index - 1]) else: - if hand_list[last_card_index-1] > 0 and hand_list[last_card_index+1] > 0: - test_cases.append([last_card_index-1, last_card_index+1]) + if hand_list[last_card_index - 1] > 0 and hand_list[last_card_index + 1] > 0: + test_cases.append([last_card_index - 1, last_card_index + 1]) if not test_cases: - continue + continue for l in test_cases: cards = [] @@ -92,12 +90,10 @@ def judge_chow(self, dealer, players, last_player): return False, None, None def judge_game(self, game): - ''' Judge which player has win the game + """Judge which player has win the game Args: - dealer (object): The dealer object. - players (list): List of all players - last_player (int): The player id of last player - ''' + game (Game): The game object + """ players_val = [] win_player = -1 for player in game.players: @@ -108,18 +104,18 @@ def judge_game(self, game): if win_player != -1 or len(game.dealer.deck) == 0: return True, win_player, players_val else: - #player_id = players_val.index(max(players_val)) + # player_id = players_val.index(max(players_val)) return False, win_player, players_val def judge_hu(self, player): - ''' Judge whether the player has win the game + """Judge whether the player has win the game Args: player (object): Target player Return: Result (bool): Win or not Maximum_score (int): Set count score of the player - ''' + """ set_count = 0 hand = [card.get_str() for card in player.hand] count_dict = {card: hand.count(card) for card in hand} @@ -141,36 +137,36 @@ def judge_hu(self, player): if tmp_set_count + set_count > maximum: maximum = tmp_set_count + set_count if tmp_set_count + set_count >= 4: - #print(player.get_player_id(), sorted([card.get_str() for card in player.hand])) - #print([[c.get_str() for c in s] for s in player.pile]) - #print(len(player.hand), sum([len(s) for s in player.pile])) - #exit() + # print(player.get_player_id(), sorted([card.get_str() for card in player.hand])) + # print([[c.get_str() for c in s] for s in player.pile]) + # print(len(player.hand), sum([len(s) for s in player.pile])) + # exit() return True, maximum return False, maximum @staticmethod def check_consecutive(_list): - ''' Check if list is consecutive + """Check if list is consecutive Args: _list (list): The target list Return: Result (bool): consecutive or not - ''' + """ l = list(map(int, _list)) - if sorted(l) == list(range(min(l), max(l)+1)): + if sorted(l) == list(range(min(l), max(l) + 1)): return True return False def cal_set(self, cards): - ''' Calculate the set for given cards + """Calculate the set for given cards Args: - Cards (list): List of cards. + cards (list): List of cards. Return: Set_count (int): Sets (list): List of cards that has been pop from user's hand - ''' + """ tmp_cards = cards.copy() sets = [] set_count = 0 @@ -182,7 +178,7 @@ def cal_set(self, cards): for _ in range(_dict[each]): tmp_cards.pop(tmp_cards.index(each)) - # get all of the traits of each type in hand (except dragons and winds) + # get all the traits of each type in hand (except dragons and winds) _dict_by_type = defaultdict(list) for card in tmp_cards: _type = card.split("-")[0] @@ -196,22 +192,22 @@ def cal_set(self, cards): if len(values) > 2: for index, _ in enumerate(values): if index == 0: - test_case = [values[index], values[index+1], values[index+2]] - elif index == len(values)-1: - test_case = [values[index-2], values[index-1], values[index]] + test_case = [values[index], values[index + 1], values[index + 2]] + elif index == len(values) - 1: + test_case = [values[index - 2], values[index - 1], values[index]] else: - test_case = [values[index-1], values[index], values[index+1]] + test_case = [values[index - 1], values[index], values[index + 1]] if self.check_consecutive(test_case): set_count += 1 for each in test_case: values.pop(values.index(each)) - c = _type+"-"+str(each) + c = _type + "-" + str(each) sets.append(c) if c in tmp_cards: tmp_cards.pop(tmp_cards.index(c)) return set_count, sets -#if __name__ == "__main__": +# if __name__ == "__main__": # judger = MahjongJudger() # player = Player(0) # card_info = Card.info diff --git a/rlcard/games/mahjong/player.py b/rlcard/games/mahjong/player.py index 9f6bb5799..3e4317f06 100644 --- a/rlcard/games/mahjong/player.py +++ b/rlcard/games/mahjong/player.py @@ -2,47 +2,44 @@ class MahjongPlayer: def __init__(self, player_id, np_random): - ''' Initilize a player. + """Initialize a player. Args: player_id (int): The id of the player - ''' + """ self.np_random = np_random self.player_id = player_id self.hand = [] self.pile = [] def get_player_id(self): - ''' Return the id of the player - ''' + """Return the id of the player""" return self.player_id def print_hand(self): - ''' Print the cards in hand in string. - ''' + """Print the cards in hand in string.""" print([c.get_str() for c in self.hand]) def print_pile(self): - ''' Print the cards in pile of the player in string. - ''' + """Print the cards in pile of the player in string.""" print([[c.get_str() for c in s]for s in self.pile]) def play_card(self, dealer, card): - ''' Play one card + """Play one card Args: dealer (object): Dealer - Card (object): The card to be play. - ''' + Card (object): The card to be played. + """ card = self.hand.pop(self.hand.index(card)) dealer.table.append(card) def chow(self, dealer, cards): - ''' Perform Chow + """Perform Chow Args: dealer (object): Dealer Cards (object): The cards to be Chow. - ''' + """ last_card = dealer.table.pop(-1) for card in cards: if card in self.hand and card != last_card: @@ -50,22 +47,22 @@ def chow(self, dealer, cards): self.pile.append(cards) def gong(self, dealer, cards): - ''' Perform Gong + """Perform Gong Args: dealer (object): Dealer Cards (object): The cards to be Gong. - ''' + """ for card in cards: if card in self.hand: self.hand.pop(self.hand.index(card)) self.pile.append(cards) def pong(self, dealer, cards): - ''' Perform Pong + """Perform Pong Args: dealer (object): Dealer Cards (object): The cards to be Pong. - ''' + """ for card in cards: if card in self.hand: self.hand.pop(self.hand.index(card)) diff --git a/rlcard/games/mahjong/round.py b/rlcard/games/mahjong/round.py index 9db69f4ef..2f3f60275 100644 --- a/rlcard/games/mahjong/round.py +++ b/rlcard/games/mahjong/round.py @@ -2,13 +2,13 @@ class MahjongRound: def __init__(self, judger, dealer, num_players, np_random): - ''' Initialize the round class + """Initialize the round class Args: judger (object): the object of MahjongJudger dealer (object): the object of MahjongDealer num_players (int): the number of players in game - ''' + """ self.np_random = np_random self.judger = judger self.dealer = dealer @@ -25,12 +25,12 @@ def __init__(self, judger, dealer, num_players, np_random): self.last_cards = [] def proceed_round(self, players, action): - ''' Call other Classes's functions to keep one round running + """Call other Classes's functions to keep one round running Args: player (object): object of UnoPlayer action (str): string of legal action - ''' + """ #hand_len = [len(p.hand) for p in players] #pile_len = [sum([len([c for c in p]) for p in pp.pile]) for pp in players] #total_len = [i + j for i, j in zip(hand_len, pile_len)] @@ -82,14 +82,14 @@ def proceed_round(self, players, action): #total_len = [i + j for i, j in zip(hand_len, pile_len)] def get_state(self, players, player_id): - ''' Get player's state + """Get player's state Args: players (list): The list of MahjongPlayer player_id (int): The id of the player Return: state (dict): The information of the state - ''' + """ state = {} #(valid_act, player, cards) = self.judger.judge_pong_gong(self.dealer, players, self.last_player) if self.valid_act: # PONG/GONG/CHOW diff --git a/rlcard/games/nolimitholdem/round.py b/rlcard/games/nolimitholdem/round.py index 01d4cf875..7d891ce73 100644 --- a/rlcard/games/nolimitholdem/round.py +++ b/rlcard/games/nolimitholdem/round.py @@ -8,7 +8,7 @@ class Action(Enum): FOLD = 0 CHECK_CALL = 1 - #CALL = 2 + # CALL = 2 # RAISE_3BB = 3 RAISE_HALF_POT = 2 RAISE_POT = 3 diff --git a/rlcard/games/uno/card.py b/rlcard/games/uno/card.py index f2a6c5e5c..98f8b2e5a 100644 --- a/rlcard/games/uno/card.py +++ b/rlcard/games/uno/card.py @@ -1,43 +1,42 @@ from termcolor import colored -class UnoCard: - info = {'type': ['number', 'action', 'wild'], - 'color': ['r', 'g', 'b', 'y'], - 'trait': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', - 'skip', 'reverse', 'draw_2', 'wild', 'wild_draw_4'] - } +class UnoCard: + info = { + 'type': ['number', 'action', 'wild'], + 'color': ['r', 'g', 'b', 'y'], + 'trait': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'skip', 'reverse', 'draw_2', 'wild', 'wild_draw_4'] + } def __init__(self, card_type, color, trait): - ''' Initialize the class of UnoCard + """Initialize the class of UnoCard Args: card_type (str): The type of card color (str): The color of card trait (str): The trait of card - ''' + """ self.type = card_type self.color = color self.trait = trait self.str = self.get_str() def get_str(self): - ''' Get the string representation of card + """Get the string representation of card Return: (str): The string of card's color and trait - ''' + """ return self.color + '-' + self.trait - @staticmethod def print_cards(cards, wild_color=False): - ''' Print out card in a nice form + """Print out card in a nice form Args: - card (str or list): The string form or a list of a UNO card - wild_color (boolean): True if assign collor to wild cards - ''' + cards (str or list): The string form or a list of a UNO card + wild_color (boolean): True if assign color to wild cards + """ if isinstance(cards, str): cards = [cards] for i, card in enumerate(cards): diff --git a/rlcard/games/uno/dealer.py b/rlcard/games/uno/dealer.py index 771ed51c6..4cda5e7da 100644 --- a/rlcard/games/uno/dealer.py +++ b/rlcard/games/uno/dealer.py @@ -3,34 +3,32 @@ class UnoDealer: - ''' Initialize a uno dealer class - ''' + """Initialize a uno dealer class """ def __init__(self, np_random): self.np_random = np_random self.deck = init_deck() self.shuffle() def shuffle(self): - ''' Shuffle the deck - ''' + """Shuffle the deck """ self.np_random.shuffle(self.deck) def deal_cards(self, player, num): - ''' Deal some cards from deck to one player + """Deal some cards from deck to one player Args: player (object): The object of DoudizhuPlayer num (int): The number of cards to be dealed - ''' + """ for _ in range(num): player.hand.append(self.deck.pop()) def flip_top_card(self): - ''' Flip top card when a new game starts + """Flip top card when a new game starts Returns: (object): The object of UnoCard at the top of the deck - ''' + """ top_card = self.deck.pop() while top_card.trait == 'wild_draw_4': self.deck.append(top_card) diff --git a/rlcard/games/uno/game.py b/rlcard/games/uno/game.py index 53c1a1f8c..cf41d050c 100644 --- a/rlcard/games/uno/game.py +++ b/rlcard/games/uno/game.py @@ -15,20 +15,19 @@ def __init__(self, allow_step_back=False, num_players=2): self.payoffs = [0 for _ in range(self.num_players)] def configure(self, game_config): - ''' Specifiy some game specific parameters, such as number of players - ''' + """Specify some game specific parameters, such as number of players""" self.num_players = game_config['game_num_players'] def init_game(self): - ''' Initialize players and state + """Initialize players and state Returns: (tuple): Tuple containing: (dict): The first state in one game (int): Current player's id - ''' - # Initalize payoffs + """ + # Initialize payoffs self.payoffs = [0 for _ in range(self.num_players)] # Initialize a dealer that can deal cards @@ -44,11 +43,11 @@ def init_game(self): # Initialize a Round self.round = Round(self.dealer, self.num_players, self.np_random) - # flip and perfrom top card + # flip and perform top card top_card = self.round.flip_top_card() self.round.perform_top_card(self.players, top_card) - # Save the hisory for stepping back to the last state. + # Save the history for stepping back to the last state. self.history = [] player_id = self.round.current_player @@ -56,7 +55,7 @@ def init_game(self): return state, player_id def step(self, action): - ''' Get the next state + """Get the next state Args: action (str): A specific action @@ -66,7 +65,7 @@ def step(self, action): (dict): next player's state (int): next plater's id - ''' + """ if self.allow_step_back: # First snapshot the current state @@ -81,36 +80,36 @@ def step(self, action): return state, player_id def step_back(self): - ''' Return to the previous state of the game + """Return to the previous state of the game Returns: (bool): True if the game steps back successfully - ''' + """ if not self.history: return False self.dealer, self.players, self.round = self.history.pop() return True def get_state(self, player_id): - ''' Return player's state + """Return player's state Args: player_id (int): player id Returns: (dict): The state of the player - ''' + """ state = self.round.get_state(self.players, player_id) state['num_players'] = self.get_num_players() state['current_player'] = self.round.current_player return state def get_payoffs(self): - ''' Return the payoffs of the game + """Return the payoffs of the game Returns: (list): Each entry corresponds to the payoff of one player - ''' + """ winner = self.round.winner if winner is not None and len(winner) == 1: self.payoffs[winner[0]] = 1 @@ -118,43 +117,43 @@ def get_payoffs(self): return self.payoffs def get_legal_actions(self): - ''' Return the legal actions for current player + """Return the legal actions for current player Returns: (list): A list of legal actions - ''' + """ return self.round.get_legal_actions(self.players, self.round.current_player) def get_num_players(self): - ''' Return the number of players in Limit Texas Hold'em + """Return the number of players in Limit Texas Hold'em Returns: (int): The number of players in the game - ''' + """ return self.num_players @staticmethod def get_num_actions(): - ''' Return the number of applicable actions + """Return the number of applicable actions Returns: (int): The number of actions. There are 61 actions - ''' + """ return 61 def get_player_id(self): - ''' Return the current player's id + """Return the current player's id Returns: (int): current player's id - ''' + """ return self.round.current_player def is_over(self): - ''' Check if the game is over + """Check if the game is over Returns: (boolean): True if the game is over - ''' + """ return self.round.is_over diff --git a/rlcard/games/uno/judger.py b/rlcard/games/uno/judger.py index 62a6375e8..bd2d9a087 100644 --- a/rlcard/games/uno/judger.py +++ b/rlcard/games/uno/judger.py @@ -3,14 +3,14 @@ class UnoJudger: @staticmethod def judge_winner(players, np_random): - ''' Judge the winner of the game + """Judge the winner of the game Args: players (list): The list of players who play the game Returns: (list): The player id of the winner - ''' + """ self.np_random = np_random count_1 = len(players[0].hand) count_2 = len(players[1].hand) diff --git a/rlcard/games/uno/player.py b/rlcard/games/uno/player.py index 26507467e..86294b634 100644 --- a/rlcard/games/uno/player.py +++ b/rlcard/games/uno/player.py @@ -2,18 +2,17 @@ class UnoPlayer: def __init__(self, player_id, np_random): - ''' Initilize a player. + """Initialize a player. Args: player_id (int): The id of the player - ''' + """ self.np_random = np_random self.player_id = player_id self.hand = [] self.stack = [] def get_player_id(self): - ''' Return the id of the player - ''' + """Return the id of the player""" return self.player_id diff --git a/rlcard/games/uno/round.py b/rlcard/games/uno/round.py index 1a81387b2..cdcabfe08 100644 --- a/rlcard/games/uno/round.py +++ b/rlcard/games/uno/round.py @@ -5,12 +5,12 @@ class UnoRound: def __init__(self, dealer, num_players, np_random): - ''' Initialize the round class + """Initialize the round class Args: dealer (object): the object of UnoDealer num_players (int): the number of players in game - ''' + """ self.np_random = np_random self.dealer = dealer self.target = None @@ -22,12 +22,12 @@ def __init__(self, dealer, num_players, np_random): self.winner = None def flip_top_card(self): - ''' Flip the top card of the card pile + """Flip the top card of the card pile Returns: (object of UnoCard): the top card in game - ''' + """ top = self.dealer.flip_top_card() if top.trait == 'wild': top.color = self.np_random.choice(UnoCard.info['color']) @@ -36,12 +36,12 @@ def flip_top_card(self): return top def perform_top_card(self, players, top_card): - ''' Perform the top card + """Perform the top card Args: players (list): list of UnoPlayer objects top_card (object): object of UnoCard - ''' + """ if top_card.trait == 'skip': self.current_player = 1 elif top_card.trait == 'reverse': @@ -52,12 +52,12 @@ def perform_top_card(self, players, top_card): self.dealer.deal_cards(player, 2) def proceed_round(self, players, action): - ''' Call other Classes' functions to keep one round running + """Call other Classes' functions to keep one round running Args: player (object): object of UnoPlayer action (str): string of legal action - ''' + """ if action == 'draw': self._perform_draw_action(players) return None @@ -135,12 +135,12 @@ def get_legal_actions(self, players, player_id): return legal_actions def get_state(self, players, player_id): - ''' Get player's state + """Get player's state Args: players (list): The list of UnoPlayer player_id (int): The id of the player - ''' + """ state = {} player = players[player_id] state['hand'] = cards2list(player.hand) @@ -153,8 +153,7 @@ def get_state(self, players, player_id): return state def replace_deck(self): - ''' Add cards have been played to deck - ''' + """Add cards have been played to deck""" self.dealer.deck.extend(self.played_cards) self.dealer.shuffle() self.played_cards = [] diff --git a/rlcard/games/uno/utils.py b/rlcard/games/uno/utils.py index 6ba5a9de6..5d18550d1 100644 --- a/rlcard/games/uno/utils.py +++ b/rlcard/games/uno/utils.py @@ -29,8 +29,7 @@ def init_deck(): - ''' Generate uno deck of 108 cards - ''' + """Generate uno deck of 108 cards""" deck = [] card_info = Card.info for color in card_info['color']: @@ -53,28 +52,28 @@ def init_deck(): def cards2list(cards): - ''' Get the corresponding string representation of cards + """Get the corresponding string representation of cards Args: cards (list): list of UnoCards objects Returns: (string): string representation of cards - ''' + """ cards_list = [] for card in cards: cards_list.append(card.get_str()) return cards_list def hand2dict(hand): - ''' Get the corresponding dict representation of hand + """Get the corresponding dict representation of hand Args: hand (list): list of string of hand's card Returns: (dict): dict of hand - ''' + """ hand_dict = {} for card in hand: if card not in hand_dict: @@ -84,7 +83,7 @@ def hand2dict(hand): return hand_dict def encode_hand(plane, hand): - ''' Encode hand and represerve it into plane + """Encode hand and represerve it into plane Args: plane (array): 3*4*15 numpy array @@ -92,7 +91,7 @@ def encode_hand(plane, hand): Returns: (array): 3*4*15 numpy array - ''' + """ # plane = np.zeros((3, 4, 15), dtype=int) plane[0] = np.ones((4, 15), dtype=int) hand = hand2dict(hand) @@ -111,7 +110,7 @@ def encode_hand(plane, hand): return plane def encode_target(plane, target): - ''' Encode target and represerve it into plane + """Encode target and represerve it into plane Args: plane (array): 1*4*15 numpy array @@ -119,7 +118,7 @@ def encode_target(plane, target): Returns: (array): 1*4*15 numpy array - ''' + """ target_info = target.split('-') color = COLOR_MAP[target_info[0]] trait = TRAIT_MAP[target_info[1]] diff --git a/rlcard/models/__init__.py b/rlcard/models/__init__.py index f772a17ac..285c46487 100644 --- a/rlcard/models/__init__.py +++ b/rlcard/models/__init__.py @@ -1,5 +1,4 @@ -''' Register rule-based models or pre-trianed models -''' +"""Register rule-based models or pre-trianed models""" from rlcard.models.registration import register, load register( diff --git a/rlcard/models/bridge_rule_models.py b/rlcard/models/bridge_rule_models.py index 43710688b..4cf174c41 100644 --- a/rlcard/models/bridge_rule_models.py +++ b/rlcard/models/bridge_rule_models.py @@ -1,10 +1,10 @@ -''' +""" File name: models/bridge_rule_models.py Author: William Hale Date created: 11/27/2021 Bridge rule models -''' +""" import numpy as np @@ -12,16 +12,14 @@ class BridgeDefenderNoviceRuleAgent(object): - ''' - Agent always passes during bidding - ''' + """Agent always passes during bidding """ def __init__(self): self.use_raw = False @staticmethod def step(state) -> int: - ''' Predict the action given the current state. + """Predict the action given the current state. Defender Novice strategy: Case during make call: Always choose PassAction. @@ -29,11 +27,11 @@ def step(state) -> int: Choose a random action. Args: - state (numpy.array): an numpy array that represents the current state + state (numpy.array): a numpy array that represents the current state Returns: action_id (int): the action_id predicted - ''' + """ legal_action_ids = state['raw_legal_actions'] if ActionEvent.pass_action_id in legal_action_ids: selected_action_id = ActionEvent.pass_action_id @@ -42,15 +40,15 @@ def step(state) -> int: return selected_action_id def eval_step(self, state): - ''' Predict the action given the current state for evaluation. + """Predict the action given the current state for evaluation. Since the agents is not trained, this function is equivalent to step function. Args: - state (numpy.array): an numpy array that represents the current state + state (numpy.array): a numpy array that represents the current state Returns: action_id (int): the action_id predicted by the agent probabilities (list): The list of action probabilities - ''' + """ probabilities = [] return self.step(state), probabilities diff --git a/rlcard/models/doudizhu_rule_models.py b/rlcard/models/doudizhu_rule_models.py index 59d230a8f..5b628544d 100644 --- a/rlcard/models/doudizhu_rule_models.py +++ b/rlcard/models/doudizhu_rule_models.py @@ -1,5 +1,4 @@ -''' Dou Dizhu rule models -''' +"""Dou Dizhu rule models""" import numpy as np @@ -8,20 +7,19 @@ from rlcard.models.model import Model class DouDizhuRuleAgentV1(object): - ''' Dou Dizhu Rule agent version 1 - ''' + """Dou Dizhu Rule agent version 1""" def __init__(self): self.use_raw = True def step(self, state): - ''' Predict the action given raw state. A naive rule. + """Predict the action given raw state. A naive rule. Args: state (dict): Raw state from the game Returns: action (str): Predicted action - ''' + """ state = state['raw_obs'] trace = state['trace'] # the rule of leading round @@ -55,13 +53,11 @@ def step(self, state): return np.random.choice(state['actions']) def eval_step(self, state): - ''' Step for evaluation. The same to step - ''' + """Step for evaluation. The same to step""" return self.step(state), [] def combine_cards(self, hand): - '''Get optimal combinations of cards in hand - ''' + """Get optimal combinations of cards in hand""" comb = {'rocket': [], 'bomb': [], 'trio': [], 'trio_chain': [], 'solo_chain': [], 'pair_chain': [], 'pair': [], 'solo': []} # 1. pick rocket @@ -152,16 +148,14 @@ def pick_chain(hand_list, count): chains.append(str_chain) add += len(chain) hand_list = [int(card) for card in hand_list] - return (chains, hand_list) + return chains, hand_list class DouDizhuRuleModelV1(Model): - ''' Dou Dizhu Rule Model version 1 - ''' + """Dou Dizhu Rule Model version 1""" def __init__(self): - ''' Load pretrained model - ''' + """Load pretrained model""" env = rlcard.make('doudizhu') rule_agent = DouDizhuRuleAgentV1() @@ -169,12 +163,12 @@ def __init__(self): @property def agents(self): - ''' Get a list of agents for each position in a the game + """Get a list of agents for each position in a game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. - ''' + """ return self.rule_agents diff --git a/rlcard/models/gin_rummy_rule_models.py b/rlcard/models/gin_rummy_rule_models.py index 2d5468684..e6c224431 100644 --- a/rlcard/models/gin_rummy_rule_models.py +++ b/rlcard/models/gin_rummy_rule_models.py @@ -1,10 +1,10 @@ -''' +""" File name: models/gin_rummy_rule_models.py Author: William Hale Date created: 2/12/2020 Gin Rummy rule models -''' +""" from typing import TYPE_CHECKING from collections import OrderedDict @@ -27,16 +27,14 @@ class GinRummyNoviceRuleAgent(object): - ''' - Agent always discards highest deadwood value card - ''' + """Agent always discards highest deadwood value card""" def __init__(self): self.use_raw = False # FIXME: should this be True ? @staticmethod def step(state): - ''' Predict the action given the current state. + """Predict the action given the current state. Novice strategy: Case where can gin: Choose one of the gin actions. @@ -45,17 +43,17 @@ def step(state): Case where can discard: Gin if can. Knock if can. Otherwise, put aside cards in some best meld cluster. - Choose one of the remaining cards with highest deadwood value. + Choose one of the remaining cards with the highest deadwood value. Discard that card. Case otherwise: Choose a random action. Args: - state (numpy.array): an numpy array that represents the current state + state (numpy.array): a numpy array that represents the current state Returns: action (int): the action predicted - ''' + """ legal_actions = state['legal_actions'] actions = legal_actions.copy() legal_action_events = [ActionEvent.decode_action(x) for x in legal_actions] @@ -76,16 +74,16 @@ def step(state): return np.random.choice(actions) def eval_step(self, state): - ''' Predict the action given the current state for evaluation. + """Predict the action given the current state for evaluation. Since the agents is not trained, this function is equivalent to step function. Args: - state (numpy.array): an numpy array that represents the current state + state (numpy.array): a numpy array that represents the current state Returns: action (int): the action predicted by the agent probabilities (list): The list of action probabilities - ''' + """ probabilities = [] return self.step(state), probabilities @@ -114,12 +112,10 @@ def _get_best_discards(discard_action_events, state) -> List[Card]: class GinRummyNoviceRuleModel(Model): - ''' Gin Rummy Rule Model - ''' + """Gin Rummy Rule Model""" def __init__(self): - ''' Load pre-trained model - ''' + """Load pre-trained model""" super().__init__() env = rlcard.make('gin-rummy') rule_agent = GinRummyNoviceRuleAgent() @@ -127,12 +123,12 @@ def __init__(self): @property def agents(self): - ''' Get a list of agents for each position in a the game + """Get a list of agents for each position in the game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. - ''' + """ return self.rule_agents diff --git a/rlcard/models/leducholdem_rule_models.py b/rlcard/models/leducholdem_rule_models.py index 9cb01b429..0c0f10b07 100644 --- a/rlcard/models/leducholdem_rule_models.py +++ b/rlcard/models/leducholdem_rule_models.py @@ -1,23 +1,23 @@ -''' Leduc Hold 'em rule model -''' +"""Leduc Hold 'em rule model""" import rlcard from rlcard.models.model import Model + class LeducHoldemRuleAgentV1(object): - ''' Leduc Hold 'em Rule agent version 1 - ''' + """Leduc Hold 'em Rule agent version 1""" + def __init__(self): self.use_raw = True @staticmethod def step(state): - ''' Predict the action when given raw state. A simple rule-based AI. + """Predict the action when given raw state. A simple rule-based AI. Args: state (dict): Raw state from the game Returns: action (str): Predicted action - ''' + """ legal_actions = state['raw_legal_actions'] # Aggressively play 'raise' and 'call' if 'raise' in legal_actions: @@ -30,25 +30,25 @@ def step(state): return 'fold' def eval_step(self, state): - ''' Step for evaluation. The same to step - ''' + """Step for evaluation. The same to step""" return self.step(state), [] + class LeducHoldemRuleAgentV2(object): - ''' Leduc Hold 'em Rule agent version 2 - ''' + """Leduc Hold 'em Rule agent version 2""" + def __init__(self): self.use_raw = True @staticmethod def step(state): - ''' Predict the action when given raw state. A simple rule-based AI. + """Predict the action when given raw state. A simple rule-based AI. Args: state (dict): Raw state from the game Returns: action (str): Predicted action - ''' + """ legal_actions = state['raw_legal_actions'] state = state['raw_obs'] hand = state['hand'] @@ -74,7 +74,7 @@ def step(state): else: action = 'fold' - #return action + # return action if action in legal_actions: return action else: @@ -88,52 +88,50 @@ def step(state): return action def eval_step(self, state): - ''' Step for evaluation. The same to step - ''' + """Step for evaluation. The same to step""" return self.step(state), [] + class LeducHoldemRuleModelV1(Model): - ''' Leduc holdem Rule Model version 1 - ''' + """Leduc holdem Rule Model version 1""" def __init__(self): - ''' Load pretrained model - ''' + """Load pretrained model + """ env = rlcard.make('leduc-holdem') rule_agent = LeducHoldemRuleAgentV1() self.rule_agents = [rule_agent for _ in range(env.num_players)] @property def agents(self): - ''' Get a list of agents for each position in a the game + """Get a list of agents for each position in the game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. - ''' + """ return self.rule_agents + class LeducHoldemRuleModelV2(Model): - ''' Leduc holdem Rule Model version 2 - ''' + """Leduc holdem Rule Model version 2""" def __init__(self): - ''' Load pretrained model - ''' + """Load pretrained model""" env = rlcard.make('leduc-holdem') rule_agent = LeducHoldemRuleAgentV2() self.rule_agents = [rule_agent for _ in range(env.num_players)] @property def agents(self): - ''' Get a list of agents for each position in a the game + """Get a list of agents for each position in the game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. - ''' + """ return self.rule_agents diff --git a/rlcard/models/limitholdem_rule_models.py b/rlcard/models/limitholdem_rule_models.py index d9049daf6..c8d5cc620 100644 --- a/rlcard/models/limitholdem_rule_models.py +++ b/rlcard/models/limitholdem_rule_models.py @@ -1,24 +1,23 @@ -''' Limit Hold 'em rule model -''' +"""Limit Hold 'em rule model""" import rlcard from rlcard.models.model import Model + class LimitholdemRuleAgentV1(object): - ''' Limit Hold 'em Rule agent version 1 - ''' + """Limit Hold 'em Rule agent version 1""" def __init__(self): self.use_raw = True @staticmethod def step(state): - ''' Predict the action when given raw state. A simple rule-based AI. + """Predict the action when given raw state. A simple rule-based AI. Args: state (dict): Raw state from the game Returns: action (str): Predicted action - ''' + """ legal_actions = state['raw_legal_actions'] state = state['raw_obs'] hand = state['hand'] @@ -32,10 +31,12 @@ def step(state): # KQ, KJ, QJ, JT # Fold all hand types except those mentioned above to save money if len(public_cards) == 0: - if hand[0][1] == hand [1][1]: + if hand[0][1] == hand[1][1]: action = 'raise' elif hand[0][1] == 'A' or hand[1][1] == 'A': - if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1], hand[1][1]] or 'T' in [hand[0][1], hand[1][1]]: + if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1], + hand[1][1]] or 'T' in [ + hand[0][1], hand[1][1]]: action = 'raise' elif hand[0][0] == hand[1][0]: action = 'raise' @@ -48,12 +49,14 @@ def step(state): for i, _ in enumerate(public_cards): public_cards_ranks[i] = public_cards[i][1] public_cards_flush[i] = public_cards[i][0] - if hand[0][1] == hand [1][1]: - # if the player already have a pair, raise when public cards have card same as the pair + if hand[0][1] == hand[1][1]: + # if the player already have a pair, raise when public cards have card same as the pair if hand[0][1] in public_cards_ranks: action = 'raise' elif hand[0][1] == 'A' or hand[1][1] == 'A': - if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1], hand[1][1]] or 'T' in [hand[0][1], hand[1][1]]: + if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1], + hand[1][1]] or 'T' in [ + hand[0][1], hand[1][1]]: # For AK, AQ, AJ, AT types, if public cards have A, K, Q, J, T, raise, because the chance of getting a straight greatly increases if 'A' in public_cards_ranks or 'K' in public_cards_ranks or 'Q' in public_cards_ranks or 'J' in public_cards_ranks or 'T' in public_cards_ranks: action = 'raise' @@ -61,12 +64,13 @@ def step(state): elif hand[0][0] == hand[1][0]: if hand[0][0] in public_cards_flush: action = 'raise' - elif max(public_cards_ranks) in ['5', '4' ,'3', '2']: # for KQ, KJ, QJ, JT, check when having no cards higher than 5 + elif max(public_cards_ranks) in ['5', '4', '3', + '2']: # for KQ, KJ, QJ, JT, check when having no cards higher than 5 action = 'check' else: action = 'call' - if len(public_cards) == 5 or len(public_cards) == 4 : + if len(public_cards) == 5 or len(public_cards) == 4: public_cards_ranks = [] public_cards_flush = [] for i, _ in enumerate(public_cards): @@ -74,12 +78,14 @@ def step(state): public_cards_flush.append('S') public_cards_ranks[i] = public_cards[i][1] public_cards_flush[i] = public_cards[i][0] - if hand[0][1] == hand [1][1]: - # if the player already have a pair, raise when public cards have card same as the pair + if hand[0][1] == hand[1][1]: + # if the player already have a pair, raise when public cards have card same as the pair if hand[0][1] in public_cards_ranks: action = 'raise' elif hand[0][1] == 'A' or hand[1][1] == 'A': - if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1], hand[1][1]] or 'T' in [hand[0][1], hand[1][1]]: + if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1], + hand[1][1]] or 'T' in [ + hand[0][1], hand[1][1]]: # For AK, AQ, AJ, AT types, if public cards have A, K, Q, J, T, raise, because the chance of getting a straight greatly increases if 'A' in public_cards_ranks or 'K' in public_cards_ranks or 'Q' in public_cards_ranks or 'J' in public_cards_ranks or 'T' in public_cards_ranks: action = 'raise' @@ -87,12 +93,13 @@ def step(state): elif hand[0][0] == hand[1][0]: if hand[0][0] in public_cards_flush: action = 'raise' - elif max(public_cards_ranks) in ['5', '4', '3', '2']: # for KQ, KJ, QJ, JT, fold when having no cards higher than 5 + elif max(public_cards_ranks) in ['5', '4', '3', + '2']: # for KQ, KJ, QJ, JT, fold when having no cards higher than 5 action = 'fold' else: action = 'call' - #return action + # return action if action in legal_actions: return action else: @@ -106,17 +113,15 @@ def step(state): return action def eval_step(self, state): - ''' Step for evaluation. The same to step - ''' + """Step for evaluation. The same to step""" return self.step(state), [] + class LimitholdemRuleModelV1(Model): - ''' Limitholdem Rule Model version 1 - ''' + """Limitholdem Rule Model version 1""" def __init__(self): - ''' Load pretrained model - ''' + """Load pretrained model""" env = rlcard.make('limit-holdem') rule_agent = LimitholdemRuleAgentV1() @@ -124,21 +129,21 @@ def __init__(self): @property def agents(self): - ''' Get a list of agents for each position in a the game + """Get a list of agents for each position in a the game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. - ''' + """ return self.rule_agents @property def use_raw(self): - ''' Indicate whether use raw state and action + """Indicate whether we use raw state and action Returns: use_raw (boolean): True if using raw state and action - ''' + """ return True diff --git a/rlcard/models/model.py b/rlcard/models/model.py index 00ce64b0c..1ee6987ed 100644 --- a/rlcard/models/model.py +++ b/rlcard/models/model.py @@ -1,21 +1,18 @@ - class Model(object): - ''' The base model class - ''' + """The base model class""" def __init__(self): - ''' Load the model here - ''' + """Load the model here""" pass @property def agents(self): - ''' Get a list of agents for each position in a the game + """Get a list of agents for each position in the game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. - ''' + """ raise NotImplementedError diff --git a/rlcard/models/pretrained_models.py b/rlcard/models/pretrained_models.py index 107293f40..aa84c0caa 100644 --- a/rlcard/models/pretrained_models.py +++ b/rlcard/models/pretrained_models.py @@ -1,5 +1,4 @@ -''' Wrrapers of pretrained models. -''' +"""Wrapers of pretrained models.""" import os import rlcard @@ -10,23 +9,21 @@ ROOT_PATH = os.path.join(rlcard.__path__[0], 'models/pretrained') class LeducHoldemCFRModel(Model): - ''' A pretrained model on Leduc Holdem with CFR (chance sampling) - ''' + """A pretrained model on Leduc Holdem with CFR (chance sampling)""" def __init__(self): - ''' Load pretrained model - ''' + """Load pretrained model""" env = rlcard.make('leduc-holdem') self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr')) self.agent.load() @property def agents(self): - ''' Get a list of agents for each position in a the game + """Get a list of agents for each position in a game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. - ''' + """ return [self.agent, self.agent] diff --git a/rlcard/models/registration.py b/rlcard/models/registration.py index 5f7965b94..8c7a57e11 100644 --- a/rlcard/models/registration.py +++ b/rlcard/models/registration.py @@ -1,76 +1,75 @@ import importlib + class ModelSpec(object): - ''' A specification for a particular Model. - ''' + """A specification for a particular Model.""" def __init__(self, model_id, entry_point=None): - ''' Initilize + """Initialize Args: model_id (string): the name of the model entry_point (string): a string that indicates the location of the model class - ''' + """ self.model_id = model_id mod_name, class_name = entry_point.split(':') self._entry_point = getattr(importlib.import_module(mod_name), class_name) def load(self): - ''' Instantiates an instance of the model + """Instantiates an instance of the model Returns: Model (Model): an instance of the Model - ''' + """ model = self._entry_point() return model class ModelRegistry(object): - ''' Register a model by ID - ''' + """Register a model by ID""" def __init__(self): - ''' Initilize - ''' + """Initialize""" self.model_specs = {} def register(self, model_id, entry_point): - ''' Register an model + """Register a model Args: model_id (string): the name of the model entry_point (string): a string the indicates the location of the model class - ''' + """ if model_id in self.model_specs: raise ValueError('Cannot re-register model_id: {}'.format(model_id)) self.model_specs[model_id] = ModelSpec(model_id, entry_point) def load(self, model_id): - ''' Create a model instance + """Create a model instance Args: model_id (string): the name of the model - ''' + """ if model_id not in self.model_specs: raise ValueError('Cannot find model_id: {}'.format(model_id)) return self.model_specs[model_id].load() + # Have a global registry model_registry = ModelRegistry() def register(model_id, entry_point): - ''' Register a model + """Register a model Args: model_id (string): the name of the model entry_point (string): a string the indicates the location of the model class - ''' + """ return model_registry.register(model_id, entry_point) def load(model_id): - ''' Create and model instance + """Create and model instance Args: model_id (string): the name of the model - ''' + """ return model_registry.load(model_id) diff --git a/rlcard/models/uno_rule_models.py b/rlcard/models/uno_rule_models.py index 42185853d..4718c41d2 100644 --- a/rlcard/models/uno_rule_models.py +++ b/rlcard/models/uno_rule_models.py @@ -1,5 +1,4 @@ -''' UNO rule models -''' +"""UNO rule models""" import numpy as np @@ -7,14 +6,13 @@ from rlcard.models.model import Model class UNORuleAgentV1(object): - ''' UNO Rule agent version 1 - ''' + """UNO Rule agent version 1""" def __init__(self): self.use_raw = True def step(self, state): - ''' Predict the action given raw state. A naive rule. Choose the color + """Predict the action given raw state. A naive rule. Choose the color that appears least in the hand from legal actions. Try to keep wild cards as long as it can. @@ -23,7 +21,7 @@ def step(self, state): Returns: action (str): Predicted action - ''' + """ legal_actions = state['raw_legal_actions'] state = state['raw_obs'] @@ -44,20 +42,19 @@ def step(self, state): return action def eval_step(self, state): - ''' Step for evaluation. The same to step - ''' + """Step for evaluation. The same to step""" return self.step(state), [] @staticmethod def filter_wild(hand): - ''' Filter the wild cards. If all are wild cards, we do not filter + """Filter the wild cards. If all are wild cards, we do not filter Args: hand (list): A list of UNO card string Returns: filtered_hand (list): A filtered list of UNO string - ''' + """ filtered_hand = [] for card in hand: if not card[2:6] == 'wild': @@ -70,14 +67,14 @@ def filter_wild(hand): @staticmethod def count_colors(hand): - ''' Count the number of cards in each color in hand + """Count the number of cards in each color in hand Args: hand (list): A list of UNO card string Returns: color_nums (dict): The number cards of each color - ''' + """ color_nums = {} for card in hand: color = card[0] @@ -88,12 +85,10 @@ def count_colors(hand): return color_nums class UNORuleModelV1(Model): - ''' UNO Rule Model version 1 - ''' + """UNO Rule Model version 1""" def __init__(self): - ''' Load pretrained model - ''' + """Load pretrained model""" env = rlcard.make('uno') rule_agent = UNORuleAgentV1() @@ -101,23 +96,23 @@ def __init__(self): @property def agents(self): - ''' Get a list of agents for each position in a the game + """Get a list of agents for each position in a the game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. - ''' + """ return self.rule_agents @property def use_raw(self): - ''' Indicate whether use raw state and action + """Indicate whether we use raw state and action Returns: use_raw (boolean): True if using raw state and action - ''' + """ return True diff --git a/rlcard/utils/logger.py b/rlcard/utils/logger.py index e62499493..4ed393aa3 100644 --- a/rlcard/utils/logger.py +++ b/rlcard/utils/logger.py @@ -1,16 +1,16 @@ import os import csv + class Logger(object): - ''' Logger saves the running results and helps make plots from the results - ''' + """Logger saves the running results and helps make plots from the results""" def __init__(self, log_dir): - ''' Initialize the labels, legend and paths of the plot and log file. + """Initialize the labels, legend and paths of the plot and log file. Args: - log_path (str): The path the log files - ''' + log_dir (str): The log directory for the log files + """ self.log_dir = log_dir def __enter__(self): @@ -30,20 +30,20 @@ def __enter__(self): return self def log(self, text): - ''' Write the text to log file then print it. + """Write the text to log file then print it. Args: text(string): text to log - ''' - self.txt_file.write(text+'\n') + """ + self.txt_file.write(text + '\n') self.txt_file.flush() print(text) def log_performance(self, episode, reward): - ''' Log a point in the curve + """Log a point in the curve Args: episode (int): the episode of the current point reward (float): the reward of the current point - ''' + """ self.writer.writerow({'episode': episode, 'reward': reward}) print('') self.log('----------------------------------------') diff --git a/rlcard/utils/pettingzoo_utils.py b/rlcard/utils/pettingzoo_utils.py index f6c0b1a62..97f0c762e 100644 --- a/rlcard/utils/pettingzoo_utils.py +++ b/rlcard/utils/pettingzoo_utils.py @@ -38,15 +38,15 @@ def run_game_pettingzoo(env, agents, is_training=False): def reorganize_pettingzoo(trajectories): - ''' Reorganize the trajectory to make it RL friendly + """Reorganize the trajectory to make it RL friendly Args: - trajectory (list): A list of trajectories + trajectories (list): A list of trajectories Returns: (list): A new trajectories that can be fed into RL algorithms. - ''' + """ new_trajectories = defaultdict(list) for agent_name, trajectory in trajectories.items(): for i in range(0, len(trajectory)-2, 2): diff --git a/rlcard/utils/utils.py b/rlcard/utils/utils.py index 0bfebb031..a41164e78 100644 --- a/rlcard/utils/utils.py +++ b/rlcard/utils/utils.py @@ -2,6 +2,7 @@ from rlcard.games.base import Card + def set_seed(seed): if seed is not None: import subprocess @@ -17,6 +18,7 @@ def set_seed(seed): import random random.seed(seed) + def get_device(): import torch if torch.backends.mps.is_available(): @@ -29,25 +31,27 @@ def get_device(): device = torch.device("cpu") print("--> Running on the CPU") - return device + return device + def init_standard_deck(): - ''' Initialize a standard deck of 52 cards + """Initialize a standard deck of 52 cards Returns: (list): A list of Card object - ''' + """ suit_list = ['S', 'H', 'D', 'C'] rank_list = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K'] res = [Card(suit, rank) for suit in suit_list for rank in rank_list] return res + def init_54_deck(): - ''' Initialize a standard deck of 52 cards, BJ and RJ + """Initialize a standard deck of 52 cards, BJ and RJ Returns: (list): Alist of Card object - ''' + """ suit_list = ['S', 'H', 'D', 'C'] rank_list = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K'] res = [Card(suit, rank) for suit in suit_list for rank in rank_list] @@ -55,8 +59,9 @@ def init_54_deck(): res.append(Card('RJ', '')) return res + def rank2int(rank): - ''' Get the coresponding number of a rank. + """Get the corresponding number of a rank. Args: rank(str): rank stored in Card object @@ -67,11 +72,11 @@ def rank2int(rank): Note: 1. If the input rank is an empty string, the function will return -1. 2. If the input rank is not valid, the function will return None. - ''' + """ if rank == '': return -1 elif rank.isdigit(): - if int(rank) >= 2 and int(rank) <= 10: + if 2 <= int(rank) <= 10: return int(rank) else: return None @@ -87,26 +92,28 @@ def rank2int(rank): return 13 return None -def elegent_form(card): - ''' Get a elegent form of a card string + +def elegant_form(card): + """Get an elegant form of a card string Args: card (string): A card string Returns: - elegent_card (string): A nice form of card - ''' - suits = {'S': '♠', 'H': '♥', 'D': '♦', 'C': '♣','s': '♠', 'h': '♥', 'd': '♦', 'c': '♣' } + elegant_card (string): A nice form of card + """ + suits = {'S': '♠', 'H': '♥', 'D': '♦', 'C': '♣', 's': '♠', 'h': '♥', 'd': '♦', 'c': '♣'} rank = '10' if card[1] == 'T' else card[1] return suits[card[0]] + rank + def print_card(cards): - ''' Nicely print a card or list of cards + """Nicely print a card or list of cards Args: - card (string or list): The card(s) to be printed - ''' + cards (string or list): The card(s) to be printed + """ if cards is None: cards = [None] if isinstance(cards, str): @@ -127,13 +134,13 @@ def print_card(cards): lines[8].append('└─────────┘') else: if isinstance(card, Card): - elegent_card = elegent_form(card.suit + card.rank) + elegant_card = elegant_form(card.suit + card.rank) else: - elegent_card = elegent_form(card) - suit = elegent_card[0] - rank = elegent_card[1] - if len(elegent_card) == 3: - space = elegent_card[2] + elegant_card = elegant_form(card) + suit = elegant_card[0] + rank = elegant_card[1] + if len(elegant_card) == 3: + space = elegant_card[2] else: space = ' ' @@ -148,47 +155,49 @@ def print_card(cards): lines[8].append('└─────────┘') for line in lines: - print (' '.join(line)) + print(' '.join(line)) + def reorganize(trajectories, payoffs): - ''' Reorganize the trajectory to make it RL friendly + """Reorganize the trajectory to make it RL friendly Args: - trajectory (list): A list of trajectories + trajectories (list): A list of trajectories payoffs (list): A list of payoffs for the players. Each entry corresponds to one player Returns: (list): A new trajectories that can be fed into RL algorithms. - ''' + """ num_players = len(trajectories) new_trajectories = [[] for _ in range(num_players)] for player in range(num_players): - for i in range(0, len(trajectories[player])-2, 2): - if i ==len(trajectories[player])-3: + for i in range(0, len(trajectories[player]) - 2, 2): + if i == len(trajectories[player]) - 3: reward = payoffs[player] - done =True + done = True else: reward, done = 0, False - transition = trajectories[player][i:i+3].copy() + transition = trajectories[player][i:i + 3].copy() transition.insert(2, reward) transition.append(done) new_trajectories[player].append(transition) return new_trajectories + def remove_illegal(action_probs, legal_actions): - ''' Remove illegal actions and normalize the + """Remove illegal actions and normalize the probability vector Args: - action_probs (numpy.array): A 1 dimention numpy array. + action_probs (numpy.array): A 1 dimensional numpy array. legal_actions (list): A list of indices of legal actions. Returns: - probd (numpy.array): A normalized vector without legal actions. - ''' + probs (numpy.array): A normalized vector without legal actions. + """ probs = np.zeros(action_probs.shape[0]) probs[legal_actions] = action_probs[legal_actions] if np.sum(probs) == 0: @@ -197,16 +206,17 @@ def remove_illegal(action_probs, legal_actions): probs /= sum(probs) return probs + def tournament(env, num): - ''' Evaluate he performance of the agents in the environment + """Evaluate the performance of the agents in the environment Args: env (Env class): The environment to be evaluated. num (int): The number of games to play. Returns: - A list of avrage payoffs for each player - ''' + A list of average payoffs for each player + """ payoffs = [0 for _ in range(env.num_players)] counter = 0 while counter < num: @@ -224,9 +234,9 @@ def tournament(env, num): payoffs[i] /= counter return payoffs + def plot_curve(csv_path, save_path, algorithm): - ''' Read data from csv file and plot the results - ''' + """Read data from csv file and plot the results""" import os import csv import matplotlib.pyplot as plt @@ -248,4 +258,3 @@ def plot_curve(csv_path, save_path, algorithm): os.makedirs(save_dir) fig.savefig(save_path) - diff --git a/tests/envs/determism_util.py b/tests/envs/determism_util.py index 5c5da092e..34515cb04 100644 --- a/tests/envs/determism_util.py +++ b/tests/envs/determism_util.py @@ -1,9 +1,9 @@ import rlcard -from rlcard.agents.random_agent import RandomAgent import random import numpy as np -def hash_obsevation(obs): + +def hash_observation(obs): try: val = hash(obs.tobytes()) return val @@ -11,14 +11,16 @@ def hash_obsevation(obs): try: return hash(obs) except TypeError: - warnings.warn("Observation not an int or an Numpy array") + warnings.warn("Observation not an int or a Numpy array") return 0 + def rand_iter(n): for x in range(n+1): random.randint(0, 1000) np.random.normal(size=100) + def gather_observations(env, actions, num_rand_steps): rand_iter(num_rand_steps) state, player_id = env.reset() @@ -44,6 +46,7 @@ def gather_observations(env, actions, num_rand_steps): return observations + def is_deterministic(env_name): env = rlcard.make(env_name) @@ -55,6 +58,6 @@ def is_deterministic(env_name): for rand_iters in range(2): env = rlcard.make(env_name,config={'seed':base_seed}) - hashes.append(hash(tuple([hash_obsevation(obs['obs']) for obs in gather_observations(env,actions,rand_iters)]))) + hashes.append(hash(tuple([hash_observation(obs['obs']) for obs in gather_observations(env, actions, rand_iters)]))) return hashes[0] == hashes[1] diff --git a/tests/envs/test_gin_rummy_env.py b/tests/envs/test_gin_rummy_env.py index ff01147f6..6f34ea826 100644 --- a/tests/envs/test_gin_rummy_env.py +++ b/tests/envs/test_gin_rummy_env.py @@ -1,8 +1,8 @@ -''' +""" File name: tests/envs/test_gin_rummy_env.py Author: William Hale Date created: 4/20/2020 -''' +""" import unittest import numpy as np diff --git a/tests/games/test_bridge_game.py b/tests/games/test_bridge_game.py index 88c3b9977..861f6e37e 100644 --- a/tests/games/test_bridge_game.py +++ b/tests/games/test_bridge_game.py @@ -1,8 +1,8 @@ -''' +""" File name: test_bridge_game.py Author: William Hale Date created: 11/25/2021 -''' +""" import unittest import numpy as np diff --git a/tests/games/test_gin_rummy_game.py b/tests/games/test_gin_rummy_game.py index a784d604d..6fa93f254 100644 --- a/tests/games/test_gin_rummy_game.py +++ b/tests/games/test_gin_rummy_game.py @@ -1,8 +1,8 @@ -''' +""" File name: test_gin_rummy_game.py Author: William Hale Date created: 3/11/2020 -''' +""" import unittest import numpy as np diff --git a/tests/games/test_nolimitholdem_judger.py b/tests/games/test_nolimitholdem_judger.py index 5f320aa78..4a0c81d16 100644 --- a/tests/games/test_nolimitholdem_judger.py +++ b/tests/games/test_nolimitholdem_judger.py @@ -27,10 +27,10 @@ def get_hands(self, player_hands, public_card): def test_judge_with_4_players(self): - ''' + """ suit_list = ['S', 'H', 'D', 'C'] rank_list = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K'] - ''' + """ players = self.get_players(4) diff --git a/tests/utils/test_holdem_utils.py b/tests/utils/test_holdem_utils.py index 9572f2321..cef203e36 100644 --- a/tests/utils/test_holdem_utils.py +++ b/tests/utils/test_holdem_utils.py @@ -23,7 +23,7 @@ def test_evaluate_hand_exception(self): hand = Hand(['CJ', 'CT', 'CQ', 'CK', 'C9', 'C8']) with self.assertRaises(Exception): - hand.evaluateHand() + hand.evaluate_hand() def test_has_high_card_false(self): diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 2594ee2bb..aa03f87ca 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -1,6 +1,6 @@ import unittest import numpy as np -from rlcard.utils.utils import init_54_deck, init_standard_deck, rank2int, print_card, elegent_form, reorganize, tournament +from rlcard.utils.utils import init_54_deck, init_standard_deck, rank2int, print_card, elegant_form, reorganize, tournament import rlcard from rlcard.agents.random_agent import RandomAgent @@ -24,8 +24,8 @@ def test_rank2int(self): self.assertEqual(rank2int('K'), 13) def test_print_cards(self): - self.assertEqual(len(elegent_form('S9')), 2) - self.assertEqual(len(elegent_form('ST')), 3) + self.assertEqual(len(elegant_form('S9')), 2) + self.assertEqual(len(elegant_form('ST')), 3) print_card(None) print_card('S9')