diff --git a/README.md b/README.md
index c8b2503cc..dc4b5c94a 100644
--- a/README.md
+++ b/README.md
@@ -245,7 +245,7 @@ You can use the the following interface to make an environment. You may optional
 	*   `allow_step_back`: Default `False`. `True` if allowing `step_back` function to traverse backward in the tree.
 	*   Game specific configurations: These fields start with `game_`. Currently, we only support `game_num_players` in Blackjack, .
 
-Once the environemnt is made, we can access some information of the game.
+Once the environment is made, we can access some information of the game.
 *   **env.num_actions**: The number of actions.
 *   **env.num_players**: The number of players.
 *   **env.state_shape**: The shape of the state space of the observations.
diff --git a/docs/games.md b/docs/games.md
index 301f85d58..bcd40ea84 100644
--- a/docs/games.md
+++ b/docs/games.md
@@ -90,7 +90,7 @@ At each decision point of the game, the corresponding player will be able to obs
 | ------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |
 | seen\_cards   | Three face-down cards distributed to the landlord after bidding. Then these cards will be made public to all players.                                | TQA                                                                                                 |
 | landlord      | An integer of landlord's id                                                                                                                          | 0                                                                                                   |
-| self          | An integer of current player's id                                                                                                                    | 2                                                                                                   |
+| cls          | An integer of current player's id                                                                                                                    | 2                                                                                                   |
 | trace         | A list of tuples which records every actions in one game. The first entry of  the tuple is player's id, the second is corresponding player's action. | \[(0, '8222'), (1, 'pass'), (2, 'pass'), (0 '6KKK'), (1, 'pass'), (2, 'pass'), (0, '8'), (1, 'Q')\] |
 | played\_cards | As the game progresses, the cards which have been played by the three players and sorted from low to high.                                           | \['6', '8', '8', 'Q', 'K', 'K', 'K', '2', '2', '2'\]                                                |
 | others\_hand  | The union of the other two player's current hand                                                                                                     | 333444555678899TTTJJJQQAA2R                                                                         |
@@ -134,7 +134,7 @@ If the landlord first get rid of all the cards in his hand, he will win and rece
 ## Mahjong
 Mahjong is a tile-based game developed in China, and has spread throughout the world since 20th century. It is commonly played
 by 4 players. The game is played with a set of 136 tiles. In turn players draw and discard tiles until  
-The goal of the game is to complete the leagal hand using the 14th drawn tile to form 4 sets and a pair. 
+The goal of the game is to complete the legal hand using the 14th drawn tile to form 4 sets and a pair. 
 We revised the game into a simple version that all of the winning set are equal, and player will win as long as she complete 
 forming 4 sets and a pair. Please refer the detail on [Wikipedia](https://en.wikipedia.org/wiki/Mahjong) or  [Baike](https://baike.baidu.com/item/麻将/215).
 
diff --git a/docs/high-level-design.md b/docs/high-level-design.md
index 22b6686da..855837e2c 100644
--- a/docs/high-level-design.md
+++ b/docs/high-level-design.md
@@ -25,4 +25,4 @@ Card games usually have similar structures. We abstract some concepts in card ga
 To summarize, in one `Game`, a `Dealer` deals the cards for each `Player`. In each `Round` of the game, a `Judger` will make major decisions about the next round and the payoffs in the end of the game.
 
 ## Agents
-We provide examples of several representative algorithms and wrap them as `Agent` to show how a learning algorithm can be connected to the toolkit. The first example is DQN which is a representative of the Reinforcement Learning (RL) algorithms category. The second example is NFSP which is a representative of the Reinforcement Learning (RL) with self-play. We also provide CFR (chance sampling) and DeepCFR which belong to Conterfactual Regret Minimization (CFR) category. Other algorithms from these three categories can be connected in similar ways.
+We provide examples of several representative algorithms and wrap them as `Agent` to show how a learning algorithm can be connected to the toolkit. The first example is DQN which is a representative of the Reinforcement Learning (RL) algorithms category. The second example is NFSP which is a representative of the Reinforcement Learning (RL) with self-play. We also provide CFR (chance sampling) and DeepCFR which belong to Counterfactual Regret Minimization (CFR) category. Other algorithms from these three categories can be connected in similar ways.
diff --git a/docs/toy-examples.md b/docs/toy-examples.md
index 412b8c81b..61ad378a9 100644
--- a/docs/toy-examples.md
+++ b/docs/toy-examples.md
@@ -339,7 +339,7 @@ def train(args):
     # Seed numpy, torch, random
     set_seed(args.seed)
 
-    # Initilize CFR Agent
+    # Initialize CFR Agent
     agent = CFRAgent(
         env,
         os.path.join(
diff --git a/examples/evaluate.py b/examples/evaluate.py
index a5f70905f..d79a54b15 100644
--- a/examples/evaluate.py
+++ b/examples/evaluate.py
@@ -1,19 +1,16 @@
-''' An example of evluating the trained models in RLCard
-'''
+"""An example of evaluating the trained models in RLCard"""
 import os
 import argparse
 
 import rlcard
-from rlcard.agents import (
-    DQNAgent,
-    RandomAgent,
-)
+
 from rlcard.utils import (
     get_device,
     set_seed,
     tournament,
 )
 
+
 def load_model(model_path, env=None, position=None, device=None):
     if os.path.isfile(model_path):  # Torch model
         import torch
@@ -29,14 +26,14 @@ def load_model(model_path, env=None, position=None, device=None):
     else:  # A model in the model zoo
         from rlcard import models
         agent = models.load(model_path).agents[position]
-    
+
     return agent
 
-def evaluate(args):
 
+def evaluate(args):
     # Check whether gpu is available
     device = get_device()
-        
+
     # Seed numpy, torch, random
     set_seed(args.seed)
 
@@ -54,6 +51,7 @@ def evaluate(args):
     for position, reward in enumerate(rewards):
         print(position, args.models[position], reward)
 
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser("Evaluation example in RLCard")
     parser.add_argument(
@@ -99,4 +97,3 @@ def evaluate(args):
 
     os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
     evaluate(args)
-
diff --git a/examples/human/blackjack_human.py b/examples/human/blackjack_human.py
index 46f3f2b72..96fd7f27b 100644
--- a/examples/human/blackjack_human.py
+++ b/examples/human/blackjack_human.py
@@ -1,5 +1,4 @@
-''' A toy example of self playing for Blackjack
-'''
+"""A toy example of self playing for Blackjack """
 
 import rlcard
 from rlcard.agents import RandomAgent as RandomAgent
@@ -23,7 +22,7 @@
 
 print(">> Blackjack human agent")
 
-while (True):
+while True:
     print(">> Start a new game")
 
     trajectories, payoffs = env.run(is_training=False)
diff --git a/examples/human/gin_rummy_human.py b/examples/human/gin_rummy_human.py
index 230e3640f..ce4b680fc 100644
--- a/examples/human/gin_rummy_human.py
+++ b/examples/human/gin_rummy_human.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: gin_rummy_human.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 #   You need to install tkinter if it is not already installed.
 #   Tkinter is Python's defacto standard GUI (Graphical User Interface) package.
diff --git a/examples/human/leduc_holdem_human.py b/examples/human/leduc_holdem_human.py
index 55e73c33f..57133cb68 100644
--- a/examples/human/leduc_holdem_human.py
+++ b/examples/human/leduc_holdem_human.py
@@ -1,5 +1,4 @@
-''' A toy example of playing against pretrianed AI on Leduc Hold'em
-'''
+"""A toy example of playing against pretrianed AI on Leduc Hold'em"""
 
 import rlcard
 from rlcard import models
@@ -17,7 +16,7 @@
 
 print(">> Leduc Hold'em pre-trained model")
 
-while (True):
+while True:
     print(">> Start a new game")
 
     trajectories, payoffs = env.run(is_training=False)
diff --git a/examples/human/limit_holdem_human.py b/examples/human/limit_holdem_human.py
index 1491180e7..3f37deda1 100644
--- a/examples/human/limit_holdem_human.py
+++ b/examples/human/limit_holdem_human.py
@@ -1,5 +1,4 @@
-''' A toy example of playing against a random agent on Limit Hold'em
-'''
+"""A toy example of playing against a random agent on Limit Hold'em"""
 
 import rlcard
 from rlcard.agents import LimitholdemHumanAgent as HumanAgent
@@ -17,7 +16,7 @@
 
 print(">> Limit Hold'em random agent")
 
-while (True):
+while True:
     print(">> Start a new game")
 
     trajectories, payoffs = env.run(is_training=False)
diff --git a/examples/human/nolimit_holdem_human.py b/examples/human/nolimit_holdem_human.py
index 76f29da11..bfb812ac9 100644
--- a/examples/human/nolimit_holdem_human.py
+++ b/examples/human/nolimit_holdem_human.py
@@ -1,5 +1,4 @@
-''' A toy example of playing against pretrianed AI on Leduc Hold'em
-'''
+"""A toy example of playing against pretrained AI on Leduc Hold'em"""
 from rlcard.agents import RandomAgent
 
 import rlcard
@@ -17,7 +16,7 @@
 env.set_agents([human_agent, human_agent2])
 
 
-while (True):
+while True:
     print(">> Start a new game")
 
     trajectories, payoffs = env.run(is_training=False)
diff --git a/examples/human/uno_human.py b/examples/human/uno_human.py
index a5110b16b..19198088e 100644
--- a/examples/human/uno_human.py
+++ b/examples/human/uno_human.py
@@ -1,5 +1,4 @@
-''' A toy example of playing against rule-based bot on UNO
-'''
+"""A toy example of playing against rule-based bot on UNO"""
 
 import rlcard
 from rlcard import models
@@ -16,7 +15,7 @@
 
 print(">> UNO rule model V1")
 
-while (True):
+while True:
     print(">> Start a new game")
 
     trajectories, payoffs = env.run(is_training=False)
diff --git a/examples/pettingzoo/run_dmc.py b/examples/pettingzoo/run_dmc.py
index a44ef0678..e88cc9601 100644
--- a/examples/pettingzoo/run_dmc.py
+++ b/examples/pettingzoo/run_dmc.py
@@ -1,6 +1,4 @@
-''' An example of training a Deep Monte-Carlo (DMC) Agent on PettingZoo environments
-wrapping RLCard
-'''
+"""An example of training a Deep Monte-Carlo (DMC) Agent on PettingZoo environments wrapping RLCard"""
 import os
 import argparse
 
diff --git a/examples/pettingzoo/run_rl.py b/examples/pettingzoo/run_rl.py
index a648135b0..fb7b5cee1 100644
--- a/examples/pettingzoo/run_rl.py
+++ b/examples/pettingzoo/run_rl.py
@@ -1,6 +1,4 @@
-''' An example of training a reinforcement learning agent on the PettingZoo 
-environments that wrap RLCard
-'''
+"""An example of training a reinforcement learning agent on the PettingZoo environments that wrap RLCard"""
 import os
 import argparse
 
diff --git a/examples/run_cfr.py b/examples/run_cfr.py
index b5d67d08d..b5862a2e0 100644
--- a/examples/run_cfr.py
+++ b/examples/run_cfr.py
@@ -1,5 +1,4 @@
-''' An example of solve Leduc Hold'em with CFR (chance sampling)
-'''
+"""An example of solve Leduc Hold'em with CFR (chance sampling)"""
 import os
 import argparse
 
@@ -15,6 +14,7 @@
     plot_curve,
 )
 
+
 def train(args):
     # Make environments, CFR only supports Leduc Holdem
     env = rlcard.make(
@@ -34,7 +34,7 @@ def train(args):
     # Seed numpy, torch, random
     set_seed(args.seed)
 
-    # Initilize CFR Agent
+    # Initialize CFR Agent
     agent = CFRAgent(
         env,
         os.path.join(
@@ -71,6 +71,7 @@ def train(args):
     # Plot the learning curve
     plot_curve(csv_path, fig_path, 'cfr')
 
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser("CFR example in RLCard")
     parser.add_argument(
diff --git a/examples/run_dmc.py b/examples/run_dmc.py
index 401b14d68..3d955e47f 100644
--- a/examples/run_dmc.py
+++ b/examples/run_dmc.py
@@ -1,5 +1,4 @@
-''' An example of training a Deep Monte-Carlo (DMC) Agent on the environments in RLCard
-'''
+"""An example of training a Deep Monte-Carlo (DMC) Agent on the environments in RLCard"""
 import os
 import argparse
 
@@ -8,8 +7,8 @@
 import rlcard
 from rlcard.agents.dmc_agent import DMCTrainer
 
-def train(args):
 
+def train(args):
     # Make the environment
     env = rlcard.make(args.env)
 
@@ -29,6 +28,7 @@ def train(args):
     # Train DMC Agents
     trainer.start()
 
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser("DMC example in RLCard")
     parser.add_argument(
@@ -95,4 +95,3 @@ def train(args):
 
     os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
     train(args)
-
diff --git a/examples/run_random.py b/examples/run_random.py
index 9a2681288..1287a4a13 100644
--- a/examples/run_random.py
+++ b/examples/run_random.py
@@ -1,5 +1,4 @@
-''' An example of playing randomly in RLCard
-'''
+"""An example of playing randomly in RLCard"""
 import argparse
 import pprint
 
diff --git a/examples/run_rl.py b/examples/run_rl.py
index 3727f3ae8..08c05db96 100644
--- a/examples/run_rl.py
+++ b/examples/run_rl.py
@@ -1,5 +1,4 @@
-''' An example of training a reinforcement learning agent on the environments in RLCard
-'''
+"""An example of training a reinforcement learning agent on the environments in RLCard"""
 import os
 import argparse
 
@@ -16,11 +15,11 @@
     plot_curve,
 )
 
-def train(args):
 
+def train(args):
     # Check whether gpu is available
     device = get_device()
-        
+
     # Seed numpy, torch, random
     set_seed(args.seed)
 
@@ -41,7 +40,7 @@ def train(args):
             agent = DQNAgent(
                 num_actions=env.num_actions,
                 state_shape=env.state_shape[0],
-                mlp_layers=[64,64],
+                mlp_layers=[64, 64],
                 device=device,
                 save_path=args.log_dir,
                 save_every=args.save_every
@@ -55,8 +54,8 @@ def train(args):
             agent = NFSPAgent(
                 num_actions=env.num_actions,
                 state_shape=env.state_shape[0],
-                hidden_layers_sizes=[64,64],
-                q_mlp_layers=[64,64],
+                hidden_layers_sizes=[64, 64],
+                q_mlp_layers=[64, 64],
                 device=device,
                 save_path=args.log_dir,
                 save_every=args.save_every
@@ -106,6 +105,7 @@ def train(args):
     torch.save(agent, save_path)
     print('Model saved in', save_path)
 
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser("DQN/NFSP example in RLCard")
     parser.add_argument(
@@ -163,13 +163,13 @@ def train(args):
         type=str,
         default='experiments/leduc_holdem_dqn_result/',
     )
-    
+
     parser.add_argument(
         "--load_checkpoint_path",
         type=str,
         default="",
     )
-    
+
     parser.add_argument(
         "--save_every",
         type=int,
@@ -179,4 +179,3 @@ def train(args):
 
     os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
     train(args)
-
diff --git a/rlcard/agents/cfr_agent.py b/rlcard/agents/cfr_agent.py
index 406b0c12d..0ae429b2c 100644
--- a/rlcard/agents/cfr_agent.py
+++ b/rlcard/agents/cfr_agent.py
@@ -1,4 +1,3 @@
-import numpy as np
 import collections
 
 import os
@@ -6,16 +5,16 @@
 
 from rlcard.utils.utils import *
 
-class CFRAgent():
-    ''' Implement CFR (chance sampling) algorithm
-    '''
+
+class CFRAgent:
+    """Implement CFR (chance sampling) algorithm """
 
     def __init__(self, env, model_path='./cfr_model'):
-        ''' Initilize Agent
+        """Initialize Agent
 
         Args:
             env (Env): Env class
-        '''
+        """
         self.use_raw = False
         self.env = env
         self.model_path = model_path
@@ -30,8 +29,7 @@ def __init__(self, env, model_path='./cfr_model'):
         self.iteration = 0
 
     def train(self):
-        ''' Do one iteration of CFR
-        '''
+        """Do one iteration of CFR """
         self.iteration += 1
         # Firstly, traverse tree to compute counterfactual regret for each player
         # The regrets are recorded in traversal
@@ -44,7 +42,7 @@ def train(self):
         self.update_policy()
 
     def traverse_tree(self, probs, player_id):
-        ''' Traverse the game tree, update the regrets
+        """Traverse the game tree, update the regrets
 
         Args:
             probs: The reach probability of the current node
@@ -52,7 +50,7 @@ def traverse_tree(self, probs, player_id):
 
         Returns:
             state_utilities (list): The expected utilities for all the players
-        '''
+        """
         if self.env.is_over():
             return self.env.get_payoffs()
 
@@ -82,7 +80,7 @@ def traverse_tree(self, probs, player_id):
         # If it is current player, we record the policy and compute regret
         player_prob = probs[current_player]
         counterfactual_prob = (np.prod(probs[:current_player]) *
-                                np.prod(probs[current_player + 1:]))
+                               np.prod(probs[current_player + 1:]))
         player_state_utility = state_utility[current_player]
 
         if obs not in self.regrets:
@@ -92,23 +90,22 @@ def traverse_tree(self, probs, player_id):
         for action in legal_actions:
             action_prob = action_probs[action]
             regret = counterfactual_prob * (action_utilities[action][current_player]
-                    - player_state_utility)
+                                            - player_state_utility)
             self.regrets[obs][action] += regret
             self.average_policy[obs][action] += self.iteration * player_prob * action_prob
         return state_utility
 
     def update_policy(self):
-        ''' Update policy based on the current regrets
-        '''
+        """Update policy based on the current regrets """
         for obs in self.regrets:
             self.policy[obs] = self.regret_matching(obs)
 
     def regret_matching(self, obs):
-        ''' Apply regret matching
+        """Apply regret matching
 
         Args:
             obs (string): The state_str
-        '''
+        """
         regret = self.regrets[obs]
         positive_regret_sum = sum([r for r in regret if r > 0])
 
@@ -122,11 +119,11 @@ def regret_matching(self, obs):
         return action_probs
 
     def action_probs(self, obs, legal_actions, policy):
-        ''' Obtain the action probabilities of the current state
+        """Obtain the action probabilities of the current state
 
         Args:
             obs (str): state_str
-            legal_actions (list): List of leagel actions
+            legal_actions (list): List of legal actions
             player_id (int): The current player
             policy (dict): The used policy
 
@@ -134,9 +131,9 @@ def action_probs(self, obs, legal_actions, policy):
             (tuple) that contains:
                 action_probs(numpy.array): The action probabilities
                 legal_actions (list): Indices of legal actions
-        '''
+        """
         if obs not in policy.keys():
-            action_probs = np.array([1.0/self.env.num_actions for _ in range(self.env.num_actions)])
+            action_probs = np.array([1.0 / self.env.num_actions for _ in range(self.env.num_actions)])
             self.policy[obs] = action_probs
         else:
             action_probs = policy[obs]
@@ -144,7 +141,7 @@ def action_probs(self, obs, legal_actions, policy):
         return action_probs
 
     def eval_step(self, state):
-        ''' Given a state, predict action based on average policy
+        """Given a state, predict action based on average policy
 
         Args:
             state (numpy.array): State representation
@@ -152,17 +149,17 @@ def eval_step(self, state):
         Returns:
             action (int): Predicted action
             info (dict): A dictionary containing information
-        '''
+        """
         probs = self.action_probs(state['obs'].tostring(), list(state['legal_actions'].keys()), self.average_policy)
         action = np.random.choice(len(probs), p=probs)
 
-        info = {}
-        info['probs'] = {state['raw_legal_actions'][i]: float(probs[list(state['legal_actions'].keys())[i]]) for i in range(len(state['legal_actions']))}
+        info = {'probs': {state['raw_legal_actions'][i]: float(probs[list(state['legal_actions'].keys())[i]]) for i in
+                          range(len(state['legal_actions']))}}
 
         return action, info
 
     def get_state(self, player_id):
-        ''' Get state_str of the player
+        """Get state_str of the player
 
         Args:
             player_id (int): The player id
@@ -171,51 +168,48 @@ def get_state(self, player_id):
             (tuple) that contains:
                 state (str): The state str
                 legal_actions (list): Indices of legal actions
-        '''
+        """
         state = self.env.get_state(player_id)
         return state['obs'].tostring(), list(state['legal_actions'].keys())
 
     def save(self):
-        ''' Save model
-        '''
+        """Save model """
         if not os.path.exists(self.model_path):
             os.makedirs(self.model_path)
 
-        policy_file = open(os.path.join(self.model_path, 'policy.pkl'),'wb')
+        policy_file = open(os.path.join(self.model_path, 'policy.pkl'), 'wb')
         pickle.dump(self.policy, policy_file)
         policy_file.close()
 
-        average_policy_file = open(os.path.join(self.model_path, 'average_policy.pkl'),'wb')
+        average_policy_file = open(os.path.join(self.model_path, 'average_policy.pkl'), 'wb')
         pickle.dump(self.average_policy, average_policy_file)
         average_policy_file.close()
 
-        regrets_file = open(os.path.join(self.model_path, 'regrets.pkl'),'wb')
+        regrets_file = open(os.path.join(self.model_path, 'regrets.pkl'), 'wb')
         pickle.dump(self.regrets, regrets_file)
         regrets_file.close()
 
-        iteration_file = open(os.path.join(self.model_path, 'iteration.pkl'),'wb')
+        iteration_file = open(os.path.join(self.model_path, 'iteration.pkl'), 'wb')
         pickle.dump(self.iteration, iteration_file)
         iteration_file.close()
 
     def load(self):
-        ''' Load model
-        '''
+        """Load model """
         if not os.path.exists(self.model_path):
             return
 
-        policy_file = open(os.path.join(self.model_path, 'policy.pkl'),'rb')
+        policy_file = open(os.path.join(self.model_path, 'policy.pkl'), 'rb')
         self.policy = pickle.load(policy_file)
         policy_file.close()
 
-        average_policy_file = open(os.path.join(self.model_path, 'average_policy.pkl'),'rb')
+        average_policy_file = open(os.path.join(self.model_path, 'average_policy.pkl'), 'rb')
         self.average_policy = pickle.load(average_policy_file)
         average_policy_file.close()
 
-        regrets_file = open(os.path.join(self.model_path, 'regrets.pkl'),'rb')
+        regrets_file = open(os.path.join(self.model_path, 'regrets.pkl'), 'rb')
         self.regrets = pickle.load(regrets_file)
         regrets_file.close()
 
-        iteration_file = open(os.path.join(self.model_path, 'iteration.pkl'),'rb')
+        iteration_file = open(os.path.join(self.model_path, 'iteration.pkl'), 'rb')
         self.iteration = pickle.load(iteration_file)
         iteration_file.close()
-
diff --git a/rlcard/agents/dmc_agent/model.py b/rlcard/agents/dmc_agent/model.py
index 2adf72cda..7086d40cc 100644
--- a/rlcard/agents/dmc_agent/model.py
+++ b/rlcard/agents/dmc_agent/model.py
@@ -18,19 +18,17 @@
 import torch
 from torch import nn
 
+
 class DMCNet(nn.Module):
-    def __init__(
-        self,
-        state_shape,
-        action_shape,
-        mlp_layers=[512,512,512,512,512]
-    ):
+    def __init__(self, state_shape, action_shape, mlp_layers=None):
         super().__init__()
+        if mlp_layers is None:
+            mlp_layers = [512, 512, 512, 512, 512]
         input_dim = np.prod(state_shape) + np.prod(action_shape)
         layer_dims = [input_dim] + mlp_layers
         fc = []
-        for i in range(len(layer_dims)-1):
-            fc.append(nn.Linear(layer_dims[i], layer_dims[i+1]))
+        for i in range(len(layer_dims) - 1):
+            fc.append(nn.Linear(layer_dims[i], layer_dims[i + 1]))
             fc.append(nn.ReLU())
         fc.append(nn.Linear(layer_dims[-1], 1))
         self.fc_layers = nn.Sequential(*fc)
@@ -42,17 +40,13 @@ def forward(self, obs, actions):
         values = self.fc_layers(x).flatten()
         return values
 
+
 class DMCAgent:
-    def __init__(
-        self,
-        state_shape,
-        action_shape,
-        mlp_layers=[512,512,512,512,512],
-        exp_epsilon=0.01,
-        device="0",
-    ):
+    def __init__(self, state_shape, action_shape, mlp_layers=None, exp_epsilon=0.01, device="0"):
+        if mlp_layers is None:
+            mlp_layers = [512, 512, 512, 512, 512]
         self.use_raw = False
-        self.device = 'cuda:'+device if device != "cpu" else "cpu"
+        self.device = 'cuda:' + device if device != "cpu" else "cpu"
         self.net = DMCNet(state_shape, action_shape, mlp_layers).to(self.device)
         self.exp_epsilon = exp_epsilon
         self.action_shape = action_shape
@@ -74,8 +68,7 @@ def eval_step(self, state):
         action_idx = np.argmax(values)
         action = action_keys[action_idx]
 
-        info = {}
-        info['values'] = {state['raw_legal_actions'][i]: float(values[i]) for i in range(len(action_keys))}
+        info = {'values': {state['raw_legal_actions'][i]: float(values[i]) for i in range(len(action_keys))}}
 
         return action, info
 
@@ -121,15 +114,11 @@ def state_dict(self):
     def set_device(self, device):
         self.device = device
 
+
 class DMCModel:
-    def __init__(
-        self,
-        state_shape,
-        action_shape,
-        mlp_layers=[512,512,512,512,512],
-        exp_epsilon=0.01,
-        device=0
-    ):
+    def __init__(self, state_shape, action_shape, mlp_layers=None, exp_epsilon=0.01, device=0):
+        if mlp_layers is None:
+            mlp_layers = [512, 512, 512, 512, 512]
         self.agents = []
         for player_id in range(len(state_shape)):
             agent = DMCAgent(
diff --git a/rlcard/agents/dmc_agent/pettingzoo_model.py b/rlcard/agents/dmc_agent/pettingzoo_model.py
index 8a6d577c1..aa1419d13 100644
--- a/rlcard/agents/dmc_agent/pettingzoo_model.py
+++ b/rlcard/agents/dmc_agent/pettingzoo_model.py
@@ -20,13 +20,10 @@ def feed(self, ts):
 
 
 class DMCModelPettingZoo:
-    def __init__(
-        self,
-        env,
-        mlp_layers=[512,512,512,512,512],
-        exp_epsilon=0.01,
-        device="0"
-    ):
+    def __init__(self, env, mlp_layers=None, exp_epsilon=0.01, device="0"):
+        if mlp_layers is None:
+            mlp_layers = [512, 512, 512, 512, 512]
+
         self.agents = OrderedDict()
         for agent_name in env.agents:
             agent = DMCAgentPettingZoo(
diff --git a/rlcard/agents/dmc_agent/trainer.py b/rlcard/agents/dmc_agent/trainer.py
index 043f99fcb..5de12b0c9 100644
--- a/rlcard/agents/dmc_agent/trainer.py
+++ b/rlcard/agents/dmc_agent/trainer.py
@@ -163,7 +163,7 @@ def __init__(
         if not self.is_pettingzoo_env:
             self.num_players = self.env.num_players
             self.action_shape = self.env.action_shape
-            if self.action_shape[0] == None:  # One-hot encoding
+            if self.action_shape[0] is None:  # One-hot encoding
                 self.action_shape = [[self.env.num_actions] for _ in range(self.num_players)]
 
             def model_func(device):
diff --git a/rlcard/agents/dqn_agent.py b/rlcard/agents/dqn_agent.py
index 11c6875d0..8cf7f4555 100644
--- a/rlcard/agents/dqn_agent.py
+++ b/rlcard/agents/dqn_agent.py
@@ -1,4 +1,4 @@
-''' DQN agent
+"""DQN agent
 
 The code is derived from https://github.com/dennybritz/reinforcement-learning/blob/master/DQN/dqn.py
 
@@ -23,7 +23,7 @@
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
-'''
+"""
 
 import os
 import random
@@ -39,10 +39,10 @@
 
 
 class DQNAgent(object):
-    '''
+    """
     Approximate clone of rlcard.agents.dqn_agent.DQNAgent
     that depends on PyTorch instead of Tensorflow
-    '''
+    """
     def __init__(self,
                  replay_memory_size=20000,
                  replay_memory_init_size=100,
@@ -61,7 +61,7 @@ def __init__(self,
                  save_path=None,
                  save_every=float('inf'),):
 
-        '''
+        """
         Q-Learning algorithm for off-policy TD control using Function Approximation.
         Finds the optimal greedy policy while following an epsilon-greedy policy.
 
@@ -86,7 +86,7 @@ def __init__(self,
             device (torch.device): whether to use the cpu or gpu
             save_path (str): The path to save the model checkpoints
             save_every (int): Save the model every X training steps
-        '''
+        """
         self.use_raw = False
         self.replay_memory_init_size = replay_memory_init_size
         self.update_target_estimator_every = update_target_estimator_every
@@ -112,10 +112,10 @@ def __init__(self,
         self.epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)
 
         # Create estimators
-        self.q_estimator = Estimator(num_actions=num_actions, learning_rate=learning_rate, state_shape=state_shape, \
-            mlp_layers=mlp_layers, device=self.device)
-        self.target_estimator = Estimator(num_actions=num_actions, learning_rate=learning_rate, state_shape=state_shape, \
-            mlp_layers=mlp_layers, device=self.device)
+        self.q_estimator = Estimator(num_actions=num_actions, learning_rate=learning_rate, state_shape=state_shape,
+                                     mlp_layers=mlp_layers, device=self.device)
+        self.target_estimator = Estimator(num_actions=num_actions, learning_rate=learning_rate, state_shape=state_shape,
+                                          mlp_layers=mlp_layers, device=self.device)
 
         # Create replay memory
         self.memory = Memory(replay_memory_size, batch_size)
@@ -125,13 +125,13 @@ def __init__(self,
         self.save_every = save_every
 
     def feed(self, ts):
-        ''' Store data in to replay buffer and train the agent. There are two stages.
+        """Store data in to replay buffer and train the agent. There are two stages.
             In stage 1, populate the memory without training
             In stage 2, train the agent every several timesteps
 
         Args:
             ts (list): a list of 5 elements that represent the transition
-        '''
+        """
         (state, action, reward, next_state, done) = tuple(ts)
         self.feed_memory(state['obs'], action, reward, next_state['obs'], list(next_state['legal_actions'].keys()), done)
         self.total_t += 1
@@ -140,7 +140,7 @@ def feed(self, ts):
             self.train()
 
     def step(self, state):
-        ''' Predict the action for genrating training data but
+        """Predict the action for genrating training data but
             have the predictions disconnected from the computation graph
 
         Args:
@@ -148,7 +148,7 @@ def step(self, state):
 
         Returns:
             action (int): an action id
-        '''
+        """
         q_values = self.predict(state)
         epsilon = self.epsilons[min(self.total_t, self.epsilon_decay_steps-1)]
         legal_actions = list(state['legal_actions'].keys())
@@ -160,7 +160,7 @@ def step(self, state):
         return legal_actions[action_idx]
 
     def eval_step(self, state):
-        ''' Predict the action for evaluation purpose.
+        """Predict the action for evaluation purpose.
 
         Args:
             state (numpy.array): current state
@@ -168,7 +168,7 @@ def eval_step(self, state):
         Returns:
             action (int): an action id
             info (dict): A dictionary containing information
-        '''
+        """
         q_values = self.predict(state)
         best_action = np.argmax(q_values)
 
@@ -178,14 +178,14 @@ def eval_step(self, state):
         return best_action, info
 
     def predict(self, state):
-        ''' Predict the masked Q-values
+        """Predict the masked Q-values
 
         Args:
             state (numpy.array): current state
 
         Returns:
             q_values (numpy.array): a 1-d array where each entry represents a Q value
-        '''
+        """
         
         q_values = self.q_estimator.predict_nograd(np.expand_dims(state['obs'], 0))[0]
         masked_q_values = -np.inf * np.ones(self.num_actions, dtype=float)
@@ -195,11 +195,11 @@ def predict(self, state):
         return masked_q_values
 
     def train(self):
-        ''' Train the network
+        """Train the network
 
         Returns:
             loss (float): The loss of the current batch.
-        '''
+        """
         state_batch, action_batch, reward_batch, next_state_batch, done_batch, legal_actions_batch = self.memory.sample()
 
         # Calculate best next actions using Q-network (Double DQN)
@@ -238,7 +238,7 @@ def train(self):
 
 
     def feed_memory(self, state, action, reward, next_state, legal_actions, done):
-        ''' Feed transition to memory
+        """Feed transition to memory
 
         Args:
             state (numpy.array): the current state
@@ -247,7 +247,7 @@ def feed_memory(self, state, action, reward, next_state, legal_actions, done):
             next_state (numpy.array): the next state after performing the action
             legal_actions (list): the legal actions of the next state
             done (boolean): whether the episode is finished
-        '''
+        """
         self.memory.save(state, action, reward, next_state, legal_actions, done)
 
     def set_device(self, device):
@@ -256,11 +256,11 @@ def set_device(self, device):
         self.target_estimator.device = device
 
     def checkpoint_attributes(self):
-        '''
+        """
         Return the current checkpoint attributes (dict)
         Checkpoint attributes are used to save and restore the model in the middle of training
         Saves the model state dict, optimizer state dict, and all other instance variables
-        '''
+        """
         
         return {
             'agent_type': 'DQNAgent',
@@ -284,12 +284,12 @@ def checkpoint_attributes(self):
 
     @classmethod
     def from_checkpoint(cls, checkpoint):
-        '''
+        """
         Restore the model from a checkpoint
-        
+
         Args:
             checkpoint (dict): the checkpoint attributes generated by checkpoint_attributes()
-        '''
+        """
         
         print("\nINFO - Restoring model from checkpoint...")
         agent_instance = cls(
@@ -321,33 +321,33 @@ def from_checkpoint(cls, checkpoint):
         return agent_instance
                      
     def save_checkpoint(self, path, filename='checkpoint_dqn.pt'):
-        ''' Save the model checkpoint (all attributes)
+        """Save the model checkpoint (all attributes)
 
         Args:
             path (str): the path to save the model
             filename(str): the file name of checkpoint
-        '''
+        """
         torch.save(self.checkpoint_attributes(), os.path.join(path, filename))
 
 
 class Estimator(object):
-    '''
+    """
     Approximate clone of rlcard.agents.dqn_agent.Estimator that
     uses PyTorch instead of Tensorflow.  All methods input/output np.ndarray.
 
     Q-Value Estimator neural network.
     This network is used for both the Q-Network and the Target Network.
-    '''
+    """
 
     def __init__(self, num_actions=2, learning_rate=0.001, state_shape=None, mlp_layers=None, device=None):
-        ''' Initilalize an Estimator object.
+        """Initialize an Estimator object.
 
         Args:
             num_actions (int): the number output actions
             state_shape (list): the shape of the state space
             mlp_layers (list): size of outputs of mlp layers
             device (torch.device): whether to use cpu or gpu
-        '''
+        """
         self.num_actions = num_actions
         self.learning_rate=learning_rate
         self.state_shape = state_shape
@@ -372,7 +372,7 @@ def __init__(self, num_actions=2, learning_rate=0.001, state_shape=None, mlp_lay
         self.optimizer =  torch.optim.Adam(self.qnet.parameters(), lr=self.learning_rate)
 
     def predict_nograd(self, s):
-        ''' Predicts action values, but prediction is not included
+        """Predicts action values, but prediction is not included
             in the computation graph.  It is used to predict optimal next
             actions in the Double-DQN algorithm.
 
@@ -382,14 +382,14 @@ def predict_nograd(self, s):
         Returns:
           np.ndarray of shape (batch_size, NUM_VALID_ACTIONS) containing the estimated
           action values.
-        '''
+        """
         with torch.no_grad():
             s = torch.from_numpy(s).float().to(self.device)
             q_as = self.qnet(s).cpu().numpy()
         return q_as
 
     def update(self, s, a, y):
-        ''' Updates the estimator towards the given targets.
+        """Updates the estimator towards the given targets.
             In this case y is the target-network estimated
             value of the Q-network optimal actions, which
             is labeled y in Algorithm 1 of Minh et al. (2015)
@@ -401,7 +401,7 @@ def update(self, s, a, y):
 
         Returns:
           The calculated loss on the batch.
-        '''
+        """
         self.optimizer.zero_grad()
 
         self.qnet.train()
@@ -427,8 +427,8 @@ def update(self, s, a, y):
         return batch_loss
     
     def checkpoint_attributes(self):
-        ''' Return the attributes needed to restore the model from a checkpoint
-        '''
+        """Return the attributes needed to restore the model from a checkpoint
+        """
         return {
             'qnet': self.qnet.state_dict(),
             'optimizer': self.optimizer.state_dict(),
@@ -441,8 +441,8 @@ def checkpoint_attributes(self):
         
     @classmethod
     def from_checkpoint(cls, checkpoint):
-        ''' Restore the model from a checkpoint
-        '''
+        """Restore the model from a checkpoint
+        """
         estimator = cls(
             num_actions=checkpoint['num_actions'],
             learning_rate=checkpoint['learning_rate'],
@@ -457,18 +457,18 @@ def from_checkpoint(cls, checkpoint):
 
 
 class EstimatorNetwork(nn.Module):
-    ''' The function approximation network for Estimator
+    """The function approximation network for Estimator
         It is just a series of tanh layers. All in/out are torch.tensor
-    '''
+    """
 
     def __init__(self, num_actions=2, state_shape=None, mlp_layers=None):
-        ''' Initialize the Q network
+        """Initialize the Q network
 
         Args:
             num_actions (int): number of legal actions
             state_shape (list): shape of state tensor
             mlp_layers (list): output size of each fc layer
-        '''
+        """
         super(EstimatorNetwork, self).__init__()
 
         self.num_actions = num_actions
@@ -477,8 +477,7 @@ def __init__(self, num_actions=2, state_shape=None, mlp_layers=None):
 
         # build the Q network
         layer_dims = [np.prod(self.state_shape)] + self.mlp_layers
-        fc = [nn.Flatten()]
-        fc.append(nn.BatchNorm1d(layer_dims[0]))
+        fc = [nn.Flatten(), nn.BatchNorm1d(layer_dims[0])]
         for i in range(len(layer_dims)-1):
             fc.append(nn.Linear(layer_dims[i], layer_dims[i+1], bias=True))
             fc.append(nn.Tanh())
@@ -486,28 +485,28 @@ def __init__(self, num_actions=2, state_shape=None, mlp_layers=None):
         self.fc_layers = nn.Sequential(*fc)
 
     def forward(self, s):
-        ''' Predict action values
+        """Predict action values
 
         Args:
             s  (Tensor): (batch, state_shape)
-        '''
+        """
         return self.fc_layers(s)
 
 class Memory(object):
-    ''' Memory for saving transitions
-    '''
+    """Memory for saving transitions
+    """
 
     def __init__(self, memory_size, batch_size):
-        ''' Initialize
+        """Initialize
         Args:
             memory_size (int): the size of the memroy buffer
-        '''
+        """
         self.memory_size = memory_size
         self.batch_size = batch_size
         self.memory = []
 
     def save(self, state, action, reward, next_state, legal_actions, done):
-        ''' Save transition into memory
+        """Save transition into memory
 
         Args:
             state (numpy.array): the current state
@@ -516,14 +515,14 @@ def save(self, state, action, reward, next_state, legal_actions, done):
             next_state (numpy.array): the next state after performing the action
             legal_actions (list): the legal actions of the next state
             done (boolean): whether the episode is finished
-        '''
+        """
         if len(self.memory) == self.memory_size:
             self.memory.pop(0)
         transition = Transition(state, action, reward, next_state, done, legal_actions)
         self.memory.append(transition)
 
     def sample(self):
-        ''' Sample a minibatch from the replay memory
+        """Sample a minibatch from the replay memory
 
         Returns:
             state_batch (list): a batch of states
@@ -531,14 +530,14 @@ def sample(self):
             reward_batch (list): a batch of rewards
             next_state_batch (list): a batch of states
             done_batch (list): a batch of dones
-        '''
+        """
         samples = random.sample(self.memory, self.batch_size)
         samples = tuple(zip(*samples))
         return tuple(map(np.array, samples[:-1])) + (samples[-1],)
 
     def checkpoint_attributes(self):
-        ''' Returns the attributes that need to be checkpointed
-        '''
+        """Returns the attributes that need to be checkpointed
+        """
         
         return {
             'memory_size': self.memory_size,
@@ -548,15 +547,15 @@ def checkpoint_attributes(self):
             
     @classmethod
     def from_checkpoint(cls, checkpoint):
-        ''' 
+        """
         Restores the attributes from the checkpoint
-        
+
         Args:
             checkpoint (dict): the checkpoint dictionary
-            
+
         Returns:
             instance (Memory): the restored instance
-        '''
+        """
         
         instance = cls(checkpoint['memory_size'], checkpoint['batch_size'])
         instance.memory = checkpoint['memory']
diff --git a/rlcard/agents/human_agents/blackjack_human_agent.py b/rlcard/agents/human_agents/blackjack_human_agent.py
index 53354c1c2..892b523ba 100644
--- a/rlcard/agents/human_agents/blackjack_human_agent.py
+++ b/rlcard/agents/human_agents/blackjack_human_agent.py
@@ -2,28 +2,27 @@
 
 
 class HumanAgent(object):
-    ''' A human agent for Blackjack. It can be used to play alone for understand how the blackjack code runs
-    '''
+    """A human agent for Blackjack. It can be used to play alone for understand how the blackjack code runs """
 
     def __init__(self, num_actions):
-        ''' Initilize the human agent
+        """Initialize the human agent
 
         Args:
             num_actions (int): the size of the output action space
-        '''
+        """
         self.use_raw = True
         self.num_actions = num_actions
 
     @staticmethod
     def step(state):
-        ''' Human agent will display the state and make decisions through interfaces
+        """Human agent will display the state and make decisions through interfaces
 
         Args:
             state (dict): A dictionary that represents the current state
 
         Returns:
             action (int): The action decided by human
-        '''
+        """
         _print_state(state['raw_obs'], state['raw_legal_actions'], state['action_record'])
         action = int(input('>> You choose action (integer): '))
         while action < 0 or action >= len(state['legal_actions']):
@@ -32,23 +31,24 @@ def step(state):
         return state['raw_legal_actions'][action]
 
     def eval_step(self, state):
-        ''' Predict the action given the current state for evaluation. The same to step here.
+        """Predict the action given the current state for evaluation. The same to step here.
 
         Args:
-            state (numpy.array): an numpy array that represents the current state
+            state (numpy.array): a numpy array that represents the current state
 
         Returns:
             action (int): the action predicted (randomly chosen) by the random agent
-        '''
+        """
         return self.step(state), {}
 
+
 def _print_state(state, raw_legal_actions, action_record):
-    ''' Print out the state
+    """Print out the state
 
     Args:
         state (dict): A dictionary of the raw state
-        action_record (list): A list of the each player's historical actions
-    '''
+        action_record (list): A list of each player's historical actions
+    """
     _action_list = []
     for i in range(1, len(action_record)+1):
         _action_list.insert(0, action_record[-i])
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gin_rummy_human_agent.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gin_rummy_human_agent.py
index 7624e66ec..a90cb2c1b 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gin_rummy_human_agent.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gin_rummy_human_agent.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: gin_rummy_human_agent.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 import time
 
@@ -12,15 +12,14 @@
 
 
 class HumanAgent(object):
-    ''' A human agent for Gin Rummy. It can be used to play against trained models.
-    '''
+    """A human agent for Gin Rummy. It can be used to play against trained models"""
 
     def __init__(self, num_actions):
-        ''' Initialize the human agent
+        """Initialize the human agent
 
         Args:
             num_actions (int): the size of the output action space
-        '''
+        """
         self.use_raw = True
         self.num_actions = num_actions
         self.is_choosing_action_id = False
@@ -28,14 +27,14 @@ def __init__(self, num_actions):
         self.state = None
 
     def step(self, state):
-        ''' Human agent will display the state and make decisions through interfaces
+        """Human agent will display the state and make decisions through interfaces
 
         Args:
             state (dict): A dictionary that represents the current state
 
         Returns:
             action (int): The action decided by human
-        '''
+        """
         if self.is_choosing_action_id:
             raise GinRummyProgramError("self.is_choosing_action_id must be False.")
         if self.state is not None:
@@ -55,12 +54,12 @@ def step(self, state):
         return chosen_action_event
 
     def eval_step(self, state):
-        ''' Predict the action given the current state for evaluation. The same to step here.
+        """Predict the action given the current state for evaluation. The same to step here.
 
         Args:
-            state (numpy.array): an numpy array that represents the current state
+            state (numpy.array): a numpy array that represents the current state
 
         Returns:
             action (int): the action predicted (randomly chosen) by the random agent
-        '''
+        """
         return self.step(state), {}
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/card_image.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/card_image.py
index 43e111917..2e56b6249 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/card_image.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_cards/card_image.py
@@ -1,20 +1,22 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: card_image.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 import os
 from PIL import Image, ImageTk, ImageDraw
 
 image_dir = os.path.abspath(os.path.dirname(__file__))
+
 if not os.path.isdir(os.path.join(image_dir, 'cards_png')):
     print('Downloading images...')
     import time
     import urllib.request
     import sys
     import zipfile
+
     def reporthook(count, block_size, total_size):
         global start_time
         if count == 0:
@@ -25,12 +27,12 @@ def reporthook(count, block_size, total_size):
         speed = int(progress_size / (1024 * duration))
         percent = int(count * block_size * 100 / total_size)
         sys.stdout.write("\r...%d%%, %d KB, %d KB/s, %d seconds passed" %
-                        (percent, progress_size / (1024), speed, duration))
+                         (percent, progress_size / 1024, speed, duration))
         sys.stdout.flush()
     zipurl = 'https://dczha.com/files/rlcard/cards_png.zip'
     filehandle, _ = urllib.request.urlretrieve(zipurl, reporthook=reporthook)
 
-    with zipfile.ZipFile(filehandle,"r") as zip_ref:
+    with zipfile.ZipFile(filehandle, "r") as zip_ref:
         zip_ref.extractall(image_dir)
 
     print()
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/Gin-Rummy-GUI-Design.md b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/Gin-Rummy-GUI-Design.md
index 7ca44e02a..f2fff11da 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/Gin-Rummy-GUI-Design.md
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/Gin-Rummy-GUI-Design.md
@@ -95,8 +95,8 @@ The GameCanvas creates the canvas card image items as follows:
     for card_id in range(52):
         card = gin_rummy_utils.card_from_card_id(card_id)
         card_image = card_images[card.rank, card.suit]
-        card_item_id = self.create_image((0, -9999), image=card_image, anchor="nw")
-        self.itemconfigure(card_item_id, state=tk.HIDDEN)
+        card_item_id = cls.create_image((0, -9999), image=card_image, anchor="nw")
+        cls.itemconfigure(card_item_id, state=tk.HIDDEN)
         card_item_ids.append(card_item_id)
 ```
 
@@ -114,7 +114,7 @@ The card_items array is a way to access the card_item by its card_id.
         card = gin_rummy_utils.card_from_card_id(card_id)
         card_item_id = card_item_ids[card_id]
         card_image = card_images[card.rank, card.suit]
-        card_item = CardItem(item_id=card_item_id, card_id=card_id, card_image=card_image, game_canvas=self)
+        card_item = CardItem(item_id=card_item_id, card_id=card_id, card_image=card_image, game_canvas=cls)
         card_items.append(card_item)
         canvas_items.append(card_item)
 ```
@@ -140,12 +140,12 @@ It creates this canvas item as follows:
     discard_pile_box_top = discard_pile_anchor[1]
     discard_pile_box_right = discard_pile_box_left + card_width
     discard_pile_box_bottom = discard_pile_box_top + card_height
-    discard_pile_box_item_id = self.create_rectangle(discard_pile_box_left,
+    discard_pile_box_item_id = cls.create_rectangle(discard_pile_box_left,
                                                      discard_pile_box_top,
                                                      discard_pile_box_right,
                                                      discard_pile_box_bottom,
                                                      fill="gray")
-    discard_pile_box_item = CanvasItem(item_id=discard_pile_box_item_id, game_canvas=self)
+    discard_pile_box_item = CanvasItem(item_id=discard_pile_box_item_id, game_canvas=cls)
     canvas_items.append(discard_pile_box_item)
 ```
 
@@ -164,9 +164,9 @@ I'm not sure if this is any better than handling it as a special case.
     for player_id in range(2):
         x, y = player_held_pile_anchors[player_id]
         x -= held_pile_tab
-        ghost_card_item_id = self.create_rectangle(x, y, x + card_width, y + card_height, width=0, fill='')
-        self.itemconfig(ghost_card_item_id, tag=held_pile_tags[player_id])
-        ghost_card_item = CanvasItem(item_id=ghost_card_item_id, game_canvas=self)
+        ghost_card_item_id = cls.create_rectangle(x, y, x + card_width, y + card_height, width=0, fill='')
+        cls.itemconfig(ghost_card_item_id, tag=held_pile_tags[player_id])
+        ghost_card_item = CanvasItem(item_id=ghost_card_item_id, game_canvas=cls)
         canvas_items.append(ghost_card_item)
         held_pile_ghost_card_items.append(ghost_card_item)
 ```
@@ -203,12 +203,12 @@ The code is:
 ```python
 class GameApp(object):
 
-    def __init__(self, make_gin_rummy_env: Callable[[], 'GinRummyEnv'] = None):
-        self.make_gin_rummy_env = make_gin_rummy_env if make_gin_rummy_env else GameApp._make_gin_rummy_env
+    def __init__(cls, make_gin_rummy_env: Callable[[], 'GinRummyEnv'] = None):
+        cls.make_gin_rummy_env = make_gin_rummy_env if make_gin_rummy_env else GameApp._make_gin_rummy_env
         root = tk.Tk()
         root.resizable(False, False)
-        self.game_frame = GameFrame(root=root, game_app=self)
-        self.menu_bar = MenuBar(root, game_frame=self.game_frame)
+        cls.game_frame = GameFrame(root=root, game_app=cls)
+        cls.menu_bar = MenuBar(root, game_frame=cls.game_frame)
         root.mainloop()
 ```
 
@@ -217,10 +217,10 @@ The EnvThread is a background daemon thread that runs gin_rummy_env.
 It also starts the GameCanvasUpdater loop on the main thread.
 It maintains the following variables:
 ```python
-    self.gin_rummy_env = gin_rummy_env
-    self.game_canvas = game_canvas
-    self.mark = 0
-    self.is_stopped = False
+    cls.gin_rummy_env = gin_rummy_env
+    cls.game_canvas = game_canvas
+    cls.mark = 0
+    cls.is_stopped = False
 ```
 The mark variable is the number of actions that the GameCanvas has processed.
 As the gin_rummy_env processes actions, the GameCanvasUpdater will be notified when a human action is needed.
@@ -237,9 +237,9 @@ The HumanAgent supplies the step action when the gin_rummy_env asks for it.
 It goes into a wait loop until the GameCanvasUpdater provides the step action taken by the human player.
 It maintains the following variables:
 ```python
-    self.is_choosing_action_id = False
-    self.chosen_action_id = None  # type: int or None
-    self.state = None
+    cls.is_choosing_action_id = False
+    cls.chosen_action_id = None  # type: int or None
+    cls.state = None
 ```
 The HumanAgent sets the state variable to the current state
 and sets the variable is_choosing_action_id to be True
@@ -254,11 +254,11 @@ The GameCanvasUpdater runs a loop on the main thread to keep the gui in sync wit
 It also returns the action taken by the human player to the gin_rummy_env via the human_agent.
 It maintains the following variables:
 ```python
-    self.game_canvas = game_canvas
-    self.env_thread = None
-    self.pending_human_action_ids = []  # type: List[int]
-    self.busy_body_id = None  # type: int or None
-    self.is_stopped = False
+    cls.game_canvas = game_canvas
+    cls.env_thread = None
+    cls.pending_human_action_ids = []  # type: List[int]
+    cls.busy_body_id = None  # type: int or None
+    cls.is_stopped = False
 ```
 The game_canvas is set on initialization and is never changed.
 When a new game starts, the env_thread is set to the new env_thread for the new game
@@ -270,13 +270,13 @@ then he can tap it a second time to cancel that action.
 
 The GameCanvasUpdater runs the following loop on the main thread:
 ```python
-    def apply_canvas_updates(self):
-        if not self.env_thread.is_stopped:
-            self._advance_mark()
+    def apply_canvas_updates(cls):
+        if not cls.env_thread.is_stopped:
+            cls._advance_mark()
             delay_ms = 1
-            self.game_canvas.after(delay_ms, func=self.apply_canvas_updates)
+            cls.game_canvas.after(delay_ms, func=cls.apply_canvas_updates)
         else:
-            self.is_stopped = True
+            cls.is_stopped = True
 ```
 It is always trying to advance the mark to keep up with the gin_rummy_env that is running in the env_thread.
 The busy_body_id is the player_id whose action is being processed.
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/canvas_item.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/canvas_item.py
index 711a4e1fa..076b2a388 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/canvas_item.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/canvas_item.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: canvas_item.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/configurations.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/configurations.py
index bc3ad3287..c56b8a1e2 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/configurations.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/configurations.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: configurations.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 import os
 
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/env_thread.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/env_thread.py
index 6b286cff2..57ef6a098 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/env_thread.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/env_thread.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: env_thread.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_app.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_app.py
index f44750558..3c6c2b396 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_app.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_app.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: game_app.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas.py
index 6a35f002b..284a5f0e5 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: game_canvas.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_debug.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_debug.py
index cfa16a4e9..ea023e103 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_debug.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_debug.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: game_canvas_debug.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
@@ -36,11 +36,10 @@ def description(self):
         discard_pile_items = game_canvas.find_withtag(configurations.DISCARD_PILE_TAG)
         north_held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id=0)
         south_held_pile_item_ids = game_canvas.getter.get_held_pile_item_ids(player_id=1)
-        lines = []
-        lines.append("dealer: {}".format(GinRummyPlayer.short_name_of(player_id=dealer_id)))
-        lines.append("current_player: {}".format(GinRummyPlayer.short_name_of(player_id=current_player_id)))
-        lines.append("north hand: {}".format([card_name(card_item_id) for card_item_id in north_held_pile_item_ids]))
-        lines.append("stockpile: {}".format([card_name(card_item_id) for card_item_id in stock_pile_item_ids]))
-        lines.append("discard pile: {}".format([card_name(card_item_id) for card_item_id in discard_pile_items]))
-        lines.append("south hand: {}".format([card_name(card_item_id) for card_item_id in south_held_pile_item_ids]))
+        lines = ["dealer: {}".format(GinRummyPlayer.short_name_of(player_id=dealer_id)),
+                 "current_player: {}".format(GinRummyPlayer.short_name_of(player_id=current_player_id)),
+                 "north hand: {}".format([card_name(card_item_id) for card_item_id in north_held_pile_item_ids]),
+                 "stockpile: {}".format([card_name(card_item_id) for card_item_id in stock_pile_item_ids]),
+                 "discard pile: {}".format([card_name(card_item_id) for card_item_id in discard_pile_items]),
+                 "south hand: {}".format([card_name(card_item_id) for card_item_id in south_held_pile_item_ids])]
         return "\n".join(lines)
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_getter.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_getter.py
index 0060b41af..8ab5356ae 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_getter.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_getter.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: game_canvas_getter.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_post_doing_action.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_post_doing_action.py
index befde76b3..453073fcc 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_post_doing_action.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_post_doing_action.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: game_canvas_post_doing_action.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_query.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_query.py
index 59b000afb..c4dd195f6 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_query.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_query.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: game_canvas.query.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_updater.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_updater.py
index 97822153b..9c48b43ca 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_updater.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_canvas_updater.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: game_canvas_updater.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_frame.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_frame.py
index 001b1a237..cd3dc3edd 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_frame.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/game_frame.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: game_frame.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap.py
index 07ec23184..ff746830b 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: handling_tap.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_discard_pile.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_discard_pile.py
index 5fc8fc611..3826a1f79 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_discard_pile.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_discard_pile.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: handling_tap_discard_pile.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_held_pile.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_held_pile.py
index f6616794b..86f7121c8 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_held_pile.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_held_pile.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: handling_tap_held_pile.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_player_pane.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_player_pane.py
index 1b35d0c3e..c6718d2b6 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_player_pane.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_player_pane.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: handling_tap_player_pane.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_stock_pile.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_stock_pile.py
index 811d0d0e2..aaa1458e3 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_stock_pile.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_stock_pile.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: handling_tap_stock_pile.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_to_arrange_held_pile.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_to_arrange_held_pile.py
index 19d7c312c..26c331f49 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_to_arrange_held_pile.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/handling_tap_to_arrange_held_pile.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: handling_tap_to_arrange_held_pile.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/info_messaging.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/info_messaging.py
index f7402800d..2d8e8fb57 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/info_messaging.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/info_messaging.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: info_messaging.py
     Author: William Hale
     Date created: 3/28/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
@@ -30,10 +30,8 @@ def show_activate_menus_message(game_canvas: 'GameCanvas'):
         return
     if game_canvas.query.is_going_out_button_visible():
         return
-    lines = []  # type: List[str]
-    lines.append("The menu items may not drop down.")
-    lines.append("On an Apple computer, this is a known problem.")
-    lines.append("A workaround is to hit cmd-tab twice to switch to another application and back to this application.")
+    lines = ["The menu items may not drop down.", "On an Apple computer, this is a known problem.",
+             "A workaround is to hit cmd-tab twice to switch to another application and back to this application."]  # type: List[str]
     info_message = " ".join(lines)
     game_canvas.info_message_label.configure(text=info_message)
 
@@ -81,10 +79,8 @@ def show_arrange_cards_message(player_id: int, game_canvas: 'GameCanvas'):
     if move_count <= 1 or move_count > 8:
         return
     if player_id == 1 and game_canvas.info_message_label['text'] == "":
-        lines = ["Tip:"]  # type: List[str]
-        lines.append("You can arrange cards in your hand.")
-        lines.append("Select the cards you want to move by tapping them.")
-        lines.append("Right click the card that you want to drop them on.")
+        lines = ["Tip:", "You can arrange cards in your hand.", "Select the cards you want to move by tapping them.",
+                 "Right click the card that you want to drop them on."]  # type: List[str]
         info_message = " ".join(lines)
         game_canvas.info_message_label.configure(text=info_message)
 
@@ -94,7 +90,6 @@ def show_hide_tips_message(game_canvas: 'GameCanvas'):
         return
     if not configurations.IS_SHOW_TIPS:
         return
-    lines = ["Tip:"]  # type: List[str]
-    lines.append("Uncheck 'show tips' in the preferences to hide tips.")
+    lines = ["Tip:", "Uncheck 'show tips' in the preferences to hide tips."]  # type: List[str]
     info_message = " ".join(lines)
     game_canvas.info_message_label.configure(text=info_message)
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/menu_bar.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/menu_bar.py
index bd342e2e0..14524f221 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/menu_bar.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/menu_bar.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: menu_bar.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/player_type.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/player_type.py
index 7757ad15d..7645dca90 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/player_type.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/player_type.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: player_type.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 import enum
 
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/preferences_window.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/preferences_window.py
index e07bd0c22..3df52c142 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/preferences_window.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/preferences_window.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: preferences_window.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 from tkinter import *
 import tkinter.colorchooser as colorchooser
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/starting_new_game.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/starting_new_game.py
index 734cc9432..d4e2c5779 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/starting_new_game.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/starting_new_game.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: starting_new_game.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/status_messaging.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/status_messaging.py
index 2709f93e7..bf085b5e3 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/status_messaging.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/status_messaging.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: status_messaging.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils.py
index 4c82ac6df..4b1fb7047 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: utils.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 # from __future__ import annotations
 from typing import TYPE_CHECKING
diff --git a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils_extra.py b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils_extra.py
index cb738450e..592f3e267 100644
--- a/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils_extra.py
+++ b/rlcard/agents/human_agents/gin_rummy_human_agent/gui_gin_rummy/utils_extra.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gui Gin Rummy
     File name: utils_extra.py
     Author: William Hale
     Date created: 3/14/2020
-'''
+"""
 
 from PIL import Image, ImageDraw, ImageFilter
 
diff --git a/rlcard/agents/human_agents/leduc_holdem_human_agent.py b/rlcard/agents/human_agents/leduc_holdem_human_agent.py
index 65b10fca6..9a52ade6b 100644
--- a/rlcard/agents/human_agents/leduc_holdem_human_agent.py
+++ b/rlcard/agents/human_agents/leduc_holdem_human_agent.py
@@ -2,28 +2,28 @@
 
 
 class HumanAgent(object):
-    ''' A human agent for Leduc Holdem. It can be used to play against trained models
-    '''
+    """A human agent for Leduc Holdem. It can be used to play against trained models
+    """
 
     def __init__(self, num_actions):
-        ''' Initilize the human agent
+        """Initialize the human agent
 
         Args:
             num_actions (int): the size of the ouput action space
-        '''
+        """
         self.use_raw = True
         self.num_actions = num_actions
 
     @staticmethod
     def step(state):
-        ''' Human agent will display the state and make decisions through interfaces
+        """Human agent will display the state and make decisions through interfaces
 
         Args:
             state (dict): A dictionary that represents the current state
 
         Returns:
             action (int): The action decided by human
-        '''
+        """
         _print_state(state['raw_obs'], state['action_record'])
         action = int(input('>> You choose action (integer): '))
         while action < 0 or action >= len(state['legal_actions']):
@@ -32,23 +32,23 @@ def step(state):
         return state['raw_legal_actions'][action]
 
     def eval_step(self, state):
-        ''' Predict the action given the curent state for evaluation. The same to step here.
+        """Predict the action given the curent state for evaluation. The same to step here.
 
         Args:
             state (numpy.array): an numpy array that represents the current state
 
         Returns:
             action (int): the action predicted (randomly chosen) by the random agent
-        '''
+        """
         return self.step(state), {}
 
 def _print_state(state, action_record):
-    ''' Print out the state
+    """Print out the state
 
     Args:
         state (dict): A dictionary of the raw state
         action_record (list): A list of the historical actions
-    '''
+    """
     _action_list = []
     for i in range(1, len(action_record)+1):
         if action_record[-i][0] == state['current_player']:
diff --git a/rlcard/agents/human_agents/limit_holdem_human_agent.py b/rlcard/agents/human_agents/limit_holdem_human_agent.py
index 1a893bdf9..d18b5443a 100644
--- a/rlcard/agents/human_agents/limit_holdem_human_agent.py
+++ b/rlcard/agents/human_agents/limit_holdem_human_agent.py
@@ -2,28 +2,28 @@
 
 
 class HumanAgent(object):
-    ''' A human agent for Limit Holdem. It can be used to play against trained models
-    '''
+    """A human agent for Limit Holdem. It can be used to play against trained models
+    """
 
     def __init__(self, num_actions):
-        ''' Initilize the human agent
+        """Initialize the human agent
 
         Args:
             num_actions (int): the size of the ouput action space
-        '''
+        """
         self.use_raw = True
         self.num_actions = num_actions
 
     @staticmethod
     def step(state):
-        ''' Human agent will display the state and make decisions through interfaces
+        """Human agent will display the state and make decisions through interfaces
 
         Args:
             state (dict): A dictionary that represents the current state
 
         Returns:
             action (int): The action decided by human
-        '''
+        """
         _print_state(state['raw_obs'], state['action_record'])
         action = int(input('>> You choose action (integer): '))
         while action < 0 or action >= len(state['legal_actions']):
@@ -32,23 +32,23 @@ def step(state):
         return state['raw_legal_actions'][action]
 
     def eval_step(self, state):
-        ''' Predict the action given the curent state for evaluation. The same to step here.
+        """Predict the action given the curent state for evaluation. The same to step here.
 
         Args:
             state (numpy.array): an numpy array that represents the current state
 
         Returns:
             action (int): the action predicted (randomly chosen) by the random agent
-        '''
+        """
         return self.step(state), {}
 
 def _print_state(state, action_record):
-    ''' Print out the state
+    """Print out the state
 
     Args:
         state (dict): A dictionary of the raw state
         action_record (list): A list of the each player's historical actions
-    '''
+    """
     _action_list = []
     for i in range(1, len(action_record)+1):
         _action_list.insert(0, action_record[-i])
diff --git a/rlcard/agents/human_agents/nolimit_holdem_human_agent.py b/rlcard/agents/human_agents/nolimit_holdem_human_agent.py
index 4d1e8b805..ec3616177 100644
--- a/rlcard/agents/human_agents/nolimit_holdem_human_agent.py
+++ b/rlcard/agents/human_agents/nolimit_holdem_human_agent.py
@@ -2,28 +2,27 @@
 
 
 class HumanAgent(object):
-    ''' A human agent for No Limit Holdem. It can be used to play against trained models
-    '''
+    """A human agent for No Limit Holdem. It can be used to play against trained models"""
 
     def __init__(self, num_actions):
-        ''' Initilize the human agent
+        """Initialize the human agent
 
         Args:
             num_actions (int): the size of the ouput action space
-        '''
+        """
         self.use_raw = True
         self.num_actions = num_actions
 
     @staticmethod
     def step(state):
-        ''' Human agent will display the state and make decisions through interfaces
+        """Human agent will display the state and make decisions through interfaces
 
         Args:
             state (dict): A dictionary that represents the current state
 
         Returns:
             action (int): The action decided by human
-        '''
+        """
         _print_state(state['raw_obs'], state['action_record'])
         action = int(input('>> You choose action (integer): '))
         while action < 0 or action >= len(state['legal_actions']):
@@ -32,25 +31,26 @@ def step(state):
         return state['raw_legal_actions'][action]
 
     def eval_step(self, state):
-        ''' Predict the action given the curent state for evaluation. The same to step here.
+        """Predict the action given the current state for evaluation. The same to step here.
 
         Args:
-            state (numpy.array): an numpy array that represents the current state
+            state (numpy.array): a numpy array that represents the current state
 
         Returns:
             action (int): the action predicted (randomly chosen) by the random agent
-        '''
+        """
         return self.step(state), {}
 
+
 def _print_state(state, action_record):
-    ''' Print out the state
+    """Print out the state
 
     Args:
         state (dict): A dictionary of the raw state
         action_record (list): A list of the historical actions
-    '''
+    """
     _action_list = []
-    for i in range(1, len(action_record)+1):
+    for i in range(1, len(action_record) + 1):
         if action_record[-i][0] == state['current_player']:
             break
         _action_list.insert(0, action_record[-i])
@@ -60,12 +60,12 @@ def _print_state(state, action_record):
     print('\n=============== Community Card ===============')
     print_card(state['public_cards'])
 
-    print('=============  Player',state["current_player"],'- Hand   =============')
+    print('=============  Player', state["current_player"], '- Hand   =============')
     print_card(state['hand'])
 
     print('===============     Chips      ===============')
-    print('In Pot:',state["pot"])
-    print('Remaining:',state["stakes"])
+    print('In Pot:', state["pot"])
+    print('Remaining:', state["stakes"])
 
     print('\n=========== Actions You Can Choose ===========')
     print(', '.join([str(index) + ': ' + str(action) for index, action in enumerate(state['legal_actions'])]))
diff --git a/rlcard/agents/human_agents/uno_human_agent.py b/rlcard/agents/human_agents/uno_human_agent.py
index caf507b16..6f21704df 100644
--- a/rlcard/agents/human_agents/uno_human_agent.py
+++ b/rlcard/agents/human_agents/uno_human_agent.py
@@ -1,28 +1,28 @@
 from rlcard.games.uno.card import UnoCard
 
+
 class HumanAgent(object):
-    ''' A human agent for Leduc Holdem. It can be used to play against trained models
-    '''
+    """A human agent for Leduc Holdem. It can be used to play against trained models"""
 
     def __init__(self, num_actions):
-        ''' Initilize the human agent
+        """Initialize the human agent
 
         Args:
             num_actions (int): the size of the ouput action space
-        '''
+        """
         self.use_raw = True
         self.num_actions = num_actions
 
     @staticmethod
     def step(state):
-        ''' Human agent will display the state and make decisions through interfaces
+        """Human agent will display the state and make decisions through interfaces
 
         Args:
             state (dict): A dictionary that represents the current state
 
         Returns:
             action (int): The action decided by human
-        '''
+        """
         print(state['raw_obs'])
         _print_state(state['raw_obs'], state['action_record'])
         action = int(input('>> You choose action (integer): '))
@@ -32,24 +32,25 @@ def step(state):
         return state['raw_legal_actions'][action]
 
     def eval_step(self, state):
-        ''' Predict the action given the curent state for evaluation. The same to step here.
+        """Predict the action given the curent state for evaluation. The same to step here.
 
         Args:
-            state (numpy.array): an numpy array that represents the current state
+            state (numpy.array): a numpy array that represents the current state
 
         Returns:
             action (int): the action predicted (randomly chosen) by the random agent
-        '''
+        """
         return self.step(state), {}
 
+
 def _print_state(state, action_record):
-    ''' Print out the state of a given player
+    """Print out the state of a given player
 
     Args:
         player (int): Player id
-    '''
+    """
     _action_list = []
-    for i in range(1, len(action_record)+1):
+    for i in range(1, len(action_record) + 1):
         if action_record[-i][0] == state['current_player']:
             break
         _action_list.insert(0, action_record[-i])
@@ -70,16 +71,17 @@ def _print_state(state, action_record):
             print('Player {} has {} cards.'.format(i, state['num_cards'][i]))
     print('======== Actions You Can Choose =========')
     for i, action in enumerate(state['legal_actions']):
-        print(str(i)+': ', end='')
+        print(str(i) + ': ', end='')
         UnoCard.print_cards(action, wild_color=True)
         if i < len(state['legal_actions']) - 1:
             print(', ', end='')
     print('\n')
 
+
 def _print_action(action):
-    ''' Print out an action in a nice form
+    """Print out an action in a nice form
 
     Args:
         action (str): A string a action
-    '''
+    """
     UnoCard.print_cards(action, wild_color=True)
diff --git a/rlcard/agents/nfsp_agent.py b/rlcard/agents/nfsp_agent.py
index 34f739fe9..89014ec6a 100644
--- a/rlcard/agents/nfsp_agent.py
+++ b/rlcard/agents/nfsp_agent.py
@@ -14,10 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-''' Neural Fictitious Self-Play (NFSP) agent implemented in TensorFlow.
+"""Neural Fictitious Self-Play (NFSP) agent implemented in TensorFlow.
 
 See the paper https://arxiv.org/abs/1603.01121 for more details.
-'''
+"""
 
 import os
 import random
@@ -33,13 +33,14 @@
 
 Transition = collections.namedtuple('Transition', 'info_state action_probs')
 
+
 class NFSPAgent(object):
-    ''' An approximate clone of rlcard.agents.nfsp_agent that uses
+    """An approximate clone of rlcard.agents.nfsp_agent that uses
     pytorch instead of tensorflow.  Note that this implementation
     differs from Henrich and Silver (2016) in that the supervised
     training minimizes cross-entropy with respect to the stored
     action probabilities rather than the realized actions.
-    '''
+    """
 
     def __init__(self,
                  num_actions=4,
@@ -66,7 +67,7 @@ def __init__(self,
                  device=None,
                  save_path=None,
                  save_every=float('inf')):
-        ''' Initialize the NFSP agent.
+        """Initialize the NFSP agent.
 
         Args:
             num_actions (int): The number of actions.
@@ -74,7 +75,7 @@ def __init__(self,
             hidden_layers_sizes (list): The hidden layers sizes for the layers of
               the average policy.
             reservoir_buffer_capacity (int): The size of the buffer for average policy.
-            anticipatory_param (float): The hyper-parameter that balances rl/avarage policy.
+            anticipatory_param (float): The hyper-parameter that balances rl/average policy.
             batch_size (int): The batch_size for training average policy.
             train_every (int): Train the SL policy every X steps.
             rl_learning_rate (float): The learning rate of the RL agent.
@@ -92,7 +93,7 @@ def __init__(self,
             q_train_step (int): Train the model every X steps.
             q_mlp_layers (list): The layer sizes of inner DQN agent.
             device (torch.device): Whether to use the cpu or gpu
-        '''
+        """
         self.use_raw = False
         self._num_actions = num_actions
         self._state_shape = state_shape
@@ -120,24 +121,23 @@ def __init__(self,
         self.train_t = 0
 
         # Build the action-value network
-        self._rl_agent = DQNAgent(q_replay_memory_size, q_replay_memory_init_size, \
-            q_update_target_estimator_every, q_discount_factor, q_epsilon_start, q_epsilon_end, \
-            q_epsilon_decay_steps, q_batch_size, num_actions, state_shape, q_train_every, q_mlp_layers, \
-            rl_learning_rate, device)
+        self._rl_agent = DQNAgent(q_replay_memory_size, q_replay_memory_init_size,
+                                  q_update_target_estimator_every, q_discount_factor, q_epsilon_start, q_epsilon_end,
+                                  q_epsilon_decay_steps, q_batch_size, num_actions, state_shape, q_train_every,
+                                  q_mlp_layers,
+                                  rl_learning_rate, device)
 
         # Build the average policy supervised model
         self._build_model()
 
         self.sample_episode_policy()
-        
+
         # Checkpoint saving parameters
         self.save_path = save_path
         self.save_every = save_every
 
     def _build_model(self):
-        ''' Build the average policy network
-        '''
-
+        """Build the average policy network"""
         # configure the average policy network
         policy_network = AveragePolicyNetwork(self._num_actions, self._state_shape, self._layer_sizes)
         policy_network = policy_network.to(self.device)
@@ -153,26 +153,27 @@ def _build_model(self):
         self.policy_network_optimizer = torch.optim.Adam(self.policy_network.parameters(), lr=self._sl_learning_rate)
 
     def feed(self, ts):
-        ''' Feed data to inner RL agent
+        """Feed data to inner RL agent
 
         Args:
             ts (list): A list of 5 elements that represent the transition.
-        '''
+        """
         self._rl_agent.feed(ts)
         self.total_t += 1
-        if self.total_t>0 and len(self._reservoir_buffer) >= self._min_buffer_size_to_learn and self.total_t%self._train_every == 0:
-            sl_loss  = self.train_sl()
+        if self.total_t > 0 and len(
+                self._reservoir_buffer) >= self._min_buffer_size_to_learn and self.total_t % self._train_every == 0:
+            sl_loss = self.train_sl()
             print('\rINFO - Step {}, sl-loss: {}'.format(self.total_t, sl_loss), end='')
 
     def step(self, state):
-        ''' Returns the action to be taken.
+        """Returns the action to be taken.
 
         Args:
             state (dict): The current state
 
         Returns:
             action (int): An action id
-        '''
+        """
         obs = state['obs']
         legal_actions = list(state['legal_actions'].keys())
         if self._mode == 'best_response':
@@ -189,7 +190,7 @@ def step(self, state):
         return action
 
     def eval_step(self, state):
-        ''' Use the average policy for evaluation purpose
+        """Use the average policy for evaluation purpose
 
         Args:
             state (dict): The current state.
@@ -197,7 +198,7 @@ def eval_step(self, state):
         Returns:
             action (int): An action id.
             info (dict): A dictionary containing information
-        '''
+        """
         if self.evaluate_with == 'best_response':
             action, info = self._rl_agent.eval_step(state)
         elif self.evaluate_with == 'average_policy':
@@ -207,28 +208,28 @@ def eval_step(self, state):
             probs = remove_illegal(probs, legal_actions)
             action = np.random.choice(len(probs), p=probs)
             info = {}
-            info['probs'] = {state['raw_legal_actions'][i]: float(probs[list(state['legal_actions'].keys())[i]]) for i in range(len(state['legal_actions']))}
+            info['probs'] = {state['raw_legal_actions'][i]: float(probs[list(state['legal_actions'].keys())[i]]) for i
+                             in range(len(state['legal_actions']))}
         else:
             raise ValueError("'evaluate_with' should be either 'average_policy' or 'best_response'.")
         return action, info
 
     def sample_episode_policy(self):
-        ''' Sample average/best_response policy
-        '''
+        """Sample average/best_response policy"""
         if np.random.rand() < self._anticipatory_param:
             self._mode = 'best_response'
         else:
             self._mode = 'average_policy'
 
     def _act(self, info_state):
-        ''' Predict action probability givin the observation and legal actions
+        """Predict action probability givin the observation and legal actions
             Not connected to computation graph
         Args:
-            info_state (numpy.array): An obervation.
+            info_state (numpy.array): An observation.
 
         Returns:
             action_probs (numpy.array): The predicted action probability.
-        '''
+        """
         info_state = np.expand_dims(info_state, axis=0)
         info_state = torch.from_numpy(info_state).float().to(self.device)
 
@@ -240,28 +241,28 @@ def _act(self, info_state):
         return action_probs
 
     def _add_transition(self, state, probs):
-        ''' Adds the new transition to the reservoir buffer.
+        """Adds the new transition to the reservoir buffer.
 
         Transitions are in the form (state, probs).
 
         Args:
             state (numpy.array): The state.
             probs (numpy.array): The probabilities of each action.
-        '''
+        """
         transition = Transition(
-                info_state=state,
-                action_probs=probs)
+            info_state=state,
+            action_probs=probs)
         self._reservoir_buffer.add(transition)
 
     def train_sl(self):
-        ''' Compute the loss on sampled transitions and perform a avg-network update.
+        """Compute the loss on sampled transitions and perform a avg-network update.
 
         If there are not enough elements in the buffer, no loss is computed and
         `None` is returned instead.
 
         Returns:
             loss (float): The average loss obtained on this batch of transitions or `None`.
-        '''
+        """
         if (len(self._reservoir_buffer) < self._batch_size or
                 len(self._reservoir_buffer) < self._min_buffer_size_to_learn):
             return None
@@ -302,14 +303,14 @@ def train_sl(self):
     def set_device(self, device):
         self.device = device
         self._rl_agent.set_device(device)
-        
+
     def checkpoint_attributes(self):
-        '''
+        """
         Return the current checkpoint attributes (dict)
         Checkpoint attributes are used to save and restore the model in the middle of training
         Saves the model state dict, optimizer state dict, and all other instance variables
-        '''
-        
+        """
+
         return {
             'agent_type': 'NFSPAgent',
             'policy_network': self.policy_network.checkpoint_attributes(),
@@ -328,15 +329,14 @@ def checkpoint_attributes(self):
             'sl_learning_rate': self._sl_learning_rate,
             'train_every': self._train_every,
         }
-    
+
     @classmethod
     def from_checkpoint(cls, checkpoint):
-        '''
-        Restore the model from a checkpoint
-        
+        """Restore the model from a checkpoint
+
         Args:
             checkpoint (dict): the checkpoint attributes generated by checkpoint_attributes()
-        '''
+        """
         print("\nINFO - Restoring model from checkpoint...")
         agent = cls(
             anticipatory_param=checkpoint['anticipatory_param'],
@@ -351,7 +351,7 @@ def from_checkpoint(cls, checkpoint):
             state_shape=checkpoint['rl_agent']['q_estimator']['state_shape'],
             hidden_layers_sizes=[],
         )
-        
+
         agent.policy_network = AveragePolicyNetwork.from_checkpoint(checkpoint['policy_network'])
         agent._reservoir_buffer = ReservoirBuffer.from_checkpoint(checkpoint['reservoir_buffer'])
         agent._mode = checkpoint['mode']
@@ -364,25 +364,25 @@ def from_checkpoint(cls, checkpoint):
         agent._rl_agent.from_checkpoint(checkpoint['rl_agent'])
         agent._rl_agent.set_device(agent.device)
         return agent
-        
+
     def save_checkpoint(self, path, filename='checkpoint_nfsp.pt'):
-        ''' Save the model checkpoint (all attributes)
+        """Save the model checkpoint (all attributes)
 
         Args:
             path (str): the path to save the model
-        '''
+        """
         torch.save(self.checkpoint_attributes(), os.path.join(path, filename))
-        
+
 
 class AveragePolicyNetwork(nn.Module):
-    '''
+    """
     Approximates the history of action probabilities
     given state (average policy). Forward pass returns
     log probabilities of actions.
-    '''
+    """
 
     def __init__(self, num_actions=2, state_shape=None, mlp_layers=None):
-        ''' Initialize the policy network.  It's just a bunch of ReLU
+        """Initialize the policy network.  It's just a bunch of ReLU
         layers with no activation on the final one, initialized with
         Xavier (sonnet.nets.MLP and tensorflow defaults)
 
@@ -390,7 +390,7 @@ def __init__(self, num_actions=2, state_shape=None, mlp_layers=None):
             num_actions (int): number of output actions
             state_shape (list): shape of state tensor for each sample
             mlp_laters (list): output size of each mlp layer including final
-        '''
+        """
         super(AveragePolicyNetwork, self).__init__()
 
         self.num_actions = num_actions
@@ -399,80 +399,80 @@ def __init__(self, num_actions=2, state_shape=None, mlp_layers=None):
 
         # set up mlp w/ relu activations
         layer_dims = [np.prod(self.state_shape)] + self.mlp_layers
-        mlp = [nn.Flatten()]
-        mlp.append(nn.BatchNorm1d(layer_dims[0]))
-        for i in range(len(layer_dims)-1):
-            mlp.append(nn.Linear(layer_dims[i], layer_dims[i+1]))
-            if i != len(layer_dims) - 2: # all but final have relu
+        mlp = [nn.Flatten(), nn.BatchNorm1d(layer_dims[0])]
+        for i in range(len(layer_dims) - 1):
+            mlp.append(nn.Linear(layer_dims[i], layer_dims[i + 1]))
+            if i != len(layer_dims) - 2:  # all but final have relu
                 mlp.append(nn.ReLU())
         self.mlp = nn.Sequential(*mlp)
 
     def forward(self, s):
-        ''' Log action probabilities of each action from state
+        """Log action probabilities of each action from state
 
         Args:
             s (Tensor): (batch, state_shape) state tensor
 
         Returns:
             log_action_probs (Tensor): (batch, num_actions)
-        '''
+        """
         logits = self.mlp(s)
         log_action_probs = F.log_softmax(logits, dim=-1)
         return log_action_probs
-    
+
     def checkpoint_attributes(self):
-        '''
+        """
         Return the current checkpoint attributes (dict)
         Checkpoint attributes are used to save and restore the model in the middle of training
-        '''
-        
+        """
+
         return {
             'num_actions': self.num_actions,
             'state_shape': self.state_shape,
             'mlp_layers': self.mlp_layers,
             'mlp': self.mlp.state_dict(),
         }
-        
+
     @classmethod
     def from_checkpoint(cls, checkpoint):
-        '''
+        """
         Restore the model from a checkpoint
-        
+
         Args:
             checkpoint (dict): the checkpoint attributes generated by checkpoint_attributes()
-        '''
-        
+        """
+
         agent = cls(
             num_actions=checkpoint['num_actions'],
             state_shape=checkpoint['state_shape'],
             mlp_layers=checkpoint['mlp_layers'],
         )
-        
+
         agent.mlp.load_state_dict(checkpoint['mlp'])
         return agent
 
+
 class ReservoirBuffer(object):
-    ''' Allows uniform sampling over a stream of data.
+    """Allows uniform sampling over a stream of data.
 
     This class supports the storage of arbitrary elements, such as observation
     tensors, integer actions, etc.
 
     See https://en.wikipedia.org/wiki/Reservoir_sampling for more details.
-    '''
+    """
 
     def __init__(self, reservoir_buffer_capacity):
-        ''' Initialize the buffer.
-        '''
+        """Initialize the buffer.
+        """
         self._reservoir_buffer_capacity = reservoir_buffer_capacity
         self._data = []
         self._add_calls = 0
 
     def add(self, element):
-        ''' Potentially adds `element` to the reservoir buffer.
+        """Potentially adds `element` to the reservoir buffer.
 
         Args:
             element (object): data to be added to the reservoir buffer.
-        '''
+        """
         if len(self._data) < self._reservoir_buffer_capacity:
             self._data.append(element)
         else:
@@ -482,7 +482,7 @@ def add(self, element):
         self._add_calls += 1
 
     def sample(self, num_samples):
-        ''' Returns `num_samples` uniformly sampled from the buffer.
+        """Returns `num_samples` uniformly sampled from the buffer.
 
         Args:
             num_samples (int): The number of samples to draw.
@@ -492,25 +492,25 @@ def sample(self, num_samples):
 
         Raises:
             ValueError: If there are less than `num_samples` elements in the buffer
-        '''
+        """
         if len(self._data) < num_samples:
             raise ValueError("{} elements could not be sampled from size {}".format(
-                    num_samples, len(self._data)))
+                num_samples, len(self._data)))
         return random.sample(self._data, num_samples)
 
     def clear(self):
-        ''' Clear the buffer
-        '''
+        """Clear the buffer
+        """
         self._data = []
         self._add_calls = 0
-        
+
     def checkpoint_attributes(self):
         return {
             'data': self._data,
             'add_calls': self._add_calls,
             'reservoir_buffer_capacity': self._reservoir_buffer_capacity,
         }
-        
+
     @classmethod
     def from_checkpoint(cls, checkpoint):
         reservoir_buffer = cls(checkpoint['reservoir_buffer_capacity'])
@@ -523,4 +523,3 @@ def __len__(self):
 
     def __iter__(self):
         return iter(self._data)
-
diff --git a/rlcard/agents/random_agent.py b/rlcard/agents/random_agent.py
index ecdab040c..241a16647 100644
--- a/rlcard/agents/random_agent.py
+++ b/rlcard/agents/random_agent.py
@@ -2,32 +2,31 @@
 
 
 class RandomAgent(object):
-    ''' A random agent. Random agents is for running toy examples on the card games
-    '''
+    """A random agent. Random agents is for running toy examples on the card games"""
 
     def __init__(self, num_actions):
-        ''' Initilize the random agent
+        """Initialize the random agent
 
         Args:
             num_actions (int): The size of the ouput action space
-        '''
+        """
         self.use_raw = False
         self.num_actions = num_actions
 
     @staticmethod
     def step(state):
-        ''' Predict the action given the curent state in gerenerating training data.
+        """Predict the action given the curent state in gerenerating training data.
 
         Args:
             state (dict): An dictionary that represents the current state
 
         Returns:
             action (int): The action predicted (randomly chosen) by the random agent
-        '''
+        """
         return np.random.choice(list(state['legal_actions'].keys()))
 
     def eval_step(self, state):
-        ''' Predict the action given the current state for evaluation.
+        """Predict the action given the current state for evaluation.
             Since the random agents are not trained. This function is equivalent to step function
 
         Args:
@@ -36,7 +35,7 @@ def eval_step(self, state):
         Returns:
             action (int): The action predicted (randomly chosen) by the random agent
             probs (list): The list of action probabilities
-        '''
+        """
         probs = [0 for _ in range(self.num_actions)]
         for i in state['legal_actions']:
             probs[i] = 1/len(state['legal_actions'])
diff --git a/rlcard/envs/__init__.py b/rlcard/envs/__init__.py
index de9dbb8c1..a56d69f11 100644
--- a/rlcard/envs/__init__.py
+++ b/rlcard/envs/__init__.py
@@ -1,5 +1,4 @@
-''' Register new environments
-'''
+"""Register new environments"""
 from rlcard.envs.env import Env
 from rlcard.envs.registration import register, make
 
diff --git a/rlcard/envs/blackjack.py b/rlcard/envs/blackjack.py
index 459d3d813..4e5c91d32 100644
--- a/rlcard/envs/blackjack.py
+++ b/rlcard/envs/blackjack.py
@@ -5,17 +5,16 @@
 from rlcard.games.blackjack import Game
 
 DEFAULT_GAME_CONFIG = {
-        'game_num_players': 1,
-        'game_num_decks': 1
-        }
+    'game_num_players': 1,
+    'game_num_decks': 1
+}
+
 
 class BlackjackEnv(Env):
-    ''' Blackjack Environment
-    '''
+    """Blackjack Environment """
 
     def __init__(self, config):
-        ''' Initialize the Blackjack environment
-        '''
+        """Initialize the Blackjack environment """
         self.name = 'blackjack'
         self.default_game_config = DEFAULT_GAME_CONFIG
         self.game = Game()
@@ -25,25 +24,25 @@ def __init__(self, config):
         self.action_shape = [None for _ in range(self.num_players)]
 
     def _get_legal_actions(self):
-        ''' Get all leagal actions
+        """Get all legal actions
 
         Returns:
             encoded_action_list (list): return encoded legal action list (from str to int)
-        '''
+        """
         encoded_action_list = []
         for i in range(len(self.actions)):
             encoded_action_list.append(i)
         return encoded_action_list
 
     def _extract_state(self, state):
-        ''' Extract the state representation from state dictionary for agent
+        """Extract the state representation from state dictionary for agent
 
         Args:
             state (dict): Original state from the game
 
         Returns:
             observation (list): combine the player's score and dealer's observable score for observation
-        '''
+        """
         cards = state['state']
         my_cards = cards[0]
         dealer_cards = cards[1]
@@ -60,11 +59,11 @@ def _extract_state(self, state):
         return extracted_state
 
     def get_payoffs(self):
-        ''' Get the payoff of a game
+        """Get the payoff of a game
 
         Returns:
            payoffs (list): list of payoffs
-        '''
+        """
         payoffs = []
 
         for i in range(self.num_players):
@@ -77,19 +76,22 @@ def get_payoffs(self):
 
         return np.array(payoffs)
 
-
     def _decode_action(self, action_id):
-        ''' Decode the action for applying to the game
+        """Decode the action for applying to the game
 
         Args:
             action id (int): action id
 
         Returns:
             action (str): action for the game
-        '''
+        """
         return self.actions[action_id]
 
-rank2score = {"A":11, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9, "T":10, "J":10, "Q":10, "K":10}
+
+rank2score = {"A": 11, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "T": 10, "J": 10, "Q": 10,
+              "K": 10}
+
+
 def get_score(hand):
     score = 0
     count_a = 0
diff --git a/rlcard/envs/bridge.py b/rlcard/envs/bridge.py
index 13190c0c6..1ba70704f 100644
--- a/rlcard/envs/bridge.py
+++ b/rlcard/envs/bridge.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: envs/bridge.py
     Author: William Hale
     Date created: 11/26/2021
-'''
+"""
 
 import numpy as np
 from collections import OrderedDict
@@ -42,8 +42,7 @@
 
 
 class BridgeEnv(Env):
-    ''' Bridge Environment
-    '''
+    """Bridge Environment"""
     def __init__(self, config):
         self.name = 'bridge'
         self.game = Game()
@@ -55,62 +54,62 @@ def __init__(self, config):
         self.action_shape = [None for _ in range(self.num_players)]
 
     def get_payoffs(self):
-        ''' Get the payoffs of players.
+        """Get the payoffs of players.
 
         Returns:
             (list): A list of payoffs for each player.
-        '''
+        """
         return self.bridgePayoffDelegate.get_payoffs(game=self.game)
 
     def get_perfect_information(self):
-        ''' Get the perfect information of the current state
+        """Get the perfect information of the current state
 
         Returns:
             (dict): A dictionary of all the perfect information of the current state
-        '''
+        """
         return self.game.round.get_perfect_information()
 
     def _extract_state(self, state):  # wch: don't use state 211126
-        ''' Extract useful information from state for RL.
+        """Extract useful information from state for RL.
 
         Args:
             state (dict): The raw state
 
         Returns:
             (numpy.array): The extracted state
-        '''
+        """
         return self.bridgeStateExtractor.extract_state(game=self.game)
 
     def _decode_action(self, action_id):
-        ''' Decode Action id to the action in the game.
+        """Decode Action id to the action in the game.
 
         Args:
             action_id (int): The id of the action
 
         Returns:
             (ActionEvent): The action that will be passed to the game engine.
-        '''
+        """
         return ActionEvent.from_action_id(action_id=action_id)
 
     def _get_legal_actions(self):
-        ''' Get all legal actions for current state.
+        """Get all legal actions for current state.
 
         Returns:
             (list): A list of legal actions' id.
-        '''
+        """
         raise NotImplementedError  # wch: not needed
 
 
 class BridgePayoffDelegate(object):
 
     def get_payoffs(self, game: BridgeGame):
-        ''' Get the payoffs of players. Must be implemented in the child class.
+        """Get the payoffs of players. Must be implemented in the child class.
 
         Returns:
             (list): A list of payoffs for each player.
 
         Note: Must be implemented in the child class.
-        '''
+        """
         raise NotImplementedError
 
 
@@ -120,11 +119,11 @@ def __init__(self):
         self.make_bid_bonus = 2
 
     def get_payoffs(self, game: BridgeGame):
-        ''' Get the payoffs of players.
+        """Get the payoffs of players.
 
         Returns:
             (list): A list of payoffs for each player.
-        '''
+        """
         contract_bid_move = game.round.contract_bid_move
         if contract_bid_move:
             declarer = contract_bid_move.player
@@ -149,23 +148,23 @@ def get_state_shape_size(self) -> int:
         raise NotImplementedError
 
     def extract_state(self, game: BridgeGame):
-        ''' Extract useful information from state for RL. Must be implemented in the child class.
+        """Extract useful information from state for RL. Must be implemented in the child class.
 
         Args:
             game (BridgeGame): The game
 
         Returns:
             (numpy.array): The extracted state
-        '''
+        """
         raise NotImplementedError
 
     @staticmethod
     def get_legal_actions(game: BridgeGame):
-        ''' Get all legal actions for current state.
+        """Get all legal actions for current state.
 
         Returns:
             (OrderedDict): A OrderedDict of legal actions' id.
-        '''
+        """
         legal_actions = game.judger.get_legal_actions()
         legal_actions_ids = {action_event.action_id: None for action_event in legal_actions}
         return OrderedDict(legal_actions_ids)
@@ -194,14 +193,14 @@ def get_state_shape_size(self) -> int:
         return state_shape_size
 
     def extract_state(self, game: BridgeGame):
-        ''' Extract useful information from state for RL.
+        """Extract useful information from state for RL.
 
         Args:
             game (BridgeGame): The game
 
         Returns:
             (numpy.array): The extracted state
-        '''
+        """
         extracted_state = {}
         legal_actions: OrderedDict = self.get_legal_actions(game=game)
         raw_legal_actions = list(legal_actions.keys())
diff --git a/rlcard/envs/doudizhu.py b/rlcard/envs/doudizhu.py
index 0f46d2322..ae20b876c 100644
--- a/rlcard/envs/doudizhu.py
+++ b/rlcard/envs/doudizhu.py
@@ -5,8 +5,7 @@
 
 
 class DoudizhuEnv(Env):
-    ''' Doudizhu Environment
-    '''
+    """Doudizhu Environment """
 
     def __init__(self, config):
         from rlcard.games.doudizhu.utils import ACTION_2_ID, ID_2_ACTION
@@ -24,11 +23,11 @@ def __init__(self, config):
         self.action_shape = [[54] for _ in range(self.num_players)]
 
     def _extract_state(self, state):
-        ''' Encode state
+        """Encode state
 
         Args:
             state (dict): dict of original state
-        '''
+        """
         current_hand = _cards2array(state['current_hand'])
         others_hand = _cards2array(state['others_hand'])
 
@@ -91,40 +90,40 @@ def _extract_state(self, state):
         return extracted_state
             
     def get_payoffs(self):
-        ''' Get the payoffs of players. Must be implemented in the child class.
+        """Get the payoffs of players. Must be implemented in the child class.
 
         Returns:
             payoffs (list): a list of payoffs for each player
-        '''
+        """
         return self.game.judger.judge_payoffs(self.game.round.landlord_id, self.game.winner_id)
 
     def _decode_action(self, action_id):
-        ''' Action id -> the action in the game. Must be implemented in the child class.
+        """Action id -> the action in the game. Must be implemented in the child class.
 
         Args:
             action_id (int): the id of the action
 
         Returns:
             action (string): the action that will be passed to the game engine.
-        '''
+        """
         return self._ID_2_ACTION[action_id]
 
     def _get_legal_actions(self):
-        ''' Get all legal actions for current state
+        """Get all legal actions for current state
 
         Returns:
             legal_actions (list): a list of legal actions' id
-        '''
+        """
         legal_actions = self.game.state['actions']
         legal_actions = {self._ACTION_2_ID[action]: _cards2array(action) for action in legal_actions}
         return legal_actions
 
     def get_perfect_information(self):
-        ''' Get the perfect information of the current state
+        """Get the perfect information of the current state
 
         Returns:
             (dict): A dictionary of all the perfect information of the current state
-        '''
+        """
         state = {}
         state['hand_cards_with_suit'] = [self._cards2str_with_suit(player.current_hand) for player in self.game.players]
         state['hand_cards'] = [self._cards2str(player.current_hand) for player in self.game.players]
@@ -134,11 +133,11 @@ def get_perfect_information(self):
         return state
 
     def get_action_feature(self, action):
-        ''' For some environments such as DouDizhu, we can have action features
+        """For some environments such as DouDizhu, we can have action features
 
         Returns:
             (numpy.array): The action features
-        '''
+        """
         return _cards2array(self._decode_action(action))
 
 Card2Column = {'3': 0, '4': 1, '5': 2, '6': 3, '7': 4, '8': 5, '9': 6, 'T': 7,
diff --git a/rlcard/envs/env.py b/rlcard/envs/env.py
index 93e239548..6cc47c4c6 100644
--- a/rlcard/envs/env.py
+++ b/rlcard/envs/env.py
@@ -1,13 +1,13 @@
 from rlcard.utils import *
 
 class Env(object):
-    '''
+    """
     The base Env class. For all the environments in RLCard,
     we should base on this class and implement as many functions
     as we can.
-    '''
+    """
     def __init__(self, config):
-        ''' Initialize the environment
+        """Initialize the environment
 
         Args:
             config (dict): A config dictionary. All the fields are
@@ -23,7 +23,7 @@ def __init__(self, config):
                 the default game configurations for Blackjack should be in
                 'rlcard/envs/blackjack.py'
                 TODO: Support more game configurations in the future.
-        '''
+        """
         self.allow_step_back = self.game.allow_step_back = config['allow_step_back']
         self.action_recorder = []
 
@@ -50,20 +50,20 @@ def __init__(self, config):
 
 
     def reset(self):
-        ''' Start a new game
+        """Start a new game
 
         Returns:
             (tuple): Tuple containing:
 
                 (numpy.array): The begining state of the game
                 (int): The begining player
-        '''
+        """
         state, player_id = self.game.init_game()
         self.action_recorder = []
         return self._extract_state(state), player_id
 
     def step(self, action, raw_action=False):
-        ''' Step forward
+        """Step forward
 
         Args:
             action (int): The action taken by the current player
@@ -74,7 +74,7 @@ def step(self, action, raw_action=False):
 
                 (dict): The next state
                 (int): The ID of the next player
-        '''
+        """
         if not raw_action:
             action = self._decode_action(action)
 
@@ -86,7 +86,7 @@ def step(self, action, raw_action=False):
         return self._extract_state(next_state), player_id
 
     def step_back(self):
-        ''' Take one step backward.
+        """Take one step backward.
 
         Returns:
             (tuple): Tuple containing:
@@ -95,7 +95,7 @@ def step_back(self):
                 (int): The ID of the previous player
 
         Note: Error will be raised if step back from the root node.
-        '''
+        """
         if not self.allow_step_back:
             raise Exception('Step back is off. To use step_back, please set allow_step_back=True in rlcard.make')
 
@@ -108,17 +108,17 @@ def step_back(self):
         return state, player_id
 
     def set_agents(self, agents):
-        '''
+        """
         Set the agents that will interact with the environment.
         This function must be called before `run`.
 
         Args:
             agents (list): List of Agent classes
-        '''
+        """
         self.agents = agents
 
     def run(self, is_training=False):
-        '''
+        """
         Run a complete game, either for evaluation or training RL agent.
 
         Args:
@@ -132,7 +132,7 @@ def run(self, is_training=False):
 
         Note: The trajectories are 3-dimension list. The first dimension is for different players.
               The second dimension is for different transitions. The third dimension is for the contents of each transiton
-        '''
+        """
         trajectories = [[] for _ in range(self.num_players)]
         state, player_id = self.reset()
 
@@ -169,57 +169,57 @@ def run(self, is_training=False):
         return trajectories, payoffs
 
     def is_over(self):
-        ''' Check whether the curent game is over
+        """Check whether the curent game is over
 
         Returns:
             (boolean): True if current game is over
-        '''
+        """
         return self.game.is_over()
 
     def get_player_id(self):
-        ''' Get the current player id
+        """Get the current player id
 
         Returns:
             (int): The id of the current player
-        '''
+        """
         return self.game.get_player_id()
 
 
     def get_state(self, player_id):
-        ''' Get the state given player id
+        """Get the state given player id
 
         Args:
             player_id (int): The player id
 
         Returns:
             (numpy.array): The observed state of the player
-        '''
+        """
         return self._extract_state(self.game.get_state(player_id))
 
     def get_payoffs(self):
-        ''' Get the payoffs of players. Must be implemented in the child class.
+        """Get the payoffs of players. Must be implemented in the child class.
 
         Returns:
             (list): A list of payoffs for each player.
 
         Note: Must be implemented in the child class.
-        '''
+        """
         raise NotImplementedError
 
     def get_perfect_information(self):
-        ''' Get the perfect information of the current state
+        """Get the perfect information of the current state
 
         Returns:
             (dict): A dictionary of all the perfect information of the current state
-        '''
+        """
         raise NotImplementedError
 
     def get_action_feature(self, action):
-        ''' For some environments such as DouDizhu, we can have action features
+        """For some environments such as DouDizhu, we can have action features
 
         Returns:
             (numpy.array): The action features
-        '''
+        """
         # By default we use one-hot encoding
         feature = np.zeros(self.num_actions, dtype=np.int8)
         feature[action] = 1
@@ -231,18 +231,18 @@ def seed(self, seed=None):
         return seed
 
     def _extract_state(self, state):
-        ''' Extract useful information from state for RL. Must be implemented in the child class.
+        """Extract useful information from state for RL. Must be implemented in the child class.
 
         Args:
             state (dict): The raw state
 
         Returns:
             (numpy.array): The extracted state
-        '''
+        """
         raise NotImplementedError
 
     def _decode_action(self, action_id):
-        ''' Decode Action id to the action in the game.
+        """Decode Action id to the action in the game.
 
         Args:
             action_id (int): The id of the action
@@ -251,15 +251,15 @@ def _decode_action(self, action_id):
             (string): The action that will be passed to the game engine.
 
         Note: Must be implemented in the child class.
-        '''
+        """
         raise NotImplementedError
 
     def _get_legal_actions(self):
-        ''' Get all legal actions for current state.
+        """Get all legal actions for current state.
 
         Returns:
             (list): A list of legal actions' id.
 
         Note: Must be implemented in the child class.
-        '''
+        """
         raise NotImplementedError
diff --git a/rlcard/envs/gin_rummy.py b/rlcard/envs/gin_rummy.py
index fd9c3ae4f..accf5388c 100644
--- a/rlcard/envs/gin_rummy.py
+++ b/rlcard/envs/gin_rummy.py
@@ -1,16 +1,17 @@
-'''
+"""
     File name: envs/gin_rummy.py
     Author: William Hale
     Date created: 2/12/2020
-'''
+"""
 import numpy as np
 from collections import OrderedDict
 
 from rlcard.envs import Env
 
+
 class GinRummyEnv(Env):
-    ''' GinRummy Environment
-    '''
+    """GinRummy Environment"""
+
     def __init__(self, config):
         from rlcard.games.gin_rummy.utils.move import ScoreSouthMove
         from rlcard.games.gin_rummy.utils import utils
@@ -25,7 +26,7 @@ def __init__(self, config):
         self.action_shape = [None for _ in range(self.num_players)]
 
     def _extract_state(self, state):  # 200213 don't use state ???
-        ''' Encode state
+        """Encode state
 
         Args:
             state (dict): dict of original state
@@ -37,12 +38,15 @@ def _extract_state(self, state):  # 200213 don't use state ???
                              dead_cards (1 for discards except for top_discard else 0)
                              opponent known cards (likewise)
                              unknown cards (likewise)  # is this needed ??? 200213
-        '''
+        """
         if self.game.is_over():
             obs = np.array([self._utils.encode_cards([]) for _ in range(5)])
-            extracted_state = {'obs': obs, 'legal_actions': self._get_legal_actions()}
-            extracted_state['raw_legal_actions'] = list(self._get_legal_actions().keys())
-            extracted_state['raw_obs'] = obs
+            extracted_state = {
+                'obs': obs,
+                'legal_actions': self._get_legal_actions(),
+                'raw_legal_actions': list(self._get_legal_actions().keys()),
+                'raw_obs': obs
+            }
         else:
             discard_pile = self.game.round.dealer.discard_pile
             stock_pile = self.game.round.dealer.stock_pile
@@ -59,16 +63,20 @@ def _extract_state(self, state):  # 200213 don't use state ???
             unknown_cards_rep = self._utils.encode_cards(unknown_cards)
             rep = [hand_rep, top_discard_rep, dead_cards_rep, known_cards_rep, unknown_cards_rep]
             obs = np.array(rep)
-            extracted_state = {'obs': obs, 'legal_actions': self._get_legal_actions(), 'raw_legal_actions': list(self._get_legal_actions().keys())}
-            extracted_state['raw_obs'] = obs
+            extracted_state = {
+                'obs': obs,
+                'legal_actions': self._get_legal_actions(),
+                'raw_legal_actions': list(self._get_legal_actions().keys()),
+                'raw_obs': obs
+            }
         return extracted_state
 
     def get_payoffs(self):
-        ''' Get the payoffs of players. Must be implemented in the child class.
+        """Get the payoffs of players. Must be implemented in the child class.
 
         Returns:
             payoffs (list): a list of payoffs for each player
-        '''
+        """
         # determine whether game completed all moves
         is_game_complete = False
         if self.game.round:
@@ -79,22 +87,22 @@ def get_payoffs(self):
         return np.array(payoffs)
 
     def _decode_action(self, action_id):  # FIXME 200213 should return str
-        ''' Action id -> the action in the game. Must be implemented in the child class.
+        """Action id -> the action in the game. Must be implemented in the child class.
 
         Args:
             action_id (int): the id of the action
 
         Returns:
             action (ActionEvent): the action that will be passed to the game engine.
-        '''
+        """
         return self.game.decode_action(action_id=action_id)
 
     def _get_legal_actions(self):
-        ''' Get all legal actions for current state
+        """Get all legal actions for current state
 
         Returns:
             legal_actions (list): a list of legal actions' id
-        '''
+        """
         legal_actions = self.game.judge.get_legal_actions()
         legal_actions_ids = {action_event.action_id: None for action_event in legal_actions}
         return OrderedDict(legal_actions_ids)
diff --git a/rlcard/envs/leducholdem.py b/rlcard/envs/leducholdem.py
index da389dd8e..777c8792e 100644
--- a/rlcard/envs/leducholdem.py
+++ b/rlcard/envs/leducholdem.py
@@ -1,6 +1,5 @@
 import json
 import os
-import numpy as np
 from collections import OrderedDict
 
 import rlcard
@@ -9,17 +8,17 @@
 from rlcard.utils import *
 
 DEFAULT_GAME_CONFIG = {
-        'game_num_players': 2,
-        }
+    'game_num_players': 2,
+}
+
 
 class LeducholdemEnv(Env):
-    ''' Leduc Hold'em Environment
-    '''
+    """Leduc Hold'em Environment"""
 
     def __init__(self, config):
-        ''' Initialize the Limitholdem environment
-        '''
-        self.name = 'leduc-holdem' 
+        """Initialize the Limitholdem environment
+        """
+        self.name = 'leduc-holdem'
         self.default_game_config = DEFAULT_GAME_CONFIG
         self.game = Game()
         super().__init__(config)
@@ -31,15 +30,15 @@ def __init__(self, config):
             self.card2index = json.load(file)
 
     def _get_legal_actions(self):
-        ''' Get all leagal actions
+        """Get all legal actions
 
         Returns:
             encoded_action_list (list): return encoded legal action list (from str to int)
-        '''
+        """
         return self.game.get_legal_actions()
 
     def _extract_state(self, state):
-        ''' Extract the state representation from state dictionary for agent
+        """Extract the state representation from state dictionary for agent
 
         Note: Currently the use the hand cards and the public cards. TODO: encode the states
 
@@ -48,7 +47,7 @@ def _extract_state(self, state):
 
         Returns:
             observation (list): combine the player's score and dealer's observable score for observation
-        '''
+        """
         extracted_state = {}
 
         legal_actions = OrderedDict({self.actions.index(a): None for a in state['legal_actions']})
@@ -59,9 +58,9 @@ def _extract_state(self, state):
         obs = np.zeros(36)
         obs[self.card2index[hand]] = 1
         if public_card:
-            obs[self.card2index[public_card]+3] = 1
-        obs[state['my_chips']+6] = 1
-        obs[sum(state['all_chips'])-state['my_chips']+21] = 1
+            obs[self.card2index[public_card] + 3] = 1
+        obs[state['my_chips'] + 6] = 1
+        obs[sum(state['all_chips']) - state['my_chips'] + 21] = 1
         extracted_state['obs'] = obs
 
         extracted_state['raw_obs'] = state
@@ -71,22 +70,22 @@ def _extract_state(self, state):
         return extracted_state
 
     def get_payoffs(self):
-        ''' Get the payoff of a game
+        """Get the payoff of a game
 
         Returns:
            payoffs (list): list of payoffs
-        '''
+        """
         return self.game.get_payoffs()
 
     def _decode_action(self, action_id):
-        ''' Decode the action for applying to the game
+        """Decode the action for applying to the game
 
         Args:
             action id (int): action id
 
         Returns:
             action (str): action for the game
-        '''
+        """
         legal_actions = self.game.get_legal_actions()
         if self.actions[action_id] not in legal_actions:
             if 'check' in legal_actions:
@@ -96,11 +95,11 @@ def _decode_action(self, action_id):
         return self.actions[action_id]
 
     def get_perfect_information(self):
-        ''' Get the perfect information of the current state
+        """Get the perfect information of the current state
 
         Returns:
             (dict): A dictionary of all the perfect information of the current state
-        '''
+        """
         state = {}
         state['chips'] = [self.game.players[i].in_chips for i in range(self.num_players)]
         state['public_card'] = self.game.public_card.get_index() if self.game.public_card else None
diff --git a/rlcard/envs/limitholdem.py b/rlcard/envs/limitholdem.py
index aa8e62cab..5307ae947 100644
--- a/rlcard/envs/limitholdem.py
+++ b/rlcard/envs/limitholdem.py
@@ -12,12 +12,12 @@
         }
 
 class LimitholdemEnv(Env):
-    ''' Limitholdem Environment
-    '''
+    """Limitholdem Environment
+    """
 
     def __init__(self, config):
-        ''' Initialize the Limitholdem environment
-        '''
+        """Initialize the Limitholdem environment
+        """
         self.name = 'limit-holdem'
         self.default_game_config = DEFAULT_GAME_CONFIG
         self.game = Game()
@@ -30,15 +30,15 @@ def __init__(self, config):
             self.card2index = json.load(file)
 
     def _get_legal_actions(self):
-        ''' Get all leagal actions
+        """Get all legal actions
 
         Returns:
             encoded_action_list (list): return encoded legal action list (from str to int)
-        '''
+        """
         return self.game.get_legal_actions()
 
     def _extract_state(self, state):
-        ''' Extract the state representation from state dictionary for agent
+        """Extract the state representation from state dictionary for agent
 
         Note: Currently the use the hand cards and the public cards. TODO: encode the states
 
@@ -47,7 +47,7 @@ def _extract_state(self, state):
 
         Returns:
             observation (list): combine the player's score and dealer's observable score for observation
-        '''
+        """
         extracted_state = {}
 
         legal_actions = OrderedDict({self.actions.index(a): None for a in state['legal_actions']})
@@ -71,22 +71,22 @@ def _extract_state(self, state):
         return extracted_state
 
     def get_payoffs(self):
-        ''' Get the payoff of a game
+        """Get the payoff of a game
 
         Returns:
            payoffs (list): list of payoffs
-        '''
+        """
         return self.game.get_payoffs()
 
     def _decode_action(self, action_id):
-        ''' Decode the action for applying to the game
+        """Decode the action for applying to the game
 
         Args:
             action id (int): action id
 
         Returns:
             action (str): action for the game
-        '''
+        """
         legal_actions = self.game.get_legal_actions()
         if self.actions[action_id] not in legal_actions:
             if 'check' in legal_actions:
@@ -96,11 +96,11 @@ def _decode_action(self, action_id):
         return self.actions[action_id]
 
     def get_perfect_information(self):
-        ''' Get the perfect information of the current state
+        """Get the perfect information of the current state
 
         Returns:
             (dict): A dictionary of all the perfect information of the current state
-        '''
+        """
         state = {}
         state['chips'] = [self.game.players[i].in_chips for i in range(self.num_players)]
         state['public_card'] = [c.get_index() for c in self.game.public_cards] if self.game.public_cards else None
diff --git a/rlcard/envs/mahjong.py b/rlcard/envs/mahjong.py
index 0c9fb350c..858bfcd12 100644
--- a/rlcard/envs/mahjong.py
+++ b/rlcard/envs/mahjong.py
@@ -6,9 +6,9 @@
 from rlcard.games.mahjong import Card
 from rlcard.games.mahjong.utils import card_encoding_dict, encode_cards, pile2list
 
+
 class MahjongEnv(Env):
-    ''' Mahjong Environment
-    '''
+    """Mahjong Environment"""
 
     def __init__(self, config):
         self.name = 'mahjong'
@@ -20,7 +20,7 @@ def __init__(self, config):
         self.action_shape = [None for _ in range(self.num_players)]
 
     def _extract_state(self, state):
-        ''' Encode state
+        """Encode state
 
         Args:
             state (dict): dict of original state
@@ -31,7 +31,7 @@ def _extract_state(self, state):
                              the union of the other two players' hand
                              the recent three actions
                              the union of all played cards
-        '''
+        """
         players_pile = state['players_pile']
         hand_rep = encode_cards(state['current_hand'])
         piles_rep = []
@@ -43,19 +43,22 @@ def _extract_state(self, state):
         rep.extend(piles_rep)
         obs = np.array(rep)
 
-        extracted_state = {'obs': obs, 'legal_actions': self._get_legal_actions()}
-        extracted_state['raw_obs'] = state
-        extracted_state['raw_legal_actions'] = [a for a in state['action_cards']]
-        extracted_state['action_record'] = self.action_recorder
+        extracted_state = {
+            'obs': obs,
+            'legal_actions': self._get_legal_actions(),
+            'raw_obs': state,
+            'raw_legal_actions': [a for a in state['action_cards']],
+            'action_record': self.action_recorder
+        }
 
         return extracted_state
 
     def get_payoffs(self):
-        ''' Get the payoffs of players. Must be implemented in the child class.
+        """Get the payoffs of players. Must be implemented in the child class.
 
         Returns:
             payoffs (list): a list of payoffs for each player
-        '''
+        """
         _, player, _ = self.game.judger.judge_game(self.game)
         if player == -1:
             payoffs = [0, 0, 0, 0]
@@ -65,14 +68,14 @@ def get_payoffs(self):
         return np.array(payoffs)
 
     def _decode_action(self, action_id):
-        ''' Action id -> the action in the game. Must be implemented in the child class.
+        """Action id -> the action in the game. Must be implemented in the child class.
 
         Args:
             action_id (int): the id of the action
 
         Returns:
             action (string): the action that will be passed to the game engine.
-        '''
+        """
         action = self.de_action_id[action_id]
         if action_id < 34:
             candidates = self.game.get_legal_actions(self.game.get_state(self.game.round.current_player))
@@ -83,7 +86,7 @@ def _decode_action(self, action_id):
         return action
 
     def _get_legal_actions(self):
-        ''' Get all legal actions for current state
+        """Get all legal actions for current state
 
         Returns:
         if type(legal_actions[0]) == Card:
@@ -91,7 +94,7 @@ def _get_legal_actions(self):
         else:
             print(legal_actions)
             legal_actions (list): a list of legal actions' id
-        '''
+        """
         legal_action_id = {}
         legal_actions = self.game.get_legal_actions(self.game.get_state(self.game.round.current_player))
         if legal_actions:
@@ -106,6 +109,6 @@ def _get_legal_actions(self):
             print(self.game.judger.judge_game(self.game))
             print(self.game.is_over())
             print([len(p.pile) for p in self.game.players])
-            #print(self.game.get_state(self.game.round.current_player))
-            #exit()
+            # print(self.game.get_state(self.game.round.current_player))
+            # exit()
         return OrderedDict(legal_action_id)
diff --git a/rlcard/envs/nolimitholdem.py b/rlcard/envs/nolimitholdem.py
index e7522cb44..fc4b6f0b0 100644
--- a/rlcard/envs/nolimitholdem.py
+++ b/rlcard/envs/nolimitholdem.py
@@ -9,18 +9,17 @@
 from rlcard.games.nolimitholdem.round import Action
 
 DEFAULT_GAME_CONFIG = {
-        'game_num_players': 2,
-        'chips_for_each': 100,
-        'dealer_id': None,
-        }
+    'game_num_players': 2,
+    'chips_for_each': 100,
+    'dealer_id': None,
+}
+
 
 class NolimitholdemEnv(Env):
-    ''' Limitholdem Environment
-    '''
+    """Nolimitholdem Environment"""
 
     def __init__(self, config):
-        ''' Initialize the Limitholdem environment
-        '''
+        """Initialize the Nolimitholdem environment"""
         self.name = 'no-limit-holdem'
         self.default_game_config = DEFAULT_GAME_CONFIG
         self.game = Game()
@@ -35,15 +34,15 @@ def __init__(self, config):
             self.card2index = json.load(file)
 
     def _get_legal_actions(self):
-        ''' Get all leagal actions
+        """Get all legal actions
 
         Returns:
             encoded_action_list (list): return encoded legal action list (from str to int)
-        '''
+        """
         return self.game.get_legal_actions()
 
     def _extract_state(self, state):
-        ''' Extract the state representation from state dictionary for agent
+        """Extract the state representation from state dictionary for agent
 
         Note: Currently the use the hand cards and the public cards. TODO: encode the states
 
@@ -52,7 +51,7 @@ def _extract_state(self, state):
 
         Returns:
             observation (list): combine the player's score and dealer's observable score for observation
-        '''
+        """
         extracted_state = {}
 
         legal_actions = OrderedDict({action.value: None for action in state['legal_actions']})
@@ -77,22 +76,22 @@ def _extract_state(self, state):
         return extracted_state
 
     def get_payoffs(self):
-        ''' Get the payoff of a game
+        """Get the payoff of a game
 
         Returns:
            payoffs (list): list of payoffs
-        '''
+        """
         return np.array(self.game.get_payoffs())
 
     def _decode_action(self, action_id):
-        ''' Decode the action for applying to the game
+        """Decode the action for applying to the game
 
         Args:
             action id (int): action id
 
         Returns:
             action (str): action for the game
-        '''
+        """
         legal_actions = self.game.get_legal_actions()
         if self.actions(action_id) not in legal_actions:
             if Action.CHECK in legal_actions:
@@ -103,11 +102,11 @@ def _decode_action(self, action_id):
         return self.actions(action_id)
 
     def get_perfect_information(self):
-        ''' Get the perfect information of the current state
+        """Get the perfect information of the current state
 
         Returns:
             (dict): A dictionary of all the perfect information of the current state
-        '''
+        """
         state = {}
         state['chips'] = [self.game.players[i].in_chips for i in range(self.num_players)]
         state['public_card'] = [c.get_index() for c in self.game.public_cards] if self.game.public_cards else None
@@ -115,5 +114,3 @@ def get_perfect_information(self):
         state['current_player'] = self.game.game_pointer
         state['legal_actions'] = self.game.get_legal_actions()
         return state
-
-
diff --git a/rlcard/envs/registration.py b/rlcard/envs/registration.py
index c27e06e67..8cd7122b7 100644
--- a/rlcard/envs/registration.py
+++ b/rlcard/envs/registration.py
@@ -7,82 +7,95 @@
         }
 
 class EnvSpec(object):
-    ''' A specification for a particular instance of the environment.
-    '''
+    """A specification for a particular instance of the environment."""
 
     def __init__(self, env_id, entry_point=None):
-        ''' Initilize
+        """
+        Initialize
 
         Args:
-            env_id (string): The name of the environent
-            entry_point (string): A string the indicates the location of the envronment class
-        '''
+            env_id (string): The name of the environment
+            entry_point (string): A string the indicates the location of the environment class
+        """
         self.env_id = env_id
         mod_name, class_name = entry_point.split(':')
         self._entry_point = getattr(importlib.import_module(mod_name), class_name)
 
-    def make(self, config=DEFAULT_CONFIG):
-        ''' Instantiates an instance of the environment
+    def make(self, config=None):
+        """
+        Instantiates an instance of the environment
 
         Returns:
-            env (Env): An instance of the environemnt
+            env (Env): An instance of the environment
             config (dict): A dictionary of the environment settings
-        '''
+        """
+        if config is None:
+            config = DEFAULT_CONFIG
         env = self._entry_point(config)
         return env
 
+
 class EnvRegistry(object):
-    ''' Register an environment (game) by ID
-    '''
+    """Register an environment (game) by ID"""
 
     def __init__(self):
-        ''' Initilize
-        '''
+        """Initialize"""
         self.env_specs = {}
 
     def register(self, env_id, entry_point):
-        ''' Register an environment
+        """
+        Register an environment
 
         Args:
-            env_id (string): The name of the environent
-            entry_point (string): A string the indicates the location of the envronment class
-        '''
+            env_id (string): The name of the environment
+            entry_point (string): A string the indicates the location of the environment class
+        """
         if env_id in self.env_specs:
             raise ValueError('Cannot re-register env_id: {}'.format(env_id))
         self.env_specs[env_id] = EnvSpec(env_id, entry_point)
 
-    def make(self, env_id, config=DEFAULT_CONFIG):
-        ''' Create and environment instance
+    def make(self, env_id, config=None):
+        """Create and environment instance
 
         Args:
             env_id (string): The name of the environment
             config (dict): A dictionary of the environment settings
-        '''
+        """
+        if config is None:
+            config = DEFAULT_CONFIG
+
         if env_id not in self.env_specs:
             raise ValueError('Cannot find env_id: {}'.format(env_id))
+
         return self.env_specs[env_id].make(config)
 
+
 # Have a global registry
 registry = EnvRegistry()
 
+
 def register(env_id, entry_point):
-    ''' Register an environment
+    """Register an environment
 
     Args:
-        env_id (string): The name of the environent
-        entry_point (string): A string the indicates the location of the envronment class
-    '''
+        env_id (string): The name of the environment
+        entry_point (string): A string the indicates the location of the environment class
+    """
     return registry.register(env_id, entry_point)
 
-def make(env_id, config={}):
-    ''' Create and environment instance
+
+def make(env_id, config=None):
+    """Create and environment instance
 
     Args:
         env_id (string): The name of the environment
         config (dict): A dictionary of the environment settings
-        env_num (int): The number of environments
-    '''
+    """
+    if config is None:
+        config = {}
+
     _config = DEFAULT_CONFIG.copy()
+
     for key in config:
         _config[key] = config[key]
 
diff --git a/rlcard/envs/uno.py b/rlcard/envs/uno.py
index b9d6d7ea1..a7b87c0ff 100644
--- a/rlcard/envs/uno.py
+++ b/rlcard/envs/uno.py
@@ -50,11 +50,11 @@ def _get_legal_actions(self):
         return OrderedDict(legal_ids)
 
     def get_perfect_information(self):
-        ''' Get the perfect information of the current state
+        """Get the perfect information of the current state
 
         Returns:
             (dict): A dictionary of all the perfect information of the current state
-        '''
+        """
         state = {}
         state['num_players'] = self.num_players
         state['hand_cards'] = [cards2list(player.hand)
diff --git a/rlcard/games/base.py b/rlcard/games/base.py
index 5424f4857..e5a77a607 100644
--- a/rlcard/games/base.py
+++ b/rlcard/games/base.py
@@ -1,25 +1,26 @@
-''' Game-related base classes
-'''
+"""Game-related base classes """
+
+
 class Card:
-    '''
+    """
     Card stores the suit and rank of a single card
 
     Note:
         The suit variable in a standard card game should be one of [S, H, D, C, BJ, RJ] meaning [Spades, Hearts, Diamonds, Clubs, Black Joker, Red Joker]
         Similarly the rank variable should be one of [A, 2, 3, 4, 5, 6, 7, 8, 9, T, J, Q, K]
-    '''
+    """
     suit = None
     rank = None
     valid_suit = ['S', 'H', 'D', 'C', 'BJ', 'RJ']
     valid_rank = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K']
 
     def __init__(self, suit, rank):
-        ''' Initialize the suit and rank of a card
+        """Initialize the suit and rank of a card
 
         Args:
             suit: string, suit of the card, should be one of valid_suit
             rank: string, rank of the card, should be one of valid_rank
-        '''
+        """
         self.suit = suit
         self.rank = rank
 
@@ -36,17 +37,17 @@ def __hash__(self):
         return rank_index + 100 * suit_index
 
     def __str__(self):
-        ''' Get string representation of a card.
+        """Get string representation of a card.
 
         Returns:
             string: the combination of rank and suit of a card. Eg: AS, 5H, JD, 3C, ...
-        '''
+        """
         return self.rank + self.suit
 
     def get_index(self):
-        ''' Get index of a card.
+        """Get index of a card.
 
         Returns:
             string: the combination of suit and rank of a card. Eg: 1S, 2H, AD, BJ, RJ...
-        '''
+        """
         return self.suit+self.rank
diff --git a/rlcard/games/blackjack/dealer.py b/rlcard/games/blackjack/dealer.py
index 88929b5e7..2f4efe186 100644
--- a/rlcard/games/blackjack/dealer.py
+++ b/rlcard/games/blackjack/dealer.py
@@ -1,11 +1,11 @@
 from rlcard.utils import init_standard_deck
 import numpy as np
 
+
 class BlackjackDealer:
 
     def __init__(self, np_random, num_decks=1):
-        ''' Initialize a Blackjack dealer class
-        '''
+        """Initialize a Blackjack dealer class """
         self.np_random = np_random
         self.num_decks = num_decks
         self.deck = init_standard_deck()
@@ -17,18 +17,17 @@ def __init__(self, np_random, num_decks=1):
         self.score = 0
 
     def shuffle(self):
-        ''' Shuffle the deck
-        '''
+        """Shuffle the deck """
         shuffle_deck = np.array(self.deck)
         self.np_random.shuffle(shuffle_deck)
         self.deck = list(shuffle_deck)
 
     def deal_card(self, player):
-        ''' Distribute one card to the player
+        """Distribute one card to the player
 
         Args:
             player_id (int): the target player's id
-        '''
+        """
         idx = self.np_random.choice(len(self.deck))
         card = self.deck[idx]
         if self.num_decks != 0:  # If infinite decks, do not pop card from deck
diff --git a/rlcard/games/blackjack/game.py b/rlcard/games/blackjack/game.py
index fa0cddac5..584c4cbac 100644
--- a/rlcard/games/blackjack/game.py
+++ b/rlcard/games/blackjack/game.py
@@ -5,27 +5,26 @@
 from rlcard.games.blackjack import Player
 from rlcard.games.blackjack import Judger
 
+
 class BlackjackGame:
 
     def __init__(self, allow_step_back=False):
-        ''' Initialize the class Blackjack Game
-        '''
+        """Initialize the class Blackjack Game"""
         self.allow_step_back = allow_step_back
         self.np_random = np.random.RandomState()
 
     def configure(self, game_config):
-        ''' Specifiy some game specific parameters, such as number of players
-        '''
+        """Specify some game specific parameters, such as number of players"""
         self.num_players = game_config['game_num_players']
         self.num_decks = game_config['game_num_decks']
 
     def init_game(self):
-        ''' Initialilze the game
+        """Initialize the game
 
         Returns:
             state (dict): the first state of the game
             player_id (int): current player's id
-        '''
+        """
         self.dealer = Dealer(self.np_random, self.num_decks)
 
         self.players = []
@@ -54,15 +53,15 @@ def init_game(self):
         return self.get_state(self.game_pointer), self.game_pointer
 
     def step(self, action):
-        ''' Get the next state
+        """Get the next state
 
         Args:
             action (str): a specific action of blackjack. (Hit or Stand)
 
-        Returns:/
+        Returns:
             dict: next player's state
             int: next plater's id
-        '''
+        """
         if self.allow_step_back:
             p = deepcopy(self.players[self.game_pointer])
             d = deepcopy(self.dealer)
@@ -82,13 +81,11 @@ def step(self, action):
                         self.dealer.deal_card(self.dealer)
                     self.dealer.status, self.dealer.score = self.judger.judge_round(self.dealer)
                     for i in range(self.num_players):
-                        self.judger.judge_game(self, i) 
+                        self.judger.judge_game(self, i)
                     self.game_pointer = 0
                 else:
                     self.game_pointer += 1
-
-                
-        elif action == "stand": # If stand, first try to pass the pointer, if it's the last player, dealer deal for himself, then judge game for everyone using a loop
+        elif action == "stand":  # If stand, first try to pass the pointer, if it's the last player, dealer deal for himself, then judge game for everyone using a loop
             self.players[self.game_pointer].status, self.players[self.game_pointer].score = self.judger.judge_round(
                 self.players[self.game_pointer])
             if self.game_pointer >= self.num_players - 1:
@@ -96,15 +93,11 @@ def step(self, action):
                     self.dealer.deal_card(self.dealer)
                 self.dealer.status, self.dealer.score = self.judger.judge_round(self.dealer)
                 for i in range(self.num_players):
-                    self.judger.judge_game(self, i) 
+                    self.judger.judge_game(self, i)
                 self.game_pointer = 0
             else:
                 self.game_pointer += 1
 
-
-            
-            
-
         hand = [card.get_index() for card in self.players[self.game_pointer].hand]
 
         if self.is_over():
@@ -118,62 +111,61 @@ def step(self, action):
         next_state['actions'] = ('hit', 'stand')
         next_state['state'] = (hand, dealer_hand)
 
-        
-
         return next_state, self.game_pointer
 
     def step_back(self):
-        ''' Return to the previous state of the game
+        """Return to the previous state of the game
 
         Returns:
             Status (bool): check if the step back is success or not
-        '''
-        #while len(self.history) > 0:
+        """
+        # while len(self.history) > 0:
         if len(self.history) > 0:
             self.dealer, self.players[self.game_pointer], self.winner = self.history.pop()
             return True
         return False
 
     def get_num_players(self):
-        ''' Return the number of players in blackjack
+        """Return the number of players in blackjack
 
         Returns:
             number_of_player (int): blackjack only have 1 player
-        '''
+        """
         return self.num_players
 
     @staticmethod
     def get_num_actions():
-        ''' Return the number of applicable actions
+        """Return the number of applicable actions
 
         Returns:
             number_of_actions (int): there are only two actions (hit and stand)
-        '''
+        """
         return 2
 
     def get_player_id(self):
-        ''' Return the current player's id
+        """Return the current player's id
 
         Returns:
             player_id (int): current player's id
-        '''
+        """
         return self.game_pointer
 
     def get_state(self, player_id):
-        ''' Return player's state
+        """Return player's state
 
         Args:
             player_id (int): player id
 
         Returns:
             state (dict): corresponding player's state
+        """
         '''
+            before change state only have two keys (action, state)
+            but now have more than 4 keys (action, state, player0 hand, player1 hand, ... , dealer hand)
+            Although key 'state' have duplicated information with key 'player hand' and 'dealer hand', I couldn't
+            remove it because of other codes.
+            To remove it, we need to change dqn agent too in my opinion
         '''
-                before change state only have two keys (action, state)
-                but now have more than 4 keys (action, state, player0 hand, player1 hand, ... , dealer hand)
-                Although key 'state' have duplicated information with key 'player hand' and 'dealer hand', I couldn't remove it because of other codes
-                To remove it, we need to change dqn agent too in my opinion
-                '''
         state = {}
         state['actions'] = ('hit', 'stand')
         hand = [card.get_index() for card in self.players[player_id].hand]
@@ -190,11 +182,11 @@ def get_state(self, player_id):
         return state
 
     def is_over(self):
-        ''' Check if the game is over
+        """Check if the game is over
 
         Returns:
             status (bool): True/False
-        '''
+        """
         '''
                 I should change here because judger and self.winner is changed too
                 '''
diff --git a/rlcard/games/blackjack/judger.py b/rlcard/games/blackjack/judger.py
index af18b9cce..f18de34f9 100644
--- a/rlcard/games/blackjack/judger.py
+++ b/rlcard/games/blackjack/judger.py
@@ -1,13 +1,12 @@
-
 class BlackjackJudger:
     def __init__(self, np_random):
-        ''' Initialize a BlackJack judger class
-        '''
+        """Initialize a BlackJack judger class"""
         self.np_random = np_random
-        self.rank2score = {"A":11, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9, "T":10, "J":10, "Q":10, "K":10}
+        self.rank2score = {"A": 11, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "T": 10, "J": 10,
+                           "Q": 10, "K": 10}
 
     def judge_round(self, player):
-        ''' Judge the target player's status
+        """Judge the target player's status
 
         Args:
             player (int): target player's id
@@ -15,7 +14,7 @@ def judge_round(self, player):
         Returns:
             status (str): the status of the target player
             score (int): the current score of the player
-        '''
+        """
         score = self.judge_score(player.hand)
         if score <= 21:
             return "alive", score
@@ -23,21 +22,21 @@ def judge_round(self, player):
             return "bust", score
 
     def judge_game(self, game, game_pointer):
-        ''' Judge the winner of the game
+        """Judge the winner of the game
 
         Args:
             game (class): target game class
+        """
         '''
-        '''
-                game.winner['dealer'] doesn't need anymore if we change code like this
+            game.winner['dealer'] doesn't need anymore if we change code like this
 
-                player bust (whether dealer bust or not) => game.winner[playerX] = -1
-                player and dealer tie => game.winner[playerX] = 1
-                dealer bust and player not bust => game.winner[playerX] = 2
-                player get higher score than dealer => game.winner[playerX] = 2
-                dealer get higher score than player => game.winner[playerX] = -1
-                game.winner[playerX] = 0 => the game is still ongoing
-                '''
+            player bust (whether dealer bust or not) => game.winner[playerX] = -1
+            player and dealer tie => game.winner[playerX] = 1
+            dealer bust and player not bust => game.winner[playerX] = 2
+            player get higher score than dealer => game.winner[playerX] = 2
+            dealer get higher score than player => game.winner[playerX] = -1
+            game.winner[playerX] = 0 => the game is still ongoing
+        '''
 
         if game.players[game_pointer].status == 'bust':
             game.winner['player' + str(game_pointer)] = -1
@@ -52,14 +51,14 @@ def judge_game(self, game, game_pointer):
                 game.winner['player' + str(game_pointer)] = 1
 
     def judge_score(self, cards):
-        ''' Judge the score of a given cards set
+        """Judge the score of a given cards set
 
         Args:
             cards (list): a list of cards
 
         Returns:
             score (int): the score of the given cards set
-        '''
+        """
         score = 0
         count_a = 0
         for card in cards:
diff --git a/rlcard/games/blackjack/player.py b/rlcard/games/blackjack/player.py
index 654a179ab..0f13a6636 100644
--- a/rlcard/games/blackjack/player.py
+++ b/rlcard/games/blackjack/player.py
@@ -2,11 +2,11 @@
 class BlackjackPlayer:
 
     def __init__(self, player_id, np_random):
-        ''' Initialize a Blackjack player class
+        """Initialize a Blackjack player class
 
         Args:
             player_id (int): id for the player
-        '''
+        """
         self.np_random = np_random
         self.player_id = player_id
         self.hand = []
@@ -14,6 +14,5 @@ def __init__(self, player_id, np_random):
         self.score = 0
 
     def get_player_id(self):
-        ''' Return player's id
-        '''
+        """Return player's id"""
         return self.player_id
diff --git a/rlcard/games/bridge/dealer.py b/rlcard/games/bridge/dealer.py
index 955ae52f9..504e582a7 100644
--- a/rlcard/games/bridge/dealer.py
+++ b/rlcard/games/bridge/dealer.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: bridge/dealer.py
     Author: William Hale
     Date created: 11/25/2021
-'''
+"""
 
 from typing import List
 
@@ -11,22 +11,20 @@
 
 
 class BridgeDealer:
-    ''' Initialize a BridgeDealer dealer class
-    '''
+    """Initialize a BridgeDealer dealer class """
     def __init__(self, np_random):
-        ''' set shuffled_deck, set stock_pile
-        '''
+        """set shuffled_deck, set stock_pile """
         self.np_random = np_random
         self.shuffled_deck: List[BridgeCard] = BridgeCard.get_deck()  # keep a copy of the shuffled cards at start of new hand
         self.np_random.shuffle(self.shuffled_deck)
         self.stock_pile: List[BridgeCard] = self.shuffled_deck.copy()
 
     def deal_cards(self, player: BridgePlayer, num: int):
-        ''' Deal some cards from stock_pile to one player
+        """Deal some cards from stock_pile to one player
 
         Args:
             player (BridgePlayer): The BridgePlayer object
             num (int): The number of cards to be dealt
-        '''
+        """
         for _ in range(num):
             player.hand.append(self.stock_pile.pop())
diff --git a/rlcard/games/bridge/game.py b/rlcard/games/bridge/game.py
index 038d2e954..03a8df305 100644
--- a/rlcard/games/bridge/game.py
+++ b/rlcard/games/bridge/game.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: bridge/game.py
     Author: William Hale
     Date created: 11/25/2021
-'''
+"""
 
 from typing import List
 
@@ -14,12 +14,10 @@
 
 
 class BridgeGame:
-    ''' Game class. This class will interact with outer environment.
-    '''
+    """Game class. This class will interact with outer environment"""
 
     def __init__(self, allow_step_back=False):
-        '''Initialize the class BridgeGame
-        '''
+        """Initialize the class BridgeGame"""
         self.allow_step_back: bool = allow_step_back
         self.np_random = np.random.RandomState()
         self.judger: BridgeJudger = BridgeJudger(game=self)
@@ -28,8 +26,7 @@ def __init__(self, allow_step_back=False):
         self.num_players: int = 4
 
     def init_game(self):
-        ''' Initialize all characters in the game and start round 1
-        '''
+        """Initialize all characters in the game and start round 1"""
         board_id = self.np_random.choice([1, 2, 3, 4])
         self.actions: List[ActionEvent] = []
         self.round = BridgeRound(num_players=self.num_players, board_id=board_id, np_random=self.np_random)
@@ -41,8 +38,7 @@ def init_game(self):
         return state, current_player_id
 
     def step(self, action: ActionEvent):
-        ''' Perform game action and return next player number, and the state for next player
-        '''
+        """Perform game action and return next player number, and the state for next player"""
         if isinstance(action, CallActionEvent):
             self.round.make_call(action=action)
         elif isinstance(action, PlayCardAction):
@@ -55,32 +51,28 @@ def step(self, action: ActionEvent):
         return next_state, next_player_id
 
     def get_num_players(self) -> int:
-        ''' Return the number of players in the game
-        '''
+        """Return the number of players in the game"""
         return self.num_players
 
     @staticmethod
     def get_num_actions() -> int:
-        ''' Return the number of possible actions in the game
-        '''
+        """Return the number of possible actions in the game"""
         return ActionEvent.get_num_actions()
 
     def get_player_id(self):
-        ''' Return the current player that will take actions soon
-        '''
+        """Return the current player that will take actions soon"""
         return self.round.current_player_id
 
     def is_over(self) -> bool:
-        ''' Return whether the current game is over
-        '''
+        """Return whether the current game is over"""
         return self.round.is_over()
 
     def get_state(self, player_id: int):  # wch: not really used
-        ''' Get player's state
+        """Get player's state
 
         Return:
             state (dict): The information of the state
-        '''
+        """
         state = {}
         if not self.is_over():
             state['player_id'] = player_id
diff --git a/rlcard/games/bridge/judger.py b/rlcard/games/bridge/judger.py
index 4158ec7f4..9023e9989 100644
--- a/rlcard/games/bridge/judger.py
+++ b/rlcard/games/bridge/judger.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: bridge/judger.py
     Author: William Hale
     Date created: 11/25/2021
-'''
+"""
 
 from typing import List
 
@@ -17,15 +17,12 @@
 
 
 class BridgeJudger:
-
-    '''
-        Judger decides legal actions for current player
-    '''
+    """Judger decides legal actions for current player"""
 
     def __init__(self, game: 'BridgeGame'):
-        ''' Initialize the class BridgeJudger
+        """Initialize the class BridgeJudger
         :param game: BridgeGame
-        '''
+        """
         self.game: BridgeGame = game
 
     def get_legal_actions(self) -> List[ActionEvent]:
diff --git a/rlcard/games/bridge/player.py b/rlcard/games/bridge/player.py
index a4e4cff89..41c6fe2ec 100644
--- a/rlcard/games/bridge/player.py
+++ b/rlcard/games/bridge/player.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: bridge/player.py
     Author: William Hale
     Date created: 11/25/2021
-'''
+"""
 
 from typing import List
 
@@ -12,11 +12,11 @@
 class BridgePlayer:
 
     def __init__(self, player_id: int, np_random):
-        ''' Initialize a BridgePlayer player class
+        """Initialize a BridgePlayer player class
 
         Args:
             player_id (int): id for the player
-        '''
+        """
         if player_id < 0 or player_id > 3:
             raise Exception(f'BridgePlayer has invalid player_id: {player_id}')
         self.np_random = np_random
diff --git a/rlcard/games/bridge/round.py b/rlcard/games/bridge/round.py
index b7e3df4b6..ab1a7aaff 100644
--- a/rlcard/games/bridge/round.py
+++ b/rlcard/games/bridge/round.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: bridge/round.py
     Author: William Hale
     Date created: 11/25/2021
-'''
+"""
 
 from typing import List
 
@@ -39,7 +39,7 @@ def round_phase(self):
         return result
 
     def __init__(self, num_players: int, board_id: int, np_random):
-        ''' Initialize the round class
+        """Initialize the round class
 
             The round class maintains the following instances:
                 1) dealer: the dealer of the round; dealer has trick_pile
@@ -59,7 +59,7 @@ def __init__(self, num_players: int, board_id: int, np_random):
             num_players: int
             board_id: int
             np_random
-        '''
+        """
         tray = Tray(board_id=board_id)
         dealer_id = tray.dealer_id
         self.tray = tray
@@ -77,8 +77,7 @@ def __init__(self, num_players: int, board_id: int, np_random):
         self.move_sheet.append(DealHandMove(dealer=self.players[dealer_id], shuffled_deck=self.dealer.shuffled_deck))
 
     def is_bidding_over(self) -> bool:
-        ''' Return whether the current bidding is over
-        '''
+        """Return whether the current bidding is over"""
         is_bidding_over = True
         if len(self.move_sheet) < 5:
             is_bidding_over = False
@@ -97,8 +96,7 @@ def is_bidding_over(self) -> bool:
         return is_bidding_over
 
     def is_over(self) -> bool:
-        ''' Return whether the current game is over
-        '''
+        """Return whether the current game is over"""
         is_over = True
         if not self.is_bidding_over():
             is_over = False
diff --git a/rlcard/games/bridge/utils/action_event.py b/rlcard/games/bridge/utils/action_event.py
index 0700f2ae6..2f5d35c03 100644
--- a/rlcard/games/bridge/utils/action_event.py
+++ b/rlcard/games/bridge/utils/action_event.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: bridge/utils/action_event.py
     Author: William Hale
     Date created: 11/25/2021
-'''
+"""
 
 from .bridge_card import BridgeCard
 
@@ -57,8 +57,7 @@ def from_action_id(action_id: int):
 
     @staticmethod
     def get_num_actions():
-        ''' Return the number of possible actions in the game
-        '''
+        """Return the number of possible actions in the game """
         return 1 + 35 + 3 + 52  # no_bid, 35 bids, pass, dbl, rdl, 52 play_card
 
 
diff --git a/rlcard/games/bridge/utils/bridge_card.py b/rlcard/games/bridge/utils/bridge_card.py
index f62268eca..2b0f0a417 100644
--- a/rlcard/games/bridge/utils/bridge_card.py
+++ b/rlcard/games/bridge/utils/bridge_card.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: bridge/utils/bridge_card.py
     Author: William Hale
     Date created: 11/25/2021
-'''
+"""
 
 from rlcard.games.base import Card
 
diff --git a/rlcard/games/bridge/utils/move.py b/rlcard/games/bridge/utils/move.py
index e3ff9eb9d..bd3053ef7 100644
--- a/rlcard/games/bridge/utils/move.py
+++ b/rlcard/games/bridge/utils/move.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: bridge/utils/move.py
     Author: William Hale
     Date created: 11/25/2021
-'''
+"""
 
 #
 #   These classes are used to keep a move_sheet history of the moves in a round.
diff --git a/rlcard/games/bridge/utils/tray.py b/rlcard/games/bridge/utils/tray.py
index 7af25f0a1..51118a879 100644
--- a/rlcard/games/bridge/utils/tray.py
+++ b/rlcard/games/bridge/utils/tray.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: bridge/utils/tray.py
     Author: William Hale
     Date created: 11/28/2021
-'''
+"""
 
 
 class Tray(object):
diff --git a/rlcard/games/bridge/utils/utils.py b/rlcard/games/bridge/utils/utils.py
index 02cf2c50d..bea8e3c4f 100644
--- a/rlcard/games/bridge/utils/utils.py
+++ b/rlcard/games/bridge/utils/utils.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: bridge/utils/utils.py
     Author: William Hale
     Date created: 11/26/2021
-'''
+"""
 
 from typing import List
 
diff --git a/rlcard/games/doudizhu/dealer.py b/rlcard/games/doudizhu/dealer.py
index b6c90515d..8461607c5 100644
--- a/rlcard/games/doudizhu/dealer.py
+++ b/rlcard/games/doudizhu/dealer.py
@@ -1,52 +1,51 @@
 # -*- coding: utf-8 -*-
-''' Implement Doudizhu Dealer class
-'''
+"""Implement Doudizhu Dealer class """
 import functools
 
 from rlcard.utils import init_54_deck
 from rlcard.games.doudizhu.utils import cards2str, doudizhu_sort_card
 
+
 class DoudizhuDealer:
-    ''' Dealer will shuffle, deal cards, and determine players' roles
-    '''
+    """Dealer will shuffle, deal cards, and determine players' roles """
+
     def __init__(self, np_random):
-        '''Give dealer the deck
+        """Give dealer the deck
 
         Notes:
             1. deck with 54 cards including black joker and red joker
-        '''
+        """
         self.np_random = np_random
         self.deck = init_54_deck()
         self.deck.sort(key=functools.cmp_to_key(doudizhu_sort_card))
         self.landlord = None
 
     def shuffle(self):
-        ''' Randomly shuffle the deck
-        '''
+        """Randomly shuffle the deck """
         self.np_random.shuffle(self.deck)
 
     def deal_cards(self, players):
-        ''' Deal cards to players
+        """Deal cards to players
 
         Args:
             players (list): list of DoudizhuPlayer objects
-        '''
+        """
         hand_num = (len(self.deck) - 3) // len(players)
         for index, player in enumerate(players):
-            current_hand = self.deck[index*hand_num:(index+1)*hand_num]
+            current_hand = self.deck[index * hand_num:(index + 1) * hand_num]
             current_hand.sort(key=functools.cmp_to_key(doudizhu_sort_card))
             player.set_current_hand(current_hand)
             player.initial_hand = cards2str(player.current_hand)
 
     def determine_role(self, players):
-        ''' Determine landlord and peasants according to players' hand
+        """Determine landlord and peasants according to players' hand
 
         Args:
             players (list): list of DoudizhuPlayer objects
 
         Returns:
             int: landlord's player_id
-        '''
+        """
         # deal cards
         self.shuffle()
         self.deal_cards(players)
@@ -54,20 +53,20 @@ def determine_role(self, players):
         self.landlord = players[0]
         players[1].role = 'peasant'
         players[2].role = 'peasant'
-        #players[0].role = 'peasant'
-        #self.landlord = players[0]
+        # players[0].role = 'peasant'
+        # self.landlord = players[0]
 
         ## determine 'landlord'
-        #max_score = get_landlord_score(
+        # max_score = get_landlord_score(
         #    cards2str(self.landlord.current_hand))
-        #for player in players[1:]:
+        # for player in players[1:]:
         #    player.role = 'peasant'
         #    score = get_landlord_score(
         #        cards2str(player.current_hand))
         #    if score > max_score:
         #        max_score = score
         #        self.landlord = player
-        #self.landlord.role = 'landlord'
+        # self.landlord.role = 'landlord'
 
         # give the 'landlord' the  three cards
         self.landlord.current_hand.extend(self.deck[-3:])
diff --git a/rlcard/games/doudizhu/game.py b/rlcard/games/doudizhu/game.py
index 945235d5c..f66602ca5 100644
--- a/rlcard/games/doudizhu/game.py
+++ b/rlcard/games/doudizhu/game.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
-''' Implement Doudizhu Game class
-'''
+"""Implement Doudizhu Game class"""
 import functools
 from heapq import merge
 import numpy as np
@@ -12,21 +11,20 @@
 
 
 class DoudizhuGame:
-    ''' Provide game APIs for env to run doudizhu and get corresponding state
-    information.
-    '''
+    """Provide game APIs for env to run doudizhu and get corresponding state information"""
+
     def __init__(self, allow_step_back=False):
         self.allow_step_back = allow_step_back
         self.np_random = np.random.RandomState()
         self.num_players = 3
 
     def init_game(self):
-        ''' Initialize players and state.
+        """Initialize players and state.
 
         Returns:
             dict: first state in one game
             int: current player's id
-        '''
+        """
         # initialize public variables
         self.winner_id = None
         self.history = []
@@ -36,8 +34,8 @@ def init_game(self):
                         for num in range(self.num_players)]
 
         # initialize round to deal cards and determine landlord
-        self.played_cards = [np.zeros((len(CARD_RANK_STR), ), dtype=np.int32)
-                                for _ in range(self.num_players)]
+        self.played_cards = [np.zeros((len(CARD_RANK_STR),), dtype=np.int32)
+                             for _ in range(self.num_players)]
         self.round = Round(self.np_random, self.played_cards)
         self.round.initiate(self.players)
 
@@ -51,7 +49,7 @@ def init_game(self):
         return self.state, player_id
 
     def step(self, action):
-        ''' Perform one draw of the game
+        """Perform one draw of the game
 
         Args:
             action (str): specific action of doudizhu. Eg: '33344'
@@ -59,19 +57,19 @@ def step(self, action):
         Returns:
             dict: next player's state
             int: next player's id
-        '''
+        """
         if self.allow_step_back:
             # TODO: don't record game.round, game.players, game.judger if allow_step_back not set
             pass
 
-        # perfrom action
+        # perform action
         player = self.players[self.round.current_player]
         self.round.proceed_round(player, action)
-        if (action != 'pass'):
+        if action != 'pass':
             self.judger.calc_playable_cards(player)
         if self.judger.judge_game(self.players, self.round.current_player):
             self.winner_id = self.round.current_player
-        next_id = (player.player_id+1) % len(self.players)
+        next_id = (player.player_id + 1) % len(self.players)
         self.round.current_player = next_id
 
         # get next state
@@ -81,41 +79,41 @@ def step(self, action):
         return state, next_id
 
     def step_back(self):
-        ''' Return to the previous state of the game
+        """Return to the previous state of the game
 
         Returns:
             (bool): True if the game steps back successfully
-        '''
+        """
         if not self.round.trace:
             return False
 
-        #winner_id will be always None no matter step_back from any case
+        # winner_id will be always None no matter step_back from any case
         self.winner_id = None
 
-        #reverse round
+        # reverse round
         player_id, cards = self.round.step_back(self.players)
 
-        #reverse player
-        if (cards != 'pass'):
+        # reverse player
+        if cards != 'pass':
             self.players[player_id].played_cards = self.round.find_last_played_cards_in_trace(player_id)
         self.players[player_id].play_back()
 
-        #reverse judger.played_cards if needed
-        if (cards != 'pass'):
+        # reverse judger.played_cards if needed
+        if cards != 'pass':
             self.judger.restore_playable_cards(player_id)
 
         self.state = self.get_state(self.round.current_player)
         return True
 
     def get_state(self, player_id):
-        ''' Return player's state
+        """Return player's state
 
         Args:
             player_id (int): player id
 
         Returns:
             (dict): The state of the player
-        '''
+        """
         player = self.players[player_id]
         others_hands = self._get_others_current_hand(player)
         num_cards_left = [len(self.players[i].current_hand) for i in range(self.num_players)]
@@ -129,41 +127,42 @@ def get_state(self, player_id):
 
     @staticmethod
     def get_num_actions():
-        ''' Return the total number of abstract acitons
+        """Return the total number of abstract actions
 
         Returns:
             int: the total number of abstract actions of doudizhu
-        '''
+        """
         return 27472
 
     def get_player_id(self):
-        ''' Return current player's id
+        """Return current player's id
 
         Returns:
             int: current player's id
-        '''
+        """
         return self.round.current_player
 
     def get_num_players(self):
-        ''' Return the number of players in doudizhu
+        """Return the number of players in doudizhu
 
         Returns:
             int: the number of players in doudizhu
-        '''
+        """
         return self.num_players
 
     def is_over(self):
-        ''' Judge whether a game is over
+        """Judge whether a game is over
 
         Returns:
             Bool: True(over) / False(not over)
-        '''
+        """
         if self.winner_id is None:
             return False
         return True
 
     def _get_others_current_hand(self, player):
-        player_up = self.players[(player.player_id+1) % len(self.players)]
-        player_down = self.players[(player.player_id-1) % len(self.players)]
-        others_hand = merge(player_up.current_hand, player_down.current_hand, key=functools.cmp_to_key(doudizhu_sort_card))
+        player_up = self.players[(player.player_id + 1) % len(self.players)]
+        player_down = self.players[(player.player_id - 1) % len(self.players)]
+        others_hand = merge(player_up.current_hand, player_down.current_hand,
+                            key=functools.cmp_to_key(doudizhu_sort_card))
         return cards2str(others_hand)
diff --git a/rlcard/games/doudizhu/judger.py b/rlcard/games/doudizhu/judger.py
index 375301b0b..b6e6b1fc7 100644
--- a/rlcard/games/doudizhu/judger.py
+++ b/rlcard/games/doudizhu/judger.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
-''' Implement Doudizhu Judger class
-'''
+"""Implement Doudizhu Judger class"""
 import numpy as np
 import collections
 from itertools import combinations
@@ -10,13 +9,12 @@
 from rlcard.games.doudizhu.utils import cards2str, contains_cards
 
 
-
 class DoudizhuJudger:
-    ''' Determine what cards a player can play
-    '''
+    """Determine what cards a player can play"""
+
     @staticmethod
     def chain_indexes(indexes_list):
-        ''' Find chains for solos, pairs and trios by using indexes_list
+        """Find chains for solos, pairs and trios by using indexes_list
 
         Args:
             indexes_list: the indexes of cards those have the same count, the count could be 1, 2, or 3.
@@ -24,29 +22,29 @@ def chain_indexes(indexes_list):
         Returns:
             list of tuples: [(start_index1, length1), (start_index1, length1), ...]
 
-        '''
+        """
         chains = []
         prev_index = -100
         count = 0
         start = None
         for i in indexes_list:
-            if (i[0] >= 12): #no chains for '2BR'
+            if i[0] >= 12:  # no chains for '2BR'
                 break
-            if (i[0] == prev_index + 1):
+            if i[0] == prev_index + 1:
                 count += 1
             else:
-                if (count > 1):
+                if count > 1:
                     chains.append((start, count))
                 count = 1
                 start = i[0]
             prev_index = i[0]
-        if (count > 1):
+        if count > 1:
             chains.append((start, count))
         return chains
 
     @classmethod
     def solo_attachments(cls, hands, chain_start, chain_length, size):
-        ''' Find solo attachments for trio_chain_solo_x and four_two_solo
+        """Find solo attachments for trio_chain_solo_x and four_two_solo
 
         Args:
             hands:
@@ -59,21 +57,22 @@ def solo_attachments(cls, hands, chain_start, chain_length, size):
                             Each attachment has two elemnts,
                             the first one contains indexes of attached cards smaller than the index of chain_start,
                             the first one contains indexes of attached cards larger than the index of chain_start
-        '''
+        """
         attachments = set()
         candidates = []
         prev_card = None
         same_card_count = 0
         for card in hands:
-            #dont count those cards in the chain
-            if (CARD_RANK_STR_INDEX[card] >= chain_start and CARD_RANK_STR_INDEX[card] < chain_start + chain_length):
+            # don't count those cards in the chain
+            if CARD_RANK_STR_INDEX[card] >= chain_start and CARD_RANK_STR_INDEX[card] < chain_start + chain_length:
                 continue
-            if (card == prev_card):
-                #attachments can not have bomb
-                if (same_card_count == 3):
+            if card == prev_card:
+                # attachments can not have bomb
+                if same_card_count == 3:
                     continue
-                #attachments can not have 3 same cards consecutive with the trio (except 3 cards of '222')
-                elif (same_card_count == 2 and (CARD_RANK_STR_INDEX[card] == chain_start - 1 or CARD_RANK_STR_INDEX[card] == chain_start + chain_length) and card != '2'):
+                # attachments can not have 3 same cards consecutive with the trio (except 3 cards of '222')
+                elif same_card_count == 2 and (CARD_RANK_STR_INDEX[card] == chain_start - 1 or CARD_RANK_STR_INDEX[
+                    card] == chain_start + chain_length) and card != '2':
                     continue
                 else:
                     same_card_count += 1
@@ -82,7 +81,7 @@ def solo_attachments(cls, hands, chain_start, chain_length, size):
                 same_card_count = 1
             candidates.append(CARD_RANK_STR_INDEX[card])
         for attachment in combinations(candidates, size):
-            if (attachment[-1] == 14 and attachment[-2] == 13):
+            if attachment[-1] == 14 and attachment[-2] == 13:
                 continue
             i = bisect_left(attachment, chain_start)
             attachments.add((attachment[:i], attachment[i:]))
@@ -90,7 +89,7 @@ def solo_attachments(cls, hands, chain_start, chain_length, size):
 
     @classmethod
     def pair_attachments(cls, cards_count, chain_start, chain_length, size):
-        ''' Find pair attachments for trio_chain_pair_x and four_two_pair
+        """Find pair attachments for trio_chain_pair_x and four_two_pair
 
         Args:
             cards_count:
@@ -103,30 +102,30 @@ def pair_attachments(cls, cards_count, chain_start, chain_length, size):
                             Each attachment has two elemnts,
                             the first one contains indexes of attached cards smaller than the index of chain_start,
                             the first one contains indexes of attached cards larger than the index of chain_start
-        '''
+        """
         attachments = set()
         candidates = []
         for i, _ in enumerate(cards_count):
-            if (i >= chain_start and i < chain_start + chain_length):
+            if i >= chain_start and i < chain_start + chain_length:
                 continue
-            if (cards_count[i] == 2 or cards_count[i] == 3):
+            if cards_count[i] == 2 or cards_count[i] == 3:
                 candidates.append(i)
-            elif (cards_count[i] == 4):
+            elif cards_count[i] == 4:
                 candidates.append(i)
         for attachment in combinations(candidates, size):
-            if (attachment[-1] == 14 and attachment[-2] == 13):
+            if attachment[-1] == 14 and attachment[-2] == 13:
                 continue
             i = bisect_left(attachment, chain_start)
             attachments.add((attachment[:i], attachment[i:]))
         return list(attachments)
-        
+
     @staticmethod
     def playable_cards_from_hand(current_hand):
-        ''' Get playable cards from hand
+        """Get playable cards from hand
 
         Returns:
             set: set of string of playable cards
-        '''
+        """
         cards_dict = collections.defaultdict(int)
         for card in current_hand:
             cards_dict[card] += 1
@@ -137,13 +136,13 @@ def playable_cards_from_hand(current_hand):
         more_than_1_indexes = np.argwhere(cards_count > 1)
         more_than_2_indexes = np.argwhere(cards_count > 2)
         more_than_3_indexes = np.argwhere(cards_count > 3)
-        #solo
+        # solo
         for i in non_zero_indexes:
             playable_cards.add(CARD_RANK_STR[i[0]])
-        #pair
+        # pair
         for i in more_than_1_indexes:
             playable_cards.add(CARD_RANK_STR[i[0]] * 2)
-        #bomb, four_two_solo, four_two_pair
+        # bomb, four_two_solo, four_two_pair
         for i in more_than_3_indexes:
             cards = CARD_RANK_STR[i[0]] * 4
             playable_cards.add(cards)
@@ -164,73 +163,73 @@ def playable_cards_from_hand(current_hand):
                     post_attached += CARD_RANK_STR[j] * 2
                 playable_cards.add(pre_attached + cards + post_attached)
 
-        #solo_chain_5 -- #solo_chain_12
+        # solo_chain_5 -- #solo_chain_12
         solo_chain_indexes = DoudizhuJudger.chain_indexes(non_zero_indexes)
         for (start_index, length) in solo_chain_indexes:
             s, l = start_index, length
-            while(l >= 5):
+            while l >= 5:
                 cards = ''
                 curr_index = s - 1
                 curr_length = 0
-                while (curr_length < l and curr_length < 12):
+                while curr_length < l and curr_length < 12:
                     curr_index += 1
                     curr_length += 1
                     cards += CARD_RANK_STR[curr_index]
-                    if (curr_length >= 5):
+                    if curr_length >= 5:
                         playable_cards.add(cards)
                 l -= 1
                 s += 1
 
-        #pair_chain_3 -- #pair_chain_10
+        # pair_chain_3 -- #pair_chain_10
         pair_chain_indexes = DoudizhuJudger.chain_indexes(more_than_1_indexes)
         for (start_index, length) in pair_chain_indexes:
             s, l = start_index, length
-            while(l >= 3):
+            while l >= 3:
                 cards = ''
                 curr_index = s - 1
                 curr_length = 0
-                while (curr_length < l and curr_length < 10):
+                while curr_length < l and curr_length < 10:
                     curr_index += 1
                     curr_length += 1
                     cards += CARD_RANK_STR[curr_index] * 2
-                    if (curr_length >= 3):
+                    if curr_length >= 3:
                         playable_cards.add(cards)
                 l -= 1
                 s += 1
 
-        #trio, trio_solo and trio_pair
+        # trio, trio_solo and trio_pair
         for i in more_than_2_indexes:
             playable_cards.add(CARD_RANK_STR[i[0]] * 3)
             for j in non_zero_indexes:
-                if (j < i):
+                if j < i:
                     playable_cards.add(CARD_RANK_STR[j[0]] + CARD_RANK_STR[i[0]] * 3)
-                elif (j > i):
+                elif j > i:
                     playable_cards.add(CARD_RANK_STR[i[0]] * 3 + CARD_RANK_STR[j[0]])
             for j in more_than_1_indexes:
-                if (j < i):
+                if j < i:
                     playable_cards.add(CARD_RANK_STR[j[0]] * 2 + CARD_RANK_STR[i[0]] * 3)
-                elif (j > i):
+                elif j > i:
                     playable_cards.add(CARD_RANK_STR[i[0]] * 3 + CARD_RANK_STR[j[0]] * 2)
 
-        #trio_solo, trio_pair, #trio -- trio_chain_2 -- trio_chain_6; trio_solo_chain_2 -- trio_solo_chain_5; trio_pair_chain_2 -- trio_pair_chain_4
+        # trio_solo, trio_pair, #trio -- trio_chain_2 -- trio_chain_6; trio_solo_chain_2 -- trio_solo_chain_5; trio_pair_chain_2 -- trio_pair_chain_4
         trio_chain_indexes = DoudizhuJudger.chain_indexes(more_than_2_indexes)
         for (start_index, length) in trio_chain_indexes:
             s, l = start_index, length
-            while(l >= 2):
+            while l >= 2:
                 cards = ''
                 curr_index = s - 1
                 curr_length = 0
-                while (curr_length < l and curr_length < 6):
+                while curr_length < l and curr_length < 6:
                     curr_index += 1
                     curr_length += 1
                     cards += CARD_RANK_STR[curr_index] * 3
 
-                    #trio_chain_2 to trio_chain_6
-                    if (curr_length >= 2 and curr_length <= 6):
+                    # trio_chain_2 to trio_chain_6
+                    if curr_length >= 2 and curr_length <= 6:
                         playable_cards.add(cards)
 
-                    #trio_solo_chain_2 to trio_solo_chain_5
-                    if (curr_length >= 2 and curr_length <= 5):
+                    # trio_solo_chain_2 to trio_solo_chain_5
+                    if curr_length >= 2 and curr_length <= 5:
                         for left, right in DoudizhuJudger.solo_attachments(current_hand, s, curr_length, curr_length):
                             pre_attached = ''
                             for j in left:
@@ -240,8 +239,8 @@ def playable_cards_from_hand(current_hand):
                                 post_attached += CARD_RANK_STR[j]
                             playable_cards.add(pre_attached + cards + post_attached)
 
-                    #trio_pair_chain2 -- trio_pair_chain_4
-                    if (curr_length >= 2 and curr_length <= 4):
+                    # trio_pair_chain2 -- trio_pair_chain_4
+                    if curr_length >= 2 and curr_length <= 4:
                         for left, right in DoudizhuJudger.pair_attachments(cards_count, s, curr_length, curr_length):
                             pre_attached = ''
                             for j in left:
@@ -252,14 +251,13 @@ def playable_cards_from_hand(current_hand):
                             playable_cards.add(pre_attached + cards + post_attached)
                 l -= 1
                 s += 1
-        #rocket
-        if (cards_count[13] and cards_count[14]):
+        # rocket
+        if cards_count[13] and cards_count[14]:
             playable_cards.add(CARD_RANK_STR[13] + CARD_RANK_STR[14])
         return playable_cards
 
     def __init__(self, players, np_random):
-        ''' Initilize the Judger class for Dou Dizhu
-        '''
+        """Initialize the Judger class for Dou Dizhu"""
         self.playable_cards = [set() for _ in range(3)]
         self._recorded_removed_playable_cards = [[] for _ in range(3)]
         for player in players:
@@ -268,17 +266,16 @@ def __init__(self, players, np_random):
             self.playable_cards[player_id] = self.playable_cards_from_hand(current_hand)
 
     def calc_playable_cards(self, player):
-        ''' Recalculate all legal cards the player can play according to his
+        """Recalculate all legal cards the player can play according to his
         current hand.
 
         Args:
             player (DoudizhuPlayer object): object of DoudizhuPlayer
-            init_flag (boolean): For the first time, set it True to accelerate
-              the preocess.
+            init_flag (boolean): For the first time, set it True to accelerate the process.
 
         Returns:
             list: list of string of playable cards
-        '''
+        """
         removed_playable_cards = []
 
         player_id = player.player_id
@@ -293,7 +290,7 @@ def calc_playable_cards(self, player):
 
         if missed is not None:
             position = player.singles.find(missed)
-            player.singles = player.singles[position+1:]
+            player.singles = player.singles[position + 1:]
             for cards in playable_cards:
                 if missed in cards or (not contains_cards(current_hand, cards)):
                     removed_playable_cards.append(cards)
@@ -301,39 +298,37 @@ def calc_playable_cards(self, player):
         else:
             for cards in playable_cards:
                 if not contains_cards(current_hand, cards):
-                    #del self.playable_cards[player_id][cards]
+                    # del self.playable_cards[player_id][cards]
                     removed_playable_cards.append(cards)
                     self.playable_cards[player_id].remove(cards)
         self._recorded_removed_playable_cards[player_id].append(removed_playable_cards)
         return self.playable_cards[player_id]
 
     def restore_playable_cards(self, player_id):
-        ''' restore playable_cards for judger for game.step_back().
+        """restore playable_cards for judger for game.step_back().
 
         Args:
             player_id: The id of the player whose playable_cards need to be restored
-        '''
+        """
         removed_playable_cards = self._recorded_removed_playable_cards[player_id].pop()
         self.playable_cards[player_id].update(removed_playable_cards)
 
     def get_playable_cards(self, player):
-        ''' Provide all legal cards the player can play according to his
+        """Provide all legal cards the player can play according to his
         current hand.
 
         Args:
             player (DoudizhuPlayer object): object of DoudizhuPlayer
-            init_flag (boolean): For the first time, set it True to accelerate
-              the preocess.
+            init_flag (boolean): For the first time, set it True to accelerate the process.
 
         Returns:
             list: list of string of playable cards
-        '''
+        """
         return self.playable_cards[player.player_id]
 
-
     @staticmethod
     def judge_game(players, player_id):
-        ''' Judge whether the game is over
+        """Judge whether the game is over
 
         Args:
             players (list): list of DoudizhuPlayer objects
@@ -341,7 +336,7 @@ def judge_game(players, player_id):
 
         Returns:
             (bool): True if the game is over
-        '''
+        """
         player = players[player_id]
         if not player.current_hand:
             return True
diff --git a/rlcard/games/doudizhu/player.py b/rlcard/games/doudizhu/player.py
index 6cfcf41b3..23879e4b9 100644
--- a/rlcard/games/doudizhu/player.py
+++ b/rlcard/games/doudizhu/player.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
-''' Implement Doudizhu Player class
-'''
+"""Implement Doudizhu Player class"""
 import functools
 
 from rlcard.games.doudizhu.utils import get_gt_cards
@@ -8,12 +7,12 @@
 
 
 class DoudizhuPlayer:
-    ''' Player can store cards in the player's hand and the role,
+    """Player can store cards in the player's hand and the role,
     determine the actions can be made according to the rules,
-    and can perfrom corresponding action
-    '''
+    and can perform corresponding action
+    """
     def __init__(self, player_id, np_random):
-        ''' Give the player an id in one game
+        """Give the player an id in one game
 
         Args:
             player_id (int): the player_id of a player
@@ -23,7 +22,7 @@ def __init__(self, player_id, np_random):
             2. played_cards: The cards played in one round
             3. hand: Initial cards
             4. _current_hand: The rest of the cards after playing some of them
-        '''
+        """
         self.np_random = np_random
         self.player_id = player_id
         self.initial_hand = None
@@ -58,7 +57,7 @@ def get_state(self, public, others_hands, num_cards_left, actions):
         return state
 
     def available_actions(self, greater_player=None, judger=None):
-        ''' Get the actions can be made based on the rules
+        """Get the actions can be made based on the rules
 
         Args:
             greater_player (DoudizhuPlayer object): player who played
@@ -67,7 +66,7 @@ def available_actions(self, greater_player=None, judger=None):
 
         Returns:
             list: list of string of actions. Eg: ['pass', '8', '9', 'T', 'J']
-        '''
+        """
         actions = []
         if greater_player is None or greater_player.player_id == self.player_id:
             actions = judger.get_playable_cards(self)
@@ -76,7 +75,7 @@ def available_actions(self, greater_player=None, judger=None):
         return actions
 
     def play(self, action, greater_player=None):
-        ''' Perfrom action
+        """Perform action
 
         Args:
             action (string): specific action
@@ -84,7 +83,7 @@ def play(self, action, greater_player=None):
 
         Returns:
             object of DoudizhuPlayer: If there is a new greater_player, return it, if not, return None
-        '''
+        """
         trans = {'B': 'BJ', 'R': 'RJ'}
         if action == 'pass':
             self._recorded_played_cards.append([])
@@ -108,8 +107,7 @@ def play(self, action, greater_player=None):
             return self
 
     def play_back(self):
-        ''' Restore recorded cards back to self._current_hand
-        '''
+        """Restore recorded cards back to self._current_hand"""
         removed_cards = self._recorded_played_cards.pop()
         self._current_hand.extend(removed_cards)
         self._current_hand.sort(key=functools.cmp_to_key(doudizhu_sort_card))
diff --git a/rlcard/games/doudizhu/round.py b/rlcard/games/doudizhu/round.py
index 8682d914f..abd1e3a0b 100644
--- a/rlcard/games/doudizhu/round.py
+++ b/rlcard/games/doudizhu/round.py
@@ -1,9 +1,7 @@
 # -*- coding: utf-8 -*-
-''' Implement Doudizhu Round class
-'''
+"""Implement Doudizhu Round class"""
 
 import functools
-import numpy as np
 
 from rlcard.games.doudizhu import Dealer
 from rlcard.games.doudizhu.utils import cards2str, doudizhu_sort_card
@@ -11,8 +9,7 @@
 
 
 class DoudizhuRound:
-    ''' Round can call other Classes' functions to keep the game running
-    '''
+    """Round can call other Classes' functions to keep the game running"""
     def __init__(self, np_random, played_cards):
         self.np_random = np_random
         self.played_cards = played_cards
@@ -23,11 +20,11 @@ def __init__(self, np_random, played_cards):
         self.deck_str = cards2str(self.dealer.deck)
 
     def initiate(self, players):
-        ''' Call dealer to deal cards and bid landlord.
+        """Call dealer to deal cards and bid landlord.
 
         Args:
             players (list): list of DoudizhuPlayer objects
-        '''
+        """
         landlord_id = self.dealer.determine_role(players)
         seen_cards = self.dealer.deck[-3:]
         seen_cards.sort(key=functools.cmp_to_key(doudizhu_sort_card))
@@ -50,11 +47,11 @@ def cards_ndarray_to_str(ndarray_cards):
         return result
 
     def update_public(self, action):
-        ''' Update public trace and played cards
+        """Update public trace and played cards
 
         Args:
             action(str): string of legal specific action
-        '''
+        """
         self.trace.append((self.current_player, action))
         if action != 'pass':
             for c in action:
@@ -65,7 +62,7 @@ def update_public(self, action):
             self.public['played_cards'] = self.cards_ndarray_to_str(self.played_cards)
 
     def proceed_round(self, player, action):
-        ''' Call other Classes's functions to keep one round running
+        """Call other Classes's functions to keep one round running
 
         Args:
             player (object): object of DoudizhuPlayer
@@ -73,53 +70,53 @@ def proceed_round(self, player, action):
 
         Returns:
             object of DoudizhuPlayer: player who played current biggest cards.
-        '''
+        """
         self.update_public(action)
         self.greater_player = player.play(action, self.greater_player)
         return self.greater_player
 
     def step_back(self, players):
-        ''' Reverse the last action
+        """Reverse the last action
 
         Args:
             players (list): list of DoudizhuPlayer objects
         Returns:
             The last player id and the cards played
-        '''
+        """
         player_id, cards = self.trace.pop()
         self.current_player = player_id
-        if (cards != 'pass'):
+        if cards != 'pass':
             for card in cards:
                 # self.played_cards.remove(card)
                 self.played_cards[player_id][CARD_RANK_STR_INDEX[card]] -= 1
             self.public['played_cards'] = self.cards_ndarray_to_str(self.played_cards)
         greater_player_id = self.find_last_greater_player_id_in_trace()
-        if (greater_player_id is not None):
+        if greater_player_id is not None:
             self.greater_player = players[greater_player_id]
         else:
             self.greater_player = None
         return player_id, cards
 
     def find_last_greater_player_id_in_trace(self):
-        ''' Find the last greater_player's id in trace
+        """Find the last greater_player's id in trace
 
         Returns:
             The last greater_player's id in trace
-        '''
+        """
         for i in range(len(self.trace) - 1, -1, -1):
             _id, action = self.trace[i]
-            if (action != 'pass'):
+            if action != 'pass':
                 return _id
         return None
 
     def find_last_played_cards_in_trace(self, player_id):
-        ''' Find the player_id's last played_cards in trace
+        """Find the player_id's last played_cards in trace
 
         Returns:
             The player_id's last played_cards in trace
-        '''
+        """
         for i in range(len(self.trace) - 1, -1, -1):
             _id, action = self.trace[i]
-            if (_id == player_id and action != 'pass'):
+            if _id == player_id and action != 'pass':
                 return action
         return None
diff --git a/rlcard/games/doudizhu/utils.py b/rlcard/games/doudizhu/utils.py
index 7d697ccfb..d7362539f 100644
--- a/rlcard/games/doudizhu/utils.py
+++ b/rlcard/games/doudizhu/utils.py
@@ -1,5 +1,4 @@
-''' Doudizhu utils
-'''
+"""Doudizhu utils"""
 import os
 import json
 from collections import OrderedDict
@@ -15,7 +14,8 @@
         or not os.path.isfile(os.path.join(ROOT_PATH, 'games/doudizhu/jsondata/card_type.json')) \
         or not os.path.isfile(os.path.join(ROOT_PATH, 'games/doudizhu/jsondata/type_card.json')):
     import zipfile
-    with zipfile.ZipFile(os.path.join(ROOT_PATH, 'games/doudizhu/jsondata.zip'),"r") as zip_ref:
+
+    with zipfile.ZipFile(os.path.join(ROOT_PATH, 'games/doudizhu/jsondata.zip'), "r") as zip_ref:
         zip_ref.extractall(os.path.join(ROOT_PATH, 'games/doudizhu/'))
 
 # Action space
@@ -41,8 +41,8 @@
 CARD_RANK_STR = ['3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K',
                  'A', '2', 'B', 'R']
 CARD_RANK_STR_INDEX = {'3': 0, '4': 1, '5': 2, '6': 3, '7': 4,
-            '8': 5, '9': 6, 'T': 7, 'J': 8, 'Q': 9,
-            'K': 10, 'A': 11, '2': 12, 'B': 13, 'R': 14}
+                       '8': 5, '9': 6, 'T': 7, 'J': 8, 'Q': 9,
+                       'K': 10, 'A': 11, '2': 12, 'B': 13, 'R': 14}
 # rank list
 CARD_RANK = ['3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K',
              'A', '2', 'BJ', 'RJ']
@@ -54,7 +54,7 @@
 
 
 def doudizhu_sort_str(card_1, card_2):
-    ''' Compare the rank of two cards of str representation
+    """Compare the rank of two cards of str representation
 
     Args:
         card_1 (str): str representation of solo card
@@ -62,7 +62,7 @@ def doudizhu_sort_str(card_1, card_2):
 
     Returns:
         int: 1(card_1 > card_2) / 0(card_1 = card2) / -1(card_1 < card_2)
-    '''
+    """
     key_1 = CARD_RANK_STR.index(card_1)
     key_2 = CARD_RANK_STR.index(card_2)
     if key_1 > key_2:
@@ -73,12 +73,12 @@ def doudizhu_sort_str(card_1, card_2):
 
 
 def doudizhu_sort_card(card_1, card_2):
-    ''' Compare the rank of two cards of Card object
+    """Compare the rank of two cards of Card object
 
     Args:
         card_1 (object): object of Card
         card_2 (object): object of card
-    '''
+    """
     key = []
     for card in [card_1, card_2]:
         if card.rank == '':
@@ -93,15 +93,14 @@ def doudizhu_sort_card(card_1, card_2):
 
 
 def get_landlord_score(current_hand):
-    ''' Roughly judge the quality of the hand, and provide a score as basis to
-    bid landlord.
+    """Roughly judge the quality of the hand, and provide a score as basis to bid landlord.
 
     Args:
         current_hand (str): string of cards. Eg: '56888TTQKKKAA222R'
 
     Returns:
         int: score
-    '''
+    """
     score_map = {'A': 1, '2': 2, 'B': 3, 'R': 4}
     score = 0
     # rocket
@@ -112,7 +111,7 @@ def get_landlord_score(current_hand):
     i = 0
     while i < length:
         # bomb
-        if i <= (length - 4) and current_hand[i] == current_hand[i+3]:
+        if i <= (length - 4) and current_hand[i] == current_hand[i + 3]:
             score += 6
             i += 4
             continue
@@ -122,26 +121,28 @@ def get_landlord_score(current_hand):
         i += 1
     return score
 
+
 def cards2str_with_suit(cards):
-    ''' Get the corresponding string representation of cards with suit
+    """Get the corresponding string representation of cards with suit
 
     Args:
         cards (list): list of Card objects
 
     Returns:
         string: string representation of cards
-    '''
-    return ' '.join([card.suit+card.rank for card in cards])
+    """
+    return ' '.join([card.suit + card.rank for card in cards])
+
 
 def cards2str(cards):
-    ''' Get the corresponding string representation of cards
+    """Get the corresponding string representation of cards
 
     Args:
         cards (list): list of Card objects
 
     Returns:
         string: string representation of cards
-    '''
+    """
     response = ''
     for card in cards:
         if card.rank == '':
@@ -150,13 +151,17 @@ def cards2str(cards):
             response += card.rank
     return response
 
+
 class LocalObjs(threading.local):
     def __init__(self):
         self.cached_candidate_cards = None
+
+
 _local_objs = LocalObjs()
 
+
 def contains_cards(candidate, target):
-    ''' Check if cards of candidate contains cards of target.
+    """Check if cards of candidate contains cards of target.
 
     Args:
         candidate (string): A string representing the cards of candidate
@@ -164,7 +169,7 @@ def contains_cards(candidate, target):
 
     Returns:
         boolean
-    '''
+    """
     # In normal cases, most continuous calls of this function
     #   will test different targets against the same candidate.
     # So the cached counts of each card in candidate can speed up
@@ -176,29 +181,30 @@ def contains_cards(candidate, target):
             cards_dict[card] += 1
         _local_objs.cached_candidate_cards_dict = cards_dict
     cards_dict = _local_objs.cached_candidate_cards_dict
-    if (target == ''):
+    if target == '':
         return True
     curr_card = target[0]
     curr_count = 1
     for card in target[1:]:
-        if (card != curr_card):
-            if (cards_dict[curr_card] < curr_count):
+        if card != curr_card:
+            if cards_dict[curr_card] < curr_count:
                 return False
             curr_card = card
             curr_count = 1
         else:
             curr_count += 1
-    if (cards_dict[curr_card] < curr_count):
+    if cards_dict[curr_card] < curr_count:
         return False
     return True
 
+
 def encode_cards(plane, cards):
-    ''' Encode cards and represerve it into plane.
+    """Encode cards and represerve it into plane.
 
     Args:
         cards (list or str): list or str of cards, every entry is a
     character of solo representation of card
-    '''
+    """
     if not cards:
         return None
     layer = 1
@@ -210,10 +216,10 @@ def encode_cards(plane, cards):
         for index, card in enumerate(cards):
             if index == 0:
                 continue
-            if card == cards[index-1]:
+            if card == cards[index - 1]:
                 layer += 1
             else:
-                rank = CARD_RANK_STR.index(cards[index-1])
+                rank = CARD_RANK_STR.index(cards[index - 1])
                 plane[layer][rank] = 1
                 layer = 1
                 plane[0][rank] = 0
@@ -223,7 +229,7 @@ def encode_cards(plane, cards):
 
 
 def get_gt_cards(player, greater_player):
-    ''' Provide player's cards which are greater than the ones played by
+    """Provide player's cards which are greater than the ones played by
     previous player in one round
 
     Args:
@@ -235,9 +241,8 @@ def get_gt_cards(player, greater_player):
 
     Note:
         1. return value contains 'pass'
-    '''
-    # add 'pass' to legal actions
-    gt_cards = ['pass']
+    """
+    gt_cards = ['pass']  # add 'pass' to legal actions
     current_hand = cards2str(player.current_hand)
     target_cards = greater_player.played_cards
     target_types = CARD_TYPE[0][target_cards]
diff --git a/rlcard/games/gin_rummy/dealer.py b/rlcard/games/gin_rummy/dealer.py
index c14713147..f792ef890 100644
--- a/rlcard/games/gin_rummy/dealer.py
+++ b/rlcard/games/gin_rummy/dealer.py
@@ -1,19 +1,17 @@
-'''
+"""
     File name: gin_rummy/dealer.py
     Author: William Hale
     Date created: 2/12/2020
-'''
+"""
 
 from .player import GinRummyPlayer
 from .utils import utils as utils
 
 
 class GinRummyDealer:
-    ''' Initialize a GinRummy dealer class
-    '''
+    """Initialize a GinRummy dealer class """
     def __init__(self, np_random):
-        ''' Empty discard_pile, set shuffled_deck, set stock_pile
-        '''
+        """Empty discard_pile, set shuffled_deck, set stock_pile """
         self.np_random = np_random
         self.discard_pile = []  # type: List[Card]
         self.shuffled_deck = utils.get_deck()  # keep a copy of the shuffled cards at start of new hand
@@ -21,12 +19,12 @@ def __init__(self, np_random):
         self.stock_pile = self.shuffled_deck.copy()  # type: List[Card]
 
     def deal_cards(self, player: GinRummyPlayer, num: int):
-        ''' Deal some cards from stock_pile to one player
+        """Deal some cards from stock_pile to one player
 
         Args:
             player (GinRummyPlayer): The GinRummyPlayer object
             num (int): The number of cards to be dealt
-        '''
+        """
         for _ in range(num):
             player.hand.append(self.stock_pile.pop())
         player.did_populate_hand()
diff --git a/rlcard/games/gin_rummy/game.py b/rlcard/games/gin_rummy/game.py
index 4666b8ccf..d85a221dc 100644
--- a/rlcard/games/gin_rummy/game.py
+++ b/rlcard/games/gin_rummy/game.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/game.py
     Author: William Hale
     Date created: 2/12/2020
-'''
+"""
 
 import numpy as np
 
@@ -15,12 +15,10 @@
 
 
 class GinRummyGame:
-    ''' Game class. This class will interact with outer environment.
-    '''
+    """Game class. This class will interact with outer environment"""
 
     def __init__(self, allow_step_back=False):
-        '''Initialize the class GinRummyGame
-        '''
+        """Initialize the class GinRummyGame"""
         self.allow_step_back = allow_step_back
         self.np_random = np.random.RandomState()
         self.judge = GinRummyJudge(game=self)
@@ -30,8 +28,7 @@ def __init__(self, allow_step_back=False):
         self.num_players = 2
 
     def init_game(self):
-        ''' Initialize all characters in the game and start round 1
-        '''
+        """Initialize all characters in the game and start round 1"""
         dealer_id = self.np_random.choice([0, 1])
         if self.settings.dealer_for_round == DealerForRound.North:
             dealer_id = 0
@@ -48,8 +45,7 @@ def init_game(self):
         return state, current_player_id
 
     def step(self, action: ActionEvent):
-        ''' Perform game action and return next player number, and the state for next player
-        '''
+        """Perform game action and return next player number, and the state for next player"""
         if isinstance(action, ScoreNorthPlayerAction):
             self.round.score_player_0(action)
         elif isinstance(action, ScoreSouthPlayerAction):
@@ -74,28 +70,23 @@ def step(self, action: ActionEvent):
         return next_state, next_player_id
 
     def step_back(self):
-        ''' Takes one step backward and restore to the last state
-        '''
+        """Takes one step backward and restore to the last state"""
         raise NotImplementedError
 
     def get_num_players(self):
-        ''' Return the number of players in the game
-        '''
+        """Return the number of players in the game"""
         return 2
 
     def get_num_actions(self):
-        ''' Return the number of possible actions in the game
-        '''
+        """Return the number of possible actions in the game"""
         return ActionEvent.get_num_actions()
 
     def get_player_id(self):
-        ''' Return the current player that will take actions soon
-        '''
+        """Return the current player that will take actions soon"""
         return self.round.current_player_id
 
     def is_over(self):
-        ''' Return whether the current game is over
-        '''
+        """Return whether the current game is over"""
         return self.round.is_over
 
     def get_current_player(self) -> GinRummyPlayer or None:
@@ -105,11 +96,11 @@ def get_last_action(self) -> ActionEvent or None:
         return self.actions[-1] if self.actions and len(self.actions) > 0 else None
 
     def get_state(self, player_id: int):
-        ''' Get player's state
+        """Get player's state
 
         Return:
             state (dict): The information of the state
-        '''
+        """
         state = {}
         if not self.is_over():
             discard_pile = self.round.dealer.discard_pile
@@ -132,12 +123,12 @@ def get_state(self, player_id: int):
 
     @staticmethod
     def decode_action(action_id) -> ActionEvent:  # FIXME 200213 should return str
-        ''' Action id -> the action_event in the game.
+        """Action id -> the action_event in the game.
 
         Args:
             action_id (int): the id of the action
 
         Returns:
             action (ActionEvent): the action that will be passed to the game engine.
-        '''
+        """
         return ActionEvent.decode_action(action_id=action_id)
diff --git a/rlcard/games/gin_rummy/judge.py b/rlcard/games/gin_rummy/judge.py
index bd47fa887..efa19c1bf 100644
--- a/rlcard/games/gin_rummy/judge.py
+++ b/rlcard/games/gin_rummy/judge.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/judge.py
     Author: William Hale
     Date created: 2/12/2020
-'''
+"""
 
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
@@ -19,15 +19,12 @@
 
 
 class GinRummyJudge:
-
-    '''
-        Judge decides legal actions for current player
-    '''
+    """Judge decides legal actions for current player"""
 
     def __init__(self, game: 'GinRummyGame'):
-        ''' Initialize the class GinRummyJudge
+        """Initialize the class GinRummyJudge
         :param game: GinRummyGame
-        '''
+        """
         self.game = game
         self.scorer = GinRummyScorer()
 
@@ -98,11 +95,11 @@ def get_legal_actions(self) -> List[ActionEvent]:
 
 
 def get_going_out_cards(hand: List[Card], going_out_deadwood_count: int) -> Tuple[List[Card], List[Card]]:
-    '''
+    """
     :param hand: List[Card] -- must have 11 cards
     :param going_out_deadwood_count: int
     :return List[Card], List[Card: cards in hand that be knocked, cards in hand that can be ginned
-    '''
+    """
     if not len(hand) == 11:
         raise GinRummyProgramError("len(hand) is {}: should be 11.".format(len(hand)))
     meld_clusters = melding.get_meld_clusters(hand=hand)
@@ -112,19 +109,15 @@ def get_going_out_cards(hand: List[Card], going_out_deadwood_count: int) -> Tupl
     return list(knock_cards), list(gin_cards)
 
 
-#
-# private methods
-#
-
 def _get_going_out_cards(meld_clusters: List[List[List[Card]]],
                          hand: List[Card],
                          going_out_deadwood_count: int) -> Tuple[List[Card], List[Card]]:
-    '''
+    """
     :param meld_clusters
     :param hand: List[Card] -- must have 11 cards
     :param going_out_deadwood_count: int
     :return List[Card], List[Card: cards in hand that be knocked, cards in hand that can be ginned
-    '''
+    """
     if not len(hand) == 11:
         raise GinRummyProgramError("len(hand) is {}: should be 11.".format(len(hand)))
     knock_cards = set()
diff --git a/rlcard/games/gin_rummy/player.py b/rlcard/games/gin_rummy/player.py
index 570977bc3..5801e59fe 100644
--- a/rlcard/games/gin_rummy/player.py
+++ b/rlcard/games/gin_rummy/player.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/player.py
     Author: William Hale
     Date created: 2/12/2020
-'''
+"""
 
 from typing import List
 
@@ -16,11 +16,11 @@
 class GinRummyPlayer:
 
     def __init__(self, player_id: int, np_random):
-        ''' Initialize a GinRummy player class
+        """Initialize a GinRummy player class
 
         Args:
             player_id (int): id for the player
-        '''
+        """
         self.np_random = np_random
         self.player_id = player_id
         self.hand = []  # type: List[Card]
@@ -30,8 +30,7 @@ def __init__(self, player_id: int, np_random):
         self.meld_run_by_suit_id = [[] for _ in range(4)]  # type: List[List[List[Card]]]
 
     def get_player_id(self) -> int:
-        ''' Return player's id
-        '''
+        """Return player's id"""
         return self.player_id
 
     def get_meld_clusters(self) -> List[List[List[Card]]]:
diff --git a/rlcard/games/gin_rummy/round.py b/rlcard/games/gin_rummy/round.py
index 02426d9c4..d27b51c19 100644
--- a/rlcard/games/gin_rummy/round.py
+++ b/rlcard/games/gin_rummy/round.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/round.py
     Author: William Hale
     Date created: 2/12/2020
-'''
+"""
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
     from .utils.move import GinRummyMove
@@ -32,7 +32,7 @@
 class GinRummyRound:
 
     def __init__(self, dealer_id: int, np_random):
-        ''' Initialize the round class
+        """Initialize the round class
 
             The round class maintains the following instances:
                 1) dealer: the dealer of the round; dealer has stock_pile and discard_pile
@@ -51,7 +51,7 @@ def __init__(self, dealer_id: int, np_random):
 
         Args:
             dealer_id: int
-        '''
+        """
         self.np_random = np_random
         self.dealer_id = dealer_id
         self.dealer = GinRummyDealer(self.np_random)
diff --git a/rlcard/games/gin_rummy/utils/action_event.py b/rlcard/games/gin_rummy/utils/action_event.py
index 3126712c5..3ab9db1b1 100644
--- a/rlcard/games/gin_rummy/utils/action_event.py
+++ b/rlcard/games/gin_rummy/utils/action_event.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/action_event.py
     Author: William Hale
     Date created: 2/12/2020
-'''
+"""
 
 from rlcard.games.base import Card
 
@@ -43,20 +43,19 @@ def __eq__(self, other):
 
     @staticmethod
     def get_num_actions():
-        ''' Return the number of possible actions in the game
-        '''
+        """Return the number of possible actions in the game """
         return knock_action_id + 52  # FIXME: sensitive to code changes 200213
 
     @staticmethod
     def decode_action(action_id) -> 'ActionEvent':
-        ''' Action id -> the action_event in the game.
+        """Action id -> the action_event in the game.
 
         Args:
             action_id (int): the id of the action
 
         Returns:
             action (ActionEvent): the action that will be passed to the game engine.
-        '''
+        """
         if action_id == score_player_0_action_id:
             action_event = ScoreNorthPlayerAction()
         elif action_id == score_player_1_action_id:
diff --git a/rlcard/games/gin_rummy/utils/gin_rummy_error.py b/rlcard/games/gin_rummy/utils/gin_rummy_error.py
index ef38a45da..5da56c826 100644
--- a/rlcard/games/gin_rummy/utils/gin_rummy_error.py
+++ b/rlcard/games/gin_rummy/utils/gin_rummy_error.py
@@ -1,9 +1,9 @@
-'''
+"""
     Project: Gin Rummy
     File name: gin_rummy/utils/gin_rummy_error.py
     Author: William Hale
     Date created: 4/29/2020
-'''
+"""
 
 
 class GinRummyError(Exception):
diff --git a/rlcard/games/gin_rummy/utils/melding.py b/rlcard/games/gin_rummy/utils/melding.py
index dea15ebf1..559812167 100644
--- a/rlcard/games/gin_rummy/utils/melding.py
+++ b/rlcard/games/gin_rummy/utils/melding.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/melding.py
     Author: William Hale
     Date created: 2/12/2020
-'''
+"""
 
 from typing import List
 
diff --git a/rlcard/games/gin_rummy/utils/move.py b/rlcard/games/gin_rummy/utils/move.py
index 78d028ea3..cdc5605d1 100644
--- a/rlcard/games/gin_rummy/utils/move.py
+++ b/rlcard/games/gin_rummy/utils/move.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/move.py
     Author: William Hale
     Date created: 2/16/2020
-'''
+"""
 
 from typing import List
 
diff --git a/rlcard/games/gin_rummy/utils/scorers.py b/rlcard/games/gin_rummy/utils/scorers.py
index 7e8a84ba7..2b3edb958 100644
--- a/rlcard/games/gin_rummy/utils/scorers.py
+++ b/rlcard/games/gin_rummy/utils/scorers.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/scorers.py
     Author: William Hale
     Date created: 2/15/2020
-'''
+"""
 
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
@@ -35,11 +35,11 @@ def get_payoffs(self, game: 'GinRummyGame'):
 
 
 def get_payoff_gin_rummy_v0(player: GinRummyPlayer, game: 'GinRummyGame') -> int:
-    ''' Get the payoff of player: deadwood_count of player
+    """Get the payoff of player: deadwood_count of player
 
     Returns:
         payoff (int or float): payoff for player (lower is better)
-    '''
+    """
     moves = game.round.move_sheet
     if player.player_id == 0:
         score_player_move = moves[-2]
@@ -54,14 +54,14 @@ def get_payoff_gin_rummy_v0(player: GinRummyPlayer, game: 'GinRummyGame') -> int
 
 
 def get_payoff_gin_rummy_v1(player: GinRummyPlayer, game: 'GinRummyGame') -> float:
-    ''' Get the payoff of player:
+    """Get the payoff of player:
             a) 1.0 if player gins
             b) 0.2 if player knocks
             c) -deadwood_count / 100 otherwise
 
     Returns:
         payoff (int or float): payoff for player (higher is better)
-    '''
+    """
     # payoff is 1.0 if player gins
     # payoff is 0.2 if player knocks
     # payoff is -deadwood_count / 100 if otherwise
diff --git a/rlcard/games/gin_rummy/utils/settings.py b/rlcard/games/gin_rummy/utils/settings.py
index f232e20d2..7a4914c93 100644
--- a/rlcard/games/gin_rummy/utils/settings.py
+++ b/rlcard/games/gin_rummy/utils/settings.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/settings.py
     Author: William Hale
     Date created: 2/16/2020
-'''
+"""
 
 from typing import Dict, Any
 
diff --git a/rlcard/games/gin_rummy/utils/thinker.py b/rlcard/games/gin_rummy/utils/thinker.py
index 5e8ffe0a4..c49319384 100644
--- a/rlcard/games/gin_rummy/utils/thinker.py
+++ b/rlcard/games/gin_rummy/utils/thinker.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/thinker.py
     Author: William Hale
     Date created: 3/28/2020
-'''
+"""
 
 from typing import List
 
diff --git a/rlcard/games/gin_rummy/utils/utils.py b/rlcard/games/gin_rummy/utils/utils.py
index 66413ed79..bbbb70bbe 100644
--- a/rlcard/games/gin_rummy/utils/utils.py
+++ b/rlcard/games/gin_rummy/utils/utils.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: gin_rummy/utils.py
     Author: William Hale
     Date created: 2/12/2020
-'''
+"""
 
 from typing import List, Iterable
 
@@ -20,11 +20,11 @@
 
 
 def card_from_card_id(card_id: int) -> Card:
-    ''' Make card from its card_id
+    """Make card from its card_id
 
     Args:
         card_id: int in range(0, 52)
-     '''
+    """
     if not (0 <= card_id < 52):
         raise GinRummyProgramError("card_id is {}: should be 0 <= card_id < 52.".format(card_id))
     rank_id = card_id % 13
diff --git a/rlcard/games/leducholdem/dealer.py b/rlcard/games/leducholdem/dealer.py
index 6af8ff63f..7fbdf7fec 100644
--- a/rlcard/games/leducholdem/dealer.py
+++ b/rlcard/games/leducholdem/dealer.py
@@ -1,11 +1,11 @@
 from rlcard.games.base import Card
 from rlcard.games.limitholdem import Dealer
 
+
 class LeducholdemDealer(Dealer):
 
     def __init__(self, np_random):
-        ''' Initialize a leducholdem dealer class
-        '''
+        """Initialize a leduc holdem dealer class """
         self.np_random = np_random
         self.deck = [Card('S', 'J'), Card('H', 'J'), Card('S', 'Q'), Card('H', 'Q'), Card('S', 'K'), Card('H', 'K')]
         self.shuffle()
diff --git a/rlcard/games/leducholdem/game.py b/rlcard/games/leducholdem/game.py
index fe31b9fdd..ebd4142f6 100644
--- a/rlcard/games/leducholdem/game.py
+++ b/rlcard/games/leducholdem/game.py
@@ -8,15 +8,15 @@
 
 from rlcard.games.limitholdem import Game
 
+
 class LeducholdemGame(Game):
 
     def __init__(self, allow_step_back=False, num_players=2):
-        ''' Initialize the class leducholdem Game
-        '''
+        """Initialize the class leducholdem Game"""
         self.allow_step_back = allow_step_back
         self.np_random = np.random.RandomState()
         ''' No big/small blind
-        # Some configarations of the game
+        # Some configurations of the game
         # These arguments are fixed in Leduc Hold'em Game
 
         # Raise amount and allowed times
@@ -25,7 +25,7 @@ def __init__(self, allow_step_back=False, num_players=2):
 
         self.num_players = 2
         '''
-        # Some configarations of the game
+        # Some configurations of the game
         # These arguments can be specified for creating new games
 
         # Small blind and big blind
@@ -39,12 +39,11 @@ def __init__(self, allow_step_back=False, num_players=2):
         self.num_players = num_players
 
     def configure(self, game_config):
-        ''' Specifiy some game specific parameters, such as number of players
-        '''
+        """Specify some game specific parameters, such as number of players"""
         self.num_players = game_config['game_num_players']
 
     def init_game(self):
-        ''' Initialilze the game of Limit Texas Hold'em
+        """Initialize the game of Limit Texas Hold'em
 
         This version supports two-player limit texas hold'em
 
@@ -53,11 +52,11 @@ def init_game(self):
 
                 (dict): The first state of the game
                 (int): Current player's id
-        '''
-        # Initilize a dealer that can deal cards
+        """
+        # Initialize a dealer that can deal cards
         self.dealer = Dealer(self.np_random)
 
-        # Initilize two players to play the game
+        # Initialize two players to play the game
         self.players = [Player(i, self.np_random) for i in range(self.num_players)]
 
         # Initialize a judger class which will decide who wins in the end
@@ -75,7 +74,7 @@ def init_game(self):
         # The player with small blind plays the first
         self.game_pointer = s
 
-        # Initilize a bidding round, in the first round, the big blind and the small blind needs to
+        # Initialize a bidding round, in the first round, the big blind and the small blind needs to
         # be passed to the round for processing.
         self.round = Round(raise_amount=self.raise_amount,
                            allowed_raise_num=self.allowed_raise_num,
@@ -87,7 +86,7 @@ def init_game(self):
         # Count the round. There are 2 rounds in each game.
         self.round_counter = 0
 
-        # Save the hisory for stepping back to the last state.
+        # Save the history for stepping back to the last state.
         self.history = []
 
         state = self.get_state(self.game_pointer)
@@ -95,7 +94,7 @@ def init_game(self):
         return state, self.game_pointer
 
     def step(self, action):
-        ''' Get the next state
+        """Get the next state
 
         Args:
             action (str): a specific action. (call, raise, fold, or check)
@@ -105,7 +104,7 @@ def step(self, action):
 
                 (dict): next player's state
                 (int): next plater's id
-        '''
+        """
         if self.allow_step_back:
             # First snapshot the current state
             r = copy(self.round)
@@ -136,14 +135,14 @@ def step(self, action):
         return state, self.game_pointer
 
     def get_state(self, player):
-        ''' Return player's state
+        """Return player's state
 
         Args:
             player_id (int): player id
 
         Returns:
             (dict): The state of the player
-        '''
+        """
         chips = [self.players[i].in_chips for i in range(self.num_players)]
         legal_actions = self.get_legal_actions()
         state = self.players[player].get_state(self.public_card, chips, legal_actions)
@@ -152,37 +151,37 @@ def get_state(self, player):
         return state
 
     def is_over(self):
-        ''' Check if the game is over
+        """Check if the game is over
 
         Returns:
             (boolean): True if the game is over
-        '''
-        alive_players = [1 if p.status=='alive' else 0 for p in self.players]
+        """
+        alive_players = [1 if p.status == 'alive' else 0 for p in self.players]
         # If only one player is alive, the game is over.
         if sum(alive_players) == 1:
             return True
 
-        # If all rounds are finshed
+        # If all rounds are finished
         if self.round_counter >= 2:
             return True
         return False
 
     def get_payoffs(self):
-        ''' Return the payoffs of the game
+        """Return the payoffs of the game
 
         Returns:
             (list): Each entry corresponds to the payoff of one player
-        '''
+        """
         chips_payoffs = self.judger.judge_game(self.players, self.public_card)
-        payoffs = np.array(chips_payoffs) / (self.big_blind)
+        payoffs = np.array(chips_payoffs) / self.big_blind
         return payoffs
 
     def step_back(self):
-        ''' Return to the previous state of the game
+        """Return to the previous state of the game
 
         Returns:
             (bool): True if the game steps back successfully
-        '''
+        """
         if len(self.history) > 0:
             self.round, r_raised, self.game_pointer, self.round_counter, d_deck, self.public_card, self.players, ps_hand = self.history.pop()
             self.round.raised = r_raised
diff --git a/rlcard/games/leducholdem/judger.py b/rlcard/games/leducholdem/judger.py
index dc24d779f..24bf43cd9 100644
--- a/rlcard/games/leducholdem/judger.py
+++ b/rlcard/games/leducholdem/judger.py
@@ -1,16 +1,16 @@
 from rlcard.utils.utils import rank2int
 
+
 class LeducholdemJudger:
-    ''' The Judger class for Leduc Hold'em
-    '''
+    """The Judger class for Leduc Hold'em"""
+
     def __init__(self, np_random):
-        ''' Initialize a judger class
-        '''
+        """Initialize a judger class"""
         self.np_random = np_random
 
     @staticmethod
     def judge_game(players, public_card):
-        ''' Judge the winner of the game.
+        """Judge the winner of the game.
 
         Args:
             players (list): The list of players who play the game
@@ -18,7 +18,7 @@ def judge_game(players, public_card):
 
         Returns:
             (list): Each entry of the list corresponds to one entry of the
-        '''
+        """
         # Judge who are the winners
         winners = [0] * len(players)
         fold_count = 0
@@ -27,20 +27,20 @@ def judge_game(players, public_card):
         for idx, player in enumerate(players):
             ranks.append(rank2int(player.hand.rank))
             if player.status == 'folded':
-               fold_count += 1
+                fold_count += 1
             elif player.status == 'alive':
                 alive_idx = idx
         if fold_count == (len(players) - 1):
             winners[alive_idx] = 1
-        
+
         # If any of the players matches the public card wins
         if sum(winners) < 1:
             for idx, player in enumerate(players):
                 if player.hand.rank == public_card.rank:
                     winners[idx] = 1
                     break
-        
-        # If non of the above conditions, the winner player is the one with the highest card rank
+
+        # If none of the above conditions, the winner player is the one with the highest card rank
         if sum(winners) < 1:
             max_rank = max(ranks)
             max_index = [i for i, j in enumerate(ranks) if j == max_rank]
diff --git a/rlcard/games/leducholdem/player.py b/rlcard/games/leducholdem/player.py
index 1ec66c339..f667772ae 100644
--- a/rlcard/games/leducholdem/player.py
+++ b/rlcard/games/leducholdem/player.py
@@ -1,11 +1,11 @@
 class LeducholdemPlayer:
 
     def __init__(self, player_id, np_random):
-        ''' Initilize a player.
+        """Initialize a player.
 
         Args:
             player_id (int): The id of the player
-        '''
+        """
         self.np_random = np_random
         self.player_id = player_id
         self.status = 'alive'
@@ -15,7 +15,7 @@ def __init__(self, player_id, np_random):
         self.in_chips = 0
 
     def get_state(self, public_card, all_chips, legal_actions):
-        ''' Encode the state for the player
+        """Encode the state for the player
 
         Args:
             public_card (object): The public card that seen by all the players
@@ -23,7 +23,7 @@ def get_state(self, public_card, all_chips, legal_actions):
 
         Returns:
             (dict): The state of the player
-        '''
+        """
         state = {}
         state['hand'] = self.hand.get_index()
         state['public_card'] = public_card.get_index() if public_card else None
@@ -33,6 +33,5 @@ def get_state(self, public_card, all_chips, legal_actions):
         return state
 
     def get_player_id(self):
-        ''' Return the id of the player
-        '''
+        """Return the id of the player"""
         return self.player_id
diff --git a/rlcard/games/leducholdem/round.py b/rlcard/games/leducholdem/round.py
index 3a0459607..deb5c06cf 100644
--- a/rlcard/games/leducholdem/round.py
+++ b/rlcard/games/leducholdem/round.py
@@ -1,19 +1,17 @@
 # -*- coding: utf-8 -*-
-''' Implement Leduc Hold'em Round class
-'''
+"""Implement Leduc Hold'em Round class"""
 
 from rlcard.games.limitholdem import Round
 
 class LeducholdemRound(Round):
-    ''' Round can call other Classes' functions to keep the game running
-    '''
+    """Round can call other Classes' functions to keep the game running"""
 
     def __init__(self, raise_amount, allowed_raise_num, num_players, np_random):
-        ''' Initilize the round class
+        """Initialize the round class
 
         Args:
             raise_amount (int): the raise amount for each raise
             allowed_raise_num (int): The number of allowed raise num
             num_players (int): The number of players
-        '''
+        """
         super(LeducholdemRound, self).__init__(raise_amount, allowed_raise_num, num_players, np_random=np_random)
diff --git a/rlcard/games/limitholdem/utils.py b/rlcard/games/limitholdem/utils.py
index 2443ed1d2..094875362 100644
--- a/rlcard/games/limitholdem/utils.py
+++ b/rlcard/games/limitholdem/utils.py
@@ -1,42 +1,41 @@
-import numpy as np
-
 class Hand:
     def __init__(self, all_cards):
-        self.all_cards = all_cards # two hand cards + five public cards
+        self.all_cards = all_cards  # two hand cards + five public cards
         self.category = 0
-        #type of a players' best five cards, greater combination has higher number eg: 0:"Not_Yet_Evaluated" 1: "High_Card" , 9:"Straight_Flush"
+        # type of players' best five cards, greater combination has higher number eg: 0:"Not_Yet_Evaluated" 1:
+        # "High_Card" , 9:"Straight_Flush"
         self.best_five = []
-        #the largest combination of five cards in all the seven cards
+        # the largest combination of five cards in all the seven cards
         self.flush_cards = []
-        #cards with same suit
+        # cards with same suit
         self.cards_by_rank = []
-        #cards after sort
+        # cards after sort
         self.product = 1
-        #cards’ type indicator
+        # cards’ type indicator
         self.RANK_TO_STRING = {2: "2", 3: "3", 4: "4", 5: "5", 6: "6",
                                7: "7", 8: "8", 9: "9", 10: "T", 11: "J", 12: "Q", 13: "K", 14: "A"}
-        self.STRING_TO_RANK = {v:k for k, v in self.RANK_TO_STRING.items()}
+        self.STRING_TO_RANK = {v: k for k, v in self.RANK_TO_STRING.items()}
         self.RANK_LOOKUP = "23456789TJQKA"
         self.SUIT_LOOKUP = "SCDH"
 
     def get_hand_five_cards(self):
-        '''
+        """
         Get the best five cards of a player
         Returns:
             (list): the best five cards among the seven cards of a player
-        '''
+        """
         return self.best_five
 
     def _sort_cards(self):
-        '''
+        """
         Sort all the seven cards ascendingly according to RANK_LOOKUP
-        '''
+        """
         self.all_cards = sorted(
             self.all_cards, key=lambda card: self.RANK_LOOKUP.index(card[1]))
 
-    def evaluateHand(self):
+    def evaluate_hand(self):
         """
-        Evaluate all the seven cards, get the best combination catagory
+        Evaluate all the seven cards, get the best combination category
         And pick the best five cards (for comparing in case 2 hands have the same Category) .
         """
         if len(self.all_cards) != 7:
@@ -49,47 +48,47 @@ def evaluateHand(self):
 
         if self._has_straight_flush():
             self.category = 9
-            #Straight Flush
+            # Straight Flush
         elif self._has_four():
             self.category = 8
-            #Four of a Kind
+            # Four of a Kind
             self.best_five = self._get_Four_of_a_kind_cards()
         elif self._has_fullhouse():
             self.category = 7
-            #Full house
+            # Full house
             self.best_five = self._get_Fullhouse_cards()
         elif self._has_flush():
             self.category = 6
-            #Flush
+            # Flush
             i = len(self.flush_cards)
-            self.best_five = [card for card in self.flush_cards[i-5:i]]
+            self.best_five = [card for card in self.flush_cards[i - 5:i]]
         elif self._has_straight(self.all_cards):
             self.category = 5
-            #Straight
+            # Straight
         elif self._has_three():
             self.category = 4
-            #Three of a Kind
+            # Three of a Kind
             self.best_five = self._get_Three_of_a_kind_cards()
         elif self._has_two_pairs():
             self.category = 3
-            #Two Pairs
+            # Two Pairs
             self.best_five = self._get_Two_Pair_cards()
         elif self._has_pair():
             self.category = 2
-            #One Pair
+            # One Pair
             self.best_five = self._get_One_Pair_cards()
         elif self._has_high_card():
             self.category = 1
-            #High Card
+            # High Card
             self.best_five = self._get_High_cards()
 
     def _has_straight_flush(self):
-        '''
+        """
         Check the existence of straight_flush cards
         Returns:
             True: exist
             False: not exist
-        '''
+        """
         self.flush_cards = self._getflush_cards()
         if len(self.flush_cards) > 0:
             straightflush_cards = self._get_straightflush_cards()
@@ -99,20 +98,20 @@ def _has_straight_flush(self):
         return False
 
     def _get_straightflush_cards(self):
-        '''
+        """
         Pick straight_flush cards
         Returns:
             (list): the straightflush cards
-        '''
+        """
         straightflush_cards = self._get_straight_cards(self.flush_cards)
         return straightflush_cards
 
     def _getflush_cards(self):
-        '''
+        """
         Pick flush cards
         Returns:
             (list): the flush cards
-        '''
+        """
         card_string = ''.join(self.all_cards)
         for suit in self.SUIT_LOOKUP:
             suit_count = card_string.count(suit)
@@ -123,52 +122,52 @@ def _getflush_cards(self):
         return []
 
     def _has_flush(self):
-        '''
+        """
         Check the existence of flush cards
         Returns:
             True: exist
             False: not exist
-        '''
+        """
         if len(self.flush_cards) > 0:
             return True
         else:
             return False
 
     def _has_straight(self, all_cards):
-        '''
+        """
         Check the existence of straight cards
         Returns:
             True: exist
             False: not exist
-        '''
+        """
         diff_rank_cards = self._get_different_rank_list(all_cards)
         self.best_five = self._get_straight_cards(diff_rank_cards)
         if len(self.best_five) != 0:
             return True
         else:
             return False
+
     @classmethod
-    def _get_different_rank_list(self, all_cards):
-        '''
+    def _get_different_rank_list(cls, all_cards):
+        """
         Get cards with different ranks, that is to say, remove duplicate-ranking cards, for picking straight cards' use
         Args:
             (list): two hand cards + five public cards
         Returns:
             (list): a list of cards with duplicate-ranking cards removed
-        '''
-        different_rank_list = []
-        different_rank_list.append(all_cards[0])
+        """
+        different_rank_list = [all_cards[0]]
         for card in all_cards:
-            if(card[1] != different_rank_list[-1][1]):
+            if card[1] != different_rank_list[-1][1]:
                 different_rank_list.append(card)
         return different_rank_list
 
     def _get_straight_cards(self, Cards):
-        '''
+        """
         Pick straight cards
         Returns:
             (list): the straight cards
-        '''
+        """
         ranks = [self.STRING_TO_RANK[c[1]] for c in Cards]
 
         highest_card = Cards[-1]
@@ -177,19 +176,19 @@ def _get_straight_cards(self, Cards):
             ranks.insert(0, 1)
 
         for i_last in range(len(ranks) - 1, 3, -1):
-            if ranks[i_last-4] + 4 == ranks[i_last]:  # works because ranks are unique and sorted in ascending order
-                return Cards[i_last-4:i_last+1]
+            if ranks[i_last - 4] + 4 == ranks[i_last]:  # works because ranks are unique and sorted in ascending order
+                return Cards[i_last - 4:i_last + 1]
         return []
 
     def _getcards_by_rank(self, all_cards):
-        '''
+        """
         Get cards by rank
         Args:
             (list): # two hand cards + five public cards
         Return:
             card_group(list): cards after sort
             product(int):cards‘ type indicator
-        '''
+        """
         card_group = []
         card_group_element = []
         product = 1
@@ -218,8 +217,7 @@ def _getcards_by_rank(self, all_cards):
                 card_group.append(card_group_element)
                 # reset counting
                 count = 1
-                card_group_element = []
-                card_group_element.append(card)
+                card_group_element = [card]
                 current_rank = rank
         # the For Loop misses operation for the last card
         # These 3 lines below to compensate that
@@ -231,83 +229,83 @@ def _getcards_by_rank(self, all_cards):
         return card_group, product
 
     def _has_four(self):
-        '''
+        """
         Check the existence of four cards
         Returns:
             True: exist
             False: not exist
-        '''
+        """
         if self.product == 5 or self.product == 10 or self.product == 15:
             return True
         else:
             return False
 
     def _has_fullhouse(self):
-        '''
+        """
         Check the existence of fullhouse cards
         Returns:
             True: exist
             False: not exist
-        '''
+        """
         if self.product == 6 or self.product == 9 or self.product == 12:
             return True
         else:
             return False
 
     def _has_three(self):
-        '''
+        """
         Check the existence of three cards
         Returns:
             True: exist
             False: not exist
-        '''
+        """
         if self.product == 3:
             return True
         else:
             return False
 
     def _has_two_pairs(self):
-        '''
+        """
         Check the existence of 2 pair cards
         Returns:
             True: exist
             False: not exist
-        '''
+        """
         if self.product == 4 or self.product == 8:
             return True
         else:
             return False
 
     def _has_pair(self):
-        '''
+        """
         Check the existence of 1 pair cards
         Returns:
             True: exist
             False: not exist
-        '''
+        """
         if self.product == 2:
             return True
         else:
             return False
 
     def _has_high_card(self):
-        '''
+        """
         Check the existence of high cards
         Returns:
             True: exist
             False: not exist
-        '''
+        """
         if self.product == 1:
             return True
         else:
             return False
 
     def _get_Four_of_a_kind_cards(self):
-        '''
+        """
         Get the four of a kind cards among a player's cards
         Returns:
             (list): best five hand cards after sort
-        '''
+        """
         Four_of_a_Kind = []
         cards_by_rank = self.cards_by_rank
         cards_len = len(cards_by_rank)
@@ -322,12 +320,11 @@ def _get_Four_of_a_kind_cards(self):
         return Four_of_a_Kind
 
     def _get_Fullhouse_cards(self):
-        '''
+        """
         Get the fullhouse cards among a player's cards
         Returns:
             (list): best five hand cards after sort
-        '''
-        Fullhouse = []
+        """
         cards_by_rank = self.cards_by_rank
         cards_len = len(cards_by_rank)
         for i in reversed(range(cards_len)):
@@ -342,11 +339,11 @@ def _get_Fullhouse_cards(self):
         return Fullhouse
 
     def _get_Three_of_a_kind_cards(self):
-        '''
+        """
         Get the three of a kind cards among a player's cards
         Returns:
             (list): best five hand cards after sort
-        '''
+        """
         Trip_cards = []
         cards_by_rank = self.cards_by_rank
         cards_len = len(cards_by_rank)
@@ -361,11 +358,11 @@ def _get_Three_of_a_kind_cards(self):
         return Trip_cards
 
     def _get_Two_Pair_cards(self):
-        '''
+        """
         Get the two pair cards among a player's cards
         Returns:
             (list): best five hand cards after sort
-        '''
+        """
         Two_Pair_cards = []
         cards_by_rank = self.cards_by_rank
         cards_len = len(cards_by_rank)
@@ -378,11 +375,11 @@ def _get_Two_Pair_cards(self):
         return Two_Pair_cards
 
     def _get_One_Pair_cards(self):
-        '''
+        """
         Get the one pair cards among a player's cards
         Returns:
             (list): best five hand cards after sort
-        '''
+        """
         One_Pair_cards = []
         cards_by_rank = self.cards_by_rank
         cards_len = len(cards_by_rank)
@@ -398,16 +395,17 @@ def _get_One_Pair_cards(self):
         return One_Pair_cards
 
     def _get_High_cards(self):
-        '''
+        """
         Get the high cards among a player's cards
         Returns:
             (list): best five hand cards after sort
-        '''
+        """
         High_cards = self.all_cards[2:7]
         return High_cards
 
+
 def compare_ranks(position, hands, winner):
-    '''
+    """
     Compare cards in same position of plays' five handcards
     Args:
         position(int): the position of a card in a sorted handcard
@@ -421,19 +419,19 @@ def compare_ranks(position, hands, winner):
         [1, 1, 1]: draw
         [1, 1, 0]: player1 and player0 draws
 
-    '''
+    """
     assert len(hands) == len(winner)
     RANKS = '23456789TJQKA'
-    cards_figure_all_players = [None]*len(hands)  #cards without suit
+    cards_figure_all_players = [None] * len(hands)  # cards without suit
     for i, hand in enumerate(hands):
         if winner[i]:
             cards = hands[i].get_hand_five_cards()
-            if len(cards[0]) != 1:# remove suit
+            if len(cards[0]) != 1:  # remove suit
                 for p in range(5):
                     cards[p] = cards[p][1:]
             cards_figure_all_players[i] = cards
 
-    rival_ranks = [] # ranks of rival_figures
+    rival_ranks = []  # ranks of rival_figures
     for i, cards_figure in enumerate(cards_figure_all_players):
         if winner[i]:
             rank = cards_figure_all_players[i][position]
@@ -446,23 +444,23 @@ def compare_ranks(position, hands, winner):
             new_winner[i] = 0
     return new_winner
 
+
 def determine_winner(key_index, hands, all_players, potential_winner_index):
-    '''
-    Find out who wins in the situation of having players with same highest hand_catagory
+    """
+    Find out who wins in the situation of having players with same highest hand_category
     Args:
         key_index(int): the position of a card in a sorted handcard
-        hands(list): cards of those players with same highest hand_catagory.
+        hands(list): cards of those players with same highest hand_category.
         e.g. hands = [['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CJ', 'SJ', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7']]
         all_players(list): all the players in this round, 0 for losing and 1 for winning or draw
-        potential_winner_index(list): the positions of those players with same highest hand_catagory in all_players
+        potential_winner_index(list): the positions of those players with same highest hand_category in all_players
     Returns:
         [0, 1, 0]: player1 wins
         [1, 0, 0]: player0 wins
         [1, 1, 1]: draw
         [1, 1, 0]: player1 and player0 draws
-
-    '''
-    winner = [1]*len(hands)
+    """
+    winner = [1] * len(hands)
     i_index = 0
     while i_index < len(key_index) and sum(winner) > 1:
         index_break_tie = key_index[i_index]
@@ -473,20 +471,21 @@ def determine_winner(key_index, hands, all_players, potential_winner_index):
             all_players[potential_winner_index[i]] = 1
     return all_players
 
+
 def determine_winner_straight(hands, all_players, potential_winner_index):
-    '''
+    """
     Find out who wins in the situation of having players all having a straight or straight flush
     Args:
         key_index(int): the position of a card in a sorted handcard
         hands(list): cards of those players which all have a straight or straight flush
         all_players(list): all the players in this round, 0 for losing and 1 for winning or draw
-        potential_winner_index(list): the positions of those players with same highest hand_catagory in all_players
+        potential_winner_index(list): the positions of those players with same highest hand_category in all_players
     Returns:
         [0, 1, 0]: player1 wins
         [1, 0, 0]: player0 wins
         [1, 1, 1]: draw
         [1, 1, 0]: player1 and player0 draws
-    '''
+    """
     highest_ranks = []
     for hand in hands:
         highest_rank = hand.STRING_TO_RANK[hand.best_five[-1][1]]  # cards are sorted in ascending order
@@ -497,21 +496,22 @@ def determine_winner_straight(hands, all_players, potential_winner_index):
             all_players[potential_winner_index[i_player]] = 1
     return all_players
 
+
 def determine_winner_four_of_a_kind(hands, all_players, potential_winner_index):
-    '''
+    """
     Find out who wins in the situation of having players which all have a four of a kind
     Args:
         key_index(int): the position of a card in a sorted handcard
         hands(list): cards of those players with a four of a kind
         e.g. hands = [['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CJ', 'SJ', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7']]
         all_players(list): all the players in this round, 0 for losing and 1 for winning or draw
-        potential_winner_index(list): the positions of those players with same highest hand_catagory in all_players
+        potential_winner_index(list): the positions of those players with same highest hand_category in all_players
     Returns:
         [0, 1, 0]: player1 wins
         [1, 0, 0]: player0 wins
         [1, 1, 1]: draw
         [1, 1, 0]: player1 and player0 draws
-    '''
+    """
     ranks = []
     for hand in hands:
         rank_1 = hand.STRING_TO_RANK[hand.best_five[-1][1]]  # rank of the four of a kind
@@ -523,11 +523,12 @@ def determine_winner_four_of_a_kind(hands, all_players, potential_winner_index):
             all_players[potential_winner_index[i]] = 1
     return all_players
 
+
 def compare_hands(hands):
-    '''
+    """
     Compare all palyer's all seven cards
     Args:
-        hands(list): cards of those players with same highest hand_catagory.
+        hands(list): cards of those players with same highest hand_category.
         e.g. hands = [['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CJ', 'SJ', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7']]
     Returns:
         [0, 1, 0]: player1 wins
@@ -539,9 +540,9 @@ def compare_hands(hands):
         return [0, 1]
     elif hands[1] == None:
         return [1, 0]
-    '''
-    hand_category = [] #such as high_card, straight_flush, etc
-    all_players = [0]*len(hands) #all the players in this round, 0 for losing and 1 for winning or draw
+    """
+    hand_category = []  # such as high_card, straight_flush, etc
+    all_players = [0] * len(hands)  # all the players in this round, 0 for losing and 1 for winning or draw
     if None in hands:
         fold_players = [i for i, j in enumerate(hands) if j is None]
         if len(fold_players) == len(all_players) - 1:
@@ -555,26 +556,28 @@ def compare_hands(hands):
             for _ in enumerate(hands):
                 if hands[_[0]] is not None:
                     hand = Hand(hands[_[0]])
-                    hand.evaluateHand()
+                    hand.evaluate_hand()
                     hand_category.append(hand.category)
                 elif hands[_[0]] is None:
                     hand_category.append(0)
     else:
-            for i in enumerate(hands):
-                hand = Hand(hands[i[0]])
-                hand.evaluateHand()
-                hand_category.append(hand.category)
-    potential_winner_index = [i for i, j in enumerate(hand_category) if j == max(hand_category)]# potential winner are those with same max card_catagory
+        for i in enumerate(hands):
+            hand = Hand(hands[i[0]])
+            hand.evaluate_hand()
+            hand_category.append(hand.category)
+    potential_winner_index = [i for i, j in enumerate(hand_category) if
+                              j == max(hand_category)]  # potential winner are those with same max card_category
 
     return final_compare(hands, potential_winner_index, all_players)
 
+
 def final_compare(hands, potential_winner_index, all_players):
-    '''
+    """
     Find out the winners from those who didn't fold
     Args:
-        hands(list): cards of those players with same highest hand_catagory.
+        hands(list): cards of those players with same highest hand_category.
         e.g. hands = [['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CJ', 'SJ', 'H9', 'B9', 'C2', 'C8', 'C7'], ['CT', 'ST', 'H9', 'B9', 'C2', 'C8', 'C7']]
-        potential_winner_index(list): index of those with same max card_catagory in all_players
+        potential_winner_index(list): index of those with same max card_category in all_players
         all_players(list): a list of all the player's win/lose situation, 0 for lose and 1 for win
     Returns:
         [0, 1, 0]: player1 wins
@@ -586,7 +589,7 @@ def final_compare(hands, potential_winner_index, all_players):
         return [0, 1]
     elif hands[1] == None:
         return [1, 0]
-    '''
+    """
     if len(potential_winner_index) == 1:
         all_players[potential_winner_index[0]] = 1
         return all_players
@@ -595,7 +598,7 @@ def final_compare(hands, potential_winner_index, all_players):
         equal_hands = []
         for _ in potential_winner_index:
             hand = Hand(hands[_])
-            hand.evaluateHand()
+            hand.evaluate_hand()
             equal_hands.append(hand)
         hand = equal_hands[0]
         if hand.category == 8:
diff --git a/rlcard/games/mahjong/card.py b/rlcard/games/mahjong/card.py
index 38cb20da3..13ed9cc6c 100644
--- a/rlcard/games/mahjong/card.py
+++ b/rlcard/games/mahjong/card.py
@@ -1,28 +1,27 @@
-
 class MahjongCard:
-
-    info = {'type':  ['dots', 'bamboo', 'characters', 'dragons', 'winds'],
-            'trait': ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'green', 'red', 'white', 'east', 'west', 'north', 'south']
-            }
+    info = {
+        'type':  ['dots', 'bamboo', 'characters', 'dragons', 'winds'],
+        'trait': ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'green', 'red', 'white', 'east', 'west', 'north', 'south']
+    }
 
     def __init__(self, card_type, trait):
-        ''' Initialize the class of MahjongCard
+        """Initialize the class of MahjongCard
 
         Args:
             card_type (str): The type of card
             trait (str): The trait of card
-        '''
+        """
         self.type = card_type
         self.trait = trait
         self.index_num = 0
 
     def get_str(self):
-        ''' Get the string representation of card
+        """Get the string representation of card
 
         Return:
             (str): The string of card's color and trait
-        '''
-        return self.type+ '-'+ self.trait
+        """
+        return self.type + '-' + self.trait
 
     def set_index_num(self, index_num):
 
diff --git a/rlcard/games/mahjong/dealer.py b/rlcard/games/mahjong/dealer.py
index b3266aaf2..1fdd72549 100644
--- a/rlcard/games/mahjong/dealer.py
+++ b/rlcard/games/mahjong/dealer.py
@@ -2,8 +2,7 @@
 
 
 class MahjongDealer:
-    ''' Initialize a mahjong dealer class
-    '''
+    """Initialize a mahjong dealer class """
     def __init__(self, np_random):
         self.np_random = np_random
         self.deck = init_deck()
@@ -11,17 +10,16 @@ def __init__(self, np_random):
         self.table = []
 
     def shuffle(self):
-        ''' Shuffle the deck
-        '''
+        """Shuffle the deck """
         self.np_random.shuffle(self.deck)
 
     def deal_cards(self, player, num):
-        ''' Deal some cards from deck to one player
+        """Deal some cards from deck to one player
 
         Args:
             player (object): The object of DoudizhuPlayer
             num (int): The number of cards to be dealed
-        '''
+        """
         for _ in range(num):
             player.hand.append(self.deck.pop())
 
diff --git a/rlcard/games/mahjong/game.py b/rlcard/games/mahjong/game.py
index 0aa574db1..f67eebc6c 100644
--- a/rlcard/games/mahjong/game.py
+++ b/rlcard/games/mahjong/game.py
@@ -6,17 +6,17 @@
 from rlcard.games.mahjong import Round
 from rlcard.games.mahjong import Judger
 
+
 class MahjongGame:
 
     def __init__(self, allow_step_back=False):
-        '''Initialize the class MajongGame
-        '''
+        """Initialize the class MajongGame"""
         self.allow_step_back = allow_step_back
         self.np_random = np.random.RandomState()
         self.num_players = 4
 
     def init_game(self):
-        ''' Initialilze the game of Mahjong
+        """Initialize the game of Mahjong
 
         This version supports two-player Mahjong
 
@@ -25,7 +25,7 @@ def init_game(self):
 
                 (dict): The first state of the game
                 (int): Current player's id
-        '''
+        """
         # Initialize a dealer that can deal cards
         self.dealer = Dealer(self.np_random)
 
@@ -48,7 +48,7 @@ def init_game(self):
         return state, self.round.current_player
 
     def step(self, action):
-        ''' Get the next state
+        """Get the next state
 
         Args:
             action (str): a specific action. (call, raise, fold, or check)
@@ -58,7 +58,7 @@ def step(self, action):
 
                 (dict): next player's state
                 (int): next plater's id
-        '''
+        """
         # First snapshot the current state
         if self.allow_step_back:
             hist_dealer = deepcopy(self.dealer)
@@ -71,35 +71,35 @@ def step(self, action):
         return state, self.round.current_player
 
     def step_back(self):
-        ''' Return to the previous state of the game
+        """Return to the previous state of the game
 
         Returns:
             (bool): True if the game steps back successfully
-        '''
+        """
         if not self.history:
             return False
         self.dealer, self.players, self.round = self.history.pop()
         return True
 
     def get_state(self, player_id):
-        ''' Return player's state
+        """Return player's state
 
         Args:
             player_id (int): player id
 
         Returns:
             (dict): The state of the player
-        '''
+        """
         state = self.round.get_state(self.players, player_id)
         return state
 
     @staticmethod
     def get_legal_actions(state):
-        ''' Return the legal actions for current player
+        """Return the legal actions for current player
 
         Returns:
             (list): A list of legal actions
-        '''
+        """
         if state['valid_act'] == ['play']:
             state['valid_act'] = state['action_cards']
             return state['action_cards']
@@ -108,40 +108,40 @@ def get_legal_actions(state):
 
     @staticmethod
     def get_num_actions():
-        ''' Return the number of applicable actions
+        """Return the number of applicable actions
 
         Returns:
             (int): The number of actions. There are 4 actions (call, raise, check and fold)
-        '''
+        """
         return 38
 
     def get_num_players(self):
-        ''' return the number of players in Mahjong
+        """Return the number of players in Mahjong
 
         returns:
             (int): the number of players in the game
-        '''
+        """
         return self.num_players
 
     def get_player_id(self):
-        ''' return the id of current player in Mahjong
+        """Return the id of current player in Mahjong
 
         returns:
             (int): the number of players in the game
-        '''
+        """
         return self.round.current_player
 
     def is_over(self):
-        ''' Check if the game is over
+        """Check if the game is over
 
         Returns:
             (boolean): True if the game is over
-        '''
+        """
         win, player, _ = self.judger.judge_game(self)
-        #pile =[sorted([c.get_str() for c in s ]) for s in self.players[player].pile if self.players[player].pile != None]
-        #cards = sorted([c.get_str() for c in self.players[player].hand])
-        #count = len(cards) + sum([len(p) for p in pile])
+        # pile =[sorted([c.get_str() for c in s ]) for s in self.players[player].pile if self.players[player].pile != None]
+        # cards = sorted([c.get_str() for c in self.players[player].hand])
+        # count = len(cards) + sum([len(p) for p in pile])
         self.winner = player
-        #print(win, player, players_val)
-        #print(win, self.round.current_player, player, cards, pile, count)
+        # print(win, player, players_val)
+        # print(win, self.round.current_player, player, cards, pile, count)
         return win
diff --git a/rlcard/games/mahjong/judger.py b/rlcard/games/mahjong/judger.py
index 057645441..67da62174 100644
--- a/rlcard/games/mahjong/judger.py
+++ b/rlcard/games/mahjong/judger.py
@@ -1,52 +1,50 @@
 # -*- coding: utf-8 -*-
-''' Implement Mahjong Judger class
-'''
+"""Implement Mahjong Judger class"""
 from collections import defaultdict
 import numpy as np
 
+
 class MahjongJudger:
-    ''' Determine what cards a player can play
-    '''
+    """Determine what cards a player can play"""
 
     def __init__(self, np_random):
-        ''' Initilize the Judger class for Mahjong
-        '''
+        """Initialize the Judger class for Mahjong"""
         self.np_random = np_random
 
     @staticmethod
     def judge_pong_gong(dealer, players, last_player):
-        ''' Judge which player has pong/gong
+        """Judge which player has pong/gong
         Args:
             dealer (object): The dealer object.
             players (list): List of all players
             last_player (int): The player id of last player
 
-        '''
+        """
         last_card = dealer.table[-1]
         last_card_str = last_card.get_str()
-        #last_card_value = last_card_str.split("-")[-1]
-        #last_card_type = last_card_str.split("-")[0]
+        # last_card_value = last_card_str.split("-")[-1]
+        # last_card_type = last_card_str.split("-")[0]
         for player in players:
             hand = [card.get_str() for card in player.hand]
             hand_dict = defaultdict(list)
             for card in hand:
                 hand_dict[card.split("-")[0]].append(card.split("-")[1])
-            #pile = player.pile
+            # pile = player.pile
             # check gong
             if hand.count(last_card_str) == 3 and last_player != player.player_id:
-                return 'gong', player, [last_card]*4
+                return 'gong', player, [last_card] * 4
             # check pong
             if hand.count(last_card_str) == 2 and last_player != player.player_id:
-                return 'pong', player, [last_card]*3
+                return 'pong', player, [last_card] * 3
         return False, None, None
 
     def judge_chow(self, dealer, players, last_player):
-        ''' Judge which player has chow
+        """Judge which player has chow
         Args:
             dealer (object): The dealer object.
             players (list): List of all players
             last_player (int): The player id of last player
-        '''
+        """
 
         last_card = dealer.table[-1]
         last_card_str = last_card.get_str()
@@ -54,31 +52,31 @@ def judge_chow(self, dealer, players, last_player):
         last_card_index = last_card.index_num
         for player in players:
             if last_card_type != "dragons" and last_card_type != "winds" and last_player == player.get_player_id() - 1:
-                # Create 9 dimensional vector where each dimension represent a specific card with the type same as last_card_type
-                # Numbers in each dimension represent how many of that card the player has it in hand
-                # If the last_card_type is 'characters' for example, and the player has cards: characters_3, characters_6, characters_3,
-                # The hand_list vector looks like: [0,0,2,0,0,1,0,0,0]
+                # Create 9 dimensional vector where each dimension represent a specific card with the type same as
+                # last_card_type Numbers in each dimension represent how many of that card the player has it in hand
+                # If the last_card_type is 'characters' for example, and the player has cards: characters_3,
+                # characters_6, characters_3, The hand_list vector looks like: [0,0,2,0,0,1,0,0,0]
                 hand_list = np.zeros(9)
 
                 for card in player.hand:
                     if card.get_str().split("-")[0] == last_card_type:
-                        hand_list[card.index_num] = hand_list[card.index_num]+1
+                        hand_list[card.index_num] = hand_list[card.index_num] + 1
 
-                #pile = player.pile
-                #check chow
+                # pile = player.pile
+                # check chow
                 test_cases = []
                 if last_card_index == 0:
-                    if hand_list[last_card_index+1] > 0 and hand_list[last_card_index+2] > 0:
-                        test_cases.append([last_card_index+1, last_card_index+2])
+                    if hand_list[last_card_index + 1] > 0 and hand_list[last_card_index + 2] > 0:
+                        test_cases.append([last_card_index + 1, last_card_index + 2])
                 elif last_card_index < 9:
-                    if hand_list[last_card_index-2] > 0 and hand_list[last_card_index-1] > 0:
-                        test_cases.append([last_card_index-2, last_card_index-1])
+                    if hand_list[last_card_index - 2] > 0 and hand_list[last_card_index - 1] > 0:
+                        test_cases.append([last_card_index - 2, last_card_index - 1])
                 else:
-                    if hand_list[last_card_index-1] > 0 and hand_list[last_card_index+1] > 0:
-                        test_cases.append([last_card_index-1, last_card_index+1])
+                    if hand_list[last_card_index - 1] > 0 and hand_list[last_card_index + 1] > 0:
+                        test_cases.append([last_card_index - 1, last_card_index + 1])
 
                 if not test_cases:
-                    continue        
+                    continue
 
                 for l in test_cases:
                     cards = []
@@ -92,12 +90,10 @@ def judge_chow(self, dealer, players, last_player):
         return False, None, None
 
     def judge_game(self, game):
-        ''' Judge which player has win the game
+        """Judge which player has win the game
         Args:
-            dealer (object): The dealer object.
-            players (list): List of all players
-            last_player (int): The player id of last player
-        '''
+            game (Game): The game object
+        """
         players_val = []
         win_player = -1
         for player in game.players:
@@ -108,18 +104,18 @@ def judge_game(self, game):
         if win_player != -1 or len(game.dealer.deck) == 0:
             return True, win_player, players_val
         else:
-            #player_id = players_val.index(max(players_val))
+            # player_id = players_val.index(max(players_val))
             return False, win_player, players_val
 
     def judge_hu(self, player):
-        ''' Judge whether the player has win the game
+        """Judge whether the player has win the game
         Args:
             player (object): Target player
 
         Return:
             Result (bool): Win or not
             Maximum_score (int): Set count score of the player
-        '''
+        """
         set_count = 0
         hand = [card.get_str() for card in player.hand]
         count_dict = {card: hand.count(card) for card in hand}
@@ -141,36 +137,36 @@ def judge_hu(self, player):
                 if tmp_set_count + set_count > maximum:
                     maximum = tmp_set_count + set_count
                 if tmp_set_count + set_count >= 4:
-                    #print(player.get_player_id(), sorted([card.get_str() for card in player.hand]))
-                    #print([[c.get_str() for c in s] for s in player.pile])
-                    #print(len(player.hand), sum([len(s) for s in player.pile]))
-                    #exit()
+                    # print(player.get_player_id(), sorted([card.get_str() for card in player.hand]))
+                    # print([[c.get_str() for c in s] for s in player.pile])
+                    # print(len(player.hand), sum([len(s) for s in player.pile]))
+                    # exit()
                     return True, maximum
         return False, maximum
 
     @staticmethod
     def check_consecutive(_list):
-        ''' Check if list is consecutive
+        """Check if list is consecutive
         Args:
             _list (list): The target list
 
         Return:
             Result (bool): consecutive or not
-        '''
+        """
         l = list(map(int, _list))
-        if sorted(l) == list(range(min(l), max(l)+1)):
+        if sorted(l) == list(range(min(l), max(l) + 1)):
             return True
         return False
 
     def cal_set(self, cards):
-        ''' Calculate the set for given cards
+        """Calculate the set for given cards
         Args:
-            Cards (list): List of cards.
+            cards (list): List of cards.
 
         Return:
             Set_count (int):
             Sets (list): List of cards that has been pop from user's hand
-        '''
+        """
         tmp_cards = cards.copy()
         sets = []
         set_count = 0
@@ -182,7 +178,7 @@ def cal_set(self, cards):
                 for _ in range(_dict[each]):
                     tmp_cards.pop(tmp_cards.index(each))
 
-        # get all of the traits of each type in hand (except dragons and winds)
+        # get all the traits of each type in hand (except dragons and winds)
         _dict_by_type = defaultdict(list)
         for card in tmp_cards:
             _type = card.split("-")[0]
@@ -196,22 +192,22 @@ def cal_set(self, cards):
             if len(values) > 2:
                 for index, _ in enumerate(values):
                     if index == 0:
-                        test_case = [values[index], values[index+1], values[index+2]]
-                    elif index == len(values)-1:
-                        test_case = [values[index-2], values[index-1], values[index]]
+                        test_case = [values[index], values[index + 1], values[index + 2]]
+                    elif index == len(values) - 1:
+                        test_case = [values[index - 2], values[index - 1], values[index]]
                     else:
-                        test_case = [values[index-1], values[index], values[index+1]]
+                        test_case = [values[index - 1], values[index], values[index + 1]]
                     if self.check_consecutive(test_case):
                         set_count += 1
                         for each in test_case:
                             values.pop(values.index(each))
-                            c = _type+"-"+str(each)
+                            c = _type + "-" + str(each)
                             sets.append(c)
                             if c in tmp_cards:
                                 tmp_cards.pop(tmp_cards.index(c))
         return set_count, sets
 
-#if __name__ == "__main__":
+# if __name__ == "__main__":
 #    judger = MahjongJudger()
 #    player = Player(0)
 #    card_info = Card.info
diff --git a/rlcard/games/mahjong/player.py b/rlcard/games/mahjong/player.py
index 9f6bb5799..3e4317f06 100644
--- a/rlcard/games/mahjong/player.py
+++ b/rlcard/games/mahjong/player.py
@@ -2,47 +2,44 @@
 class MahjongPlayer:
 
     def __init__(self, player_id, np_random):
-        ''' Initilize a player.
+        """Initialize a player.
 
         Args:
             player_id (int): The id of the player
-        '''
+        """
         self.np_random = np_random
         self.player_id = player_id
         self.hand = []
         self.pile = []
 
     def get_player_id(self):
-        ''' Return the id of the player
-        '''
+        """Return the id of the player"""
 
         return self.player_id
 
     def print_hand(self):
-        ''' Print the cards in hand in string.
-        '''
+        """Print the cards in hand in string."""
         print([c.get_str() for c in self.hand])
 
     def print_pile(self):
-        ''' Print the cards in pile of the player in string.
-        '''
+        """Print the cards in pile of the player in string."""
         print([[c.get_str() for c in s]for s in self.pile])
 
     def play_card(self, dealer, card):
-        ''' Play one card
+        """Play one card
         Args:
             dealer (object): Dealer
-            Card (object): The card to be play.
-        '''
+            Card (object): The card to be played.
+        """
         card = self.hand.pop(self.hand.index(card))
         dealer.table.append(card)
 
     def chow(self, dealer, cards):
-        ''' Perform Chow
+        """Perform Chow
         Args:
             dealer (object): Dealer
             Cards (object): The cards to be Chow.
-        '''
+        """
         last_card = dealer.table.pop(-1)
         for card in cards:
             if card in self.hand and card != last_card:
@@ -50,22 +47,22 @@ def chow(self, dealer, cards):
         self.pile.append(cards)
 
     def gong(self, dealer, cards):
-        ''' Perform Gong
+        """Perform Gong
         Args:
             dealer (object): Dealer
             Cards (object): The cards to be Gong.
-        '''
+        """
         for card in cards:
             if card in self.hand:
                 self.hand.pop(self.hand.index(card))
         self.pile.append(cards)
 
     def pong(self, dealer, cards):
-        ''' Perform Pong
+        """Perform Pong
         Args:
             dealer (object): Dealer
             Cards (object): The cards to be Pong.
-        '''
+        """
         for card in cards:
             if card in self.hand:
                 self.hand.pop(self.hand.index(card))
diff --git a/rlcard/games/mahjong/round.py b/rlcard/games/mahjong/round.py
index 9db69f4ef..2f3f60275 100644
--- a/rlcard/games/mahjong/round.py
+++ b/rlcard/games/mahjong/round.py
@@ -2,13 +2,13 @@
 class MahjongRound:
 
     def __init__(self, judger, dealer, num_players, np_random):
-        ''' Initialize the round class
+        """Initialize the round class
 
         Args:
             judger (object): the object of MahjongJudger
             dealer (object): the object of MahjongDealer
             num_players (int): the number of players in game
-        '''
+        """
         self.np_random = np_random
         self.judger = judger
         self.dealer = dealer
@@ -25,12 +25,12 @@ def __init__(self, judger, dealer, num_players, np_random):
         self.last_cards = []
 
     def proceed_round(self, players, action):
-        ''' Call other Classes's functions to keep one round running
+        """Call other Classes's functions to keep one round running
 
         Args:
             player (object): object of UnoPlayer
             action (str): string of legal action
-        '''
+        """
         #hand_len = [len(p.hand) for p in players]
         #pile_len = [sum([len([c for c in p]) for p in pp.pile]) for pp in players]
         #total_len = [i + j for i, j in zip(hand_len, pile_len)]
@@ -82,14 +82,14 @@ def proceed_round(self, players, action):
         #total_len = [i + j for i, j in zip(hand_len, pile_len)]
 
     def get_state(self, players, player_id):
-        ''' Get player's state
+        """Get player's state
 
         Args:
             players (list): The list of MahjongPlayer
             player_id (int): The id of the player
         Return:
             state (dict): The information of the state
-        '''
+        """
         state = {}
         #(valid_act, player, cards) = self.judger.judge_pong_gong(self.dealer, players, self.last_player)
         if self.valid_act: # PONG/GONG/CHOW
diff --git a/rlcard/games/nolimitholdem/round.py b/rlcard/games/nolimitholdem/round.py
index 01d4cf875..7d891ce73 100644
--- a/rlcard/games/nolimitholdem/round.py
+++ b/rlcard/games/nolimitholdem/round.py
@@ -8,7 +8,7 @@
 class Action(Enum):
     FOLD = 0
     CHECK_CALL = 1
-    #CALL = 2
+    # CALL = 2
     # RAISE_3BB = 3
     RAISE_HALF_POT = 2
     RAISE_POT = 3
diff --git a/rlcard/games/uno/card.py b/rlcard/games/uno/card.py
index f2a6c5e5c..98f8b2e5a 100644
--- a/rlcard/games/uno/card.py
+++ b/rlcard/games/uno/card.py
@@ -1,43 +1,42 @@
 from termcolor import colored
 
-class UnoCard:
 
-    info = {'type':  ['number', 'action', 'wild'],
-            'color': ['r', 'g', 'b', 'y'],
-            'trait': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
-                      'skip', 'reverse', 'draw_2', 'wild', 'wild_draw_4']
-            }
+class UnoCard:
+    info = {
+        'type':  ['number', 'action', 'wild'],
+        'color': ['r', 'g', 'b', 'y'],
+        'trait': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'skip', 'reverse', 'draw_2', 'wild', 'wild_draw_4']
+    }
 
     def __init__(self, card_type, color, trait):
-        ''' Initialize the class of UnoCard
+        """Initialize the class of UnoCard
 
         Args:
             card_type (str): The type of card
             color (str): The color of card
             trait (str): The trait of card
-        '''
+        """
         self.type = card_type
         self.color = color
         self.trait = trait
         self.str = self.get_str()
 
     def get_str(self):
-        ''' Get the string representation of card
+        """Get the string representation of card
 
         Return:
             (str): The string of card's color and trait
-        '''
+        """
         return self.color + '-' + self.trait
 
-
     @staticmethod
     def print_cards(cards, wild_color=False):
-        ''' Print out card in a nice form
+        """Print out card in a nice form
 
         Args:
-            card (str or list): The string form or a list of a UNO card
-            wild_color (boolean): True if assign collor to wild cards
-        '''
+            cards (str or list): The string form or a list of a UNO card
+            wild_color (boolean): True if assign color to wild cards
+        """
         if isinstance(cards, str):
             cards = [cards]
         for i, card in enumerate(cards):
diff --git a/rlcard/games/uno/dealer.py b/rlcard/games/uno/dealer.py
index 771ed51c6..4cda5e7da 100644
--- a/rlcard/games/uno/dealer.py
+++ b/rlcard/games/uno/dealer.py
@@ -3,34 +3,32 @@
 
 
 class UnoDealer:
-    ''' Initialize a uno dealer class
-    '''
+    """Initialize a uno dealer class """
     def __init__(self, np_random):
         self.np_random = np_random
         self.deck = init_deck()
         self.shuffle()
 
     def shuffle(self):
-        ''' Shuffle the deck
-        '''
+        """Shuffle the deck """
         self.np_random.shuffle(self.deck)
 
     def deal_cards(self, player, num):
-        ''' Deal some cards from deck to one player
+        """Deal some cards from deck to one player
 
         Args:
             player (object): The object of DoudizhuPlayer
             num (int): The number of cards to be dealed
-        '''
+        """
         for _ in range(num):
             player.hand.append(self.deck.pop())
 
     def flip_top_card(self):
-        ''' Flip top card when a new game starts
+        """Flip top card when a new game starts
 
         Returns:
             (object): The object of UnoCard at the top of the deck
-        '''
+        """
         top_card = self.deck.pop()
         while top_card.trait == 'wild_draw_4':
             self.deck.append(top_card)
diff --git a/rlcard/games/uno/game.py b/rlcard/games/uno/game.py
index 53c1a1f8c..cf41d050c 100644
--- a/rlcard/games/uno/game.py
+++ b/rlcard/games/uno/game.py
@@ -15,20 +15,19 @@ def __init__(self, allow_step_back=False, num_players=2):
         self.payoffs = [0 for _ in range(self.num_players)]
 
     def configure(self, game_config):
-        ''' Specifiy some game specific parameters, such as number of players
-        '''
+        """Specify some game specific parameters, such as number of players"""
         self.num_players = game_config['game_num_players']
 
     def init_game(self):
-        ''' Initialize players and state
+        """Initialize players and state
 
         Returns:
             (tuple): Tuple containing:
 
                 (dict): The first state in one game
                 (int): Current player's id
-        '''
-        # Initalize payoffs
+        """
+        # Initialize payoffs
         self.payoffs = [0 for _ in range(self.num_players)]
 
         # Initialize a dealer that can deal cards
@@ -44,11 +43,11 @@ def init_game(self):
         # Initialize a Round
         self.round = Round(self.dealer, self.num_players, self.np_random)
 
-        # flip and perfrom top card
+        # flip and perform top card
         top_card = self.round.flip_top_card()
         self.round.perform_top_card(self.players, top_card)
 
-        # Save the hisory for stepping back to the last state.
+        # Save the history for stepping back to the last state.
         self.history = []
 
         player_id = self.round.current_player
@@ -56,7 +55,7 @@ def init_game(self):
         return state, player_id
 
     def step(self, action):
-        ''' Get the next state
+        """Get the next state
 
         Args:
             action (str): A specific action
@@ -66,7 +65,7 @@ def step(self, action):
 
                 (dict): next player's state
                 (int): next plater's id
-        '''
+        """
 
         if self.allow_step_back:
             # First snapshot the current state
@@ -81,36 +80,36 @@ def step(self, action):
         return state, player_id
 
     def step_back(self):
-        ''' Return to the previous state of the game
+        """Return to the previous state of the game
 
         Returns:
             (bool): True if the game steps back successfully
-        '''
+        """
         if not self.history:
             return False
         self.dealer, self.players, self.round = self.history.pop()
         return True
 
     def get_state(self, player_id):
-        ''' Return player's state
+        """Return player's state
 
         Args:
             player_id (int): player id
 
         Returns:
             (dict): The state of the player
-        '''
+        """
         state = self.round.get_state(self.players, player_id)
         state['num_players'] = self.get_num_players()
         state['current_player'] = self.round.current_player
         return state
 
     def get_payoffs(self):
-        ''' Return the payoffs of the game
+        """Return the payoffs of the game
 
         Returns:
             (list): Each entry corresponds to the payoff of one player
-        '''
+        """
         winner = self.round.winner
         if winner is not None and len(winner) == 1:
             self.payoffs[winner[0]] = 1
@@ -118,43 +117,43 @@ def get_payoffs(self):
         return self.payoffs
 
     def get_legal_actions(self):
-        ''' Return the legal actions for current player
+        """Return the legal actions for current player
 
         Returns:
             (list): A list of legal actions
-        '''
+        """
 
         return self.round.get_legal_actions(self.players, self.round.current_player)
 
     def get_num_players(self):
-        ''' Return the number of players in Limit Texas Hold'em
+        """Return the number of players in Limit Texas Hold'em
 
         Returns:
             (int): The number of players in the game
-        '''
+        """
         return self.num_players
 
     @staticmethod
     def get_num_actions():
-        ''' Return the number of applicable actions
+        """Return the number of applicable actions
 
         Returns:
             (int): The number of actions. There are 61 actions
-        '''
+        """
         return 61
 
     def get_player_id(self):
-        ''' Return the current player's id
+        """Return the current player's id
 
         Returns:
             (int): current player's id
-        '''
+        """
         return self.round.current_player
 
     def is_over(self):
-        ''' Check if the game is over
+        """Check if the game is over
 
         Returns:
             (boolean): True if the game is over
-        '''
+        """
         return self.round.is_over
diff --git a/rlcard/games/uno/judger.py b/rlcard/games/uno/judger.py
index 62a6375e8..bd2d9a087 100644
--- a/rlcard/games/uno/judger.py
+++ b/rlcard/games/uno/judger.py
@@ -3,14 +3,14 @@ class UnoJudger:
 
     @staticmethod
     def judge_winner(players, np_random):
-        ''' Judge the winner of the game
+        """Judge the winner of the game
 
         Args:
             players (list): The list of players who play the game
 
         Returns:
             (list): The player id of the winner
-        '''
+        """
         self.np_random = np_random
         count_1 = len(players[0].hand)
         count_2 = len(players[1].hand)
diff --git a/rlcard/games/uno/player.py b/rlcard/games/uno/player.py
index 26507467e..86294b634 100644
--- a/rlcard/games/uno/player.py
+++ b/rlcard/games/uno/player.py
@@ -2,18 +2,17 @@
 class UnoPlayer:
 
     def __init__(self, player_id, np_random):
-        ''' Initilize a player.
+        """Initialize a player.
 
         Args:
             player_id (int): The id of the player
-        '''
+        """
         self.np_random = np_random
         self.player_id = player_id
         self.hand = []
         self.stack = []
 
     def get_player_id(self):
-        ''' Return the id of the player
-        '''
+        """Return the id of the player"""
 
         return self.player_id
diff --git a/rlcard/games/uno/round.py b/rlcard/games/uno/round.py
index 1a81387b2..cdcabfe08 100644
--- a/rlcard/games/uno/round.py
+++ b/rlcard/games/uno/round.py
@@ -5,12 +5,12 @@
 class UnoRound:
 
     def __init__(self, dealer, num_players, np_random):
-        ''' Initialize the round class
+        """Initialize the round class
 
         Args:
             dealer (object): the object of UnoDealer
             num_players (int): the number of players in game
-        '''
+        """
         self.np_random = np_random
         self.dealer = dealer
         self.target = None
@@ -22,12 +22,12 @@ def __init__(self, dealer, num_players, np_random):
         self.winner = None
 
     def flip_top_card(self):
-        ''' Flip the top card of the card pile
+        """Flip the top card of the card pile
 
         Returns:
             (object of UnoCard): the top card in game
 
-        '''
+        """
         top = self.dealer.flip_top_card()
         if top.trait == 'wild':
             top.color = self.np_random.choice(UnoCard.info['color'])
@@ -36,12 +36,12 @@ def flip_top_card(self):
         return top
 
     def perform_top_card(self, players, top_card):
-        ''' Perform the top card
+        """Perform the top card
 
         Args:
             players (list): list of UnoPlayer objects
             top_card (object): object of UnoCard
-        '''
+        """
         if top_card.trait == 'skip':
             self.current_player = 1
         elif top_card.trait == 'reverse':
@@ -52,12 +52,12 @@ def perform_top_card(self, players, top_card):
             self.dealer.deal_cards(player, 2)
 
     def proceed_round(self, players, action):
-        ''' Call other Classes' functions to keep one round running
+        """Call other Classes' functions to keep one round running
 
         Args:
             player (object): object of UnoPlayer
             action (str): string of legal action
-        '''
+        """
         if action == 'draw':
             self._perform_draw_action(players)
             return None
@@ -135,12 +135,12 @@ def get_legal_actions(self, players, player_id):
         return legal_actions
 
     def get_state(self, players, player_id):
-        ''' Get player's state
+        """Get player's state
 
         Args:
             players (list): The list of UnoPlayer
             player_id (int): The id of the player
-        '''
+        """
         state = {}
         player = players[player_id]
         state['hand'] = cards2list(player.hand)
@@ -153,8 +153,7 @@ def get_state(self, players, player_id):
         return state
 
     def replace_deck(self):
-        ''' Add cards have been played to deck
-        '''
+        """Add cards have been played to deck"""
         self.dealer.deck.extend(self.played_cards)
         self.dealer.shuffle()
         self.played_cards = []
diff --git a/rlcard/games/uno/utils.py b/rlcard/games/uno/utils.py
index 6ba5a9de6..5d18550d1 100644
--- a/rlcard/games/uno/utils.py
+++ b/rlcard/games/uno/utils.py
@@ -29,8 +29,7 @@
 
 
 def init_deck():
-    ''' Generate uno deck of 108 cards
-    '''
+    """Generate uno deck of 108 cards"""
     deck = []
     card_info = Card.info
     for color in card_info['color']:
@@ -53,28 +52,28 @@ def init_deck():
 
 
 def cards2list(cards):
-    ''' Get the corresponding string representation of cards
+    """Get the corresponding string representation of cards
 
     Args:
         cards (list): list of UnoCards objects
 
     Returns:
         (string): string representation of cards
-    '''
+    """
     cards_list = []
     for card in cards:
         cards_list.append(card.get_str())
     return cards_list
 
 def hand2dict(hand):
-    ''' Get the corresponding dict representation of hand
+    """Get the corresponding dict representation of hand
 
     Args:
         hand (list): list of string of hand's card
 
     Returns:
         (dict): dict of hand
-    '''
+    """
     hand_dict = {}
     for card in hand:
         if card not in hand_dict:
@@ -84,7 +83,7 @@ def hand2dict(hand):
     return hand_dict
 
 def encode_hand(plane, hand):
-    ''' Encode hand and represerve it into plane
+    """Encode hand and represerve it into plane
 
     Args:
         plane (array): 3*4*15 numpy array
@@ -92,7 +91,7 @@ def encode_hand(plane, hand):
 
     Returns:
         (array): 3*4*15 numpy array
-    '''
+    """
     # plane = np.zeros((3, 4, 15), dtype=int)
     plane[0] = np.ones((4, 15), dtype=int)
     hand = hand2dict(hand)
@@ -111,7 +110,7 @@ def encode_hand(plane, hand):
     return plane
 
 def encode_target(plane, target):
-    ''' Encode target and represerve it into plane
+    """Encode target and represerve it into plane
 
     Args:
         plane (array): 1*4*15 numpy array
@@ -119,7 +118,7 @@ def encode_target(plane, target):
 
     Returns:
         (array): 1*4*15 numpy array
-    '''
+    """
     target_info = target.split('-')
     color = COLOR_MAP[target_info[0]]
     trait = TRAIT_MAP[target_info[1]]
diff --git a/rlcard/models/__init__.py b/rlcard/models/__init__.py
index f772a17ac..285c46487 100644
--- a/rlcard/models/__init__.py
+++ b/rlcard/models/__init__.py
@@ -1,5 +1,4 @@
-''' Register rule-based models or pre-trianed models
-'''
+"""Register rule-based models or pre-trianed models"""
 from rlcard.models.registration import register, load
 
 register(
diff --git a/rlcard/models/bridge_rule_models.py b/rlcard/models/bridge_rule_models.py
index 43710688b..4cf174c41 100644
--- a/rlcard/models/bridge_rule_models.py
+++ b/rlcard/models/bridge_rule_models.py
@@ -1,10 +1,10 @@
-'''
+"""
     File name: models/bridge_rule_models.py
     Author: William Hale
     Date created: 11/27/2021
 
     Bridge rule models
-'''
+"""
 
 import numpy as np
 
@@ -12,16 +12,14 @@
 
 
 class BridgeDefenderNoviceRuleAgent(object):
-    '''
-        Agent always passes during bidding
-    '''
+    """Agent always passes during bidding """
 
     def __init__(self):
         self.use_raw = False
 
     @staticmethod
     def step(state) -> int:
-        ''' Predict the action given the current state.
+        """Predict the action given the current state.
             Defender Novice strategy:
                 Case during make call:
                     Always choose PassAction.
@@ -29,11 +27,11 @@ def step(state) -> int:
                     Choose a random action.
 
         Args:
-            state (numpy.array): an numpy array that represents the current state
+            state (numpy.array): a numpy array that represents the current state
 
         Returns:
             action_id (int): the action_id predicted
-        '''
+        """
         legal_action_ids = state['raw_legal_actions']
         if ActionEvent.pass_action_id in legal_action_ids:
             selected_action_id = ActionEvent.pass_action_id
@@ -42,15 +40,15 @@ def step(state) -> int:
         return selected_action_id
 
     def eval_step(self, state):
-        ''' Predict the action given the current state for evaluation.
+        """Predict the action given the current state for evaluation.
             Since the agents is not trained, this function is equivalent to step function.
 
         Args:
-            state (numpy.array): an numpy array that represents the current state
+            state (numpy.array): a numpy array that represents the current state
 
         Returns:
             action_id (int): the action_id predicted by the agent
             probabilities (list): The list of action probabilities
-        '''
+        """
         probabilities = []
         return self.step(state), probabilities
diff --git a/rlcard/models/doudizhu_rule_models.py b/rlcard/models/doudizhu_rule_models.py
index 59d230a8f..5b628544d 100644
--- a/rlcard/models/doudizhu_rule_models.py
+++ b/rlcard/models/doudizhu_rule_models.py
@@ -1,5 +1,4 @@
-''' Dou Dizhu rule models
-'''
+"""Dou Dizhu rule models"""
 
 import numpy as np
 
@@ -8,20 +7,19 @@
 from rlcard.models.model import Model
 
 class DouDizhuRuleAgentV1(object):
-    ''' Dou Dizhu Rule agent version 1
-    '''
+    """Dou Dizhu Rule agent version 1"""
 
     def __init__(self):
         self.use_raw = True
 
     def step(self, state):
-        ''' Predict the action given raw state. A naive rule.
+        """Predict the action given raw state. A naive rule.
         Args:
             state (dict): Raw state from the game
 
         Returns:
             action (str): Predicted action
-        '''
+        """
         state = state['raw_obs']
         trace = state['trace']
         # the rule of leading round
@@ -55,13 +53,11 @@ def step(self, state):
             return np.random.choice(state['actions'])
 
     def eval_step(self, state):
-        ''' Step for evaluation. The same to step
-        '''
+        """Step for evaluation. The same to step"""
         return self.step(state), []
 
     def combine_cards(self, hand):
-        '''Get optimal combinations of cards in hand
-        '''
+        """Get optimal combinations of cards in hand"""
         comb = {'rocket': [], 'bomb': [], 'trio': [], 'trio_chain': [],
                 'solo_chain': [], 'pair_chain': [], 'pair': [], 'solo': []}
         # 1. pick rocket
@@ -152,16 +148,14 @@ def pick_chain(hand_list, count):
                             chains.append(str_chain)
                 add += len(chain)
         hand_list = [int(card) for card in hand_list]
-        return (chains, hand_list)
+        return chains, hand_list
 
 
 class DouDizhuRuleModelV1(Model):
-    ''' Dou Dizhu Rule Model version 1
-    '''
+    """Dou Dizhu Rule Model version 1"""
 
     def __init__(self):
-        ''' Load pretrained model
-        '''
+        """Load pretrained model"""
         env = rlcard.make('doudizhu')
 
         rule_agent = DouDizhuRuleAgentV1()
@@ -169,12 +163,12 @@ def __init__(self):
 
     @property
     def agents(self):
-        ''' Get a list of agents for each position in a the game
+        """Get a list of agents for each position in a game
 
         Returns:
             agents (list): A list of agents
 
         Note: Each agent should be just like RL agent with step and eval_step
               functioning well.
-        '''
+        """
         return self.rule_agents
diff --git a/rlcard/models/gin_rummy_rule_models.py b/rlcard/models/gin_rummy_rule_models.py
index 2d5468684..e6c224431 100644
--- a/rlcard/models/gin_rummy_rule_models.py
+++ b/rlcard/models/gin_rummy_rule_models.py
@@ -1,10 +1,10 @@
-'''
+"""
     File name: models/gin_rummy_rule_models.py
     Author: William Hale
     Date created: 2/12/2020
 
     Gin Rummy rule models
-'''
+"""
 
 from typing import TYPE_CHECKING
 from collections import OrderedDict
@@ -27,16 +27,14 @@
 
 
 class GinRummyNoviceRuleAgent(object):
-    '''
-        Agent always discards highest deadwood value card
-    '''
+    """Agent always discards highest deadwood value card"""
 
     def __init__(self):
         self.use_raw = False  # FIXME: should this be True ?
 
     @staticmethod
     def step(state):
-        ''' Predict the action given the current state.
+        """Predict the action given the current state.
             Novice strategy:
                 Case where can gin:
                     Choose one of the gin actions.
@@ -45,17 +43,17 @@ def step(state):
                 Case where can discard:
                     Gin if can. Knock if can.
                     Otherwise, put aside cards in some best meld cluster.
-                    Choose one of the remaining cards with highest deadwood value.
+                    Choose one of the remaining cards with the highest deadwood value.
                     Discard that card.
                 Case otherwise:
                     Choose a random action.
 
         Args:
-            state (numpy.array): an numpy array that represents the current state
+            state (numpy.array): a numpy array that represents the current state
 
         Returns:
             action (int): the action predicted
-        '''
+        """
         legal_actions = state['legal_actions']
         actions = legal_actions.copy()
         legal_action_events = [ActionEvent.decode_action(x) for x in legal_actions]
@@ -76,16 +74,16 @@ def step(state):
         return np.random.choice(actions)
 
     def eval_step(self, state):
-        ''' Predict the action given the current state for evaluation.
+        """Predict the action given the current state for evaluation.
             Since the agents is not trained, this function is equivalent to step function.
 
         Args:
-            state (numpy.array): an numpy array that represents the current state
+            state (numpy.array): a numpy array that represents the current state
 
         Returns:
             action (int): the action predicted by the agent
             probabilities (list): The list of action probabilities
-        '''
+        """
         probabilities = []
         return self.step(state), probabilities
 
@@ -114,12 +112,10 @@ def _get_best_discards(discard_action_events, state) -> List[Card]:
 
 
 class GinRummyNoviceRuleModel(Model):
-    ''' Gin Rummy Rule Model
-    '''
+    """Gin Rummy Rule Model"""
 
     def __init__(self):
-        ''' Load pre-trained model
-        '''
+        """Load pre-trained model"""
         super().__init__()
         env = rlcard.make('gin-rummy')
         rule_agent = GinRummyNoviceRuleAgent()
@@ -127,12 +123,12 @@ def __init__(self):
 
     @property
     def agents(self):
-        ''' Get a list of agents for each position in a the game
+        """Get a list of agents for each position in the game
 
         Returns:
             agents (list): A list of agents
 
         Note: Each agent should be just like RL agent with step and eval_step
               functioning well.
-        '''
+        """
         return self.rule_agents
diff --git a/rlcard/models/leducholdem_rule_models.py b/rlcard/models/leducholdem_rule_models.py
index 9cb01b429..0c0f10b07 100644
--- a/rlcard/models/leducholdem_rule_models.py
+++ b/rlcard/models/leducholdem_rule_models.py
@@ -1,23 +1,23 @@
-''' Leduc Hold 'em rule model
-'''
+"""Leduc Hold 'em rule model"""
 import rlcard
 from rlcard.models.model import Model
 
+
 class LeducHoldemRuleAgentV1(object):
-    ''' Leduc Hold 'em Rule agent version 1
-    '''
+    """Leduc Hold 'em Rule agent version 1"""
+
     def __init__(self):
         self.use_raw = True
 
     @staticmethod
     def step(state):
-        ''' Predict the action when given raw state. A simple rule-based AI.
+        """Predict the action when given raw state. A simple rule-based AI.
         Args:
             state (dict): Raw state from the game
 
         Returns:
             action (str): Predicted action
-        '''
+        """
         legal_actions = state['raw_legal_actions']
         # Aggressively play 'raise' and 'call'
         if 'raise' in legal_actions:
@@ -30,25 +30,25 @@ def step(state):
             return 'fold'
 
     def eval_step(self, state):
-        ''' Step for evaluation. The same to step
-        '''
+        """Step for evaluation. The same to step"""
         return self.step(state), []
 
+
 class LeducHoldemRuleAgentV2(object):
-    ''' Leduc Hold 'em Rule agent version 2
-    '''
+    """Leduc Hold 'em Rule agent version 2"""
+
     def __init__(self):
         self.use_raw = True
 
     @staticmethod
     def step(state):
-        ''' Predict the action when given raw state. A simple rule-based AI.
+        """Predict the action when given raw state. A simple rule-based AI.
         Args:
             state (dict): Raw state from the game
 
         Returns:
             action (str): Predicted action
-        '''
+        """
         legal_actions = state['raw_legal_actions']
         state = state['raw_obs']
         hand = state['hand']
@@ -74,7 +74,7 @@ def step(state):
             else:
                 action = 'fold'
 
-        #return action
+        # return action
         if action in legal_actions:
             return action
         else:
@@ -88,52 +88,50 @@ def step(state):
                 return action
 
     def eval_step(self, state):
-        ''' Step for evaluation. The same to step
-        '''
+        """Step for evaluation. The same to step"""
         return self.step(state), []
 
+
 class LeducHoldemRuleModelV1(Model):
-    ''' Leduc holdem Rule Model version 1
-    '''
+    """Leduc holdem Rule Model version 1"""
 
     def __init__(self):
-        ''' Load pretrained model
-        '''
+        """Load pretrained model
+        """
         env = rlcard.make('leduc-holdem')
         rule_agent = LeducHoldemRuleAgentV1()
         self.rule_agents = [rule_agent for _ in range(env.num_players)]
 
     @property
     def agents(self):
-        ''' Get a list of agents for each position in a the game
+        """Get a list of agents for each position in the game
 
         Returns:
             agents (list): A list of agents
 
         Note: Each agent should be just like RL agent with step and eval_step
               functioning well.
-        '''
+        """
         return self.rule_agents
 
+
 class LeducHoldemRuleModelV2(Model):
-    ''' Leduc holdem Rule Model version 2
-    '''
+    """Leduc holdem Rule Model version 2"""
 
     def __init__(self):
-        ''' Load pretrained model
-        '''
+        """Load pretrained model"""
         env = rlcard.make('leduc-holdem')
         rule_agent = LeducHoldemRuleAgentV2()
         self.rule_agents = [rule_agent for _ in range(env.num_players)]
 
     @property
     def agents(self):
-        ''' Get a list of agents for each position in a the game
+        """Get a list of agents for each position in the game
 
         Returns:
             agents (list): A list of agents
 
         Note: Each agent should be just like RL agent with step and eval_step
               functioning well.
-        '''
+        """
         return self.rule_agents
diff --git a/rlcard/models/limitholdem_rule_models.py b/rlcard/models/limitholdem_rule_models.py
index d9049daf6..c8d5cc620 100644
--- a/rlcard/models/limitholdem_rule_models.py
+++ b/rlcard/models/limitholdem_rule_models.py
@@ -1,24 +1,23 @@
-''' Limit Hold 'em rule model
-'''
+"""Limit Hold 'em rule model"""
 import rlcard
 from rlcard.models.model import Model
 
+
 class LimitholdemRuleAgentV1(object):
-    ''' Limit Hold 'em Rule agent version 1
-    '''
+    """Limit Hold 'em Rule agent version 1"""
 
     def __init__(self):
         self.use_raw = True
 
     @staticmethod
     def step(state):
-        ''' Predict the action when given raw state. A simple rule-based AI.
+        """Predict the action when given raw state. A simple rule-based AI.
         Args:
             state (dict): Raw state from the game
 
         Returns:
             action (str): Predicted action
-        '''
+        """
         legal_actions = state['raw_legal_actions']
         state = state['raw_obs']
         hand = state['hand']
@@ -32,10 +31,12 @@ def step(state):
         # KQ, KJ, QJ, JT
         # Fold all hand types except those mentioned above to save money
         if len(public_cards) == 0:
-            if hand[0][1] == hand [1][1]:
+            if hand[0][1] == hand[1][1]:
                 action = 'raise'
             elif hand[0][1] == 'A' or hand[1][1] == 'A':
-                if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1], hand[1][1]] or 'T' in [hand[0][1], hand[1][1]]:
+                if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1],
+                                                                                                 hand[1][1]] or 'T' in [
+                    hand[0][1], hand[1][1]]:
                     action = 'raise'
                 elif hand[0][0] == hand[1][0]:
                     action = 'raise'
@@ -48,12 +49,14 @@ def step(state):
             for i, _ in enumerate(public_cards):
                 public_cards_ranks[i] = public_cards[i][1]
                 public_cards_flush[i] = public_cards[i][0]
-            if hand[0][1] == hand [1][1]:
-            # if the player already have a pair, raise when public cards have card same as the pair
+            if hand[0][1] == hand[1][1]:
+                # if the player already have a pair, raise when public cards have card same as the pair
                 if hand[0][1] in public_cards_ranks:
                     action = 'raise'
             elif hand[0][1] == 'A' or hand[1][1] == 'A':
-                if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1], hand[1][1]] or 'T' in [hand[0][1], hand[1][1]]:
+                if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1],
+                                                                                                 hand[1][1]] or 'T' in [
+                    hand[0][1], hand[1][1]]:
                     # For AK, AQ, AJ, AT types, if public cards have A, K, Q, J, T, raise, because the chance of getting a straight greatly increases
                     if 'A' in public_cards_ranks or 'K' in public_cards_ranks or 'Q' in public_cards_ranks or 'J' in public_cards_ranks or 'T' in public_cards_ranks:
                         action = 'raise'
@@ -61,12 +64,13 @@ def step(state):
                 elif hand[0][0] == hand[1][0]:
                     if hand[0][0] in public_cards_flush:
                         action = 'raise'
-            elif max(public_cards_ranks) in ['5', '4' ,'3', '2']: # for KQ, KJ, QJ, JT, check when having no cards higher than 5
+            elif max(public_cards_ranks) in ['5', '4', '3',
+                                             '2']:  # for KQ, KJ, QJ, JT, check when having no cards higher than 5
                 action = 'check'
             else:
                 action = 'call'
 
-        if len(public_cards) == 5 or len(public_cards) == 4 :
+        if len(public_cards) == 5 or len(public_cards) == 4:
             public_cards_ranks = []
             public_cards_flush = []
             for i, _ in enumerate(public_cards):
@@ -74,12 +78,14 @@ def step(state):
                 public_cards_flush.append('S')
                 public_cards_ranks[i] = public_cards[i][1]
                 public_cards_flush[i] = public_cards[i][0]
-            if hand[0][1] == hand [1][1]:
-            # if the player already have a pair, raise when public cards have card same as the pair
+            if hand[0][1] == hand[1][1]:
+                # if the player already have a pair, raise when public cards have card same as the pair
                 if hand[0][1] in public_cards_ranks:
                     action = 'raise'
             elif hand[0][1] == 'A' or hand[1][1] == 'A':
-                if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1], hand[1][1]] or 'T' in [hand[0][1], hand[1][1]]:
+                if 'K' in [hand[0][1], hand[1][1]] or 'Q' in [hand[0][1], hand[1][1]] or 'J' in [hand[0][1],
+                                                                                                 hand[1][1]] or 'T' in [
+                    hand[0][1], hand[1][1]]:
                     # For AK, AQ, AJ, AT types, if public cards have A, K, Q, J, T, raise, because the chance of getting a straight greatly increases
                     if 'A' in public_cards_ranks or 'K' in public_cards_ranks or 'Q' in public_cards_ranks or 'J' in public_cards_ranks or 'T' in public_cards_ranks:
                         action = 'raise'
@@ -87,12 +93,13 @@ def step(state):
                 elif hand[0][0] == hand[1][0]:
                     if hand[0][0] in public_cards_flush:
                         action = 'raise'
-            elif max(public_cards_ranks) in ['5', '4', '3', '2']: # for KQ, KJ, QJ, JT, fold when having no cards higher than 5
+            elif max(public_cards_ranks) in ['5', '4', '3',
+                                             '2']:  # for KQ, KJ, QJ, JT, fold when having no cards higher than 5
                 action = 'fold'
             else:
                 action = 'call'
 
-        #return action
+        # return action
         if action in legal_actions:
             return action
         else:
@@ -106,17 +113,15 @@ def step(state):
                 return action
 
     def eval_step(self, state):
-        ''' Step for evaluation. The same to step
-        '''
+        """Step for evaluation. The same to step"""
         return self.step(state), []
 
+
 class LimitholdemRuleModelV1(Model):
-    ''' Limitholdem Rule Model version 1
-    '''
+    """Limitholdem Rule Model version 1"""
 
     def __init__(self):
-        ''' Load pretrained model
-        '''
+        """Load pretrained model"""
         env = rlcard.make('limit-holdem')
 
         rule_agent = LimitholdemRuleAgentV1()
@@ -124,21 +129,21 @@ def __init__(self):
 
     @property
     def agents(self):
-        ''' Get a list of agents for each position in a the game
+        """Get a list of agents for each position in a the game
 
         Returns:
             agents (list): A list of agents
 
         Note: Each agent should be just like RL agent with step and eval_step
               functioning well.
-        '''
+        """
         return self.rule_agents
 
     @property
     def use_raw(self):
-        ''' Indicate whether use raw state and action
+        """Indicate whether we use raw state and action
 
         Returns:
             use_raw (boolean): True if using raw state and action
-        '''
+        """
         return True
diff --git a/rlcard/models/model.py b/rlcard/models/model.py
index 00ce64b0c..1ee6987ed 100644
--- a/rlcard/models/model.py
+++ b/rlcard/models/model.py
@@ -1,21 +1,18 @@
-
 class Model(object):
-    ''' The base model class
-    '''
+    """The base model class"""
 
     def __init__(self):
-        ''' Load the model here
-        '''
+        """Load the model here"""
         pass
 
     @property
     def agents(self):
-        ''' Get a list of agents for each position in a the game
+        """Get a list of agents for each position in the game
 
         Returns:
             agents (list): A list of agents
 
         Note: Each agent should be just like RL agent with step and eval_step
               functioning well.
-        '''
+        """
         raise NotImplementedError
diff --git a/rlcard/models/pretrained_models.py b/rlcard/models/pretrained_models.py
index 107293f40..aa84c0caa 100644
--- a/rlcard/models/pretrained_models.py
+++ b/rlcard/models/pretrained_models.py
@@ -1,5 +1,4 @@
-''' Wrrapers of pretrained models.
-'''
+"""Wrapers of pretrained models."""
 import os
 
 import rlcard
@@ -10,23 +9,21 @@
 ROOT_PATH = os.path.join(rlcard.__path__[0], 'models/pretrained')
 
 class LeducHoldemCFRModel(Model):
-    ''' A pretrained model on Leduc Holdem with CFR (chance sampling)
-    '''
+    """A pretrained model on Leduc Holdem with CFR (chance sampling)"""
     def __init__(self):
-        ''' Load pretrained model
-        '''
+        """Load pretrained model"""
         env = rlcard.make('leduc-holdem')
         self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr'))
         self.agent.load()
     @property
     def agents(self):
-        ''' Get a list of agents for each position in a the game
+        """Get a list of agents for each position in a game
 
         Returns:
             agents (list): A list of agents
 
         Note: Each agent should be just like RL agent with step and eval_step
               functioning well.
-        '''
+        """
         return [self.agent, self.agent]
 
diff --git a/rlcard/models/registration.py b/rlcard/models/registration.py
index 5f7965b94..8c7a57e11 100644
--- a/rlcard/models/registration.py
+++ b/rlcard/models/registration.py
@@ -1,76 +1,75 @@
 import importlib
 
+
 class ModelSpec(object):
-    ''' A specification for a particular Model.
-    '''
+    """A specification for a particular Model."""
     def __init__(self, model_id, entry_point=None):
-        ''' Initilize
+        """Initialize
 
         Args:
             model_id (string): the name of the model
             entry_point (string): a string that indicates the location of the model class
-        '''
+        """
         self.model_id = model_id
         mod_name, class_name = entry_point.split(':')
         self._entry_point = getattr(importlib.import_module(mod_name), class_name)
 
     def load(self):
-        ''' Instantiates an instance of the model
+        """Instantiates an instance of the model
 
         Returns:
             Model (Model): an instance of the Model
-        '''
+        """
         model = self._entry_point()
         return model
 
 
 class ModelRegistry(object):
-    ''' Register a model by ID
-    '''
+    """Register a model by ID"""
 
     def __init__(self):
-        ''' Initilize
-        '''
+        """Initialize"""
         self.model_specs = {}
 
     def register(self, model_id, entry_point):
-        ''' Register an model
+        """Register a model
 
         Args:
             model_id (string): the name of the model
             entry_point (string): a string the indicates the location of the model class
-        '''
+        """
         if model_id in self.model_specs:
             raise ValueError('Cannot re-register model_id: {}'.format(model_id))
         self.model_specs[model_id] = ModelSpec(model_id, entry_point)
 
     def load(self, model_id):
-        ''' Create a model instance
+        """Create a model instance
 
         Args:
             model_id (string): the name of the model
-        '''
+        """
         if model_id not in self.model_specs:
             raise ValueError('Cannot find model_id: {}'.format(model_id))
         return self.model_specs[model_id].load()
 
+
 # Have a global registry
 model_registry = ModelRegistry()
 
 
 def register(model_id, entry_point):
-    ''' Register a model
+    """Register a model
 
     Args:
         model_id (string): the name of the model
         entry_point (string): a string the indicates the location of the model class
-    '''
+    """
     return model_registry.register(model_id, entry_point)
 
 def load(model_id):
-    ''' Create and model instance
+    """Create and model instance
 
     Args:
         model_id (string): the name of the model
-    '''
+    """
     return model_registry.load(model_id)
diff --git a/rlcard/models/uno_rule_models.py b/rlcard/models/uno_rule_models.py
index 42185853d..4718c41d2 100644
--- a/rlcard/models/uno_rule_models.py
+++ b/rlcard/models/uno_rule_models.py
@@ -1,5 +1,4 @@
-''' UNO rule models
-'''
+"""UNO rule models"""
 
 import numpy as np
 
@@ -7,14 +6,13 @@
 from rlcard.models.model import Model
 
 class UNORuleAgentV1(object):
-    ''' UNO Rule agent version 1
-    '''
+    """UNO Rule agent version 1"""
 
     def __init__(self):
         self.use_raw = True
 
     def step(self, state):
-        ''' Predict the action given raw state. A naive rule. Choose the color
+        """Predict the action given raw state. A naive rule. Choose the color
             that appears least in the hand from legal actions. Try to keep wild
             cards as long as it can.
 
@@ -23,7 +21,7 @@ def step(self, state):
 
         Returns:
             action (str): Predicted action
-        '''
+        """
 
         legal_actions = state['raw_legal_actions']
         state = state['raw_obs']
@@ -44,20 +42,19 @@ def step(self, state):
         return action
 
     def eval_step(self, state):
-        ''' Step for evaluation. The same to step
-        '''
+        """Step for evaluation. The same to step"""
         return self.step(state), []
 
     @staticmethod
     def filter_wild(hand):
-        ''' Filter the wild cards. If all are wild cards, we do not filter
+        """Filter the wild cards. If all are wild cards, we do not filter
 
         Args:
             hand (list): A list of UNO card string
 
         Returns:
             filtered_hand (list): A filtered list of UNO string
-        '''
+        """
         filtered_hand = []
         for card in hand:
             if not card[2:6] == 'wild':
@@ -70,14 +67,14 @@ def filter_wild(hand):
 
     @staticmethod
     def count_colors(hand):
-        ''' Count the number of cards in each color in hand
+        """Count the number of cards in each color in hand
 
         Args:
             hand (list): A list of UNO card string
 
         Returns:
             color_nums (dict): The number cards of each color
-        '''
+        """
         color_nums = {}
         for card in hand:
             color = card[0]
@@ -88,12 +85,10 @@ def count_colors(hand):
         return color_nums
 
 class UNORuleModelV1(Model):
-    ''' UNO Rule Model version 1
-    '''
+    """UNO Rule Model version 1"""
 
     def __init__(self):
-        ''' Load pretrained model
-        '''
+        """Load pretrained model"""
         env = rlcard.make('uno')
 
         rule_agent = UNORuleAgentV1()
@@ -101,23 +96,23 @@ def __init__(self):
 
     @property
     def agents(self):
-        ''' Get a list of agents for each position in a the game
+        """Get a list of agents for each position in a the game
 
         Returns:
             agents (list): A list of agents
 
         Note: Each agent should be just like RL agent with step and eval_step
               functioning well.
-        '''
+        """
         return self.rule_agents
 
     @property
     def use_raw(self):
-        ''' Indicate whether use raw state and action
+        """Indicate whether we use raw state and action
 
         Returns:
             use_raw (boolean): True if using raw state and action
-        '''
+        """
         return True
 
 
diff --git a/rlcard/utils/logger.py b/rlcard/utils/logger.py
index e62499493..4ed393aa3 100644
--- a/rlcard/utils/logger.py
+++ b/rlcard/utils/logger.py
@@ -1,16 +1,16 @@
 import os
 import csv
 
+
 class Logger(object):
-    ''' Logger saves the running results and helps make plots from the results
-    '''
+    """Logger saves the running results and helps make plots from the results"""
 
     def __init__(self, log_dir):
-        ''' Initialize the labels, legend and paths of the plot and log file.
+        """Initialize the labels, legend and paths of the plot and log file.
 
         Args:
-            log_path (str): The path the log files
-        '''
+            log_dir (str): The log directory for the log files
+        """
         self.log_dir = log_dir
 
     def __enter__(self):
@@ -30,20 +30,20 @@ def __enter__(self):
         return self
 
     def log(self, text):
-        ''' Write the text to log file then print it.
+        """Write the text to log file then print it.
         Args:
             text(string): text to log
-        '''
-        self.txt_file.write(text+'\n')
+        """
+        self.txt_file.write(text + '\n')
         self.txt_file.flush()
         print(text)
 
     def log_performance(self, episode, reward):
-        ''' Log a point in the curve
+        """Log a point in the curve
         Args:
             episode (int): the episode of the current point
             reward (float): the reward of the current point
-        '''
+        """
         self.writer.writerow({'episode': episode, 'reward': reward})
         print('')
         self.log('----------------------------------------')
diff --git a/rlcard/utils/pettingzoo_utils.py b/rlcard/utils/pettingzoo_utils.py
index f6c0b1a62..97f0c762e 100644
--- a/rlcard/utils/pettingzoo_utils.py
+++ b/rlcard/utils/pettingzoo_utils.py
@@ -38,15 +38,15 @@ def run_game_pettingzoo(env, agents, is_training=False):
 
 
 def reorganize_pettingzoo(trajectories):
-    ''' Reorganize the trajectory to make it RL friendly
+    """Reorganize the trajectory to make it RL friendly
 
     Args:
-        trajectory (list): A list of trajectories
+        trajectories (list): A list of trajectories
 
     Returns:
         (list): A new trajectories that can be fed into RL algorithms.
 
-    '''
+    """
     new_trajectories = defaultdict(list)
     for agent_name, trajectory in trajectories.items():
         for i in range(0, len(trajectory)-2, 2):
diff --git a/rlcard/utils/utils.py b/rlcard/utils/utils.py
index 0bfebb031..a41164e78 100644
--- a/rlcard/utils/utils.py
+++ b/rlcard/utils/utils.py
@@ -2,6 +2,7 @@
 
 from rlcard.games.base import Card
 
+
 def set_seed(seed):
     if seed is not None:
         import subprocess
@@ -17,6 +18,7 @@ def set_seed(seed):
         import random
         random.seed(seed)
 
+
 def get_device():
     import torch
     if torch.backends.mps.is_available():
@@ -29,25 +31,27 @@ def get_device():
         device = torch.device("cpu")
         print("--> Running on the CPU")
 
-    return device    
+    return device
+
 
 def init_standard_deck():
-    ''' Initialize a standard deck of 52 cards
+    """Initialize a standard deck of 52 cards
 
     Returns:
         (list): A list of Card object
-    '''
+    """
     suit_list = ['S', 'H', 'D', 'C']
     rank_list = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K']
     res = [Card(suit, rank) for suit in suit_list for rank in rank_list]
     return res
 
+
 def init_54_deck():
-    ''' Initialize a standard deck of 52 cards, BJ and RJ
+    """Initialize a standard deck of 52 cards, BJ and RJ
 
     Returns:
         (list): Alist of Card object
-    '''
+    """
     suit_list = ['S', 'H', 'D', 'C']
     rank_list = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K']
     res = [Card(suit, rank) for suit in suit_list for rank in rank_list]
@@ -55,8 +59,9 @@ def init_54_deck():
     res.append(Card('RJ', ''))
     return res
 
+
 def rank2int(rank):
-    ''' Get the coresponding number of a rank.
+    """Get the corresponding number of a rank.
 
     Args:
         rank(str): rank stored in Card object
@@ -67,11 +72,11 @@ def rank2int(rank):
     Note:
         1. If the input rank is an empty string, the function will return -1.
         2. If the input rank is not valid, the function will return None.
-    '''
+    """
     if rank == '':
         return -1
     elif rank.isdigit():
-        if int(rank) >= 2 and int(rank) <= 10:
+        if 2 <= int(rank) <= 10:
             return int(rank)
         else:
             return None
@@ -87,26 +92,28 @@ def rank2int(rank):
         return 13
     return None
 
-def elegent_form(card):
-    ''' Get a elegent form of a card string
+
+def elegant_form(card):
+    """Get an elegant form of a card string
 
     Args:
         card (string): A card string
 
     Returns:
-        elegent_card (string): A nice form of card
-    '''
-    suits = {'S': '♠', 'H': '♥', 'D': '♦', 'C': '♣','s': '♠', 'h': '♥', 'd': '♦', 'c': '♣' }
+        elegant_card (string): A nice form of card
+    """
+    suits = {'S': '♠', 'H': '♥', 'D': '♦', 'C': '♣', 's': '♠', 'h': '♥', 'd': '♦', 'c': '♣'}
     rank = '10' if card[1] == 'T' else card[1]
 
     return suits[card[0]] + rank
 
+
 def print_card(cards):
-    ''' Nicely print a card or list of cards
+    """Nicely print a card or list of cards
 
     Args:
-        card (string or list): The card(s) to be printed
-    '''
+        cards (string or list): The card(s) to be printed
+    """
     if cards is None:
         cards = [None]
     if isinstance(cards, str):
@@ -127,13 +134,13 @@ def print_card(cards):
             lines[8].append('└─────────┘')
         else:
             if isinstance(card, Card):
-                elegent_card = elegent_form(card.suit + card.rank)
+                elegant_card = elegant_form(card.suit + card.rank)
             else:
-                elegent_card = elegent_form(card)
-            suit = elegent_card[0]
-            rank = elegent_card[1]
-            if len(elegent_card) == 3:
-                space = elegent_card[2]
+                elegant_card = elegant_form(card)
+            suit = elegant_card[0]
+            rank = elegant_card[1]
+            if len(elegant_card) == 3:
+                space = elegant_card[2]
             else:
                 space = ' '
 
@@ -148,47 +155,49 @@ def print_card(cards):
             lines[8].append('└─────────┘')
 
     for line in lines:
-        print ('   '.join(line))
+        print('   '.join(line))
+
 
 def reorganize(trajectories, payoffs):
-    ''' Reorganize the trajectory to make it RL friendly
+    """Reorganize the trajectory to make it RL friendly
 
     Args:
-        trajectory (list): A list of trajectories
+        trajectories (list): A list of trajectories
         payoffs (list): A list of payoffs for the players. Each entry corresponds to one player
 
     Returns:
         (list): A new trajectories that can be fed into RL algorithms.
 
-    '''
+    """
     num_players = len(trajectories)
     new_trajectories = [[] for _ in range(num_players)]
 
     for player in range(num_players):
-        for i in range(0, len(trajectories[player])-2, 2):
-            if i ==len(trajectories[player])-3:
+        for i in range(0, len(trajectories[player]) - 2, 2):
+            if i == len(trajectories[player]) - 3:
                 reward = payoffs[player]
-                done =True
+                done = True
             else:
                 reward, done = 0, False
-            transition = trajectories[player][i:i+3].copy()
+            transition = trajectories[player][i:i + 3].copy()
             transition.insert(2, reward)
             transition.append(done)
 
             new_trajectories[player].append(transition)
     return new_trajectories
 
+
 def remove_illegal(action_probs, legal_actions):
-    ''' Remove illegal actions and normalize the
+    """Remove illegal actions and normalize the
         probability vector
 
     Args:
-        action_probs (numpy.array): A 1 dimention numpy array.
+        action_probs (numpy.array): A 1 dimensional numpy array.
         legal_actions (list): A list of indices of legal actions.
 
     Returns:
-        probd (numpy.array): A normalized vector without legal actions.
-    '''
+        probs (numpy.array): A normalized vector without legal actions.
+    """
     probs = np.zeros(action_probs.shape[0])
     probs[legal_actions] = action_probs[legal_actions]
     if np.sum(probs) == 0:
@@ -197,16 +206,17 @@ def remove_illegal(action_probs, legal_actions):
         probs /= sum(probs)
     return probs
 
+
 def tournament(env, num):
-    ''' Evaluate he performance of the agents in the environment
+    """Evaluate the performance of the agents in the environment
 
     Args:
         env (Env class): The environment to be evaluated.
         num (int): The number of games to play.
 
     Returns:
-        A list of avrage payoffs for each player
-    '''
+        A list of average payoffs for each player
+    """
     payoffs = [0 for _ in range(env.num_players)]
     counter = 0
     while counter < num:
@@ -224,9 +234,9 @@ def tournament(env, num):
         payoffs[i] /= counter
     return payoffs
 
+
 def plot_curve(csv_path, save_path, algorithm):
-    ''' Read data from csv file and plot the results
-    '''
+    """Read data from csv file and plot the results"""
     import os
     import csv
     import matplotlib.pyplot as plt
@@ -248,4 +258,3 @@ def plot_curve(csv_path, save_path, algorithm):
             os.makedirs(save_dir)
 
         fig.savefig(save_path)
-
diff --git a/tests/envs/determism_util.py b/tests/envs/determism_util.py
index 5c5da092e..34515cb04 100644
--- a/tests/envs/determism_util.py
+++ b/tests/envs/determism_util.py
@@ -1,9 +1,9 @@
 import rlcard
-from rlcard.agents.random_agent import RandomAgent
 import random
 import numpy as np
 
-def hash_obsevation(obs):
+
+def hash_observation(obs):
     try:
         val = hash(obs.tobytes())
         return val
@@ -11,14 +11,16 @@ def hash_obsevation(obs):
         try:
             return hash(obs)
         except TypeError:
-            warnings.warn("Observation not an int or an Numpy array")
+            warnings.warn("Observation not an int or a Numpy array")
             return 0
 
+
 def rand_iter(n):
     for x in range(n+1):
         random.randint(0, 1000)
         np.random.normal(size=100)
 
+
 def gather_observations(env, actions, num_rand_steps):
     rand_iter(num_rand_steps)
     state, player_id = env.reset()
@@ -44,6 +46,7 @@ def gather_observations(env, actions, num_rand_steps):
 
     return observations
 
+
 def is_deterministic(env_name):
     env = rlcard.make(env_name)
 
@@ -55,6 +58,6 @@ def is_deterministic(env_name):
     for rand_iters in range(2):
         env = rlcard.make(env_name,config={'seed':base_seed})
 
-        hashes.append(hash(tuple([hash_obsevation(obs['obs']) for obs in gather_observations(env,actions,rand_iters)])))
+        hashes.append(hash(tuple([hash_observation(obs['obs']) for obs in gather_observations(env, actions, rand_iters)])))
 
     return hashes[0] == hashes[1]
diff --git a/tests/envs/test_gin_rummy_env.py b/tests/envs/test_gin_rummy_env.py
index ff01147f6..6f34ea826 100644
--- a/tests/envs/test_gin_rummy_env.py
+++ b/tests/envs/test_gin_rummy_env.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: tests/envs/test_gin_rummy_env.py
     Author: William Hale
     Date created: 4/20/2020
-'''
+"""
 
 import unittest
 import numpy as np
diff --git a/tests/games/test_bridge_game.py b/tests/games/test_bridge_game.py
index 88c3b9977..861f6e37e 100644
--- a/tests/games/test_bridge_game.py
+++ b/tests/games/test_bridge_game.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: test_bridge_game.py
     Author: William Hale
     Date created: 11/25/2021
-'''
+"""
 
 import unittest
 import numpy as np
diff --git a/tests/games/test_gin_rummy_game.py b/tests/games/test_gin_rummy_game.py
index a784d604d..6fa93f254 100644
--- a/tests/games/test_gin_rummy_game.py
+++ b/tests/games/test_gin_rummy_game.py
@@ -1,8 +1,8 @@
-'''
+"""
     File name: test_gin_rummy_game.py
     Author: William Hale
     Date created: 3/11/2020
-'''
+"""
 
 import unittest
 import numpy as np
diff --git a/tests/games/test_nolimitholdem_judger.py b/tests/games/test_nolimitholdem_judger.py
index 5f320aa78..4a0c81d16 100644
--- a/tests/games/test_nolimitholdem_judger.py
+++ b/tests/games/test_nolimitholdem_judger.py
@@ -27,10 +27,10 @@ def get_hands(self, player_hands, public_card):
     
     def test_judge_with_4_players(self):
 
-        '''
+        """
         suit_list = ['S', 'H', 'D', 'C']
         rank_list = ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K']
-        '''
+        """
         players = self.get_players(4)
         
         
diff --git a/tests/utils/test_holdem_utils.py b/tests/utils/test_holdem_utils.py
index 9572f2321..cef203e36 100644
--- a/tests/utils/test_holdem_utils.py
+++ b/tests/utils/test_holdem_utils.py
@@ -23,7 +23,7 @@ def test_evaluate_hand_exception(self):
 
         hand = Hand(['CJ', 'CT', 'CQ', 'CK', 'C9', 'C8'])
         with self.assertRaises(Exception):
-            hand.evaluateHand()
+            hand.evaluate_hand()
 
     def test_has_high_card_false(self):
 
diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py
index 2594ee2bb..aa03f87ca 100644
--- a/tests/utils/test_utils.py
+++ b/tests/utils/test_utils.py
@@ -1,6 +1,6 @@
 import unittest
 import numpy as np
-from rlcard.utils.utils import init_54_deck, init_standard_deck, rank2int, print_card, elegent_form, reorganize, tournament
+from rlcard.utils.utils import init_54_deck, init_standard_deck, rank2int, print_card, elegant_form, reorganize, tournament
 import rlcard
 from rlcard.agents.random_agent import RandomAgent
 
@@ -24,8 +24,8 @@ def test_rank2int(self):
         self.assertEqual(rank2int('K'), 13)
 
     def test_print_cards(self):
-        self.assertEqual(len(elegent_form('S9')), 2)
-        self.assertEqual(len(elegent_form('ST')), 3)
+        self.assertEqual(len(elegant_form('S9')), 2)
+        self.assertEqual(len(elegant_form('ST')), 3)
 
         print_card(None)
         print_card('S9')