rf_flyer2d_gemma2

Generation 0
import numpy as np

def reward(action, obs):
    """
    Reward function for the 2D Flyer system.

    Args:
        action (np.ndarray): A 2-dimensional array representing the forces applied by the left and right propellers.
        obs (np.ndarray): The current state of the system, a 6-dimensional array representing position, velocity, and angle.

    Returns:
        float: The reward for the given state and action.
    """

    goal_state = np.zeros(6)  # Desired goal state

    position_error = np.linalg.norm(obs[:2] - goal_state[:2])  
    angle_error = np.abs(obs[2] ) # Angular error
    velocity_penalty = 0.1 * np.linalg.norm(obs[3:]) # Penalize velocity

    # Shaping reward to encourage achieving and staying in the goal state
    reward = -0.1*position_error - angle_error - velocity_penalty

   
    if position_error < 0.1 and angle_error < 0.5:  # Within a small radius of the goal
        reward += 100.0  
    elif position_error < 0.5 :
       reward += 1.

        
    return reward

Generation 1
import numpy as np

def reward(action, obs):
    """
    Reward function for the 2D Flyer system.

    Args:
        action (np.ndarray): A 2-dimensional array representing the forces applied by the left and right propellers.
        obs (np.ndarray): The current state of the system, a 6-dimensional array representing position, velocity, and angle.

    Returns:
        float: The reward for the given state and action.
    """

    goal_state = np.zeros(6)  # Desired goal state

    position_error = np.linalg.norm(obs[:2] - goal_state[:2])  
    angle_error = np.abs(obs[2]) # Angular error normalized by pi 
    velocity_penalty = 0.1 * np.linalg.norm(obs[3:]) # Penalize velocity

    # Shaping reward to encourage achieving and staying in the goal state
    reward = -position_error - angle_error/np.pi- velocity_penalty

   
    if position_error < 0.1 and angle_error <  0.5:  # Within a small radius of the goal
        reward += 100.0  
    elif position_error < 0.3 : # Relaxed reward to encourage staying near goal state
       reward += 1.

        
    return reward

Generation 2
import numpy as np

def reward(action, obs):
    """
    Reward function for the 2D Flyer system.

    Args:
        action (np.ndarray): A 2-dimensional array representing the forces applied by the left and right propellers.
        obs (np.ndarray): The current state of the system, a 6-dimensional array representing position, velocity, and angle.

    Returns:
        float: The reward for the given state and action.
    """

    goal_state = np.zeros(6)  # Desired goal state

    position_error = np.linalg.norm(obs[:2] - goal_state[:2])  
    angle_error = obs[2]**2 # Penalize squared angle error for smoother control 

    velocity_penalty = 0.1 * np.linalg.norm(obs[3:]) # Penalize velocity

   
    reward = -position_error - angle_error- velocity_penalty 

   
    if position_error < 0.1 and abs(angle_error) <  0.25:  # Tighten goal threshold for better precision
        reward += 100.0  
      

    return reward

Generation 3
import numpy as np

def reward(action, obs):
    """
    Reward function for the 2D Flyer system.

    Args:
        action (np.ndarray): A 2-dimensional array representing the forces applied by the left and right propellers.
        obs (np.ndarray): The current state of the system, a 6-dimensional array representing position, velocity, and angle.

    Returns:
        float: The reward for the given state and action.
    """

    goal_state = np.zeros(6)  

    position_error = np.linalg.norm(obs[:2] - goal_state[:2]) 
  
    angle_error = obs[2]**2  
    velocity_penalty = 0.1 * np.linalg.norm(obs[3:])
    action_penalty = 0.01 * np.linalg.norm(action) # Penalize large actions for smoothness

    reward = -position_error - angle_error - velocity_penalty  - action_penalty 


    if position_error < 0.05 and abs(angle_error) < 0.1:
        reward += 100.0

    return reward

Generation 4
import numpy as np

def reward(action, obs):
    """
    Reward function for the 2D Flyer system.

    Args:
        action (np.ndarray): A 2-dimensional array representing the forces applied by the left and right propellers.
        obs (np.ndarray): The current state of the system, a 6-dimensional array representing position, velocity, and angle.

    Returns:
        float: The reward for the given state and action.
    """

    goal_state = np.zeros(6)  

    position_error = np.linalg.norm(obs[:2] - goal_state[:2]) 
    angle_error = obs[2]**2
    velocity_penalty = 0.1 * np.linalg.norm(obs[3:])
    action_penalty = 0.01 * np.linalg.norm(action) # Penalize large actions for smoothness

    # Increase reward scaling for proximity to the goal
    reward = -position_error - angle_error - velocity_penalty  
    reward -= action_penalty 
    if position_error < 0.05 and abs(angle_error) < 0.1:
      reward += 5 * (1 - position_error) # More reward for being closer
 

    return reward