Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pr allow curriculum learning in grocery ground goal task #82

Merged
merged 8 commits into from
Oct 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 35 additions & 5 deletions python/social_bot/envs/grocery_ground.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ def __init__(self,
fail_distance_thresh=3,
random_range=10.0,
random_goal=False,
use_curriculum_training=False,
start_range=0,
increase_range_by_percent=50.,
reward_thresh_to_increase_range=0.4,
percent_full_range_in_curriculum=0.1,
max_reward_q_length=100,
reward_weight=1.0):
"""
Args:
Expand All @@ -85,6 +91,18 @@ def __init__(self,
it's considered a failure and is givne reward -1
random_range (float): the goal's random position range
random_goal (bool): if ture, teacher will randomly select goal from the object list each episode
use_curriculum_training (bool): when true, use curriculum in goal task training
start_range (float): for curriculum learning, the starting random_range to set the goal
Enables curriculum learning if start_range > 1.2 * success_distance_thresh.
Jialn marked this conversation as resolved.
Show resolved Hide resolved
NOTE: Because curriculum learning is implemented using teacher in the environment,
currently teacher status are not stored in model checkpoints. Resuming is not supported.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As curriculum range is increased according to parameter reward_thresh_to_increase_range automatically, is it supposed to be kind of supporting resuming?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is described in Issue #79

increase_range_by_percent (float): for curriculum learning, how much to increase random range
every time agent reached the specified amount of reward.
reward_thresh_to_increase_range (float): for curriculum learning, how much reward to reach
before the teacher increases random range.
percent_full_range_in_curriculum (float): if above 0, randomly throw in x% of training examples
where random_range is the full range instead of the easier ones in the curriculum.
max_reward_q_length (int): how many recent rewards to consider when estimating agent accuracy.
"""
assert goal_name is not None, "Goal name needs to be set, not None."
GoalTask.__init__(
Expand All @@ -93,7 +111,13 @@ def __init__(self,
goal_name=goal_name,
success_distance_thresh=success_distance_thresh,
fail_distance_thresh=fail_distance_thresh,
random_range=random_range)
random_range=random_range,
use_curriculum_training=use_curriculum_training,
start_range=start_range,
increase_range_by_percent=increase_range_by_percent,
reward_thresh_to_increase_range=reward_thresh_to_increase_range,
percent_full_range_in_curriculum=percent_full_range_in_curriculum,
max_reward_q_length=max_reward_q_length)
GroceryGroundTaskBase.__init__(self)
self._random_goal = random_goal
self._objects_in_world = [
Expand All @@ -104,8 +128,14 @@ def __init__(self,
'coke_can', 'table', 'bookshelf', 'car_wheel', 'plastic_cup',
'beer', 'hammer'
]
logging.info("goal_name %s, random_goal %d, fail_distance_thresh %f",
self._goals = self._objects_to_insert
if self._random_goal:
self._goals = self._goal_name.split(',')
logging.info("goal_name %s, random_goal %d, fail_distance_thresh %f,",
self._goal_name, self._random_goal, fail_distance_thresh)
if GoalTask.should_use_curriculum_training(self):
logging.info("start_range %f, reward_thresh_to_increase_range %f",
self._start_range, self._reward_thresh_to_increase_range)
self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5)))
self._pos_list.remove((0, 0))
self.reward_weight = reward_weight
Expand All @@ -121,8 +151,8 @@ def setup(self, world, agent_name):
def run(self, agent, world):
self._random_move_objects()
if self._random_goal:
random_id = random.randrange(len(self._objects_to_insert))
self.set_goal_name(self._objects_to_insert[random_id])
random_id = random.randrange(len(self._goals))
self.set_goal_name(self._goals[random_id])
yield from GoalTask.run(self, agent, world)

def _insert_objects(self, object_list):
Expand Down Expand Up @@ -572,7 +602,7 @@ def __init__(self,
elif task_name == 'kickball':
main_task = GroceryGroundKickBallTask(step_time=step_time)
else:
logging.debug("upsupported task name: " + task_name)
logging.debug("unsupported task name: " + task_name)

main_task_group = TaskGroup()
main_task_group.add_task(main_task)
Expand Down
66 changes: 63 additions & 3 deletions python/social_bot/teacher_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
A variety of teacher tasks.
"""

from collections import deque
import math
import numpy as np
import os
Expand All @@ -41,7 +42,13 @@ def __init__(self,
goal_name="goal",
success_distance_thresh=0.5,
fail_distance_thresh=0.5,
random_range=2.0):
random_range=2.0,
use_curriculum_training=False,
start_range=0,
increase_range_by_percent=50.,
reward_thresh_to_increase_range=0.4,
percent_full_range_in_curriculum=0.1,
max_reward_q_length=100):
"""
Args:
max_steps (int): episode will end if not reaching gaol in so many steps
Expand All @@ -50,15 +57,58 @@ def __init__(self,
fail_distance_thresh (float): if the agent moves away from the goal more than this distance,
it's considered a failure and is given reward -1
random_range (float): the goal's random position range
use_curriculum_training (bool): when true, use curriculum in goal task training
start_range (float): for curriculum learning, the starting random_range to set the goal
increase_range_by_percent (float): for curriculum learning, how much to increase random range
every time agent reached the specified amount of reward.
reward_thresh_to_increase_range (float): for curriculum learning, how much reward to reach
before the teacher increases random range.
percent_full_range_in_curriculum (float): if above 0, randomly throw in x% of training examples
where random_range is the full range instead of the easier ones in the curriculum.
max_reward_q_length (int): how many recent rewards to consider when estimating agent accuracy.
"""
super().__init__()
self._goal_name = goal_name
self._success_distance_thresh = success_distance_thresh
self._fail_distance_thresh = fail_distance_thresh
self._max_steps = max_steps
self._random_range = random_range
self._use_curriculum_training = use_curriculum_training
self._start_range = start_range
self._is_full_range_in_curriculum = False
if self.should_use_curriculum_training():
logging.info("Setting random_range to %f", self._start_range)
self._orig_random_range = random_range
self._random_range = start_range
self._max_reward_q_length = max_reward_q_length
self._q = deque(maxlen=max_reward_q_length)
self._reward_thresh_to_increase_range = reward_thresh_to_increase_range
self._increase_range_by_percent = increase_range_by_percent
self._percent_full_range_in_curriculum = percent_full_range_in_curriculum
else:
self._random_range = random_range
self.task_vocab = ['hello', 'goal', 'well', 'done', 'failed', 'to']

def should_use_curriculum_training(self):
return (self._use_curriculum_training and
self._start_range >= self._success_distance_thresh * 1.2)

def _push_reward_queue(self, value):
if (not self.should_use_curriculum_training() or
self._is_full_range_in_curriculum):
return
self._q.append(value)
if (value > 0 and len(self._q) == self._max_reward_q_length and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think "len(self._q) == self._max_reward_q_length" can be removed. It's unlikely to exceed the reward_thresh without the queue being full.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, actually after curriculum advances, we clear the deque, and it's very likely the agent can get a few episodes successfully (because agent can already pass earlier level in curriculum), and pass the next level of the curriculum by accident.

sum(self._q) >= self._max_reward_q_length *
self._reward_thresh_to_increase_range):
self._random_range *= 1. + self._increase_range_by_percent
if self._random_range > self._orig_random_range:
self._random_range = self._orig_random_range
logging.info("Raising random_range to %f", self._random_range)
self._q.clear()

def get_random_range(self):
return self._random_range

def run(self, agent, world):
"""
Start a teaching episode for this task.
Expand Down Expand Up @@ -86,6 +136,7 @@ def run(self, agent, world):
goal_dir = (goal_loc[0:2] - loc[0:2]) / dist
dot = sum(dir * goal_dir)
if dot > 0.707:
self._push_reward_queue(1)
# within 45 degrees of the agent direction
logging.debug("loc: " + str(loc) + " goal: " +
str(goal_loc) + "dist: " + str(dist))
Expand All @@ -96,17 +147,26 @@ def run(self, agent, world):
else:
agent_sentence = yield TeacherAction()
elif dist > self._initial_dist + self._fail_distance_thresh:
self._push_reward_queue(0)
logging.debug("loc: " + str(loc) + " goal: " + str(goal_loc) +
"dist: " + str(dist))
yield TeacherAction(reward=-1.0, sentence="failed", done=True)
else:
agent_sentence = yield TeacherAction(sentence=self._goal_name)
logging.debug("loc: " + str(loc) + " goal: " + str(goal_loc) +
"dist: " + str(dist))
self._push_reward_queue(0)
yield TeacherAction(reward=-1.0, sentence="failed", done=True)

def _move_goal(self, goal, agent_loc):
range = self._random_range
if (self.should_use_curriculum_training() and
self._percent_full_range_in_curriculum > 0 and
random.random() < self._percent_full_range_in_curriculum):
range = self._orig_random_range
self._is_full_range_in_curriculum = True
else:
range = self._random_range
self._is_full_range_in_curriculum = False
while True:
loc = (random.random() * range - range / 2,
random.random() * range - range / 2, 0)
Expand Down