diff --git a/myGym/__init__.py b/myGym/__init__.py index e19bcdf1..08d10e0d 100644 --- a/myGym/__init__.py +++ b/myGym/__init__.py @@ -1,3 +1,3 @@ # This file was generated by the setup.py and will be rewritten at the next build -__version__ = "2.2.2.dev279+g20c5920" +__version__ = "2.2.2.dev316+gb2cc0b1.d20241118" __name__ = "myGym" \ No newline at end of file diff --git a/myGym/atari-ddppo.yaml b/myGym/atari-ddppo.yaml deleted file mode 100644 index e437a5d8..00000000 --- a/myGym/atari-ddppo.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Basically the same as atari-ppo, but adapted for DDPPO. Note that DDPPO -# isn't actually any more efficient on Atari, since the network size is -# relatively small and the env doesn't require a GPU. -atari-ddppo: - env: ALE/Breakout-v5 - run: DDPPO - config: - # DDPPO only supports PyTorch so far. - framework: torch - env_config: - frameskip: 1 # no frameskip - # Worker config: 10 workers, each of which requires a GPU. - num_workers: 10 - num_gpus_per_worker: 1 - # Each worker will sample 100 * 5 envs per worker steps = 500 steps - # per optimization round. This is 5000 steps summed across workers. - train_batch_size: 500 - num_envs_per_worker: 5 - # Each worker will take a minibatch of 50. There are 10 workers total, - # so the effective minibatch size will be 500. - sgd_minibatch_size: 50 - num_sgd_iter: 10 - # Params from standard PPO Atari config: - lambda: 0.95 - kl_coeff: 0.5 - clip_rewards: True - clip_param: 0.1 - vf_clip_param: 10.0 - entropy_coeff: 0.01 - batch_mode: truncate_episodes - observation_filter: NoFilter - model: - vf_share_layers: true diff --git a/myGym/condaenv.e218cqgt.requirements.txt b/myGym/condaenv.e218cqgt.requirements.txt deleted file mode 100644 index b8829734..00000000 --- a/myGym/condaenv.e218cqgt.requirements.txt +++ /dev/null @@ -1,86 +0,0 @@ -absl-py==2.0.0 -aiosignal==1.3.1 -ale-py==0.8.1 -astor==0.8.1 -attrs==23.1.0 -cachetools==5.3.1 -charset-normalizer==3.3.0 -click==8.1.3 -commentjson==0.9.0 -cycler==0.11.0 -dm-tree==0.1.8 -filelock==3.12.2 -flatbuffers==23.5.26 -fonttools==4.38.0 -fsspec==2023.1.0 -future==0.18.3 -google-auth==2.23.2 -google-auth-oauthlib==0.4.6 -gputils==1.0.6 -gputil==1.4.0 -grpcio==1.51.3 -gym==0.26.2 -gym-notices==0.0.8 -gymnasium==0.26.3 -gymnasium-notices==0.0.1 -h5py==3.8.0 -imageio==2.31.2 -importlib-metadata==6.7.0 -importlib-resources==5.12.0 -joblib==1.3.2 -jsonschema==4.17.3 -keras==2.11.0 -kiwisolver==1.4.5 -libclang==16.0.6 -lz4==4.3.2 -markdown==3.4.4 -markdown-it-py==2.2.0 -markupsafe==2.1.3 -matplotlib==3.5.3 -mdurl==0.1.2 -msgpack==1.0.5 -networkx==2.6.3 -numpy==1.18.5 -nvidia-cublas-cu11==11.10.3.66 -nvidia-cuda-nvrtc-cu11==11.7.99 -nvidia-cuda-runtime-cu11==11.7.99 -nvidia-cudnn-cu11==8.5.0.96 -oauthlib==3.2.2 -opencv-python==4.8.1.78 -packaging==23.2 -pandas==1.3.5 -pillow==9.5.0 -pkgutil-resolve-name==1.3.10 -protobuf==3.19.6 -pyarrow==12.0.1 -pyasn1==0.5.0 -pyasn1-modules==0.3.0 -pybullet==2.6.6 -pygame==2.1.0 -pygments==2.16.1 -pyparsing==3.1.1 -pyquaternion==0.9.9 -pyrsistent==0.19.3 -python-dateutil==2.8.2 -pytz==2023.3 -pywavelets==1.3.0 -pyyaml==6.0.1 -requests==2.31.0 -requests-oauthlib==1.3.1 -rich==13.4.2 -rsa==4.9 -scikit-image==0.19.3 -stable-baselines==2.10.2 -stable-baselines3==1.2.0 -tensorflow-addons==0.19.0 -tensorflow-io-gcs-filesystem==0.34.0 -tifffile==2021.11.2 -torch==1.13.1 -torchvision==0.14.1 -typeguard==4.1.2 -typer==0.9.0 -typing-extensions==4.7.1 -urllib3==2.0.3 -werkzeug==2.2.3 -wrapt==1.15.0 -zipp==3.15.0 \ No newline at end of file diff --git a/myGym/configs/rllib_debug.json b/myGym/configs/rllib_debug.json index 968beae0..06b7079d 100644 --- a/myGym/configs/rllib_debug.json +++ b/myGym/configs/rllib_debug.json @@ -19,7 +19,7 @@ "task_type" :"reach", "natural_language" :0, "task_objects" :[{"init":{"obj_name":"null"}, - "goal":{"obj_name":"cube_holes","fixed":1,"rand_rot":0, "sampling_area":[-0.5, 0.2, 0.3, 0.6, 0.1, 0.4]}}], + "goal":{"obj_name":"apple_target","fixed":1,"rand_rot":0, "sampling_area":[-0.5, 0.2, 0.3, 0.6, 0.1, 0.4]}}], "used_objects" :{"num_range":[0,0], "obj_list":[]}, // Observation // actual_state options (pick one): "endeff_xyz", "endeff_6D" (robot end effector), "obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope" diff --git a/myGym/configs/train_rddl.json b/myGym/configs/train_rddl.json new file mode 100644 index 00000000..35743eb0 --- /dev/null +++ b/myGym/configs/train_rddl.json @@ -0,0 +1,44 @@ +{ + #Environment + "env_name" :"Gym-v0", + "workspace" :"table", + "engine" :"pybullet", + "render" :"opengl", + "camera" :1, + "gui" :1, + "visualize" :1, + "visgym" :0, + #Robot + "robot" :"kuka", + "robot_action" :"step", + "robot_init" :[0.0, 0.4, 0.4], + "max_velocity" :1, + "max_force" :70, + "action_repeat" :1, + #Task + "natural_language" :0, + "rddl": {"protoactions":["Approach", "Grasp", "Move", "Drop"], "num_task_range":[1,3], "allowed_objects":["Apple", "Tuna", "Banana"], "allowed_predicates":["Near"]}, + "observation" : {"actual_state":"obj_xyz", "goal_state":"obj_xyz"}, # this is ignored + #Distractor + "distractors" : {"list":null, "moveable":1, "constant_speed":0, "movement_dims":3, + "movement_endpoints":[-0.3, 0.3, 0.4, 0.7, 0.1, 0.3]}, + #Reward + "vae_path" :null, + "yolact_path" :null, + "yolact_config" :null, + #Train + "train_framework" :"pytorch", + "algo" :"ppo", + "num_networks" : 1, + "max_episode_steps" :1024, + "algo_steps" :1024, + "steps" :200000, + "pretrained_model" :null, + "multiprocessing" :false, + #Evaluation + "eval_freq" :10000, + "eval_episodes" :10, + #Saving and logging + "logdir" :"trained_models/test", + "record" :0 + } \ No newline at end of file diff --git a/myGym/env.txt b/myGym/env.txt index b44abc94..9439ce65 100644 --- a/myGym/env.txt +++ b/myGym/env.txt @@ -57,7 +57,6 @@ matplotlib 3.1.3 mdurl 0.1.2 mpi4py 3.1.4 msgpack 1.0.5 -mujoco-py 2.1.2.14 myGym 2.2.2.dev32+gd0bbd2d /home/michal/code/myGym networkx 2.6.3 numpy 1.18.5 @@ -221,7 +220,6 @@ mistune 0.8.4 mpi4py 3.1.4 mpmath 1.3.0 msgpack 1.0.5 -mujoco-py 2.1.2.14 myGym 2.2.2.dev32+gd0bbd2d /home/michal/code/myGym networkx 2.6.3 nicomotion 1.0 diff --git a/myGym/envs/__init__.py b/myGym/envs/__init__.py index 883514e9..db7e47c6 100755 --- a/myGym/envs/__init__.py +++ b/myGym/envs/__init__.py @@ -1,4 +1,4 @@ -from gym.envs.registration import register +from gymnasium.envs.registration import register register( id="CrowWorkspaceEnv-v0", diff --git a/myGym/envs/base_env.py b/myGym/envs/base_env.py index b7718307..173e6bfb 100644 --- a/myGym/envs/base_env.py +++ b/myGym/envs/base_env.py @@ -4,14 +4,13 @@ import pybullet_utils.bullet_client as bc import time import numpy as np -from gym.utils import seeding -import gym +from gymnasium.utils import seeding +import gymnasium as gym import inspect from myGym.envs.camera import Camera -import pkg_resources -currentdir = pkg_resources.resource_filename("myGym", "envs") -repodir = pkg_resources.resource_filename("myGym", "./") - +import importlib.resources as resources +currentdir = resources.files("myGym").joinpath("envs") +repodir = resources.files("myGym").parent class BaseEnv(gym.Env): """ @@ -30,7 +29,7 @@ class BaseEnv(gym.Env): def __init__(self, gui_on=True, - objects_dir_path=pkg_resources.resource_filename("myGym", "envs/"), + objects_dir_path=resources.files("myGym").joinpath("envs"), max_steps=1024, show_bounding_boxes_gui=False, changing_light_gui=False, @@ -73,6 +72,7 @@ def __init__(self, self._set_physics() self._setup_scene() self._set_action_space() + self.obsdim = self.check_obs_template() self._set_observation_space() @@ -197,7 +197,7 @@ def _restart_episode(self): self.episode_reward = 0.0 self.episode_steps = 0 - def reset(self, hard=False): + def reset(self, hard=False,options=None): """ Reset the state of the environment """ diff --git a/myGym/envs/env_object.py b/myGym/envs/env_object.py index 87c5dc7f..c3c11c85 100644 --- a/myGym/envs/env_object.py +++ b/myGym/envs/env_object.py @@ -9,11 +9,11 @@ import numpy as np import sys, shutil from datetime import datetime -import pkg_resources -currentdir = pkg_resources.resource_filename("myGym", "envs") +import importlib.resources as resources +currentdir = resources.files("myGym").joinpath("envs") +from myGym.envs.vision_module import VisionModule - -class EnvObject: +class EnvObject(VisionModule): """ Env object class for dynamic object in PyBullet environment @@ -24,23 +24,25 @@ class EnvObject: :param fixed: (bool) Whether the object should have fixed position and orientation :param pybullet_client: Which pybullet client the environment should refere to in case of parallel existence of multiple instances of this environment """ - def __init__(self, urdf_path, position=[0, 0, 0], + def __init__(self, urdf_path, env, position=[0, 0, 0], orientation=[0, 0, 0, 0], fixed=False, - pybullet_client=None): + pybullet_client=None, observation="ground_truth", vae_path=None, yolact_path=None, yolact_config=None, is_robot=False, rgba=None): self.p = pybullet_client self.urdf_path = urdf_path self.init_position = position self.init_orientation = orientation self.fixed = fixed - self.name = os.path.splitext(os.path.basename(self.urdf_path))[0] - self.virtual = True if "virtual" in self.name else False + self.is_robot = is_robot + self.o_name = os.path.splitext(os.path.basename(self.urdf_path))[0] + self.virtual = True if "virtual" in self.o_name else False self.object_ldamping = 1 self.object_adamping = 1 self.object_lfriction = 100 self.object_rfriction = 100 self.object_mass = 10 - self. object_stiffness = 1 - if not self.virtual: + self.color_rgba = rgba + self.object_stiffness = 1 + if not self.virtual and not self.is_robot: self.uid = self.load() self.bounding_box = self.get_bounding_box() self.centroid = self.get_centroid() @@ -50,6 +52,8 @@ def __init__(self, urdf_path, position=[0, 0, 0], self.centroid = None self.debug_line_ids = [] self.cuboid_dimensions = None + super(EnvObject, self).__init__(observation=observation, env=env, + vae_path=vae_path, yolact_path=yolact_path, yolact_config = yolact_config) def set_color(self, color): """ @@ -144,7 +148,8 @@ def get_position_and_orientation(self): :return orientation: (array) Position ([x,y,z]) Orientation of object (quaternion [x,y,z,w]) """ if not self.virtual: - return self.p.getBasePositionAndOrientation(self.uid) + # return self.p.getBasePositionAndOrientation(self.uid) + return tuple(np.array(a) for a in self.p.getBasePositionAndOrientation(self.uid)) else: return (self.init_position, self.init_orientation) @@ -208,7 +213,7 @@ def load(self): Returns: :return self.uid: (int) ID of loaded object """ - self.uid = self.p.loadURDF(self.urdf_path, self.init_position, self.init_orientation, useFixedBase=self.fixed, flags=self.p.URDF_USE_SELF_COLLISION) + self.uid = self.p.loadURDF(str(self.urdf_path), self.init_position, self.init_orientation, useFixedBase=self.fixed, flags=self.p.URDF_USE_SELF_COLLISION) if self.fixed: self.p.changeDynamics(self.uid, 0, collisionMargin=0., contactProcessingThreshold=0.0, ccdSweptSphereRadius=0) else: @@ -305,9 +310,9 @@ def get_name(self): Get object's name. Uses data from URDF. Returns: - :return self.name: (string) Object's name + :return self.o_name: (string) Object's name """ - return self.name + return self.o_name def get_uid(self): """ @@ -317,23 +322,60 @@ def get_uid(self): :return self.uid: Object's unique ID """ return self.uid + + def get_obj_position_for_obs(self, img=None, depth=None): + """ + Get object position in world coordinates of environment for observation (i.e. from Yolact) + + Parameters: + :param obj: (object) Object to find its mask and centroid + :param img: (array) 2D input image to inference of vision model + :param depth: (array) Depth input image to inference of vision model + Returns: + :return position: (list) Centroid of object in world coordinates + """ + return super().get_obj_position_for_obs(self, img, depth) + + def get_obj_orientation_for_obs(self, img=None): + """ + Get object orientation in world coordinates of environment for observation (i.e. from Yolact) + + Parameters: + :param obj: (object) Object to find its mask and centroid + :param img: (array) 2D input image to inference of vision model + Returns: + :return orientation: (list) Orientation of object in world coordinates + """ + return super().get_obj_orientation_for_obs(self, img) + + def get_obj_bbox_for_obs(self, img=None): + """ + Get bounding box of an object for observation (i.e. from Yolact) + + Parameters: + :param obj: (object) Object to find its bounding box + :param img: (array) 2D input image to inference of vision model + Returns: + :return bbox: (list) Bounding box of object + """ + return super().get_obj_bbox_for_obs(self, img) @staticmethod - def get_random_object_position(boarders): + def get_random_object_position(borders): """ Generate random position in defined volume Parameters: - :param boarders: (list) Volume, where position may be generated ([x,x,y,y,z,z]) + :param borders: (list) Volume, where position may be generated ([x,x,y,y,z,z]) Returns: :return pos: (list) Position in specified volume ([x,y,z]) """ - if any(isinstance(i, list) for i in boarders): - boarders = boarders[random.randint(0,len(boarders)-1)] + if any(isinstance(i, list) for i in borders): + borders = borders[random.randint(0,len(borders)-1)] pos = [] - pos.append(random.uniform(boarders[0], boarders[1])) #x - pos.append(random.uniform(boarders[2], boarders[3])) #y - pos.append(random.uniform(boarders[4], boarders[5])) #z + pos.append(random.uniform(borders[0], borders[1])) #x + pos.append(random.uniform(borders[2], borders[3])) #y + pos.append(random.uniform(borders[4], borders[5])) #z return pos @staticmethod diff --git a/myGym/envs/gym_env.py b/myGym/envs/gym_env.py index 1708c794..04c12df4 100644 --- a/myGym/envs/gym_env.py +++ b/myGym/envs/gym_env.py @@ -14,13 +14,15 @@ import random from myGym.utils.helpers import get_workspace_dict -import pkg_resources +import importlib.resources as resources +currentdir = resources.files("myGym").joinpath("envs") from myGym.envs.human import Human import myGym.utils.colors as cs -from myGym.envs.vision_module import get_module_type +from myGym.utils.helpers import get_module_type from myGym.envs.natural_language import NaturalLanguage +from myGym.envs.task import TaskModule + -currentdir = pkg_resources.resource_filename("myGym", "envs") # used to exclude these colors for other objects, in the order of goal, init, done, else COLORS_RESERVED_FOR_HIGHLIGHTING = ["dark green", "green", "blue", "gray"] @@ -31,8 +33,8 @@ class GymEnv(CameraEnv): Environment class for particular environment based on myGym basic environment classes Parameters: - :param task_objects: (list of strings) Objects that are relevant for performing the task :param observation: (dict) Template dictionary of what should be part of the network observation + :param rddl: (dict) Template dictionary for task creation via rddl :param workspace: (string) Workspace in gym, where training takes place (collabtable, maze, drawer, ...) :param dimension_velocity: (float) Maximum allowed velocity for robot movements in individual x,y,z axis :param used_objects: (list of strings) Names of extra objects (not task objects) that can appear in the scene @@ -61,12 +63,11 @@ class GymEnv(CameraEnv): """ def __init__(self, - task_objects, observation, + rddl={}, framework="ray", workspace="table", dimension_velocity=0.05, - used_objects=None, action_repeat=1, color_dict={}, robot='kuka', @@ -78,8 +79,6 @@ def __init__(self, num_networks=1, network_switcher="gt", distractors=None, - reward='distance', - distance_type='euclidean', active_cameras=None, dataset=False, obs_space=None, @@ -106,12 +105,12 @@ def __init__(self, self.action_repeat = action_repeat self.dimension_velocity = dimension_velocity self.active_cameras = active_cameras - self.used_objects = used_objects self.action_repeat = action_repeat self.color_dict = color_dict self.task_type = task_type - self.task_objects_dict = task_objects + self.rddl_config = rddl self.framework = framework + self.vision_source = get_module_type(self.obs_type) self.task_objects = [] self.env_objects = [] self.vae_path = vae_path @@ -119,17 +118,18 @@ def __init__(self, self.yolact_config = yolact_config self.has_distractor = distractors["list"] != None self.distractors = distractors - self.distance_type = distance_type self.objects_area_borders = None - self.reward = reward + self.reachable_borders = None self.dist = d.DistractorModule(distractors["moveable"], distractors["movement_endpoints"], distractors["constant_speed"], distractors["movement_dims"], env=self) self.dataset = dataset self.obs_space = obs_space self.visualize = visualize self.visgym = visgym + self.check_obs_template() self.logdir = logdir self.workspace_dict = get_workspace_dict() + self.robot = None if not hasattr(self, "task"): self.task = None @@ -145,6 +145,7 @@ def __init__(self, self.nl_mode = natural_language if self.nl_mode: self.nl = NaturalLanguage(self) + self.use_magnet = True # @TODO provisory solution, used when _gripper in self.robot_action. We should make adjustable in config whether to use asctual gripper control or magnet self.nl_text_id = None self.training = training if self.nl_mode: @@ -154,41 +155,31 @@ def __init__(self, # # just some dummy settings so that _set_observation_space() doesn't throw exceptions at the beginning # self.num_networks = 3 self.rng = np.random.default_rng(seed=0) - self.task_objects_were_given_as_list = isinstance(self.task_objects_dict, list) - self.n_subtasks = len(self.task_objects_dict) if self.task_objects_were_given_as_list else 1 if self.reach_gesture and not self.nl_mode: raise Exception("Reach gesture task can't be started without natural language mode") super(GymEnv, self).__init__(active_cameras=active_cameras, **kwargs) def _init_task_and_reward(self): - if self.reward == 'distractor': - self.has_distractor = True - self.distractor = ['bus'] if not self.distractors["list"] else self.distractors["list"] - reward_classes = { - "1-network": {"F": F, "A": A, "switch": SwitchRewardNew, "turn": TurnRewardNew, "FM": FaM, "FMR": FaMaR,"FROM": FaROaM, "FMOR": FaMOaR, "FMOT": FaMOaT, "FROT": FaROaT, - "FMOM": FaMOaM, "FMLFR": FaMaLaFaR}, - "2-network": {"switch": SwitchRewardNew, "turn": TurnRewardNew, "FM": FaM, "AG": AaG}, - "3-network": {"FMR": FaMaR,"FROM": FaROaM, "FMOR": FaMOaR, "FMOT": FaMOaT, "FROT": FaROaT, - "FMOM": FaMOaM, "AGM": AaGaM}, - "4-network": {"FMLFR": FaMaLaFaR, "AGMD" : AaGaMaD}, - "5-network": {"AGMDW" : AaGaMaDaW}} - - scheme = "{}-network".format(str(self.num_networks)) - assert self.reward in reward_classes[scheme].keys(), "Failed to find the right reward class. Check reward_classes in gym_env.py" - self.task = t.TaskModule(task_type=self.task_type, - observation=self.obs_type, - vae_path=self.vae_path, - yolact_path=self.yolact_path, - yolact_config=self.yolact_config, - distance_type=self.distance_type, - number_tasks=len(self.task_objects_dict), - env=self) - self.reward = reward_classes[scheme][self.reward](env=self, task=self.task) + """Main communicator with rddl. Passes arguments from config to RDDLWorld, tells rddl to make a task sequence and to build + a scene accordingly, including robot. Work in progress""" + self.task = TaskModule(self, self.rddl_config["num_task_range"], self.rddl_config["protoactions"], self.rddl_config["allowed_objects"], self.rddl_config["allowed_predicates"], self.p) + # generates task sequence and initializes scene with objects accordingly. The first action is set as self.task.current_task + self.task.build_scene_for_task_sequence() # it also loads the robot. must be done his way so that rddl knows about the robot + self.reward = self.task.current_action.reward # reward class + + print(f"Initial condition is: {self.task.rddl_task.current_action.initial.decide()}") + print(f"Goal condition is: {self.task.rddl_task.current_action.goal.decide()}") + + self.compute_reward = self.task.current_action.reward # function that computes reward (no inputs needed, already bound to the objects) + obs_entities = self.reward.get_relevant_entities() # does not work yet, must be done in rddl + self.robot = self.task.rddl_robot # robot class as we know it + + def _setup_scene(self): """ - Set-up environment scene. Load static objects, apply textures. Load robot. + Set-up environment scene. Load static objects, apply textures. """ self._add_scene_object_uid(self._load_static_scene_urdf(path="rooms/plane.urdf", name="floor"), "floor") if self.visgym: @@ -203,25 +194,26 @@ def _setup_scene(self): if ws_texture: self._change_texture(self.workspace, self._load_texture(ws_texture)) self._change_texture("floor", self._load_texture("parquet1.jpg")) self.objects_area_borders = self.workspace_dict[self.workspace]['borders'] - kwargs = {"position": self.workspace_dict[self.workspace]['robot']['position'], + self.reachable_borders = self.workspace_dict[self.workspace]['reachable_borders'] + self.robot_kwargs = {"position": self.workspace_dict[self.workspace]['robot']['position'], "orientation": self.workspace_dict[self.workspace]['robot']['orientation'], "init_joint_poses": self.robot_init_joint_poses, "max_velocity": self.max_velocity, "max_force": self.max_force, "dimension_velocity": self.dimension_velocity, - "pybullet_client": self.p} - self.robot = robot.Robot(self.robot_type, robot_action=self.robot_action, task_type=self.task_type, **kwargs) + "pybullet_client": self.p, "env": self, "observation":self.vision_source, "vae_path":self.vae_path, + "yolact_path":self.yolact_path, "yolact_config": self.yolact_config} if self.workspace == 'collabtable': self.human = Human(model_name='human', pybullet_client=self.p) def _load_urdf(self, path, fixedbase=True, maxcoords=True): transform = self.workspace_dict[self.workspace]['transform'] - return self.p.loadURDF(pkg_resources.resource_filename("myGym", os.path.join("envs", path)), + return self.p.loadURDF(resources.files("myGym").joinpath("envs",path), transform['position'], self.p.getQuaternionFromEuler(transform['orientation']), useFixedBase=fixedbase, useMaximalCoordinates=maxcoords) - + def _load_static_scene_urdf(self, path, name, fixedbase=True): transform = self.workspace_dict[self.workspace]['transform'] - object = env_object.EnvObject(pkg_resources.resource_filename("myGym", os.path.join("envs", path)), transform['position'], self.p.getQuaternionFromEuler(transform['orientation']), pybullet_client=self.p, fixed=fixedbase) + object = env_object.EnvObject(resources.files("myGym").joinpath("envs",path), self, transform['position'], self.p.getQuaternionFromEuler(transform['orientation']), pybullet_client=self.p, fixed=fixedbase, observation=self.vision_source, vae_path=self.vae_path, yolact_path=self.yolact_path, yolact_config=self.yolact_config) self.static_scene_objects[name] = object return object.uid @@ -231,7 +223,7 @@ def _change_texture(self, name, texture_id): def _load_texture(self, name): - return self.p.loadTexture(pkg_resources.resource_filename("myGym", "/envs/textures/{}".format(name))) + return self.p.loadTexture(str(resources.files("myGym").joinpath("envs/textures/",name))) def _set_observation_space(self): @@ -241,16 +233,15 @@ def _set_observation_space(self): if self.framework == "ray": from gymnasium import spaces else: - from gym import spaces - self._init_task_and_reward() + from gymnasium import spaces if self.obs_space == "dict": - goaldim = int(self.task.obsdim / 2) if self.task.obsdim % 2 == 0 else int(self.task.obsdim / 3) + goaldim = int(self.obsdim / 2) if self.obsdim % 2 == 0 else int(self.obsdim / 3) self.observation_space = spaces.Dict( - {"observation": spaces.Box(low=-10, high=10, shape=(self.task.obsdim,)), + {"observation": spaces.Box(low=-10, high=10, shape=(self.obsdim,)), "achieved_goal": spaces.Box(low=-10, high=10, shape=(goaldim,)), "desired_goal": spaces.Box(low=-10, high=10, shape=(goaldim,))}) else: - observationDim = self.task.obsdim + observationDim = self.obsdim observation_high = np.array([100] * observationDim) self.observation_space = spaces.Box(-observation_high, observation_high) @@ -259,10 +250,11 @@ def _set_action_space(self): """ Set action space dimensions and range """ + self._init_task_and_reward() # we first need rddl to make a robot if self.framework == "ray": from gymnasium import spaces else: - from gym import spaces + from gymnasium import spaces action_dim = self.robot.get_action_dimension() if "step" in self.robot_action: self.action_low = np.array([-1] * action_dim) @@ -271,7 +263,6 @@ def _set_action_space(self): # self.action_low = np.insert(self.action_low, action_dim, self.robot.gjoints_limits[0][1]) # self.action_high = np.insert(self.action_high, action_dim,self.robot.gjoints_limits[1][1]) - elif "absolute" in self.robot_action: if any(isinstance(i, list) for i in self.objects_area_borders): borders_max = np.max(self.objects_area_borders, 0) @@ -304,7 +295,7 @@ def _rescale_action(self, action): """ return [(sub + 1) * (h - l) / 2 + l for sub, l, h in zip(action, self.action_low, self.action_high)] - def reset(self, random_pos=True, hard=False, random_robot=False, only_subtask=False): + def reset(self, random_pos=True, hard=False, random_robot=False, only_subtask=False, options=None, seed=None): """ Environment reset called at the beginning of an episode. Reset state of objects, robot, task and reward. @@ -317,100 +308,11 @@ def reset(self, random_pos=True, hard=False, random_robot=False, only_subtask=Fa :return self._observation: (list) Observation data of the environment """ if not only_subtask: - self.robot.reset(random_robot=random_robot) - super().reset(hard=hard) - - if not self.nl_mode: - other_objects = [] - if self.task_objects_were_given_as_list: - task_objects_dict = copy.deepcopy(self.task_objects_dict) - else: - if not self.reach_gesture: - init = self.rng.choice(self.task_objects_dict["init"]) - goal = self.rng.choice(self.task_objects_dict["goal"]) - objects = self.task_objects_dict["init"] + self.task_objects_dict["goal"] - task_objects_dict = [{"init": init, "goal": goal}] - other_objects = self._randomly_place_objects({"obj_list": [o for o in objects if o != init and o != goal]}) - else: - goal = self.rng.choice(self.task_objects_dict["goal"]) - task_objects_dict = [{"init": {"obj_name":"null"}, "goal": goal}] - other_objects = self._randomly_place_objects({"obj_list": [o for o in self.task_objects_dict["goal"] if o != goal]}) - - all_subtask_objects = [x for i, x in enumerate(task_objects_dict) if i != self.task.current_task] - subtasks_processed = [list(x.values()) for x in all_subtask_objects] - subtask_objects = self._randomly_place_objects({"obj_list": list(chain.from_iterable(subtasks_processed))}) - self.env_objects = {"env_objects": self._randomly_place_objects(self.used_objects)} - if self.task_objects_were_given_as_list: - self.env_objects["env_objects"] += other_objects - self.task_objects = self._randomly_place_objects(task_objects_dict[self.task.current_task]) - self.task_objects = dict(ChainMap(*self.task_objects)) - if subtask_objects: - self.task_objects["distractor"] = subtask_objects - else: - init_objects = [] - if not self.reach_gesture: - init_objects = self._randomly_place_objects({"obj_list": self.task_objects_dict["init"]}) - for i, c in enumerate(cs.draw_random_rgba(size=len(init_objects), excluding=COLORS_RESERVED_FOR_HIGHLIGHTING)): - init_objects[i].set_color(c) - goal_objects = self._randomly_place_objects({"obj_list": self.task_objects_dict["goal"]}) - for i, c in enumerate(cs.draw_random_rgba(size=len(goal_objects), transparent=self.task_type != "reach", excluding=COLORS_RESERVED_FOR_HIGHLIGHTING)): - goal_objects[i].set_color(c) - - if self.training or (not self.training and self.reach_gesture) and self.nl_mode: - # setting the objects and generating a description based on them - self.nl.get_venv().set_objects(init_goal_objects=(init_objects, goal_objects)) - self.nl.generate_subtask_with_random_description() - - # resetting the objects to remove the knowledge about whether an object is an init or a goal - self.nl.get_venv().set_objects(all_objects=init_objects + goal_objects) - self.task_type, self.reward, self.num_networks, init, goal = self.nl.extract_subtask_info_from_description(self.nl.get_previously_generated_subtask_description()) - else: - success = False - i = 0 - - while (not success): - try: - if i > 0: - print("Unknown task description format. Actual format is very strict. " - "All articles must be included. Examples of valid subtask descriptions in general:") - print("\"reach the cyan cube\"") - print("\"reach the transparent pink cube left to the gray cube\"") - print("\"pick the orange cube and place it to the same position as the pink cube\"") - print("Pay attention to the fact that colors, task and objects in your case can be different!") - print("To leave the program use Ctrl + Z!") - if self.nl: - self.nl.set_current_subtask_description(input("Enter a subtask description in the natural language based on what you see:")) - # resetting the objects to remove the knowledge about whether an object is an init or a goal - self.nl.get_venv().set_objects(all_objects=init_objects + goal_objects) - self.task_type, self.reward, self.num_networks, init, goal = self.nl.extract_subtask_info_from_description(self.nl.get_previously_generated_subtask_description()) - success = True - break - except: - pass - i += 1 - - self.task_objects = {"actual_state": init if init is not None else self.robot, "goal_state": goal} - other_objects = [o for o in init_objects + goal_objects if o != init and o != goal] - self.env_objects = {"env_objects": other_objects + self._randomly_place_objects(self.used_objects)} - - # will set the task and the reward - self._set_observation_space() - if only_subtask: - if self.task.current_task < (len(self.task_objects_dict)) and not self.nl_mode: - self.shift_next_subtask() - if self.has_distractor: - distrs = [] - if self.distractors["list"]: - for distractor in self.distractors["list"]: - distrs.append( - self.dist.place_distractor(distractor, self.p, self.task_objects["goal_state"].get_position())) - if self.task_objects["distractor"]: - self.task_objects["distractor"].extend(distrs) - else: - self.task_objects["distractor"] = distrs - self.env_objects = {**self.task_objects, **self.env_objects} + self.task.rddl_robot.reset(random_robot=random_robot) + super().reset(hard=hard, options=options) + # @TODO I removed support of nl_mode, which is dependent on the old structure. We need to add nl_support again in later phases self.task.reset_task() - self.reward.reset() + #self.reward.reset() self.p.stepSimulation() self._observation = self.get_observation() @@ -420,24 +322,8 @@ def reset(self, random_pos=True, hard=False, random_robot=False, only_subtask=Fa self.nl_text_id = self.p.addUserDebugText(self.nl.get_previously_generated_subtask_description(), [2, 0, 1], textSize=1) if only_subtask and self.nl_text_id is not None: self.p.removeUserDebugItem(self.nl_text_id) - - return self.flatten_obs(self._observation.copy()) - - def shift_next_subtask(self): - # put current init and goal back in env_objects - self.env_objects["distractor"].extend([self.env_objects["actual_state"], self.env_objects["goal_state"]]) - # set the next subtask objects as the actual and goal state and remove them from env_objects - self.env_objects["actual_state"] = self.env_objects["distractor"][0] - self.env_objects["goal_state"] = self.env_objects["distractor"][1] - del self.env_objects["distractor"][:2] - self.task_objects["distractor"] = self.env_objects["distractor"].copy() - # copy the state to task_objects and change colors - self.task_objects["actual_state"] = self.env_objects["actual_state"] - self.task_objects["goal_state"] = self.env_objects["goal_state"] - self.highlight_active_object(self.env_objects["actual_state"], "init") - self.highlight_active_object(self.env_objects["goal_state"], "goal") - for o in self.env_objects["distractor"][-2:]: - self.highlight_active_object(o, "done") + info = {'d': 0.9, 'f': 0, 'o': self._observation} + return (np.asarray(self._observation.copy(), dtype="float32"), info) def flatten_obs(self, obs): """ Returns the input obs dict as flattened list """ @@ -458,18 +344,25 @@ def _set_cameras(self): def get_observation(self): """ - Get observation data from the environment + Get observation data from the environment. Probably should be moved to task.py Returns: :return observation: (array) Represented position of task relevant objects """ - self.observation["task_objects"] = self.task.get_observation() + ## !!!! This is a provisory solution until self.task.current_task.reward.get_relevant_entities() returns the relevant objects. + ## Now we just take the position and orientation for every object in the scene including gripper, but that is legit in very few cases + obs = [] + relevant_entities = self.reward.get_relevant_entities() + for e in relevant_entities: + obs += list(e.get_position()) + obs += list(e.get_orientation()) + if "gripper" in self.robot_action: + obs += int(self.robot.gripper_active) + self.observation = obs if self.dataset: - self.observation["camera_data"] = self.render(mode="rgb_array") - self.observation["objects"] = self.env_objects - self.observation["additional_obs"] = {} - return self.observation - return self.observation["task_objects"] + raise NotImplemented # @TODO one day + return self.observation + def step(self, action): """ @@ -484,27 +377,43 @@ def step(self, action): :return info: (dict) Additional information about step """ self._apply_action_robot(action) + print("robot action: {}".format(action), end="\r") if self.has_distractor: [self.dist.execute_distractor_step(d) for d in self.distractors["list"]] self._observation = self.get_observation() if self.dataset: reward, done, info = 0, False, {} else: - reward = self.reward.compute(observation=self._observation) + reward = self.compute_reward() # this uses rddl protoaction, no arguments needed + print("Reward by RDDL: {}".format(reward),end="\r") self.episode_reward += reward - # self.task.check_goal() - done = self.episode_over - info = {'d': self.task.last_distance / self.task.init_distance, 'f': int(self.episode_failed), - 'o': self._observation} - if done: self.successful_finish(info) + done = self.episode_over #@TODO replace with actual is_done value from RDDL + info = {'d': 0.9, 'f': int(self.episode_failed), + 'o': self._observation} # @TODOreplace 'd' with actual distance values obtained from rddl or make own implementation + if done is True: self.successful_finish(info) if self.task.subtask_over: self.reset(only_subtask=True) - # return self._observation, reward, done, info - return self.flatten_obs(self._observation.copy()), reward, done, info + # return self._observation, reward, done, truncated, info + truncated = False #not sure when to use this + return np.asarray(self._observation.copy(), dtype="float32"), reward, done, truncated,info - def compute_reward(self, achieved_goal, desired_goal, info): - # @TODO: Reward computation for HER, argument for .compute() - reward = self.reward.compute(np.append(achieved_goal, desired_goal)) - return reward + + def get_linkstates_unpacked(self): + o = [] + [[o.append(x) for x in z] for z in self.robot.observe_all_links()] + return o + + + def check_obs_template(self): + """ + @TODO Add smart and variable observation space constructor based on config? + + Returns: + :return obsdim: (int) Dimensionality of observation + """ + obsdim = 14 # 7 values for actual state (object or gripper pose and orientation) and same for goal state + if "gripper" in self.robot_action: + obsdim += 1 # binary value for gripper close or open + return obsdim def successful_finish(self, info): """ @@ -524,7 +433,6 @@ def _apply_action_robot(self, action): Parameters: :param action: (list) Action data returned by trained model """ - use_magnet = self.reward.get_magnetization_status() for i in range(self.action_repeat): objects = self.env_objects self.robot.apply_action(action, env_objects=objects) @@ -568,58 +476,6 @@ def draw_bounding_boxes(self): for object in self.env_objects: object.draw_bounding_box() - def _place_object(self, obj_info): - fixed = True if obj_info["fixed"] == 1 else False - pos = env_object.EnvObject.get_random_object_position(obj_info["sampling_area"]) - orn = env_object.EnvObject.get_random_z_rotation() if obj_info["rand_rot"] == 1 else [0, 0, 0, 1] - object = env_object.EnvObject(obj_info["urdf"], pos, orn, pybullet_client=self.p, fixed=fixed) - if self.color_dict: object.set_color(self.color_of_object(object)) - return object - - def _randomly_place_objects(self, object_dict): - """ - Place dynamic objects to the scene randomly - - Parameters: - :param n: (int) Number of objects to place in the scene - :param object_names: (list of strings) Objects that may be placed to the scene - :param random_pos: (bool) Whether to place object to random positions in the scene - Returns: - :return env_objects: (list of objects) Objects that are present in the current scene - """ - env_objects = [] - if not "init" in object_dict.keys(): # solves used_objects - for idx, o in enumerate(object_dict["obj_list"]): - if o["obj_name"] != "null": - urdf = self._get_urdf_filename(o["obj_name"]) - if urdf: - object_dict["obj_list"][idx]["urdf"] = urdf - else: - del object_dict["obj_list"][idx] - if "num_range" in object_dict.keys(): - for x in range(random.randint(object_dict["num_range"][0], object_dict["num_range"][1])): - env_o = self._place_object(random.choice(object_dict["obj_list"])) - self.highlight_active_object(env_o, "other") - env_objects.append(env_o) - else: - for o in object_dict["obj_list"]: - if o["obj_name"] != "null": - env_o = self._place_object(o) - self.highlight_active_object(env_o, "other") - env_objects.append(env_o) - else: # solves task_objects - for o in ['init', 'goal']: - d = object_dict[o] - if d["obj_name"] != "null": - d["urdf"] = self._get_urdf_filename(d["obj_name"]) - n = "actual_state" if o == "init" else "goal_state" - env_o = self._place_object(d) - self.highlight_active_object(env_o, o) - env_objects.append({n: env_o}) - elif d["obj_name"] == "null" and o == "init": - env_objects.append({"actual_state": self.robot}) - return env_objects - def highlight_active_object(self, env_o, obj_role): if obj_role == "goal": env_o.set_color(cs.name_to_rgba("transparent green")) @@ -630,21 +486,9 @@ def highlight_active_object(self, env_o, obj_role): else: env_o.set_color(cs.name_to_rgba("gray")) - def color_of_object(self, object): - """ - Set object's color - Parameters: - :param object: (object) Object - Returns: - :return color: (list) RGB color - """ - if object.name not in self.color_dict: - return cs.draw_random_rgba() - else: - color_name = random.sample(self.color_dict[object.name], 1)[0] - color = cs.name_to_rgba(color_name) - return color + def get_random_color(self): + return cs.draw_random_rgba() def get_task_objects(self, with_none=False) -> List[EnvObject]: objects = [self.task_objects["actual_state"], self.task_objects["goal_state"]] diff --git a/myGym/envs/human.py b/myGym/envs/human.py index ca294820..f1f1baae 100644 --- a/myGym/envs/human.py +++ b/myGym/envs/human.py @@ -1,7 +1,7 @@ from typing import List, Tuple import numpy as np -import pkg_resources +import importlib.resources as resources from myGym.envs.env_object import EnvObject from myGym.utils.helpers import get_robot_dict @@ -56,7 +56,7 @@ def _load_model(self, model_name): :param model_name: (string) Model name in the get_robot_dict() dictionary """ path, position, orientation = get_robot_dict()[model_name].values() - path = pkg_resources.resource_filename("myGym", path) + path = resources.files("myGym").joinpath(path) orientation = self.p.getQuaternionFromEuler(orientation) if path[-3:] == 'sdf': diff --git a/myGym/envs/predicates.py b/myGym/envs/predicates.py index e3189b70..7fe0e5eb 100644 --- a/myGym/envs/predicates.py +++ b/myGym/envs/predicates.py @@ -26,12 +26,24 @@ def get_value(self, obj1, obj2): base2 = obj2.get_bounding_box()[-1][-1] return obj2.uid in overlapping and base1 > base2 + +class IsReachable(): + def set_value(self, obj, robot): + raise NotImplementedError() + + def get_value(self, obj, robot): + pass + + def get_scale_from_urdf(pth): with open(pth) as f: lines = f.readlines() - scale = float([x for x in lines if "scale" in x][0].split("scale=\"")[1].split(" ")[0]) + if len([x for x in lines if "scale" in x]) > 0: + scale = float([x for x in lines if "scale" in x][0].split("scale=\"")[1].split(" ")[0]) + else: + scale = 1 return scale @@ -39,44 +51,43 @@ def get_scale_from_urdf(pth): if __name__ == '__main__': - import gym + import gymnasium as gym from myGym.train import configure_env, get_parser, get_arguments from myGym import envs parser = get_parser() arg_dict = get_arguments(parser) arg_dict["gui"] = 1 env = configure_env(arg_dict) - #env.reset() table = env.env.env.static_scene_objects["table"] touching = Touching() on_top = OnTop() - pth1 = "./envs/objects/geometric/urdf/cube.urdf" - pos = env_object.EnvObject.get_random_object_position([-0.5, 0.5, 0.4, 0.6, 0.1, 0.1]) - pan1 = env_object.EnvObject(pth1, pos, [0, 0, 0, 1], pybullet_client=p, fixed=False) - pan1_info = p.getVisualShapeData(pan1.get_uid())[0] - pan1_scale = get_scale_from_urdf(pth1) - objpth = pan1_info[4].decode("utf-8") + + # load tuna model and check the scale + pth1 = "./envs/objects/household/urdf/tuna_can.urdf" + pos = env_object.EnvObject.get_random_object_position([-0.5, 0.5, 0.4, 0.6, 0.07, 0.07]) + obj1 = env_object.EnvObject(pth1, env, pos, [0, 0, 0, 1], pybullet_client=p, fixed=False) + obj1_info = p.getVisualShapeData(obj1.get_uid())[0] + obj1_scale = get_scale_from_urdf(pth1) + objpth = obj1_info[4].decode("utf-8") + + # voxelize o3model = o3d.io.read_triangle_model(objpth) mesh = o3model.meshes[0].mesh - mesh = mesh.scale(pan1_scale, center=mesh.get_center()) + mesh = mesh.scale(obj1_scale, center=mesh.get_center()) vm = VolumeMesh(mesh) orig = vm.duplicate() orig.paint(np.array([0, 1, 0])) voxel_grid = vm.voxelgrid - o3d.visualization.draw_geometries([voxel_grid, orig.voxelgrid]) - pan1.paint([0,1,0,1]) - print("Green pan touching table:") - print(touching.get_value(pan1, table)) - print("Green pan on top of table:") - print(on_top.get_value(pan1, table)) - print("Table on top of green pan:") - print(on_top.get_value(table, pan1)) - - pos = env_object.EnvObject.get_random_object_position([-0.5, 0.5, 0.4, 0.6, 0.5, 0.55]) - pan2 = env_object.EnvObject("myGym/envs/objects/household/urdf/pan.urdf", pos, [0, 0, 0, 1], pybullet_client=p, fixed=False) - pan2.set_color([1,0,0,1]) - print("Red pan touching table:") - print(touching.get_value(pan2, table)) - print("") + # visualize geometry + #o3d.visualization.draw_geometries([voxel_grid, orig.voxelgrid]) + + + print("Tuna touching table:") + print(touching.get_value(obj1, table)) + print("Tuna on top of table:") + print(on_top.get_value(obj1, table)) + print("Table on top of tuna:") + print(on_top.get_value(table, obj1)) + diff --git a/myGym/envs/randomizers.py b/myGym/envs/randomizers.py index 3eac3b99..0c897003 100644 --- a/myGym/envs/randomizers.py +++ b/myGym/envs/randomizers.py @@ -2,8 +2,8 @@ import numpy as np import os, glob, random import pybullet as p -import pkg_resources -repodir = pkg_resources.resource_filename("myGym", "") +import importlib.resources as resources +repodir = resources.files("myGym").parent diff --git a/myGym/envs/robot.py b/myGym/envs/robot.py index 079bb1a6..820a3a0c 100644 --- a/myGym/envs/robot.py +++ b/myGym/envs/robot.py @@ -1,15 +1,16 @@ from re import S -import pkg_resources from myGym.utils.vector import Vector import numpy as np import math from myGym.utils.helpers import get_robot_dict +from myGym.envs.env_object import EnvObject -currentdir = pkg_resources.resource_filename("myGym", "envs") -repodir = pkg_resources.resource_filename("myGym", "") +import importlib.resources as resources +currentdir = resources.files("myGym").joinpath("envs") +repodir = resources.files("myGym") -class Robot: +class Robot(EnvObject): """ Robot class for control of robot environment interaction @@ -40,8 +41,13 @@ def __init__(self, dimension_velocity = 0.5, max_velocity = None, #1., max_force = None, #50., - pybullet_client=None): - + pybullet_client=None, + env = None, + observation="ground_truth", + vae_path=None, + yolact_path=None, + yolact_config=None): + self.debug = False self.p = pybullet_client self.robot_dict = get_robot_dict() self.robot_path = self.robot_dict[robot]['path'] @@ -69,47 +75,46 @@ def __init__(self, self.task_type = task_type self.magnetized_objects = {} self.gripper_active = False + super(Robot, self).__init__(self.robot_path, env, position=position, + orientation=orientation, fixed=False, + pybullet_client=self.p, observation=observation, vae_path=vae_path, yolact_path=yolact_path, yolact_config=yolact_config, is_robot=True) self._load_robot() - self.num_joints = self.p.getNumJoints(self.robot_uid) + self.num_joints = self.p.getNumJoints(self.uid) self._set_motors() self.joints_limits, self.joints_ranges, self.joints_rest_poses, self.joints_max_force, self.joints_max_velo = self.get_joints_limits(self.motor_indices) if self.gripper_names: self.gjoints_limits, self.gjoints_ranges, self.gjoints_rest_poses, self.gjoints_max_force, self.gjoints_max_velo = self.get_joints_limits(self.gripper_indices) - #TODO Clean code (test and gym_env) to initialize just from coordinates - #if self.robot_action != "joints": self.init_joint_poses = list(self._calculate_accurate_IK(init_joint_poses[:3])) - #else: - #self.init_joint_poses = np.zeros((len(self.motor_names))) - #self.reset() def _load_robot(self): """ Load SDF or URDF model of specified robot and place it in the environment to specified position and orientation """ + if self.robot_path.startswith('/'): # leading slash indicates absolute path + self.robot_path = self.robot_path[1:] + if self.robot_path[-3:] == 'sdf': objects = self.p.loadSDF( - pkg_resources.resource_filename("myGym", - self.robot_path)) - self.robot_uid = objects[0] - self.p.resetBasePositionAndOrientation(self.robot_uid, self.position, + str(resources.files("myGym").joinpath(self.robot_path))) + self.uid = objects[0] + self.p.resetBasePositionAndOrientation(self.uid, self.position, self.orientation) else: - self.robot_uid = self.p.loadURDF( - pkg_resources.resource_filename("myGym", - self.robot_path), + self.uid = self.p.loadURDF( + str(resources.files("myGym").joinpath(self.robot_path)), self.position, self.orientation, useFixedBase=True, flags=(self.p.URDF_USE_SELF_COLLISION)) - for jid in range(self.p.getNumJoints(self.robot_uid)): - self.p.changeDynamics(self.robot_uid, jid, collisionMargin=0., contactProcessingThreshold=0.0, ccdSweptSphereRadius=0) + for jid in range(self.p.getNumJoints(self.uid)): + self.p.changeDynamics(self.uid, jid, collisionMargin=0., contactProcessingThreshold=0.0, ccdSweptSphereRadius=0) # if 'jaco' in self.name: #@TODO jaco gripper has closed loop between finger and finger_tip that is not respected by the simulator - # self.p.createConstraint(self.robot_uid, 11, self.robot_uid, 15, self.p.JOINT_FIXED, [0, 0, 0], [0, 0, 0], [0, 0, 0]) - # self.p.createConstraint(self.robot_uid, 13, self.robot_uid, 17, self.p.JOINT_FIXED, [0, 0, 0], [0, 0, 0], [0, 0, 0]) + # self.p.createConstraint(self.uid, 11, self.uid, 15, self.p.JOINT_FIXED, [0, 0, 0], [0, 0, 0], [0, 0, 0]) + # self.p.createConstraint(self.uid, 13, self.uid, 17, self.p.JOINT_FIXED, [0, 0, 0], [0, 0, 0], [0, 0, 0]) def _set_motors(self): """ Identify motors among all joints (fixed joints aren't motors). Identify index of gripper and end-effector link among all links. Uses data from robot model. """ for i in range(self.num_joints): - joint_info = self.p.getJointInfo(self.robot_uid, i) + joint_info = self.p.getJointInfo(self.uid, i) joint_name = joint_info[1] q_index = joint_info[3] link_name = joint_info[12] @@ -122,22 +127,25 @@ def _set_motors(self): if q_index > -1 and "rjoint" in joint_name.decode("utf-8"): # Fixed joints have q_index -1 self.motor_names.append(str(joint_name)) self.motor_indices.append(i) - self.motor_positions.append(self.p.getJointState(self.robot_uid,i)[0]) + self.motor_positions.append(self.p.getJointState(self.uid,i)[0]) if q_index > -1 and "gjoint" in joint_name.decode("utf-8"): self.gripper_names.append(str(joint_name)) self.gripper_indices.append(i) - self.gripper_positions.append(self.p.getJointState(self.robot_uid,i)[0]) - - print("Robot summary") - print("--------------") - print("Links:") - print("\n".join(map(str,self.link_names))) - print("Joints:") - print("\n".join(map(str,self.motor_names))) - print("Gripper joints:") - print("\n".join(map(str,self.gripper_names))) - print("Gripper index is: " + str(self.gripper_index)) - print("End effector index is: " + str(self.end_effector_index)) + self.gripper_positions.append(self.p.getJointState(self.uid,i)[0]) + + if self.debug: + print("Robot summary") + print("--------------") + print("Links:") + print("\n".join(map(str,self.link_names))) + print("Joints:") + print("\n".join(map(str,self.motor_names))) + print("Gripper joints:") + print("\n".join(map(str,self.gripper_names))) + print("Gripper index is: " + str(self.gripper_index)) + print("End effector index is: " + str(self.end_effector_index)) + else: + print("Robot initialized") self.joints_num = len(self.motor_names) self.gjoints_num = len(self.gripper_names) @@ -160,7 +168,7 @@ def _set_motors(self): #print(f"Gripper active:{self.gripper_active}") def touch_sensors_active(self, target_object): - contact_points = self.p.getContactPoints(self.robot_uid, target_object.uid) + contact_points = self.p.getContactPoints(self.uid, target_object.uid) if len(contact_points)> 0: return True return False @@ -204,7 +212,7 @@ def reset_joints(self, joint_poses): # joint_poses = self._calculate_joint_poses(joint_poses) joint_poses = np.clip(joint_poses, self.joints_limits[0], self.joints_limits[1]) for jid in range(len(self.motor_indices)): - self.p.resetJointState(self.robot_uid, self.motor_indices[jid], joint_poses[jid]) + self.p.resetJointState(self.uid, self.motor_indices[jid], joint_poses[jid]) self._run_motors(joint_poses) def get_joints_limits(self,indices): @@ -218,7 +226,7 @@ def get_joints_limits(self,indices): """ joints_limits_l, joints_limits_u, joints_ranges, joints_rest_poses, joints_max_force, joints_max_velo = [], [], [], [], [], [] for jid in indices: - joint_info = self.p.getJointInfo(self.robot_uid, jid) + joint_info = self.p.getJointInfo(self.uid, jid) joints_limits_l.append(joint_info[8]) joints_limits_u.append(joint_info[9]) joints_ranges.append(joint_info[9] - joint_info[8]) @@ -244,7 +252,7 @@ def observe_all_links(self): """ Returns the cartesian world position of all robot's links """ - return [self.p.getLinkState(self.robot_uid, link)[0] for link in range(self.num_joints)] + return [self.p.getLinkState(self.uid, link)[0] for link in range(self.num_joints)] def get_joints_states(self): """ @@ -252,7 +260,7 @@ def get_joints_states(self): """ joints = [] for link in range(self.joints_num): - joints.append(self.p.getJointState(self.robot_uid,link)[0]) + joints.append(self.p.getJointState(self.uid,link)[0]) return joints def get_observation_dimension(self): @@ -272,7 +280,7 @@ def get_observation(self): :return observation: (list) Position of end-effector link (center of mass) """ observation = [] - state = self.p.getLinkState(self.robot_uid, self.end_effector_index) + state = self.p.getLinkState(self.uid, self.end_effector_index) pos = state[0] orn = self.p.getEulerFromQuaternion(state[1]) @@ -291,7 +299,7 @@ def get_links_observation(self, num): if "kuka" in self.name: for link in range(self.gripper_index-num, self.gripper_index): # for link in range(4, self.gripper_index): - state = self.p.getLinkState(self.robot_uid, link) + state = self.p.getLinkState(self.uid, link) pos = state[0] observation.extend(list(pos)) else: @@ -300,7 +308,7 @@ def get_links_observation(self, num): self.observed_links_num = num return observation - def get_position(self): + def get_position(self) -> np.ndarray: """ Get position of robot's end-effector link @@ -308,9 +316,9 @@ def get_position(self): :return position: (list) Position of end-effector link (center of mass) """ #return self.get_accurate_gripper_position() - #print(self.p.getLinkState(self.robot_uid, self.end_effector_index)[0]) - return self.p.getLinkState(self.robot_uid, self.end_effector_index)[0] - + #print(self.p.getLinkState(self.uid, self.end_effector_index)[0]) + return np.array(self.p.getLinkState(self.uid, self.end_effector_index)[0]) + def get_orientation(self): """ Get orientation of robot's end-effector link @@ -318,7 +326,7 @@ def get_orientation(self): Returns: :return orientation: (list) Orientation of end-effector link (center of mass) """ - return self.p.getLinkState(self.robot_uid, self.end_effector_index)[1] + return self.p.getLinkState(self.uid, self.end_effector_index)[1] def _run_motors(self, joint_poses): """ @@ -330,7 +338,7 @@ def _run_motors(self, joint_poses): joint_poses = np.clip(joint_poses, self.joints_limits[0], self.joints_limits[1]) self.joints_state = [] for i in range(len(self.motor_indices)): - self.p.setJointMotorControl2(bodyUniqueId=self.robot_uid, + self.p.setJointMotorControl2(bodyUniqueId=self.uid, jointIndex=self.motor_indices[i], controlMode=self.p.POSITION_CONTROL, targetPosition=joint_poses[i], @@ -339,10 +347,10 @@ def _run_motors(self, joint_poses): positionGain=0.7, velocityGain=0.3) - self.end_effector_pos = self.p.getLinkState(self.robot_uid, self.end_effector_index)[0] - self.end_effector_orn = self.p.getLinkState(self.robot_uid, self.end_effector_index)[1] - self.gripper_pos = self.p.getLinkState(self.robot_uid, self.gripper_index)[0] - self.gripper_orn = self.p.getLinkState(self.robot_uid, self.gripper_index)[1] + self.end_effector_pos = self.p.getLinkState(self.uid, self.end_effector_index)[0] + self.end_effector_orn = self.p.getLinkState(self.uid, self.end_effector_index)[1] + self.gripper_pos = self.p.getLinkState(self.uid, self.gripper_index)[0] + self.gripper_orn = self.p.getLinkState(self.uid, self.gripper_index)[1] def _move_gripper(self, action): """ @@ -352,7 +360,7 @@ def _move_gripper(self, action): :param joint_poses: (list) Desired poses of individual joints """ for i in range(len(self.gripper_indices)): - self.p.setJointMotorControl2(bodyUniqueId=self.robot_uid, + self.p.setJointMotorControl2(bodyUniqueId=self.uid, jointIndex=self.gripper_indices[i], controlMode=self.p.POSITION_CONTROL, targetPosition=action[i], @@ -373,7 +381,7 @@ def _calculate_joint_poses(self, end_effector_pos): :return joint_poses: (list) Calculated joint poses corresponding to desired end-effector position """ if (self.use_fixed_end_effector_orn): - joint_poses = self.p.calculateInverseKinematics(self.robot_uid, + joint_poses = self.p.calculateInverseKinematics(self.uid, self.end_effector_index, end_effector_pos, self.fixed_end_effector_orn, @@ -382,7 +390,7 @@ def _calculate_joint_poses(self, end_effector_pos): jointRanges=self.joints_ranges, restPoses=self.joints_rest_poses) else: - joint_poses = self.p.calculateInverseKinematics(self.robot_uid, + joint_poses = self.p.calculateInverseKinematics(self.uid, self.end_effector_index, end_effector_pos) joint_poses = np.clip(joint_poses[:len(self.motor_indices)], self.joints_limits[0], self.joints_limits[1]) @@ -404,12 +412,12 @@ def _calculate_accurate_IK(self, end_effector_pos): iter = 0 while (not closeEnough and iter < maxIter): if (self.use_fixed_end_effector_orn): - joint_poses = self.p.calculateInverseKinematics(self.robot_uid, + joint_poses = self.p.calculateInverseKinematics(self.uid, self.end_effector_index, end_effector_pos, self.fixed_end_effector_orn) else: - joint_poses = self.p.calculateInverseKinematics(self.robot_uid, + joint_poses = self.p.calculateInverseKinematics(self.uid, self.end_effector_index, end_effector_pos, lowerLimits=self.joints_limits[0], @@ -420,9 +428,9 @@ def _calculate_accurate_IK(self, end_effector_pos): joint_poses = np.clip(joint_poses, self.joints_limits[0], self.joints_limits[1]) #reset the joint state (ignoring all dynamics, not recommended to use during simulation) for jid in range(len(self.motor_indices)): - self.p.resetJointState(self.robot_uid, self.motor_indices[jid], joint_poses[jid]) + self.p.resetJointState(self.uid, self.motor_indices[jid], joint_poses[jid]) - ls = self.p.getLinkState(self.robot_uid, self.end_effector_index) + ls = self.p.getLinkState(self.uid, self.end_effector_index) newPos = ls[4] #world position of the URDF link frame newOrn = ls[5] #world orientation of the URDF link frame diffPos = np.linalg.norm(np.asarray(end_effector_pos)-np.asarray(newPos)) @@ -513,7 +521,7 @@ def apply_action_torque_control(self, action): # Desired task-space force using PD law F = np.vstack([P_pos * (delta_pos), P_ori * (delta_ori)]) - \ np.vstack([D_pos * (curr_vel), D_ori * (curr_omg)]) - Jt, Jr = self.p.calculateJacobian(self.robot_uid, self.end_effector_index, [0,0,0], joint_states, joint_velocities, [10]*(len(joint_states))) + Jt, Jr = self.p.calculateJacobian(self.uid, self.end_effector_index, [0,0,0], joint_states, joint_velocities, [10]*(len(joint_states))) J = np.array([Jt,Jr]).reshape(-1, len(joint_states)) tau = np.dot(J.T, F) # + robot.coriolis_comp().reshape(7,1) @@ -606,7 +614,7 @@ def apply_action(self, action, env_objects=None): #self.end_effector_prev_ori = self.end_effector_ori #if 'gripper' not in self.robot_action: # for joint_index in range(self.gripper_index, self.end_effector_index + 1): - # self.p.resetJointState(self.robot_uid, joint_index, self.p.getJointInfo(self.robot_uid, joint_index)[9]) + # self.p.resetJointState(self.uid, joint_index, self.p.getJointInfo(self.uid, joint_index)[9]) def magnetize_object(self, object, distance_threshold=.1): if len(self.magnetized_objects) == 0 : @@ -644,8 +652,8 @@ def get_accurate_gripper_position(self): """ Returns the position of the tip of the pointy gripper. Tested on Kuka only """ - gripper_position = self.p.getLinkState(self.robot_uid, self.end_effector_index)[0] - gripper_orientation = self.p.getLinkState(self.robot_uid, self.end_effector_index)[1] + gripper_position = self.p.getLinkState(self.uid, self.end_effector_index)[0] + gripper_orientation = self.p.getLinkState(self.uid, self.end_effector_index)[1] gripper_matrix = self.p.getMatrixFromQuaternion(gripper_orientation) direction_vector = Vector([0,0,0], [0.0, 0, 0.14]) m = np.array([[gripper_matrix[0], gripper_matrix[1], gripper_matrix[2]], [gripper_matrix[3], gripper_matrix[4], gripper_matrix[5]], [gripper_matrix[6], gripper_matrix[7], gripper_matrix[8]]]) @@ -670,5 +678,5 @@ def get_uid(self): Returns: :return self.uid: Robot's unique ID """ - return self.robot_uid + return self.uid diff --git a/myGym/envs/scene_objects.py b/myGym/envs/scene_objects.py new file mode 100644 index 00000000..b2e4b1a4 --- /dev/null +++ b/myGym/envs/scene_objects.py @@ -0,0 +1,181 @@ +from time import time +from typing import Callable, Optional, TypeVar + +import numpy as np +import pytest +import os + +from rddl import Operand, Variable +from myGym.envs.env_object import EnvObject +from myGym.envs.robot import Robot +import importlib.resources as resources +currentdir = resources.files("myGym").joinpath("envs") + +def time_function(f: Callable, *args, **kwargs): + start = time() + result = f(*args, **kwargs) + end = time() + return end - start, result + + +def euclidean_distance(point_A: np.ndarray, point_B: np.ndarray) -> float: + return np.linalg.norm(np.array(point_A) - np.array(point_B)) + + +def is_holding(gripper, obj) -> bool: + return bool(np.linalg.norm(gripper.location - obj.location) < NEAR_THRESHOLD) + + +NEAR_THRESHOLD = 0.1 + +mapping = { + "is_holding": is_holding, + "euclidean_distance": euclidean_distance, + "is_reachable": lambda g, o: True, + "near_threshold": NEAR_THRESHOLD, + "gripper_at": lambda g, o: all(g.location == o.location), + "gripper_open": lambda g: np.random.random() < 0.5, + "object_at": lambda g, o: g.location == o.location, + "exists": lambda e: True, + "on_top" : lambda e: True +} + +Operand.set_mapping(mapping) + +from rddl import AtomicAction, Entity, Reward +from rddl.actions import Approach, Grasp, Drop, Move, Follow +from rddl.entities import GraspableObject, Gripper, Location, ObjectEntity +from rddl.operators import NotOp +from rddl.predicates import IsHolding, Near, OnTop + +# CONSTANTS + +# END OF CUSTOM FUNCTIONS & VARIABLES + + +class EnvObjectProxy: # This should be replaced by EnvObject from myGym + + def __init__(self, **kwds): + super().__init__() + self._name = self.reference + self._position = np.random.randn(3) + self._orientation = np.random.randn(3) + + def set_position(self, position: np.ndarray): + self._position = position + + def set_orientation(self, orientation: np.ndarray): + self._orientation = orientation + + def get_position(self) -> np.ndarray: + return self._position + + def get_orientation(self) -> np.ndarray: + return self._orientation + + def get_position_and_orientation(self) -> tuple[np.ndarray, np.ndarray]: + return self.get_position(), self.get_orientation() + + def get_name(self) -> str: + return self._name + + def get_uuid(self) -> str: + return self.name + "_uuid" + + +class EnvSimulator: # This should be replaced by actual env from myGym + """Simulates the simulation environment""" + + def __init__(self, list_of_objects: list[EnvObjectProxy]): + self._objects = list_of_objects + + def reset(self): + pass + + def step(self, action): + # generate observation + obs = {} + for obj in self._objects: + obj.set_position(np.random.randn(3)) # randomly jiggle the object about + obj.set_orientation(np.random.randn(3)) + obs[obj.get_name()] = obj + return obs + + +class Observer: # This class serves merely as a container & memory for current observation + + def __init__(self): + self.obs = None + + def set_observation(self, obs): + self.obs = obs + + def get_observation(self): + return self.obs + + +Entity.set_observation_getter(lambda self: self) +Location.monkey_patch(Location._get_location, lambda self: self.get_position()) + + +class Apple(GraspableObject, EnvObject): + + def __init__(self, reference: Optional[str] = None, kind: str = "RedDelicious", **kw): + self.urdf = "objects/household/urdf/apple.urdf" + self.rgba = (0.54, 0.0, 0.0, 1.0) + kw["rgba"] = self.rgba + kw["urdf_path"] = resources.files("myGym").joinpath("envs",self.urdf) + super().__init__(self._get_generic_reference() if reference is None else reference, "apple", **kw) + self._kind = kind + + +class Banana(GraspableObject, EnvObject): + + def __init__(self, reference: Optional[str] = None, kind: str = "", **kw): + self.urdf = "objects/household/urdf/banana.urdf" + self.rgba = (1.0, 0.94, 0.0, 1.0) + kw["urdf_path"] = resources.files("myGym").joinpath("envs",self.urdf) + kw["rgba"] = self.rgba + super().__init__(self._get_generic_reference() if reference is None else reference, "banana", **kw) + self._kind = kind + + + +class Tuna(GraspableObject, EnvObject): + + def __init__(self, reference: Optional[str] = None, kind: str = "", **kw): + self.urdf = "objects/household/urdf/tuna_can.urdf" + self.rgba = None + kw["urdf_path"] = resources.files("myGym").joinpath("envs",self.urdf) + kw["rgba"] = self.rgba + super().__init__(self._get_generic_reference() if reference is None else reference, "tuna_can", **kw) + self._kind = kind + + +class TiagoGripper(Gripper, Robot): + + def __init__(self, reference: Optional[str] = None, **kw): + super().__init__("gripper_tiago" if reference is None else reference, **kw) + # self._is_holding_predicate = IsHolding(self) + + + +# @pytest.fixture +def create_approach_action() -> AtomicAction: + a = Approach() + return a + + +# @pytest.fixture +def create_gripper_and_apple() -> dict[str, Entity]: + gripper_name = "tiago_gripper" + apple_name = "apple_01" + + t_gripper = TiagoGripper(gripper_name) + apple = Apple(apple_name) + objects_for_approach = { + "gripper": t_gripper, + "object": apple + } + + return objects_for_approach \ No newline at end of file diff --git a/myGym/envs/task.py b/myGym/envs/task.py index 144cc160..8729ebdc 100644 --- a/myGym/envs/task.py +++ b/myGym/envs/task.py @@ -1,145 +1,135 @@ -from myGym.envs.vision_module import VisionModule -import pybullet as p -import warnings -import time -import numpy as np -import pkg_resources -import cv2 import random -from scipy.spatial.distance import cityblock -from scipy.spatial.transform import Rotation -from pyquaternion import Quaternion -currentdir = pkg_resources.resource_filename("myGym", "envs") - +import myGym.envs.scene_objects # allows binding with rddl +from myGym.envs.scene_objects import TiagoGripper +from rddl.rddl_sampler import RDDLWorld +from rddl.task import RDDLTask +from rddl.entities import Gripper, ObjectEntity class TaskModule(): """ - Task module class for task management + Task module class for task management with RDDL Parameters: - :param task_type: (string) Type of learned task (reach, push, ...) - :param task_objects: (list of strings) Objects that are relevant for performing the task - :param distance_type: (string) Way of calculating distances (euclidean, manhattan) - :param logdir: (string) Directory for logging :param env: (object) Environment, where the training takes place + :param task_num_range: (list of ints) range of allowed number of subtasks + :param allowed_objects: (list of strings) names of allowed object classes + :param allowed_predicates: (list of strings) names of allowed predicates + :param pybullet_client: (object) pybullet instance """ - def __init__(self, task_type='reach', observation={}, - vae_path=None, yolact_path=None, yolact_config=None, distance_type='euclidean', - logdir=currentdir, env=None, number_tasks=None): - self.task_type = task_type - print("task_type", self.task_type) - self.distance_type = distance_type - self.number_tasks = number_tasks - self.current_task = 0 - self.subtask_over = False - self.logdir = logdir + def __init__(self, env=None, num_task_range=[], allowed_protoactions=[], allowed_objects=[], allowed_predicates=[], pybullet_client=None): self.env = env - self.image = None - self.depth = None - self.last_distance = None - self.init_distance = None - self.current_norm_distance = None - self.current_norm_rotation = None - self.stored_observation = [] - self.obs_template = observation - self.vision_module = VisionModule(observation=observation, env=env, vae_path=vae_path, yolact_path=yolact_path, yolact_config=yolact_config) - self.obsdim = self.check_obs_template() - self.vision_src = self.vision_module.src - self.writebool = False - - def reset_task(self): - """ - Reset task relevant data and statistics - """ - self.last_distance = None - self.init_distance = None + self.p = pybullet_client + self.number_tasks_range = num_task_range + self.allowed_protoactions = allowed_protoactions + self.allowed_objects = allowed_objects + self.allowed_predicates = allowed_predicates + self.current_task_length = None + self.current_task_sequence = None self.subtask_over = False - self.current_norm_distance = None - self.vision_module.mask = {} - self.vision_module.centroid = {} - self.vision_module.centroid_transformed = {} - self.env.task_objects["robot"] = self.env.robot - if self.vision_src == "vae": - self.generate_new_goal(self.env.objects_area_boarders, self.env.active_cameras) - - def render_images(self): - render_info = self.env.render(mode="rgb_array", camera_id=self.env.active_cameras) - self.image = render_info[self.env.active_cameras]["image"] - self.depth = render_info[self.env.active_cameras]["depth"] - if self.env.visualize == 1 and self.vision_src != "vae": - cv2.imshow("Vision input", cv2.cvtColor(self.image, cv2.COLOR_RGB2BGR)) - cv2.waitKey(1) - - def visualize_vae(self, recons): - imsize = self.vision_module.vae_imsize - actual_img, goal_img = [(lambda a: cv2.resize(a[60:390, 160:480], (imsize, imsize)))(a) for a in - [self.image, self.goal_image]] - images = [] - for idx, im in enumerate([actual_img, recons[0], goal_img, recons[1]]): - im = cv2.copyMakeBorder(im, 30, 10, 10, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255]) - cv2.putText(im, ["actual", "actual rec", "goal", "goal rec"][idx], (10, 20), cv2.FONT_HERSHEY_SIMPLEX, .5, - (0, 0, 0), 1, 0) - images.append(cv2.cvtColor(im, cv2.COLOR_RGB2BGR)) - fig = np.vstack((np.hstack((images[0], images[1])), np.hstack((images[2], images[3])))) - cv2.imshow("Scene", fig) - cv2.waitKey(1) - - def get_linkstates_unpacked(self): - o = [] - [[o.append(x) for x in z] for z in self.env.robot.observe_all_links()] - return o - - def get_additional_obs(self, d, robot): - info = d.copy() - info["additional_obs"] = {} - for key in d["additional_obs"]: - if key == "joints_xyz": - o = [] - info["additional_obs"]["joints_xyz"] = self.get_linkstates_unpacked() - elif key == "joints_angles": - info["additional_obs"]["joints_angles"] = self.env.robot.get_joints_states() - elif key == "endeff_xyz": - info["additional_obs"]["endeff_xyz"] = self.vision_module.get_obj_position(robot, self.image, self.depth)[:3] - elif key == "endeff_6D": - info["additional_obs"]["endeff_6D"] = list(self.vision_module.get_obj_position(robot, self.image, self.depth)) \ - + list(self.vision_module.get_obj_orientation(robot)) - elif key == "touch": - if hasattr(self.env.env_objects["actual_state"], "magnetized_objects"): - obj_touch = self.env.env_objects["goal_state"] - else: - obj_touch = self.env.env_objects["actual_state"] - touch = self.env.robot.touch_sensors_active(obj_touch) or len(self.env.robot.magnetized_objects)>0 - info["additional_obs"]["touch"] = [1] if touch else [0] - elif key == "distractor": - poses = [self.vision_module.get_obj_position(self.env.task_objects["distractor"][x],\ - self.image, self.depth) for x in range(len(self.env.task_objects["distractor"]))] - info["additional_obs"]["distractor"] = [p for sublist in poses for p in sublist] - return info + self.rddl_robot = None + self.current_task = None + self.current_action = None + self.scene_entities = [] + self.scene_objects = [] + self.rddl_world = RDDLWorld(allowed_actions=self.cfg_strings_to_classes(self.allowed_protoactions), + allowed_entities=self.cfg_strings_to_classes(self.allowed_objects) + [TiagoGripper], + allowed_initial_actions=self.cfg_strings_to_classes(self.allowed_protoactions)) + #self.rddl_task = RDDLTask() + + def sample_num_subtasks(self): + '''Whenever a new sequence of actions is desired, this function chooses a random + length of the sequence based on the range from config. ''' + self.current_task_length = random.randint(*self.number_tasks_range) + return self.current_task_length + + def cfg_strings_to_classes(self, cfg:list): + """Looks up the classes corresponding to names in cfg and returns a list of them. + Args: + cfg (list): list of strings with names of entities, e.g., ["Approach", "Drop"] or ["Banana", "Tuna"] + Raises: + Exception: When the class is not found by rddl, that usually means it is not defined in scene_objects.py + Returns: + _type_: list of classes + """ + list_of_classes = [] + for n in cfg: + n_fixed = n[0].upper() + n[1:].lower() + try: + cl = getattr(myGym.envs.scene_objects, n_fixed) + list_of_classes.append(cl) + except: + raise Exception("Entity {} not found by RDDL! Check scene_objects.py for this class".format(n_fixed)) + return list_of_classes + + def create_new_task_sequence(self): + '''Calls rddl to make a new sequence of actions (subtasks).''' + n_samples = self.sample_num_subtasks() + # task_sequence = self.rddl_world.sample_generator(n_samples) + self.rddl_task = self.rddl_world.sample_world(n_samples) + self.current_task_sequence = self.rddl_task.get_generator() + print("Generated a new action sequence") + + def get_next_task(self): + '''When the previous action (subtask) is finished, this function will jump to the next one.''' + if self.current_task_sequence is None: + # if there is no action sequence to follow, first make a new one + self.create_new_task_sequence() + self.current_action = next(self.current_task_sequence) + print(f"Current task: {self.current_action}") + print("Desired world state after action:") + self.rddl_task.show_current_state() + + def build_scene_for_task_sequence(self): + '''After a new sequence of actions (subtasks) is created, this function will create the physical scene according to the + symbolic template. ''' + if self.current_action is None: + # if there is no action sequence, first make a new one + self.get_next_task() + # scene_entities = self.rddl_world.get_created_variables() + scene_entities = self.rddl_task.gather_objects() + for entity in scene_entities: + if issubclass(entity.type, Gripper) and not entity.is_bound(): + robot = entity.type(robot=self.env.robot_type, robot_action=self.env.robot_action, task_type=self.env.task_type, **self.env.robot_kwargs) + entity.bind(robot) + self.rddl_robot = entity + self.scene_objects.append(robot) + elif not entity.is_bound(): + pos = entity.type.get_random_object_position(self.env.reachable_borders) # @TODO needs to be constrained by predicates + orn = entity.type.get_random_z_rotation() #@TODO needs to be constrained by predicates + + kw = {"env": self.env, "position":pos, "orientation":orn, "pybullet_client":self.p, "fixed":False, "observation":self.env.vision_source, + "vae_path":self.env.vae_path, "yolact_path":self.env.yolact_path, "yolact_config":self.env.yolact_config} + self.spawn_object_for_rddl(entity, **kw) - def get_observation(self): - """ - Get task relevant observation data based on reward signal source + - Returns: - :return self._observation: (array) Task relevant observation data, positions of task objects - """ - info_dict = self.obs_template.copy() - self.render_images() if "ground_truth" not in self.vision_src else None - if self.vision_src == "vae": - [info_dict["actual_state"], info_dict["goal_state"]], recons = (self.vision_module.encode_with_vae( - imgs=[self.image, self.goal_image], task=self.task_type, decode=self.env.visualize)) - self.visualize_vae(recons) if self.env.visualize == 1 else None + def spawn_object_for_rddl(self, rddl_entity, env, position, orientation, pybullet_client, + fixed=False, observation="ground_truth", vae_path=None, yolact_path=None, yolact_config=None): + """Initializes EnvObject and adds it to the scene. The object will be bound to the symbolic entity provided as rddl_entity. + + Args: + rddl_entity (Variable): unbound rddl entity to bind with + env (class instance): env instance + position (list): position of the object [x, y, z] + orientation (tuple): orientation of the object in quaternions (x1, x2, x3, x4) + pybullet_client (object): instance of pybullet simulator + fixed (bool): whether object has fixed base or not + observation (str): vision source ("ground_truth", "yolact", "vae") + vae_path (str): path to vae model, if used + yolact_path (str): path to yolact model, if used + yolact_config (str): path to saved yolact config + """ + kw = {"env": env, "position":position, "orientation":orientation, "pybullet_client":pybullet_client, "fixed":fixed, + "observation":observation, "vae_path":vae_path, "yolact_path":yolact_path, "yolact_config":yolact_config} + o = rddl_entity.type(**kw) # initialize EnvObject and add to scene + rddl_entity.bind(o) # bind spawned object to the symbolic one + if o.rgba is None: # if no colour bound to object, assign random rgb + o.set_color(self.env.get_random_color()) else: - for key in ["actual_state", "goal_state"]: - if "endeff" in info_dict[key]: - xyz = self.vision_module.get_obj_position(self.env.task_objects["robot"], self.image, self.depth) - xyz = xyz[:3] if "xyz" in info_dict else xyz - else: - xyz = self.vision_module.get_obj_position(self.env.task_objects[key],self.image,self.depth) - info_dict[key] = xyz - self._observation = self.get_additional_obs(info_dict, self.env.task_objects["robot"]) - return self._observation - + o.set_color(o.get_color_rgba()) # assign correct colour to object as defined in scene_objects.py + self.scene_objects.append(o) + self.scene_entities.append(rddl_entity) + def get_world_state(self): """ @@ -150,457 +140,26 @@ def get_world_state(self): """ pass - def check_vision_failure(self): - """ - Check if YOLACT vision model fails repeatedly during episode - - Returns: - :return: (bool) - """ - self.stored_observation.append(self._observation["actual_state"]) - self.stored_observation.append(self._observation["goal_state"]) - if len(self.stored_observation) > 9: - self.stored_observation.pop(0) - if self.vision_src == "yolact": # Yolact assigns 10 to not detected objects - if all(10 in obs for obs in self.stored_observation): - return True - return False - - def check_time_exceeded(self): - """ - Check if maximum episode time was exceeded - - Returns: - :return: (bool) - """ - if (time.time() - self.env.episode_start_time) > self.env.episode_max_time: - self.env.episode_info = "Episode maximum time {} s exceeded".format(self.env.episode_max_time) - return True - return False - - def check_object_moved(self, object, threshold=0.3): - """ - Check if object moved more than allowed threshold - - Parameters: - :param object: (object) Object to check - :param threshold: (float) Maximum allowed object movement - Returns: - :return: (bool) - """ - if self.vision_src != "vae": - object_position = object.get_position() - pos_diff = np.array(object_position[:2]) - np.array(object.init_position[:2]) - distance = np.linalg.norm(pos_diff) - if distance > threshold: - self.env.episode_info = "The object has moved {:.2f} m, limit is {:.2f}".format(distance, threshold) - return True - return False - - def check_turn_threshold(self, desired_angle=57): - turned = self.env.reward.get_angle() - if turned >= desired_angle: - return True - elif turned <= - desired_angle: - return -1 - return False - - def check_distance_threshold(self, observation, threshold=0.1): - """ - Check if the distance between relevant task objects is under threshold for successful task completion - Returns: - :return: (bool) - """ - self.current_norm_distance = self.calc_distance(observation["goal_state"], observation["actual_state"]) - return self.current_norm_distance < threshold - - def check_distrot_threshold(self, observation, threshold=0.1): - """ - Check if the distance between relevant task objects is under threshold for successful task completion - Returns: - :return: (bool) - """ - self.current_norm_distance = self.calc_distance(observation["goal_state"], observation["actual_state"]) - self.current_norm_rotation = self.calc_rot_quat(observation["goal_state"], observation["actual_state"]) - - if self.current_norm_distance < threshold and self.current_norm_rotation < threshold: - return True - return False - - - def get_dice_value(self, quaternion): - def noramalize(q): - return q/np.linalg.norm(q) - - faces = np.array([ - [0,0,1], - [0,1,0], - [1,0,0], - [0,0,-1], - [0,-1,0], - [-1,0,0], - ]) - - rot_mtx = Rotation.from_quat(noramalize(quaternion)).as_matrix() - - rotated_faces = np.dot(rot_mtx, faces.T).T - top_face_index = np.argmax(rotated_faces[:,2]) - - #face_nums = [2, 5, 1, 4, 6, 3] #states that first face has number 2 on it, second 5 and so on... - #return face_nums[top_face_index] - return top_face_index+1 - - def check_dice_moving(self, observation, threshold=0.1): - def calc_still(o1, o2): - result = 0 - for i in range(len(o1)): - result+= np.power(o1[i]-o2[i],2) - result = np.sqrt(result) - - return result < 0.000005 - #print(observation["goal_state"]) - if len(observation)<3: - print("Invalid",observation) - x = np.array(observation["actual_state"][3:]) - - #print(observation) - - if not self.check_distance_threshold(self._observation) and self.env.episode_steps > 25: - if calc_still(observation["actual_state"], self.stored_observation): - if (self.stored_observation == observation["actual_state"]): - return 0 - else: - if self.writebool: - print(self.get_dice_value(x)) - print(observation) - self.writebool = False - if self.get_dice_value(x) == 6: - return 2 - return 1 - - else: - self.stored_observation = observation["actual_state"] - return 0 - else: - self.stored_observation = observation["actual_state"] - self.writebool = True - #print(self.get_dice_value(x)) - return 0 - - def check_points_distance_threshold(self, threshold=0.1): - o1 = self.env.task_objects["actual_state"] - if (self.task_type == 'pnp') and (self.env.robot_action != 'joints_gripper') and (len(self.env.robot.magnetized_objects) == 0): - o2 = self.env.robot - closest_points = self.env.p.getClosestPoints(o2.get_uid(), o1.get_uid(), threshold, - o2.end_effector_index, -1) - else: - o2 = self.env.task_objects["goal_state"] - idx = -1 if o1 != self.env.robot else self.env.robot.end_effector_index - closest_points = self.env.p.getClosestPoints(o1.get_uid(), o2.get_uid(), threshold, idx, -1) - return closest_points if len(closest_points) > 0 else False - - def drop_magnetic(self): - """ - Release the object if required point was reached and controls if task was compleated. - Returns: - :return: (bool) - """ - if self.env.reward.point_was_reached: - if not self.env.reward.was_dropped: - self.env.episode_over = False - self.env.robot.release_all_objects() - self.env.task.subtask_over = True - self.current_task = 0 - self.env.reward.was_dropped = True - # print("drop episode", self.env.reward.drop_episode, "episode steps", self.env.episode_steps) - if self.env.reward.drop_episode and self.env.reward.drop_episode + 35 < self.env.episode_steps: - self.end_episode_success() - return True - else: - return False def check_goal(self): """ Check if goal of the task was completed successfully """ - - finished = None - if self.task_type in ['reach', 'poke', 'pnp', 'pnpbgrip', 'FMOT', 'FROM', 'FROT', 'FMOM', 'FM','F','A','AG','AGM','AGMD','AGMDW']: #all tasks ending with R (FMR) have to have distrot checker - finished = self.check_distance_threshold(self._observation) - if self.task_type in ['pnprot','pnpswipe','FMR', 'FMOR', 'FMLFR', 'compositional']: - finished = self.check_distrot_threshold(self._observation) - if self.task_type in ["dropmag"]: #FMOT should be compositional - self.check_distance_threshold(self._observation) - finished = self.drop_magnetic() - if self.task_type in ['push', 'throw']: - self.check_distance_threshold(self._observation) - finished = self.check_points_distance_threshold() - if self.task_type == "switch": - self.check_distance_threshold(self._observation) - finished = abs(self.env.reward.get_angle()) >= 18 - if self.task_type == "press": - self.check_distance_threshold(self._observation) - finished = self.env.reward.get_angle() >= 1.71 - if self.task_type == "dice_throw": - finished = self.check_dice_moving(self._observation) - - if self.task_type == "turn": - self.check_distance_threshold(self._observation) - finished = self.check_turn_threshold() - self.last_distance = self.current_norm_distance - if self.init_distance is None: - self.init_distance = self.current_norm_distance - #if self.task_type == 'pnp' and self.env.robot_action != 'joints_gripper' and finished: - # if len(self.env.robot.magnetized_objects) == 0 and self.env.episode_steps > 5: - # self.end_episode_success() - # else: - # self.env.episode_over = False - if finished: - if self.task_type == "dice_throw": - - if finished == 1: - self.end_episode_fail("Finished with wrong dice result thrown") - return finished - self.end_episode_success() - if self.check_time_exceeded() or self.env.episode_steps == self.env.max_steps: - self.end_episode_fail("Max amount of steps reached") - if "ground_truth" not in self.vision_src and (self.check_vision_failure()): - self.stored_observation = [] - self.end_episode_fail("Vision fails repeatedly") - - def end_episode_fail(self, message): - self.env.episode_over = True - self.env.episode_failed = True - self.env.episode_info = message - self.env.robot.release_all_objects() - - def end_episode_success(self): - #print("Finished subtask {}".format(self.current_task)) - if self.current_task == (self.number_tasks-1): - self.env.episode_over = True - self.env.robot.release_all_objects() - self.current_task = 0 - if self.env.episode_steps == 1: - self.env.episode_info = "Task completed in initial configuration" - else: - self.env.episode_info = "Task completed successfully" - else: - self.env.episode_over = False - self.env.robot.release_all_objects() - self.subtask_over = True - self.current_task += 1 - - def calc_distance(self, obj1, obj2): - """ - Calculate distance between two objects - - Parameters: - :param obj1: (float array) First object position representation - :param obj2: (float array) Second object position representation - Returns: - :return dist: (float) Distance between 2 float arrays - """ - #print("gripper_position:", obj1[:3]) - #print("goal_position_offset:", obj2[:3]) - #TODO - - if self.distance_type == "euclidean": - dist = np.linalg.norm(np.asarray(obj1[:3]) - np.asarray(obj2[:3])) - #print("Calculated distance:", dist) - elif self.distance_type == "manhattan": - dist = cityblock(obj1, obj2) - return dist - - def calc_height_diff(self, obj1, obj2): - """ - Calculate height difference between objects - - Parameters: - :param obj1: (float array) First object position representation - :param obj2: (float array) Second object position representation - Returns: - :return dist: (float) Distance iz Z axis - """ - #TODO - dist = abs(obj1[2] - obj2[2]) - return dist - - def calc_rot_quat(self, obj1, obj2): - """ - Calculate difference between two quaternions - - Parameters: - :param obj1: (float array) First object quaternion - :param obj2: (float array) Second object object quaternion - Returns: - :return dist: (float) Distance between 2 float arrays - """ - #TODO - #tran = np.linalg.norm(np.asarray(obj1[:3]) - np.asarray(obj2[:3])) - rot = Quaternion.distance(Quaternion(obj1[3:]), Quaternion(obj2[3:])) - #print(obj1[3:]) - #print(obj2[3:]) - #print(rot) - return rot + pass - def calc_rotation_diff(self, obj1, obj2): + def check_end_episode(self): """ - Calculate diffrence between orientation of two objects - - Parameters: - :param obj1: (float array) First object orientation (Euler angles) - :param obj2: (float array) Second object orientation (Euler angles) - Returns: - :return diff: (float) Distance between 2 float arrays + Check if episode should finish based on fulfilled goal or exceeded number of steps """ - if self.distance_type == "euclidean": - diff = np.linalg.norm(np.asarray(obj1) - np.asarray(obj2)) - elif self.distance_type == "manhattan": - diff = cityblock(obj1, obj2) - return diff - - def trajectory_distance(self, trajectory, point, last_nearest_index, n=10): - """Compute the distance of point from a trajectory, n points around the last nearest point""" - index1, index2 = last_nearest_index - (int(n / 2)), last_nearest_index + int(n / 2) - print("indexes:", index1, index2) - if (index1) < 0: # index can't be less than zero - index1 -= last_nearest_index - (int(n / 2)) - index2 -= last_nearest_index - (int(n / 2)) - if (index2 > np.shape(trajectory)[1]): - index2 =np.shape(trajectory)[1] - trajectory_points = np.transpose( - trajectory[:, index1:index2]) # select n points around last nearest point of trajectory - distances = [] - for i in range(index2-index1): - distances.append(self.calc_distance(point, trajectory_points[i])) - min_idx = int(np.argwhere(distances == np.min(distances))) - min = np.min(distances) - nearest_index = min_idx + index1 - return min, nearest_index - - def create_trajectory(self, fx, fy, fz, t): - """General trajectory creator. Pass functions fx, fy, fz and parametric vector t to create any 3D trajectory""" - trajectory = np.asarray([fx(t), fy(t), fz(t)]) - return trajectory - - - def create_line(self, point1, point2, step=0.01): - """Creates line from point1 to point2 -> vectors of length 1/step""" - t = np.arange(0, 1, step) - - def linemaker_x(t): - "axis: 0 = x, 1 = y, 2 = z" - return (point2[0] - point1[0]) * t + point1[0] - - def linemaker_y(t): - return (point2[1] - point1[1]) * t + point1[1] - - def linemaker_z(t): - return (point2[2] - point1[2]) * t + point1[2] - - return self.create_trajectory(fx=linemaker_x, fy=linemaker_y, fz=linemaker_z, t=t) - - - def create_circular_trajectory(self, center, radius, rot_vector = [0.0, 0.0, 0.0], arc=np.pi, step=0.01, direction = 1): - """Creates a 2D circular trajectory in 3D space. - params: center ([x,y,z]), radius (float): self-explanatory - rot_vector ([x,y,z]): Axis of rotation. Angle of rotation is norm of rot_vector - arc (radians): 2pi means full circle, 1 pi means half a circle etc... - """ - phi = np.arange(0, arc, step) - v = np.asarray(rot_vector) - theta = np.linalg.norm(v) - - # creation of non-rotated circle of given radius located at [0,0,0] - base_circle = np.array([np.cos(phi) * radius, np.sin(phi) * radius, [0] * len(phi)])*direction - rotation = np.eye(3) - print(theta) - if theta != 0.0: - normalized_v = v * (1 / theta) - # Rodrigues' formula: - rotation = (np.eye(3) + np.sin(theta) * self.skew_symmetric(normalized_v) + - (1 - np.cos(theta)) * np.matmul(self.skew_symmetric(normalized_v), - self.skew_symmetric(normalized_v))) - rotated = np.asarray([[], [], []]) - for i in range(len(phi)): - rotated_v = np.asarray([np.matmul(rotation, base_circle[:3, i])]) - rotated = np.append(rotated, np.transpose(rotated_v), axis=1) - # moving circle to its center - move = np.asarray(center) - final_circle = np.transpose(np.transpose(rotated) + move) - return final_circle - - - def create_circular_trajectory_v2(self, point1, point2, radius, direction): pass - def generate_new_goal(self, object_area_borders, camera_id): - """ - Generate an image of new goal for VEA vision model. This function is supposed to be called from env workspace. - - Parameters: - :param object_area_borders: (list) Volume in space where task objects can be located - :param camera_id: (int) ID of environment camera active for image rendering - """ - if self.task_type == "push": - random_pos = self.env.task_objects[0].get_random_object_position(object_area_borders) - random_rot = self.env.task_objects[0].get_random_object_orientation() - self.env.robot.reset_up() - self.env.task_objects[0].set_position(random_pos) - self.env.task_objects[0].set_orientation(random_rot) - self.env.task_objects[1].set_position(random_pos) - self.env.task_objects[1].set_orientation(random_rot) - render_info = self.env.render(mode="rgb_array", camera_id = self.env.active_cameras) - self.goal_image = render_info[self.env.active_cameras]["image"] - random_pos = self.env.task_objects[0].get_random_object_position(object_area_borders) - random_rot = self.env.task_objects[0].get_random_object_orientation() - self.env.task_objects[0].set_position(random_pos) - self.env.task_objects[0].set_orientation(random_rot) - elif self.task_type == "reach": - bounded_action = [random.uniform(-3,-2.4) for x in range(2)] - action = [random.uniform(-2.9,2.9) for x in range(6)] - self.env.robot.reset_joints(bounded_action + action) - self.goal_image = self.env.render(mode="rgb_array", camera_id=self.env.active_cameras)[self.env.active_cameras]['image'] - self.env.robot.reset_up() - #self.goal_image = self.vision_module.vae_generate_sample() - def check_obs_template(self): + def reset_task(self): """ - Checks if observations are set according to rules and computes observation dim - - Returns: - :return obsdim: (int) Dimensionality of observation + Start a new episode with a new setup """ - t = self.obs_template - assert "actual_state" and "goal_state" in t.keys(), \ - "Observation setup in config must contain actual_state and goal_state" - if t["additional_obs"]: - assert [x in ["joints_xyz", "joints_angles", "endeff_xyz", "endeff_6D", "touch", "distractor"] for x in - t["additional_obs"]], "Failed to parse some of the additional_obs in config" - assert t["actual_state"] in ["endeff_xyz", "endeff_6D", "obj_xyz", "obj_6D", "vae", "yolact", "voxel", "dope"],\ - "failed to parse actual_state in Observation config" - assert t["goal_state"] in ["obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope"],\ - "failed to parse goal_state in Observation config" - if "endeff" not in t["actual_state"]: - assert t["actual_state"] == t["goal_state"], \ - "actual_state and goal_state in Observation must have the same format" - else: - assert t["actual_state"].split("_")[-1] == t["goal_state"] .split("_")[-1], "Actual state and goal state must " \ - "have the same number of dimensions!" - if "endeff_xyz" in t["additional_obs"] or "endeff_6D" in t["additional_obs"]: - warnings.warn("Observation config: endeff_xyz already in actual_state, no need to have it in additional_obs. Removing it") - [self.obs_template["additional_obs"].remove(x) for x in t["additional_obs"] if "endeff" in x] - obsdim = 0 - for x in [t["actual_state"], t["goal_state"]]: - get_datalen = {"joints_xyz":len(self.get_linkstates_unpacked()), - "joints_angles":len(self.env.robot.get_joints_states()), - "endeff_xyz":len(self.vision_module.get_obj_position(self.env.robot, self.image, self.depth)[:3]), - "endeff_6D":len(list(self.vision_module.get_obj_position(self.env.robot, self.image, self.depth)) \ - + list(self.vision_module.get_obj_orientation(self.env.robot))), - "dope":7, "obj_6D":7, "distractor": 3, "touch":1, "yolact":3, "voxel":3, "obj_xyz":3, - "vae":self.vision_module.obsdim} - obsdim += get_datalen[x] - for x in t["additional_obs"]: - obsdim += get_datalen[x] - return obsdim \ No newline at end of file + self.current_task_length = None + self.current_task_sequence = None + self.subtask_over = False + self.current_action = None \ No newline at end of file diff --git a/myGym/envs/vision_module.py b/myGym/envs/vision_module.py index c6209d68..23a1684d 100644 --- a/myGym/envs/vision_module.py +++ b/myGym/envs/vision_module.py @@ -6,33 +6,15 @@ import numpy as np import cv2 import random -import pkg_resources -currentdir = pkg_resources.resource_filename("myGym", "envs") +import importlib.resources as resources +currentdir = resources.files("myGym").joinpath("envs") # import vision models YOLACT, VAE -sys.path.append(pkg_resources.resource_filename("myGym", "yolact_vision")) #may be moved somewhere else -try: - from inference_tool import InfTool -except: - print("Problem importing YOLACT.") +sys.path.append(resources.files("myGym").joinpath("yolact_vision")) #may be moved somewhere else from myGym.vae.vis_helpers import load_checkpoint from myGym.vae import sample -def get_module_type(observation): - """ - Get source of the information from environment (ground_truth, yolact, vae) - - Returns: - :return source: (string) Source of information - """ - if observation["actual_state"] not in ["vae", "yolact", "voxel", "dope"]: - src = "ground_truth_6D" if "6D" in observation["actual_state"] else "ground_truth" - else: - src = observation["actual_state"] - return src - - class VisionModule: """ Vision class that retrieves information from environment based on a visual subsystem (YOLACT, VAE) or ground truth @@ -44,8 +26,8 @@ class VisionModule: :param yolact_path: (string) Path to a trained Yolact in 3dvu reward type :param yolact_config: (string) Path to saved Yolact config obj or name of an existing one in the data/Config script or None for autodetection """ - def __init__(self, observation={}, env=None, vae_path=None, yolact_path=None, yolact_config=None): - self.src = get_module_type(observation) + def __init__(self, observation="ground_truth", env=None, vae_path=None, yolact_path=None, yolact_config=None): + self.src = observation self.env = env self.vae_embedder = None self.vae_imsize = None @@ -112,7 +94,7 @@ def get_obj_pixel_position(self, obj=None, img=None): else: raise Exception("You need to provide image argument for segmentation") - def get_obj_bbox(self, obj=None, img=None): + def get_obj_bbox_for_obs(self, obj=None, img=None): """ Get bounding box of an object from 2D image @@ -145,7 +127,7 @@ def get_obj_bbox(self, obj=None, img=None): else: raise Exception("{} module does not provide bounding boxes!".format(self.src)) - def get_obj_position(self, obj=None, img=None, depth=None): + def get_obj_position_for_obs(self, obj=None, img=None, depth=None): """ Get object position in world coordinates of environment from 2D and depth image @@ -186,7 +168,7 @@ def get_obj_position(self, obj=None, img=None, depth=None): raise Exception("You need to provide image argument to infer object position") return - def get_obj_orientation(self, obj=None, img=None): + def get_obj_orientation_for_obs(self, obj=None, img=None): """ Get object orientation in world coordinates of environment from 2D image @@ -209,7 +191,7 @@ def get_obj_orientation(self, obj=None, img=None): raise Exception("You need to provide image argument to infer orientation") return - def vae_generate_sample(self): + def vae_generate_sample_for_obs(self): """ Generate image as a sample of VAE latent representation @@ -222,7 +204,7 @@ def vae_generate_sample(self): dec_img = np.asarray((img * 255).cpu().detach(), dtype="uint8") return dec_img - def encode_with_vae(self, imgs, task="reach", decode=0): + def encode_with_vae_for_obs(self, imgs, task="reach", decode=0): """ Encode the input image into an n-dimensional latent variable using VAE model @@ -274,7 +256,7 @@ def _initialize_network(self, network): :param network: (string) Source of information from environment (yolact, vae) """ if network == "vae": - weights_pth = pkg_resources.resource_filename("myGym", self.vae_path) + weights_pth = resources.files("myGym").joinpath(self.vae_path) try: self.vae_embedder, imsize = load_checkpoint(weights_pth, use_cuda=True) except: @@ -282,10 +264,14 @@ def _initialize_network(self, network): self.vae_imsize = imsize self.obsdim = (2*self.vae_embedder.n_latents) + 3 elif network == "yolact": - weights = pkg_resources.resource_filename("myGym", self.yolact_path) + weights = resources.files("myGym").joinpath(self.yolact_path) if ".obj" in self.yolact_config: - config = pkg_resources.resource_filename("myGym", self.yolact_config) + config = resources.files("myGym").joinpath(self.yolact_config) try: + try: + from inference_tool import InfTool + except: + print("Problem importing YOLACT.") self.yolact_cnn = InfTool(weights=weights, config=config, score_threshold=0.2) except: raise Exception("For yolact observations, you need to download pre-trained vision model and specify its path in config. Specified {} and {} not found.".format(self.yolact_path, self.yolact_config)) diff --git a/myGym/envs/visualize_trajectory.py b/myGym/envs/visualize_trajectory.py index da59920b..73b6473a 100644 --- a/myGym/envs/visualize_trajectory.py +++ b/myGym/envs/visualize_trajectory.py @@ -1,7 +1,7 @@ from mpl_toolkits import mplot3d import numpy as np import matplotlib.pyplot as plt -from stable_baselines import results_plotter +#from stable_baselines import results_plotter import os import math from math import sqrt, fabs, exp, pi, asin diff --git a/myGym/envs/wrappers.py b/myGym/envs/wrappers.py index a80a2168..e08b4e38 100644 --- a/myGym/envs/wrappers.py +++ b/myGym/envs/wrappers.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import commentjson diff --git a/myGym/generate_dataset.py b/myGym/generate_dataset.py index 5837ddb5..407cc368 100644 --- a/myGym/generate_dataset.py +++ b/myGym/generate_dataset.py @@ -1,6 +1,6 @@ ## script to generate train/test sets from the simulator in COCO or DOPE format. Used for vision training. -import gym +import gymnasium as gym from myGym import envs from matplotlib.pyplot import imshow, show import cv2 @@ -16,17 +16,18 @@ import pybullet as p from bbox import BBox3D from myGym.envs.wrappers import RandomizedEnvWrapper -import pkg_resources +import importlib.resources as resources + # config, specify here or pass as an input argument -CONFIG_DEFAULT = pkg_resources.resource_filename("myGym", 'configs/dataset_coco.json') +CONFIG_DEFAULT = resources.files("myGym").joinpath("configs/dataset_coco.json") # helper functions: def color_names_to_rgb(): """ Assign RGB colors to objects by name as specified in the training config file """ - with open(pkg_resources.resource_filename("myGym", 'utils/rgbcolors.json'), "r") as read_file: + with open(resources.files("myGym").joinpath("utils/rgbcolors.json"), "r") as read_file: clr = json.load(read_file) #json file with suggested colors new_dict = {} for key, value in config['object_colors'].items(): @@ -498,7 +499,7 @@ def collect_data(self, steps): config_path = CONFIG_DEFAULT print('No config.json passed in argument. Loading default config: {}'.format(CONFIG_DEFAULT)) else: - config_path = pkg_resources.resource_filename("myGym", sys.argv[1]) + config_path = resources.files("myGym").joinpath(sys.argv[1]) with open(config_path) as file: config = commentjson.load(file) diff --git a/myGym/halfcheetah-appo.yaml b/myGym/halfcheetah-appo.yaml deleted file mode 100644 index b423e0ed..00000000 --- a/myGym/halfcheetah-appo.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# This can reach 9k reward in 2 hours on a Titan XP GPU -# with 16 workers and 8 envs per worker. -halfcheetah-appo: - env: HalfCheetah-v2 - run: APPO - stop: - time_total_s: 10800 - config: - # Works for both torch and tf. - framework: torch - vtrace: True - gamma: 0.99 - lambda: 0.95 - rollout_fragment_length: 512 - train_batch_size: 4096 - num_workers: 16 - num_gpus: 1 - broadcast_interval: 1 - max_sample_requests_in_flight_per_worker: 1 - num_multi_gpu_tower_stacks: 1 - num_envs_per_worker: 32 - minibatch_buffer_size: 16 - num_sgd_iter: 32 - clip_param: 0.2 - lr_schedule: [ - [0, 0.0005], - [150000000, 0.000001], - ] - vf_loss_coeff: 0.5 - entropy_coeff: 0.01 - grad_clip: 0.5 - batch_mode: truncate_episodes - use_kl_loss: True - kl_coeff: 1.0 - kl_target: 0.04 - observation_filter: MeanStdFilter - diff --git a/myGym/humanoid-ppo.yaml b/myGym/humanoid-ppo.yaml deleted file mode 100644 index fadd7e96..00000000 --- a/myGym/humanoid-ppo.yaml +++ /dev/null @@ -1,23 +0,0 @@ -humanoid-ppo: - env: Humanoid-v4 - run: PPO - stop: - sampler_results/episode_reward_mean: 6000 - timesteps_total: 40000000 - config: - # Works for both torch and tf. - framework: torch - gamma: 0.995 - kl_coeff: 1.0 - num_sgd_iter: 20 - lr: .0001 - sgd_minibatch_size: 32768 - train_batch_size: 320000 - model: - free_log_std: true - use_gae: false - num_workers: 10 - num_envs_per_worker: 20 - num_gpus: 1 - batch_mode: complete_episodes - observation_filter: MeanStdFilter diff --git a/myGym/learnability.py b/myGym/learnability.py deleted file mode 100644 index 7d4081ea..00000000 --- a/myGym/learnability.py +++ /dev/null @@ -1,55 +0,0 @@ -import os -import re -import threading -import subprocess -from sklearn.model_selection import ParameterGrid -import json -import time - -parameters = { - "robot_action": ["step", "joints","joints_gripper"], - "task_type": ["reach", "push","pnp"], - "max_episode_steps": [1024, 512, 256] -} -parameter_grid = ParameterGrid(parameters) -configfile = 'configs/multi.json' -evaluation_results_paths = [None] * len(parameter_grid) -threaded = True -last_eval_results = {} - - -def train(params, i): - if "task_type" in params and params["task_type"] != "reach": - params["task_objects"] = "cube_holes target" - params["reward"] = "complex_distance" - print((" ".join(f"--{key} {value}" for key, value in params.items())).split()) - command = 'python train.py --config {configfile} '.format(configfile=configfile) + " ".join(f"--{key} {value}" for key, value in params.items()) - output = subprocess.check_output(command.split()) - evaluation_results_paths[i] = output.splitlines()[-1].decode('UTF-8') + "/evaluation_results.json" - with open(evaluation_results_paths[i]) as f: - data = json.load(f) - last_eval_results[str(list(params.values()))] = list(data.values())[-1] - print(last_eval_results) - with open("trained_models/multi_evaluation_results.json", 'w') as f: - json.dump(last_eval_results, f, indent=4) - - # os.system('python train.py --config {configfile} '.format(configfile=configfile) - # + " ".join(f"--{key} {value}" for key, value in params.items())) - - -if __name__ == '__main__': - threads = [] - starttime=time.time() - for i, params in enumerate(parameter_grid): - if threaded: - thread = threading.Thread(target=train, args=(params, i)) - thread.start() - threads.append(thread) - else: - train(params.copy(), i) - - if threaded: - for thread in threads: - thread.join() - endtime=time.time() - print (endtime-starttime) diff --git a/myGym/multi_evaluation_task.py b/myGym/multi_evaluation_task.py deleted file mode 100644 index 3eb3a461..00000000 --- a/myGym/multi_evaluation_task.py +++ /dev/null @@ -1,60 +0,0 @@ -import os -import re -import threading -import subprocess -from sklearn.model_selection import ParameterGrid -import json -import time -import argparse - - -parser = argparse.ArgumentParser() -parser.add_argument("-cfg", "--config", type=str, help="config file for evaluation") -parser.add_argument("-out", "--output", type=str, help="output file") - -args = parser.parse_args() - -parameters = { - "robot": ["kuka", "panda",'ur5'], - "robot_action": ["step", "joints","absolute"], - "algo": ["ppo", "ppo2","sac"], -} -parameter_grid = ParameterGrid(parameters) -configfile = args.config -evaluation_results_paths = [None] * len(parameter_grid) -threaded = True -last_eval_results = {} - - -def train(params, i): - print((" ".join(f"--{key} {value}" for key, value in params.items())).split()) - command = 'python train.py --config {configfile} '.format(configfile=configfile) + " ".join(f"--{key} {value}" for key, value in params.items()) - output = subprocess.check_output(command.split()) - evaluation_results_paths[i] = output.splitlines()[-1].decode('UTF-8') + "/evaluation_results.json" - with open(evaluation_results_paths[i]) as f: - data = json.load(f) - last_eval_results[str(list(params.values()))] = list(data.values())[-1] - print(last_eval_results) - with open(args.output, 'w') as f: - json.dump(last_eval_results, f, indent=4) - - # os.system('python train.py --config {configfile} '.format(configfile=configfile) - # + " ".join(f"--{key} {value}" for key, value in params.items())) - - -if __name__ == '__main__': - threads = [] - starttime=time.time() - for i, params in enumerate(parameter_grid): - if threaded: - thread = threading.Thread(target=train, args=(params, i)) - thread.start() - threads.append(thread) - else: - train(params.copy(), i) - - if threaded: - for thread in threads: - thread.join() - endtime=time.time() - print (endtime-starttime) diff --git a/myGym/mygymrl_ppc_.txt b/myGym/mygymrl_ppc_.txt deleted file mode 100644 index 79783b9d..00000000 --- a/myGym/mygymrl_ppc_.txt +++ /dev/null @@ -1,24 +0,0 @@ -# This file may be used to create an environment using: -# $ conda create --name --file -# platform: linux-64 -@EXPLICIT -https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.05.30-h06a4308_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda -https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda -https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda -https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda -https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1u-h7f8727e_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.2-h5eee18b_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/python-3.7.16-h7a1cb2a_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/certifi-2022.12.7-py37h06a4308_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.38.4-py37h06a4308_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-65.6.3-py37h06a4308_0.conda -https://repo.anaconda.com/pkgs/main/linux-64/pip-22.3.1-py37h06a4308_0.conda diff --git a/myGym/pendulum-ddppo.yaml b/myGym/pendulum-ddppo.yaml deleted file mode 100644 index 1c3f5708..00000000 --- a/myGym/pendulum-ddppo.yaml +++ /dev/null @@ -1,20 +0,0 @@ -pendulum-ddppo: - env: Pendulum-v1 - run: DDPPO - stop: - sampler_results/episode_reward_mean: -300 - timesteps_total: 1500000 - config: - framework: torch - train_batch_size: 2500 # per worker - num_gpus_per_worker: 1 - num_workers: 4 - num_envs_per_worker: 10 - observation_filter: MeanStdFilter - gamma: 0.95 - sgd_minibatch_size: 50 - num_sgd_iter: 5 - clip_param: 0.3 - vf_clip_param: 10.0 - lambda: 0.1 - lr: 0.00015 diff --git a/myGym/pendulum-ppo.yaml b/myGym/pendulum-ppo.yaml deleted file mode 100644 index f129279f..00000000 --- a/myGym/pendulum-ppo.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Can expect improvement to -140 reward in ~300-500k timesteps. -pendulum-ppo: - env: Pendulum-v1 - run: PPO - stop: - sampler_results/episode_reward_mean: -400 - timesteps_total: 400000 - config: - # Works for both torch and tf. - framework: torch - train_batch_size: 512 - vf_clip_param: 10.0 - num_gpus: 1 - num_workers: 3 - num_envs_per_worker: 20 - lambda: 0.1 - gamma: 0.95 - lr: 0.0003 - sgd_minibatch_size: 64 - observation_filter: MeanStdFilter - model: - fcnet_activation: relu - enable_connectors: true diff --git a/myGym/sim2real.py b/myGym/sim2real.py deleted file mode 100644 index 2547d968..00000000 --- a/myGym/sim2real.py +++ /dev/null @@ -1,541 +0,0 @@ -from ast import arg -import gym -from myGym import envs -import cv2 -from myGym.train import get_parser, get_arguments, configure_implemented_combos, configure_env -import os, imageio -import numpy as np -import time -from numpy import matrix -import pybullet as p -import pybullet_data -import pkg_resources -import random -import getkey -import signal -from utils.nicomotors import NicoMotors - - -clear = lambda: os.system('clear') - -AVAILABLE_SIMULATION_ENGINES = ["mujoco", "pybullet"] -AVAILABLE_TRAINING_FRAMEWORKS = ["tensorflow", "pytorch"] - -def visualize_sampling_area(arg_dict): - rx = (arg_dict["task_objects"][0]["goal"]["sampling_area"][0] - arg_dict["task_objects"][0]["goal"]["sampling_area"][1])/2 - ry = (arg_dict["task_objects"][0]["goal"]["sampling_area"][2] - arg_dict["task_objects"][0]["goal"]["sampling_area"][3])/2 - rz = (arg_dict["task_objects"][0]["goal"]["sampling_area"][4] - arg_dict["task_objects"][0]["goal"]["sampling_area"][5])/2 - - visual = p.createVisualShape(shapeType=p.GEOM_BOX, halfExtents=[rx,ry,rz], rgbaColor=[1,0,0,.2]) - collision = -1 - - sampling = p.createMultiBody( - baseVisualShapeIndex=visual, - baseCollisionShapeIndex=collision, - baseMass=0, - basePosition=[arg_dict["task_objects"][0]["goal"]["sampling_area"][0]-rx, arg_dict["task_objects"][0]["goal"]["sampling_area"][2]-ry,arg_dict["task_objects"][0]["goal"]["sampling_area"][4]-rz], - ) - -def visualize_trajectories (info,action): - - visualo = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[0,0,1,.3]) - collision = -1 - p.createMultiBody( - baseVisualShapeIndex=visualo, - baseCollisionShapeIndex=collision, - baseMass=0, - basePosition=info['o']['actual_state'], - ) - - #visualr = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[0,1,0,.5]) - #p.createMultiBody( - # baseVisualShapeIndex=visualr, - # baseCollisionShapeIndex=collision, - # baseMass=0, - # basePosition=info['o']['additional_obs']['endeff_xyz'], - #) - - visuala = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[1,0,0,.3]) - p.createMultiBody( - baseVisualShapeIndex=visuala, - baseCollisionShapeIndex=collision, - baseMass=0, - basePosition=action[:3], - ) - -def visualize_goal(info): - visualg = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[1,0,0,.5]) - collision = -1 - p.createMultiBody( - baseVisualShapeIndex=visualg, - baseCollisionShapeIndex=collision, - baseMass=0, - basePosition=info['o']['goal_state'], - ) -def change_dynamics(cubex,lfriction,rfriction,ldamping,adamping): - p.changeDynamics(cubex, -1, lateralFriction=p.readUserDebugParameter(lfriction)) - p.changeDynamics(cubex,-1,rollingFriction=p.readUserDebugParameter(rfriction)) - p.changeDynamics(cubex, -1, linearDamping=p.readUserDebugParameter(ldamping)) - p.changeDynamics(cubex, -1, angularDamping=p.readUserDebugParameter(adamping)) - - #visualrobot = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=1, rgbaColor=[0,1,0,.2]) - #collisionrobot = -1 - #sampling = p.createMultiBody( - # baseVisualShapeIndex=visualrobot, - # baseCollisionShapeIndex=collisionrobot, - # baseMass=0, - # basePosition=[0,0,0.3], - #) - -def visualize_infotext(action, env, info): - p.addUserDebugText(f"Episode:{env.env.episode_number}", - [.65, 1., 0.45], textSize=1.0, lifeTime=0.5, textColorRGB=[0.4, 0.2, .3]) - p.addUserDebugText(f"Step:{env.env.episode_steps}", - [.67, 1, .40], textSize=1.0, lifeTime=0.5, textColorRGB=[0.2, 0.8, 1]) - p.addUserDebugText(f"Subtask:{env.env.task.current_task}", - [.69, 1, 0.35], textSize=1.0, lifeTime=0.5, textColorRGB=[0.4, 0.2, 1]) - p.addUserDebugText(f"Network:{env.env.reward.current_network}", - [.71, 1, 0.3], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 0.0, 1]) - p.addUserDebugText(f"Action (Gripper):{matrix(np.around(np.array(action),2))}", - [.73, 1, 0.25], textSize=1.0, lifeTime=0.5, textColorRGB=[1, 0, 0]) - p.addUserDebugText(f"Actual_state:{matrix(np.around(np.array(env.env.observation['task_objects']['actual_state'][:3]),2))}", - [.75, 1, 0.2], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 1, 0.0]) - p.addUserDebugText(f"End_effector:{matrix(np.around(np.array(env.env.robot.end_effector_pos),2))}", - [.77, 1, 0.15], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 1, 0.0]) - p.addUserDebugText(f" Object:{matrix(np.around(np.array(info['o']['actual_state']),2))}", - [.8, 1, 0.10], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 0.0, 1]) - p.addUserDebugText(f"Velocity:{env.env.max_velocity}", - [.79, 1, 0.05], textSize=1.0, lifeTime=0.5, textColorRGB=[0.6, 0.8, .3]) - p.addUserDebugText(f"Force:{env.env.max_force}", - [.81, 1, 0.00], textSize=1.0, lifeTime=0.5, textColorRGB=[0.3, 0.2, .4]) - -def detect_key(keypress,arg_dict,action): - - if 97 in keypress.keys() and keypress[97] == 1: # A - action[2] += .03 - print(action) - if 122 in keypress.keys() and keypress[122] == 1: # Z/Y - action[2] -= .03 - print(action) - if 65297 in keypress.keys() and keypress[65297] == 1: # ARROW UP - action[1] -= .03 - print(action) - if 65298 in keypress.keys() and keypress[65298] == 1: # ARROW DOWN - action[1] += .03 - print(action) - if 65295 in keypress.keys() and keypress[65295] == 1: # ARROW LEFT - action[0] += .03 - print(action) - if 65296 in keypress.keys() and keypress[65296] == 1: # ARROW RIGHT - action[0] -= .03 - print(action) - if 120 in keypress.keys() and keypress[120] == 1: # X - action[3] -= .03 - action[4] -= .03 - print(action) - if 99 in keypress.keys() and keypress[99] == 1: # C - action[3] += .03 - action[4] += .03 - print(action) - # if 100 in keypress.keys() and keypress[100] == 1: - # cube[cubecount] = p.loadURDF(pkg_resources.resource_filename("myGym", os.path.join("envs", "objects/assembly/urdf/cube_holes.urdf")), [action[0], action[1],action[2]-0.2 ]) - # change_dynamics(cube[cubecount],lfriction,rfriction,ldamping,adamping) - # cubecount +=1 - if "step" in arg_dict["robot_action"]: - action[:3] = np.multiply(action [:3],10) - elif "joints" in arg_dict["robot_action"]: - print("Robot action: Joints - KEYBOARD CONTROL UNDER DEVELOPMENT") - quit() - #for i in range (env.action_space.shape[0]): - # env.env.robot.joints_max_velo[i] = p.readUserDebugParameter(maxvelo) - # env.env.robot.joints_max_force[i] = p.readUserDebugParameter(maxforce) - return action - -def test_env(env, arg_dict): - real_robot = True - motors = NicoMotors() - dofs = motors.dofs() - try: - motors.open() - except: - print('motors are not operational') - - for k in dofs: - motors.enableTorque(k) - torque = True - - - arg_dict["vsampling"] = 0 - arg_dict["vinfo"] = 0 - spawn_objects = False - env.render("human") - #env.reset() - #Prepare names for sliders - joints = ['Joint1','Joint2','Joint3','Joint4','Joint5','Joint6','Joint7','Joint 8','Joint 9', 'Joint10', 'Joint11','Joint12','Joint13','Joint14','Joint15','Joint16','Joint17','Joint 18','Joint 19'] - jointparams = ['Jnt1','Jnt2','Jnt3','Jnt4','Jnt5','Jnt6','Jnt7','Jnt 8','Jnt 9', 'Jnt10', 'Jnt11','Jnt12','Jnt13','Jnt14','Jnt15','Jnt16','Jnt17','Jnt 18','Jnt 19'] - cube = ['Cube1','Cube2','Cube3','Cube4','Cube5','Cube6','Cube7','Cube8','Cube9','Cube10','Cube11','Cube12','Cube13','Cube14','Cube15','Cube16','Cube17','Cube18','Cube19'] - cubecount = 0 - - if arg_dict["gui"] == 0: - print ("Add --gui 1 parameter to visualize environment") - quit() - - p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0) - - p.resetDebugVisualizerCamera(1.2, 180, -30, [0.0, 0.5, 0.05]) - p.setAdditionalSearchPath(pybullet_data.getDataPath()) - #newobject = p.loadURDF("cube.urdf", [3.1,3.7,0.1]) - #p.changeDynamics(newobject, -1, lateralFriction=1.00) - #p.setRealTimeSimulation(1) - if arg_dict["control"] == "slider": - p.configureDebugVisualizer(p.COV_ENABLE_GUI, 1) - if "joints" in arg_dict["robot_action"]: - if 'gripper' in arg_dict["robot_action"]: - print ("gripper is present") - for i in range (env.action_space.shape[0]): - if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)): - joints[i] = p.addUserDebugParameter(joints[i], env.action_space.low[i], env.action_space.high[i], env.env.robot.init_joint_poses[i]) - else: - joints[i] = p.addUserDebugParameter(joints[i], env.action_space.low[i], env.action_space.high[i], .02) - else: - for i in range (env.action_space.shape[0]): - joints[i] = p.addUserDebugParameter(joints[i],env.action_space.low[i], env.action_space.high[i], env.env.robot.init_joint_poses[i]) - elif "absolute" in arg_dict["robot_action"]: - if 'gripper' in arg_dict["robot_action"]: - print ("gripper is present") - for i in range (env.action_space.shape[0]): - if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)): - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, arg_dict["robot_init"][i]) - else: - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, .02) - else: - for i in range (env.action_space.shape[0]): - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, arg_dict["robot_init"][i]) - elif "step" in arg_dict["robot_action"]: - if 'gripper' in arg_dict["robot_action"]: - print ("gripper is present") - for i in range (env.action_space.shape[0]): - if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)): - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, 0) - else: - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, .02) - else: - for i in range (env.action_space.shape[0]): - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, 0) - - - #maxvelo = p.addUserDebugParameter("Max Velocity", 0.1, 50, env.env.robot.joints_max_velo[0]) - #maxforce = p.addUserDebugParameter("Max Force", 0.1, 300, env.env.robot.joints_max_force[0]) - lfriction = p.addUserDebugParameter("Lateral Friction", 0, 100, 0) - rfriction = p.addUserDebugParameter("Spinning Friction", 0, 100, 0) - ldamping = p.addUserDebugParameter("Linear Damping", 0, 100, 0) - adamping = p.addUserDebugParameter("Angular Damping", 0, 100, 0) - #action.append(jointparams[i]) - if arg_dict["vsampling"] == True: - visualize_sampling_area(arg_dict) - - #visualgr = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=.005, rgbaColor=[0,0,1,.1]) - - if arg_dict["control"] == "random": - action = env.action_space.sample() - if arg_dict["control"] == "keyboard": - action = arg_dict["robot_init"] - if "gripper" in arg_dict["robot_action"]: - action.append(.1) - action.append(.1) - if arg_dict["control"] == "slider": - action = [] - for i in range (env.action_space.shape[0]): - jointparams[i] = p.readUserDebugParameter(joints[i]) - action.append(jointparams[i]) - - for e in range(50): - env.reset() - #if spawn_objects: - # cube[e] = p.loadURDF(pkg_resources.resource_filename("myGym", os.path.join("envs", "objects/assembly/urdf/cube_holes.urdf")), [0, 0.5, .1]) - - #if visualize_traj: - # visualize_goal(info) - trajectory=[] - degdiff = [] - actiondiff = [] - - - - - for t in range(arg_dict["max_episode_steps"]): - - - if arg_dict["control"] == "slider": - action = [] - for i in range (env.action_space.shape[0]): - jointparams[i] = p.readUserDebugParameter(joints[i]) - action.append(jointparams[i]) - #env.env.robot.joints_max_velo[i] = p.readUserDebugParameter(maxvelo) - #env.env.robot.joints_max_force[i] = p.readUserDebugParameter(maxforce) - - - if arg_dict["control"] == "observation": - if t == 0: - action = env.action_space.sample() - else: - - if "joints" in arg_dict["robot_action"]: - action = info['o']["additional_obs"]["joints_angles"] #n - elif "absolute" in arg_dict["robot_action"]: - action = info['o']["actual_state"] - else: - action = [0,0,0] - - if arg_dict["control"] == "oraculum": - if t == 0: - action = env.action_space.sample() - else: - - if "absolute" in arg_dict["robot_action"]: - action = info['o']["goal_state"] - else: - print("ERROR - Oraculum mode only works for absolute actions") - quit() - - - elif arg_dict["control"] == "keyboard": - keypress = p.getKeyboardEvents() - #print(action) - action = detect_key(keypress,arg_dict,action) - elif arg_dict["control"] == "random": - action = env.action_space.sample() - - prejointaction = env.env.robot.get_joints_states() - - - jointaction = env.env.robot.get_joints_states() - predeg = np.rad2deg(prejointaction) - actiondeg = np.rad2deg(env.env.robot.joint_poses) - - deg = np.rad2deg(jointaction) - #print("Prejointaction: ", predeg) - #print("Actiondeg: ", actiondeg) - #print("Jointaction: ", deg) - degdifference = deg - predeg - actiondifference = actiondeg - deg - if arg_dict["ik_solver"]: - print("Step: ", t,end="") - for i in range(len(REALJOINTS)): - print(" - {}:{:.2f}, ".format(REALJOINTS[i],degdifference[i]), end="") - print(" ") - print(actiondifference) - degdiff.append(degdifference) - actiondiff.append(actiondifference) - #time.sleep(0.06) - - #Execute action on real robot - if arg_dict["real_robot"]: - #jointaction = info['o']["additional_obs"]["joints_angles"] - for i,realjoint in enumerate(REALJOINTS): - robot.setAngle(realjoint,deg[i],DEFAULT_SPEED) - time.sleep(SIMREALDELAY) - print("Step:" + str(t)) - #if t % 10 == 0: - # print('Temperature:', str(robot.getTemperature('r_shoulder_y'))) - # print('Current:', str(robot.getCurrent('r_shoulder_y'))) - - print (f"Action:{action}") - observation, reward, done, info = env.step(action) - if real_robot: - for k in dofs: - motors.setPositionDg(k,int(values[k])) - - if arg_dict["vtrajectory"] == True: - visualize_trajectories(info,action) - if arg_dict["vinfo"] == True: - visualize_infotext(action, env, info) - - - #visualize_goal(info) - #if debug_mode: - #print("Reward is {}, observation is {}".format(reward, observation)) - #if t>=1: - #action = matrix(np.around(np.array(action),5)) - #oaction = env.env.robot.get_joints_states() - #oaction = matrix(np.around(np.array(oaction[0:action.shape[0]]),5)) - #diff = matrix(np.around(np.array(action-oaction),5)) - #print(env.env.robot.get_joints_states()) - #print(f"Step:{t}") - #print (f"RAction:{action}") - #print(f"OAction:{oaction}") - #print(f"DAction:{diff}") - #p.addUserDebugText(f"DAction:{diff}", - # [1, 1, 0.1], textSize=1.0, lifeTime=0.05, textColorRGB=[0.6, 0.0, 0.6]) - #time.sleep(.4) - #clear() - - #if action_control == "slider": - # action=[] - if "step" in arg_dict["robot_action"]: - action[:3] = [0,0,0] - - if arg_dict["visualize"]: - visualizations = [[],[]] - env.render("human") - for camera_id in range(len(env.cameras)): - camera_render = env.render(mode="rgb_array", camera_id=camera_id) - image = cv2.cvtColor(camera_render[camera_id]["image"], cv2.COLOR_RGB2BGR) - depth = camera_render[camera_id]["depth"] - image = cv2.copyMakeBorder(image, 30, 10, 10, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255]) - cv2.putText(image, 'Camera {}'.format(camera_id), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, .5, - (0, 0, 0), 1, 0) - visualizations[0].append(image) - depth = cv2.copyMakeBorder(depth, 30, 10, 10, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255]) - cv2.putText(depth, 'Camera {}'.format(camera_id), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, .5, - (0, 0, 0), 1, 0) - visualizations[1].append(depth) - - if len(visualizations[0])%2 !=0: - visualizations[0].append(255*np.ones(visualizations[0][0].shape, dtype=np.uint8)) - visualizations[1].append(255*np.ones(visualizations[1][0].shape, dtype=np.float32)) - fig_rgb = np.vstack((np.hstack((visualizations[0][0::2])),np.hstack((visualizations[0][1::2])))) - fig_depth = np.vstack((np.hstack((visualizations[1][0::2])),np.hstack((visualizations[1][1::2])))) - cv2.imshow('Camera RGB renders', fig_rgb) - cv2.imshow('Camera depthrenders', fig_depth) - cv2.waitKey(1) - - if done: - print("Episode finished after {} timesteps".format(t + 1)) - if arg_dict["ik_solver"]: - file = open(os.path.join("trajectory.txt"), 'a') - for value in trajectory: - file.write(f"{value}\n") - file.write("\n") - file.close() - #Plot actiondiff in graph - fig, axs = plt.subplots(2) - axs[0].plot(actiondiff) - axs[0].set_title('Difference between Action and Sim position') - axs[0].set_xlabel('Steps') - axs[0].set_ylabel('Difference in degrees') - axs[1].plot(degdiff) - axs[1].set_title('Difference between SimPreaction and SimPostaction') - axs[1].set_xlabel('Steps') - axs[1].set_ylabel('Difference in degrees') - plt.tight_layout() - plt.show() - input("Press key to continue...") - plt.close - if arg_dict["real_robot"]: - time.sleep(FINISHDELAY) - reset_robot(robot) - - break - -def test_model(env, model=None, implemented_combos=None, arg_dict=None, model_logdir=None, deterministic=False): - - try: - if "multi" in arg_dict["algo"]: - model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1] - model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(arg_dict["model_path"], env=model_args[1].env) - else: - model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(arg_dict["model_path"]) - except: - if (arg_dict["algo"] in implemented_combos.keys()) and (arg_dict["train_framework"] not in list(implemented_combos[arg_dict["algo"]].keys())): - err = "{} is only implemented with {}".format(arg_dict["algo"],list(implemented_combos[arg_dict["algo"]].keys())[0]) - elif arg_dict["algo"] not in implemented_combos.keys(): - err = "{} algorithm is not implemented.".format(arg_dict["algo"]) - else: - err = "invalid model_path argument" - raise Exception(err) - - images = [] # Empty list for gif images - success_episodes_num = 0 - distance_error_sum = 0 - vel= arg_dict["max_velocity"] - force = arg_dict["max_force"] - steps_sum = 0 - p.resetDebugVisualizerCamera(1.2, 180, -30, [0.0, 0.5, 0.05]) - #p.setRealTimeSimulation(1) - #p.setTimeStep(0.01) - - for e in range(arg_dict["eval_episodes"]): - done = False - obs = env.reset() - is_successful = 0 - distance_error = 0 - step_sum = 0 - while not done: - steps_sum += 1 - action, _state = model.predict(obs, deterministic=deterministic) - obs, reward, done, info = env.step(action) - is_successful = not info['f'] - distance_error = info['d'] - if arg_dict["vinfo"] == True: - visualize_infotext(action, env, info) - - - if (arg_dict["record"] > 0) and (len(images) < 8000): - render_info = env.render(mode="rgb_array", camera_id = arg_dict["camera"]) - image = render_info[arg_dict["camera"]]["image"] - images.append(image) - print(f"appending image: total size: {len(images)}]") - - success_episodes_num += is_successful - distance_error_sum += distance_error - - mean_distance_error = distance_error_sum / arg_dict["eval_episodes"] - mean_steps_num = steps_sum // arg_dict["eval_episodes"] - - print("#---------Evaluation-Summary---------#") - print("{} of {} episodes ({} %) were successful".format(success_episodes_num, arg_dict["eval_episodes"], success_episodes_num / arg_dict["eval_episodes"]*100)) - print("Mean distance error is {:.2f}%".format(mean_distance_error * 100)) - print("Mean number of steps {}".format(mean_steps_num)) - print("#------------------------------------#") - model_name = arg_dict["algo"] + '_' + str(arg_dict["steps"]) - file = open(os.path.join(model_logdir, "train_" + model_name + ".txt"), 'a') - file.write("\n") - file.write("#Evaluation results: \n") - file.write("#{} of {} episodes were successful \n".format(success_episodes_num, arg_dict["eval_episodes"])) - file.write("#Mean distance error is {:.2f}% \n".format(mean_distance_error * 100)) - file.write("#Mean number of steps {}\n".format(mean_steps_num)) - file.close() - - if arg_dict["record"] == 1: - gif_path = os.path.join(model_logdir, "train_" + model_name + ".gif") - imageio.mimsave(gif_path, [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=15) - os.system('./utils/gifopt -O3 --lossy=5 -o {dest} {source}'.format(source=gif_path, dest=gif_path)) - print("Record saved to " + gif_path) - elif arg_dict["record"] == 2: - video_path = os.path.join(model_logdir, "train_" + model_name + ".avi") - height, width, layers = image.shape - out = cv2.VideoWriter(video_path,cv2.VideoWriter_fourcc(*'XVID'), 30, (width,height)) - for img in images: - out.write(cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) - out.release() - print("Record saved to " + video_path) - - -def main(): - parser = get_parser() - parser.add_argument("-ct", "--control", default="slider", help="How to control robot during testing. Valid arguments: keyboard, observation, random, oraculum, slider") - parser.add_argument("-vs", "--vsampling", action="store_true", help="Visualize sampling area.") - parser.add_argument("-vt", "--vtrajectory", action="store_true", help="Visualize gripper trajectgory.") - parser.add_argument("-vn", "--vinfo", action="store_true", help="Visualize info. Valid arguments: True, False") - parser.add_argument("-nl", "--natural_language", default=False, help="NL Valid arguments: True, False") - arg_dict = get_arguments(parser) - model_logdir = os.path.dirname(arg_dict.get("model_path","")) - # Check if we chose one of the existing engines - if arg_dict["engine"] not in AVAILABLE_SIMULATION_ENGINES: - print(f"Invalid simulation engine. Valid arguments: --engine {AVAILABLE_SIMULATION_ENGINES}.") - return - if arg_dict.get("model_path") is None: - print("Path to the model using --model_path argument not specified. Testing random actions in selected environment.") - arg_dict["gui"] = 1 - env = configure_env(arg_dict, model_logdir, for_train=0) - test_env(env, arg_dict) - - else: - env = configure_env(arg_dict, model_logdir, for_train=0) - implemented_combos = configure_implemented_combos(env, model_logdir, arg_dict) - test_model(env, None, implemented_combos, arg_dict, model_logdir) - - -if __name__ == "__main__": - main() diff --git a/myGym/stable_baselines_mygym/TorchPPO.py b/myGym/stable_baselines_mygym/TorchPPO.py index d0247050..0e84dc2d 100644 --- a/myGym/stable_baselines_mygym/TorchPPO.py +++ b/myGym/stable_baselines_mygym/TorchPPO.py @@ -2,7 +2,7 @@ import numpy as np import torch as th -from gym import spaces +from gymnasium import spaces from torch.nn import functional as F from stable_baselines3.common import logger diff --git a/myGym/stable_baselines_mygym/algo.py b/myGym/stable_baselines_mygym/algo.py index 0ec97b90..0aaac838 100644 --- a/myGym/stable_baselines_mygym/algo.py +++ b/myGym/stable_baselines_mygym/algo.py @@ -1,6 +1,6 @@ import time -import gym +import gymnasium as gym import numpy as np import tensorflow as tf diff --git a/myGym/stable_baselines_mygym/multi_acktr.py b/myGym/stable_baselines_mygym/multi_acktr.py index 2b7aa7d9..19858c2c 100644 --- a/myGym/stable_baselines_mygym/multi_acktr.py +++ b/myGym/stable_baselines_mygym/multi_acktr.py @@ -2,7 +2,7 @@ import warnings import tensorflow as tf -from gym.spaces import Box, Discrete +from gymnasium.spaces import Box, Discrete from stable_baselines import logger from stable_baselines.a2c.a2c import A2CRunner @@ -17,7 +17,7 @@ from stable_baselines.common.misc_util import set_global_seeds import commentjson import numpy as np -import gym +import gymnasium as gym import os from collections import deque diff --git a/myGym/stable_baselines_mygym/multi_ppo2.py b/myGym/stable_baselines_mygym/multi_ppo2.py index df73e39b..f1a43f8d 100644 --- a/myGym/stable_baselines_mygym/multi_ppo2.py +++ b/myGym/stable_baselines_mygym/multi_ppo2.py @@ -1,7 +1,7 @@ from pickle import NONE import time import os -import gym +import gymnasium as gym import numpy as np from numpy.lib.function_base import append from myGym.utils.callbacks import SaveOnTopRewardCallback diff --git a/myGym/stable_baselines_mygym/policies.py b/myGym/stable_baselines_mygym/policies.py index f1fa2c6f..31171d33 100644 --- a/myGym/stable_baselines_mygym/policies.py +++ b/myGym/stable_baselines_mygym/policies.py @@ -4,7 +4,7 @@ import numpy as np import tensorflow as tf -from gym.spaces import Discrete +from gymnasium.spaces import Discrete from stable_baselines.common.tf_util import batch_to_seq, seq_to_batch from stable_baselines.common.tf_layers import conv, linear, conv_to_fc, lstm diff --git a/myGym/stable_baselines_mygym/ppo2.py b/myGym/stable_baselines_mygym/ppo2.py index 6e31c0d8..9d3c125d 100644 --- a/myGym/stable_baselines_mygym/ppo2.py +++ b/myGym/stable_baselines_mygym/ppo2.py @@ -1,6 +1,6 @@ import time -import gym +import gymnasium as gym import numpy as np import tensorflow as tf diff --git a/myGym/stable_baselines_mygym/reference.py b/myGym/stable_baselines_mygym/reference.py index a04da18c..20b1eb62 100644 --- a/myGym/stable_baselines_mygym/reference.py +++ b/myGym/stable_baselines_mygym/reference.py @@ -1,6 +1,6 @@ import time -import gym +import gymnasium as gym import numpy as np import tensorflow as tf diff --git a/myGym/stable_baselines_mygym/testalgo.py b/myGym/stable_baselines_mygym/testalgo.py index 2fffe28f..03ec4524 100644 --- a/myGym/stable_baselines_mygym/testalgo.py +++ b/myGym/stable_baselines_mygym/testalgo.py @@ -1,6 +1,6 @@ import time -import gym +import gymnasium as gym import numpy as np import tensorflow as tf diff --git a/myGym/stable_baselines_mygym/testpolicies.py b/myGym/stable_baselines_mygym/testpolicies.py index f1fa2c6f..31171d33 100644 --- a/myGym/stable_baselines_mygym/testpolicies.py +++ b/myGym/stable_baselines_mygym/testpolicies.py @@ -4,7 +4,7 @@ import numpy as np import tensorflow as tf -from gym.spaces import Discrete +from gymnasium.spaces import Discrete from stable_baselines.common.tf_util import batch_to_seq, seq_to_batch from stable_baselines.common.tf_layers import conv, linear, conv_to_fc, lstm diff --git a/myGym/test.py b/myGym/test.py deleted file mode 100644 index ad266732..00000000 --- a/myGym/test.py +++ /dev/null @@ -1,462 +0,0 @@ -from ast import arg -import gym -from myGym import envs -import cv2 -from myGym.train import get_parser, get_arguments, configure_implemented_combos, configure_env -import os, imageio -import numpy as np -import time -from numpy import matrix -import pybullet as p -import pybullet_data -import pkg_resources -import random -import getkey - - - -clear = lambda: os.system('clear') - -AVAILABLE_SIMULATION_ENGINES = ["mujoco", "pybullet"] -AVAILABLE_TRAINING_FRAMEWORKS = ["tensorflow", "pytorch"] - -def visualize_sampling_area(arg_dict): - rx = (arg_dict["task_objects"][0]["goal"]["sampling_area"][0] - arg_dict["task_objects"][0]["goal"]["sampling_area"][1])/2 - ry = (arg_dict["task_objects"][0]["goal"]["sampling_area"][2] - arg_dict["task_objects"][0]["goal"]["sampling_area"][3])/2 - rz = (arg_dict["task_objects"][0]["goal"]["sampling_area"][4] - arg_dict["task_objects"][0]["goal"]["sampling_area"][5])/2 - - visual = p.createVisualShape(shapeType=p.GEOM_BOX, halfExtents=[rx,ry,rz], rgbaColor=[1,0,0,.2]) - collision = -1 - - sampling = p.createMultiBody( - baseVisualShapeIndex=visual, - baseCollisionShapeIndex=collision, - baseMass=0, - basePosition=[arg_dict["task_objects"][0]["goal"]["sampling_area"][0]-rx, arg_dict["task_objects"][0]["goal"]["sampling_area"][2]-ry,arg_dict["task_objects"][0]["goal"]["sampling_area"][4]-rz], - ) - -def visualize_trajectories (info,action): - - visualo = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[0,0,1,.3]) - collision = -1 - p.createMultiBody( - baseVisualShapeIndex=visualo, - baseCollisionShapeIndex=collision, - baseMass=0, - basePosition=info['o']['actual_state'], - ) - - #visualr = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[0,1,0,.5]) - #p.createMultiBody( - # baseVisualShapeIndex=visualr, - # baseCollisionShapeIndex=collision, - # baseMass=0, - # basePosition=info['o']['additional_obs']['endeff_xyz'], - #) - - visuala = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[1,0,0,.3]) - p.createMultiBody( - baseVisualShapeIndex=visuala, - baseCollisionShapeIndex=collision, - baseMass=0, - basePosition=action[:3], - ) - -def visualize_goal(info): - visualg = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[1,0,0,.5]) - collision = -1 - p.createMultiBody( - baseVisualShapeIndex=visualg, - baseCollisionShapeIndex=collision, - baseMass=0, - basePosition=info['o']['goal_state'], - ) -def change_dynamics(cubex,lfriction,rfriction,ldamping,adamping): - p.changeDynamics(cubex, -1, lateralFriction=p.readUserDebugParameter(lfriction)) - p.changeDynamics(cubex,-1,rollingFriction=p.readUserDebugParameter(rfriction)) - p.changeDynamics(cubex, -1, linearDamping=p.readUserDebugParameter(ldamping)) - p.changeDynamics(cubex, -1, angularDamping=p.readUserDebugParameter(adamping)) - - #visualrobot = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=1, rgbaColor=[0,1,0,.2]) - #collisionrobot = -1 - #sampling = p.createMultiBody( - # baseVisualShapeIndex=visualrobot, - # baseCollisionShapeIndex=collisionrobot, - # baseMass=0, - # basePosition=[0,0,0.3], - #) - -def visualize_infotext(action, env, info): - p.addUserDebugText(f"Episode:{env.env.episode_number}", - [.65, 1., 0.45], textSize=1.0, lifeTime=0.5, textColorRGB=[0.4, 0.2, .3]) - p.addUserDebugText(f"Step:{env.env.episode_steps}", - [.67, 1, .40], textSize=1.0, lifeTime=0.5, textColorRGB=[0.2, 0.8, 1]) - p.addUserDebugText(f"Subtask:{env.env.task.current_task}", - [.69, 1, 0.35], textSize=1.0, lifeTime=0.5, textColorRGB=[0.4, 0.2, 1]) - p.addUserDebugText(f"Network:{env.env.reward.current_network}", - [.71, 1, 0.3], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 0.0, 1]) - p.addUserDebugText(f"Action (Gripper):{matrix(np.around(np.array(action),2))}", - [.73, 1, 0.25], textSize=1.0, lifeTime=0.5, textColorRGB=[1, 0, 0]) - p.addUserDebugText(f"Actual_state:{matrix(np.around(np.array(env.env.observation['task_objects']['actual_state'][:3]),2))}", - [.75, 1, 0.2], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 1, 0.0]) - p.addUserDebugText(f"End_effector:{matrix(np.around(np.array(env.env.robot.end_effector_pos),2))}", - [.77, 1, 0.15], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 1, 0.0]) - p.addUserDebugText(f" Object:{matrix(np.around(np.array(info['o']['actual_state']),2))}", - [.8, 1, 0.10], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 0.0, 1]) - p.addUserDebugText(f"Velocity:{env.env.max_velocity}", - [.79, 1, 0.05], textSize=1.0, lifeTime=0.5, textColorRGB=[0.6, 0.8, .3]) - p.addUserDebugText(f"Force:{env.env.max_force}", - [.81, 1, 0.00], textSize=1.0, lifeTime=0.5, textColorRGB=[0.3, 0.2, .4]) - -def detect_key(keypress,arg_dict,action): - - if 97 in keypress.keys() and keypress[97] == 1: # A - action[2] += .03 - print(action) - if 122 in keypress.keys() and keypress[122] == 1: # Z/Y - action[2] -= .03 - print(action) - if 65297 in keypress.keys() and keypress[65297] == 1: # ARROW UP - action[1] -= .03 - print(action) - if 65298 in keypress.keys() and keypress[65298] == 1: # ARROW DOWN - action[1] += .03 - print(action) - if 65295 in keypress.keys() and keypress[65295] == 1: # ARROW LEFT - action[0] += .03 - print(action) - if 65296 in keypress.keys() and keypress[65296] == 1: # ARROW RIGHT - action[0] -= .03 - print(action) - if 120 in keypress.keys() and keypress[120] == 1: # X - action[3] -= .03 - action[4] -= .03 - print(action) - if 99 in keypress.keys() and keypress[99] == 1: # C - action[3] += .03 - action[4] += .03 - print(action) - # if 100 in keypress.keys() and keypress[100] == 1: - # cube[cubecount] = p.loadURDF(pkg_resources.resource_filename("myGym", os.path.join("envs", "objects/assembly/urdf/cube_holes.urdf")), [action[0], action[1],action[2]-0.2 ]) - # change_dynamics(cube[cubecount],lfriction,rfriction,ldamping,adamping) - # cubecount +=1 - if "step" in arg_dict["robot_action"]: - action[:3] = np.multiply(action [:3],10) - elif "joints" in arg_dict["robot_action"]: - print("Robot action: Joints - KEYBOARD CONTROL UNDER DEVELOPMENT") - quit() - #for i in range (env.action_space.shape[0]): - # env.env.robot.joints_max_velo[i] = p.readUserDebugParameter(maxvelo) - # env.env.robot.joints_max_force[i] = p.readUserDebugParameter(maxforce) - return action - -def test_env(env, arg_dict): - arg_dict["vsampling"] = 0 - arg_dict["vinfo"] = 0 - spawn_objects = False - #env.render("human") - #env.reset() - #Prepare names for sliders - joints = ['Joint1','Joint2','Joint3','Joint4','Joint5','Joint6','Joint7','Joint 8','Joint 9', 'Joint10', 'Joint11','Joint12','Joint13','Joint14','Joint15','Joint16','Joint17','Joint 18','Joint 19'] - jointparams = ['Jnt1','Jnt2','Jnt3','Jnt4','Jnt5','Jnt6','Jnt7','Jnt 8','Jnt 9', 'Jnt10', 'Jnt11','Jnt12','Jnt13','Jnt14','Jnt15','Jnt16','Jnt17','Jnt 18','Jnt 19'] - cube = ['Cube1','Cube2','Cube3','Cube4','Cube5','Cube6','Cube7','Cube8','Cube9','Cube10','Cube11','Cube12','Cube13','Cube14','Cube15','Cube16','Cube17','Cube18','Cube19'] - cubecount = 0 - - if arg_dict["gui"] == 0: - print ("Add --gui 1 parameter to visualize environment") - quit() - - p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0) - - p.resetDebugVisualizerCamera(1.2, 180, -30, [0.0, 0.5, 0.05]) - p.setAdditionalSearchPath(pybullet_data.getDataPath()) - #newobject = p.loadURDF("cube.urdf", [3.1,3.7,0.1]) - #p.changeDynamics(newobject, -1, lateralFriction=1.00) - #p.setRealTimeSimulation(1) - if arg_dict["control"] == "slider": - p.configureDebugVisualizer(p.COV_ENABLE_GUI, 1) - if "joints" in arg_dict["robot_action"]: - if 'gripper' in arg_dict["robot_action"]: - print ("gripper is present") - for i in range (env.action_space.shape[0]): - if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)): - joints[i] = p.addUserDebugParameter(joints[i], env.action_space.low[i], env.action_space.high[i], env.env.robot.init_joint_poses[i]) - else: - joints[i] = p.addUserDebugParameter(joints[i], env.action_space.low[i], env.action_space.high[i], .02) - else: - for i in range (env.action_space.shape[0]): - joints[i] = p.addUserDebugParameter(joints[i],env.action_space.low[i], env.action_space.high[i], env.env.robot.init_joint_poses[i]) - elif "absolute" in arg_dict["robot_action"]: - if 'gripper' in arg_dict["robot_action"]: - print ("gripper is present") - for i in range (env.action_space.shape[0]): - if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)): - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, arg_dict["robot_init"][i]) - else: - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, .02) - else: - for i in range (env.action_space.shape[0]): - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, arg_dict["robot_init"][i]) - elif "step" in arg_dict["robot_action"]: - if 'gripper' in arg_dict["robot_action"]: - print ("gripper is present") - for i in range (env.action_space.shape[0]): - if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)): - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, 0) - else: - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, .02) - else: - for i in range (env.action_space.shape[0]): - joints[i] = p.addUserDebugParameter(joints[i], -1, 1, 0) - - - #maxvelo = p.addUserDebugParameter("Max Velocity", 0.1, 50, env.env.robot.joints_max_velo[0]) - #maxforce = p.addUserDebugParameter("Max Force", 0.1, 300, env.env.robot.joints_max_force[0]) - lfriction = p.addUserDebugParameter("Lateral Friction", 0, 100, 0) - rfriction = p.addUserDebugParameter("Spinning Friction", 0, 100, 0) - ldamping = p.addUserDebugParameter("Linear Damping", 0, 100, 0) - adamping = p.addUserDebugParameter("Angular Damping", 0, 100, 0) - #action.append(jointparams[i]) - if arg_dict["vsampling"] == True: - visualize_sampling_area(arg_dict) - - #visualgr = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=.005, rgbaColor=[0,0,1,.1]) - - if arg_dict["control"] == "random": - action = env.action_space.sample() - if arg_dict["control"] == "keyboard": - action = arg_dict["robot_init"] - if "gripper" in arg_dict["robot_action"]: - action.append(.1) - action.append(.1) - if arg_dict["control"] == "slider": - action = [] - for i in range (env.action_space.shape[0]): - jointparams[i] = p.readUserDebugParameter(joints[i]) - action.append(jointparams[i]) - - for e in range(50): - env.reset() - #if spawn_objects: - # cube[e] = p.loadURDF(pkg_resources.resource_filename("myGym", os.path.join("envs", "objects/assembly/urdf/cube_holes.urdf")), [0, 0.5, .1]) - - #if visualize_traj: - # visualize_goal(info) - - for t in range(arg_dict["max_episode_steps"]): - - - if arg_dict["control"] == "slider": - action = [] - for i in range (env.action_space.shape[0]): - jointparams[i] = p.readUserDebugParameter(joints[i]) - action.append(jointparams[i]) - #env.env.robot.joints_max_velo[i] = p.readUserDebugParameter(maxvelo) - #env.env.robot.joints_max_force[i] = p.readUserDebugParameter(maxforce) - - - if arg_dict["control"] == "observation": - if t == 0: - action = env.action_space.sample() - else: - - if "joints" in arg_dict["robot_action"]: - action = info['o']["additional_obs"]["joints_angles"] #n - elif "absolute" in arg_dict["robot_action"]: - action = info['o']["actual_state"] - else: - action = [0,0,0] - - if arg_dict["control"] == "oraculum": - if t == 0: - action = env.action_space.sample() - else: - - if "absolute" in arg_dict["robot_action"]: - action = info['o']["goal_state"] - else: - print("ERROR - Oraculum mode only works for absolute actions") - quit() - - - elif arg_dict["control"] == "keyboard": - keypress = p.getKeyboardEvents() - #print(action) - action = detect_key(keypress,arg_dict,action) - elif arg_dict["control"] == "random": - action = env.action_space.sample() - - - - print (f"Action:{action}") - observation, reward, done, info = env.step(action) - - if arg_dict["vtrajectory"] == True: - visualize_trajectories(info,action) - if arg_dict["vinfo"] == True: - visualize_infotext(action, env, info) - - - #visualize_goal(info) - #if debug_mode: - #print("Reward: {} \n Observation: {} \n EnvObservation: {}".format(reward, observation, env.env.observation)) - #if t>=1: - #action = matrix(np.around(np.array(action),5)) - #oaction = env.env.robot.get_joints_states() - #oaction = matrix(np.around(np.array(oaction[0:action.shape[0]]),5)) - #diff = matrix(np.around(np.array(action-oaction),5)) - #print(env.env.robot.get_joints_states()) - #print(f"Step:{t}") - #print (f"RAction:{action}") - #print(f"OAction:{oaction}") - #print(f"DAction:{diff}") - #p.addUserDebugText(f"DAction:{diff}", - # [1, 1, 0.1], textSize=1.0, lifeTime=0.05, textColorRGB=[0.6, 0.0, 0.6]) - #time.sleep(.4) - #clear() - - #if action_control == "slider": - # action=[] - if "step" in arg_dict["robot_action"]: - action[:3] = [0,0,0] - - if arg_dict["visualize"]: - visualizations = [[],[]] - env.render("human") - for camera_id in range(len(env.cameras)): - camera_render = env.render(mode="rgb_array", camera_id=camera_id) - image = cv2.cvtColor(camera_render[camera_id]["image"], cv2.COLOR_RGB2BGR) - depth = camera_render[camera_id]["depth"] - image = cv2.copyMakeBorder(image, 30, 10, 10, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255]) - cv2.putText(image, 'Camera {}'.format(camera_id), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, .5, - (0, 0, 0), 1, 0) - visualizations[0].append(image) - depth = cv2.copyMakeBorder(depth, 30, 10, 10, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255]) - cv2.putText(depth, 'Camera {}'.format(camera_id), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, .5, - (0, 0, 0), 1, 0) - visualizations[1].append(depth) - - if len(visualizations[0])%2 !=0: - visualizations[0].append(255*np.ones(visualizations[0][0].shape, dtype=np.uint8)) - visualizations[1].append(255*np.ones(visualizations[1][0].shape, dtype=np.float32)) - fig_rgb = np.vstack((np.hstack((visualizations[0][0::2])),np.hstack((visualizations[0][1::2])))) - fig_depth = np.vstack((np.hstack((visualizations[1][0::2])),np.hstack((visualizations[1][1::2])))) - cv2.imshow('Camera RGB renders', fig_rgb) - cv2.imshow('Camera depthrenders', fig_depth) - cv2.waitKey(1) - - if done: - print("Episode finished after {} timesteps".format(t + 1)) - break - -def test_model(env, model=None, implemented_combos=None, arg_dict=None, model_logdir=None, deterministic=False): - - try: - if "multi" in arg_dict["algo"]: - model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1] - model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(arg_dict["model_path"], env=model_args[1].env) - else: - model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(arg_dict["model_path"]) - except: - if (arg_dict["algo"] in implemented_combos.keys()) and (arg_dict["train_framework"] not in list(implemented_combos[arg_dict["algo"]].keys())): - err = "{} is only implemented with {}".format(arg_dict["algo"],list(implemented_combos[arg_dict["algo"]].keys())[0]) - elif arg_dict["algo"] not in implemented_combos.keys(): - err = "{} algorithm is not implemented.".format(arg_dict["algo"]) - else: - err = "invalid model_path argument" - raise Exception(err) - - images = [] # Empty list for gif images - success_episodes_num = 0 - distance_error_sum = 0 - vel= arg_dict["max_velocity"] - force = arg_dict["max_force"] - steps_sum = 0 - p.resetDebugVisualizerCamera(1.2, 180, -30, [0.0, 0.5, 0.05]) - #p.setRealTimeSimulation(1) - #p.setTimeStep(0.01) - - for e in range(arg_dict["eval_episodes"]): - done = False - obs = env.reset() - is_successful = 0 - distance_error = 0 - step_sum = 0 - while not done: - steps_sum += 1 - action, _state = model.predict(obs, deterministic=deterministic) - obs, reward, done, info = env.step(action) - is_successful = not info['f'] - distance_error = info['d'] - if arg_dict["vinfo"] == True: - visualize_infotext(action, env, info) - - - if (arg_dict["record"] > 0) and (len(images) < 8000): - render_info = env.render(mode="rgb_array", camera_id = arg_dict["camera"]) - image = render_info[arg_dict["camera"]]["image"] - images.append(image) - print(f"appending image: total size: {len(images)}]") - - success_episodes_num += is_successful - distance_error_sum += distance_error - - mean_distance_error = distance_error_sum / arg_dict["eval_episodes"] - mean_steps_num = steps_sum // arg_dict["eval_episodes"] - - print("#---------Evaluation-Summary---------#") - print("{} of {} episodes ({} %) were successful".format(success_episodes_num, arg_dict["eval_episodes"], success_episodes_num / arg_dict["eval_episodes"]*100)) - print("Mean distance error is {:.2f}%".format(mean_distance_error * 100)) - print("Mean number of steps {}".format(mean_steps_num)) - print("#------------------------------------#") - model_name = arg_dict["algo"] + '_' + str(arg_dict["steps"]) - file = open(os.path.join(model_logdir, "train_" + model_name + ".txt"), 'a') - file.write("\n") - file.write("#Evaluation results: \n") - file.write("#{} of {} episodes were successful \n".format(success_episodes_num, arg_dict["eval_episodes"])) - file.write("#Mean distance error is {:.2f}% \n".format(mean_distance_error * 100)) - file.write("#Mean number of steps {}\n".format(mean_steps_num)) - file.close() - - if arg_dict["record"] == 1: - gif_path = os.path.join(model_logdir, "train_" + model_name + ".gif") - imageio.mimsave(gif_path, [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=15) - os.system('./utils/gifopt -O3 --lossy=5 -o {dest} {source}'.format(source=gif_path, dest=gif_path)) - print("Record saved to " + gif_path) - elif arg_dict["record"] == 2: - video_path = os.path.join(model_logdir, "train_" + model_name + ".avi") - height, width, layers = image.shape - out = cv2.VideoWriter(video_path,cv2.VideoWriter_fourcc(*'XVID'), 30, (width,height)) - for img in images: - out.write(cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) - out.release() - print("Record saved to " + video_path) - - -def main(): - parser = get_parser() - parser.add_argument("-ct", "--control", default="slider", help="How to control robot during testing. Valid arguments: keyboard, observation, random, oraculum, slider") - parser.add_argument("-vs", "--vsampling", action="store_true", help="Visualize sampling area.") - parser.add_argument("-vt", "--vtrajectory", action="store_true", help="Visualize gripper trajectgory.") - parser.add_argument("-vn", "--vinfo", action="store_true", help="Visualize info. Valid arguments: True, False") - parser.add_argument("-nl", "--natural_language", default=False, help="NL Valid arguments: True, False") - arg_dict = get_arguments(parser) - model_logdir = os.path.dirname(arg_dict.get("model_path","")) - # Check if we chose one of the existing engines - if arg_dict["engine"] not in AVAILABLE_SIMULATION_ENGINES: - print(f"Invalid simulation engine. Valid arguments: --engine {AVAILABLE_SIMULATION_ENGINES}.") - return - if arg_dict.get("model_path") is None: - print("Path to the model using --model_path argument not specified. Testing random actions in selected environment.") - arg_dict["gui"] = 1 - env = configure_env(arg_dict, model_logdir, for_train=0) - test_env(env, arg_dict) - - else: - env = configure_env(arg_dict, model_logdir, for_train=0) - implemented_combos = configure_implemented_combos(env, model_logdir, arg_dict) - test_model(env, None, implemented_combos, arg_dict, model_logdir) - - -if __name__ == "__main__": - main() diff --git a/myGym/test.urdf b/myGym/test.urdf deleted file mode 100644 index e69de29b..00000000 diff --git a/myGym/train.py b/myGym/train.py deleted file mode 100644 index 148ad428..00000000 --- a/myGym/train.py +++ /dev/null @@ -1,352 +0,0 @@ -import random -import copy - -import pkg_resources -import os, sys, time, yaml -import argparse -import numpy as np -import matplotlib.pyplot as plt -import json, commentjson -import gym -from myGym import envs -import myGym.utils.cfg_comparator as cfg -#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' -#from stable_baselines.common.policies import MlpPolicy -#from stable_baselines.common import make_vec_env -#from stable_baselines.common.vec_env import DummyVecEnv -#from stable_baselines.bench import Monitor -#from stable_baselines import results_plotter -#from stable_baselines.her import GoalSelectionStrategy, HERGoalEnvWrapper -# For now I am importing both with slightly modified names P-PyTorch T-TensorFlow -#from stable_baselines import PPO1 as PPO1_T, PPO2 as PPO2_T, HER as HER_T, SAC as SAC_T, DDPG as DDPG_T -#from stable_baselines import TD3 as TD3_T, A2C as A2C_T, ACKTR as ACKTR_T, TRPO as TRPO_T, GAIL as GAIL_T -#try: -# from stable_baselines3 import PPO as PPO_P, A2C as A2C_P, SAC as SAC_P, TD3 as TD3_P -#except: -# print("Torch isn't probably installed correctly") - -#from myGym.stable_baselines_mygym.algo import MyAlgo -#from myGym.stable_baselines_mygym.reference import REFER -#from myGym.stable_baselines_mygym.multi_ppo2 import MultiPPO2 -#from myGym.stable_baselines_mygym.multi_acktr import MultiACKTR -#from myGym.stable_baselines_mygym.policies import MyMlpPolicy -#from myGym.stable_baselines_mygym.TorchPPO import TorchPPO -#from myGym.stable_baselines_mygym.TorchPPOpolicies import TorchMlpPolicy - - -#from stable_baselines.gail import ExpertDataset, generate_expert_traj -#from stable_baselines.sac.policies import MlpPolicy as MlpPolicySAC -#from stable_baselines.ddpg.policies import MlpPolicy as MlpPolicyDDPG -#from stable_baselines.td3.policies import MlpPolicy as MlpPolicyTD3 - -# Import helper classes and functions for monitoring -#from myGym.utils.callbacks import ProgressBarManager, SaveOnBestTrainingRewardCallback, PlottingCallback, CustomEvalCallback -from myGym.envs.natural_language import NaturalLanguage - -# This is global variable for the type of engine we are working with -AVAILABLE_SIMULATION_ENGINES = ["mujoco", "pybullet"] -AVAILABLE_TRAINING_FRAMEWORKS = ["tensorflow", "pytorch"] - - -def save_results(arg_dict, model_name, env, model_logdir=None, show=False): - if model_logdir is None: - model_logdir = arg_dict["logdir"] - print(f"model_logdir: {model_logdir}") - - #results_plotter.EPISODES_WINDOW = 100 - #results_plotter.plot_results([model_logdir], arg_dict["steps"], results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " reward") - #plt.gcf().set_size_inches(8, 6) - #plt.savefig(os.path.join(model_logdir, model_name) + '_reward_results.png') - #plot_extended_results(model_logdir, 'd', results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " distance", "Episode Distances") - #plt.gcf().set_size_inches(8, 6) - #plt.savefig(os.path.join(model_logdir, model_name) + '_distance_results.png') - #plt.close() - #plt.close() - #if isinstance(env, HERGoalEnvWrapper): - # results_plotter.plot_curves([(np.arange(len(env.env.episode_final_distance)),np.asarray(env.env.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance') - #else: - # results_plotter.plot_curves([(np.arange(len(env.unwrapped.episode_final_distance)),np.asarray(env.unwrapped.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance') - #plt.gcf().set_size_inches(8, 6) - #plt.ylabel("Step Distances") - #plt.savefig(os.path.join(model_logdir, model_name) + "_final_distance_results.png") - #plt.close() - print("Congratulations! Training with {} timesteps succeed!".format(arg_dict["steps"])) - #if show: - # plt.show() - -def configure_env(arg_dict, model_logdir=None, for_train=True): - env_arguments = {"render_on": True, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"], "framework":"stable_baselines", - "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"], - "robot_action": arg_dict["robot_action"],"max_velocity": arg_dict["max_velocity"], - "max_force": arg_dict["max_force"],"task_type": arg_dict["task_type"], - "action_repeat": arg_dict["action_repeat"], - "task_objects":arg_dict["task_objects"], "observation":arg_dict["observation"], "distractors":arg_dict["distractors"], - "num_networks":arg_dict.get("num_networks", 1), "network_switcher":arg_dict.get("network_switcher", "gt"), - "distance_type": arg_dict["distance_type"], "used_objects": arg_dict["used_objects"], - "active_cameras": arg_dict["camera"], "color_dict":arg_dict.get("color_dict", {}), - "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"], - "reward": arg_dict["reward"], "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"], - "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"], - "natural_language": bool(arg_dict["natural_language"]), - "training": bool(for_train) - } - if for_train: - env_arguments["gui_on"] = arg_dict["gui"] - else: - env_arguments["gui_on"] = arg_dict["gui"] - - if arg_dict["algo"] == "her": - env = gym.make(arg_dict["env_name"], **env_arguments, obs_space="dict") # her needs obs as a dict - else: - env = gym.make(arg_dict["env_name"], **env_arguments) - if for_train: - if arg_dict["engine"] == "mujoco": - env = VecMonitor(env, model_logdir) if arg_dict["multiprocessing"] else Monitor(env, model_logdir) - elif arg_dict["engine"] == "pybullet": - try: - env = Monitor(env, model_logdir, info_keywords=tuple('d')) - except: - pass - - if arg_dict["algo"] == "her": - env = HERGoalEnvWrapper(env) - return env - - -def configure_implemented_combos(env, model_logdir, arg_dict): - implemented_combos = {"ppo2": {"tensorflow": [PPO2_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]}, - "ppo": {"tensorflow": [PPO1_T, (MlpPolicy, env), {"verbose": 1, "tensorboard_log": model_logdir}],}, - "her": {"tensorflow": [HER_T, (MlpPolicyDDPG, env, DDPG_T), {"goal_selection_strategy": 'future', "verbose": 1,"tensorboard_log": model_logdir}]}, - "sac": {"tensorflow": [SAC_T, (MlpPolicySAC, env), {"verbose": 1, "tensorboard_log": model_logdir}],}, - "ddpg": {"tensorflow": [DDPG_T, (MlpPolicyDDPG, env),{"verbose": 1, "tensorboard_log": model_logdir}]}, - "td3": {"tensorflow": [TD3_T, (MlpPolicyTD3, env), {"verbose": 1, "tensorboard_log": model_logdir}],}, - "acktr": {"tensorflow": [ACKTR_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]}, - "trpo": {"tensorflow": [TRPO_T, (MlpPolicy, env), {"verbose": 1, "tensorboard_log": model_logdir}]}, - "gail": {"tensorflow": [GAIL_T, (MlpPolicy, env), {"verbose": 1, "tensorboard_log": model_logdir}]}, - "a2c": {"tensorflow": [A2C_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}],}, - "torchppo": {"tensorflow": [TorchPPO, (TorchMlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]}, - "myalgo": {"tensorflow": [MyAlgo, (MyMlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]}, - "ref": {"tensorflow": [REFER, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]}, - "multippo2": {"tensorflow": [MultiPPO2, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"],"n_models": arg_dict["num_networks"], "verbose": 1, "tensorboard_log": model_logdir}]}, - "multiacktr": {"tensorflow": [MultiACKTR, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"],"n_models": arg_dict["num_networks"], "verbose": 1, "tensorboard_log": model_logdir}]}} - - if "PPO_P" in sys.modules: - implemented_combos["ppo"]["pytorch"] = [PPO_P, ('MlpPolicy', env), {"n_steps": 1024, "verbose": 1, "tensorboard_log": model_logdir}] - implemented_combos["sac"]["pytorch"] = [SAC_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}] - implemented_combos["td3"]["pytorch"] = [TD3_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}] - implemented_combos["a2c"]["pytorch"] = [A2C_P, ('MlpPolicy', env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}] - - return implemented_combos - - -def train(env, implemented_combos, model_logdir, arg_dict, pretrained_model=None): - model_name = arg_dict["algo"] + '_' + str(arg_dict["steps"]) - conf_pth = os.path.join(model_logdir, "train.json") - model_path = os.path.join(model_logdir, "best_model.zip") - arg_dict["model_path"] = model_path - seed = arg_dict.get("seed", None) - with open(conf_pth, "w") as f: - json.dump(arg_dict, f, indent=4) - - model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1] - model_kwargs = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][2] - if seed is not None: - np.random.seed(seed) - random.seed(seed) - model_kwargs["seed"] = seed - if pretrained_model: - if not os.path.isabs(pretrained_model): - pretrained_model = pkg_resources.resource_filename("myGym", pretrained_model) - env = model_args[1] - vec_env = DummyVecEnv([lambda: env]) - model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(pretrained_model, vec_env) - else: - model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0](*model_args, **model_kwargs) - - if arg_dict["algo"] == "gail": - # Multi processing: (using MPI) - if arg_dict["train_framework"] == 'tensorflow': - # Generate expert trajectories (train expert) - generate_expert_traj(model, model_name, n_timesteps=3000, n_episodes=100) - # Load the expert dataset - dataset = ExpertDataset(expert_path=model_name+'.npz', traj_limitation=10, verbose=1) - kwargs = {"verbose":1} - if seed is not None: - kwargs["seed"] = seed - model = GAIL_T('MlpPolicy', model_name, dataset, **kwargs) - # Note: in practice, you need to train for 1M steps to have a working policy - - start_time = time.time() - callbacks_list = [] - if pretrained_model: - model_logdir = pretrained_model.split('/') - model_logdir = model_logdir[:-1] - model_logdir = "/".join(model_logdir) - auto_save_callback = SaveOnBestTrainingRewardCallback(check_freq=1024, logdir=model_logdir, env=env, engine=arg_dict["engine"], multiprocessing=arg_dict["multiprocessing"]) - else: - auto_save_callback = SaveOnBestTrainingRewardCallback(check_freq=1024, logdir=model_logdir, env=env, engine=arg_dict["engine"], multiprocessing=arg_dict["multiprocessing"]) - callbacks_list.append(auto_save_callback) - if arg_dict["eval_freq"]: - #eval_env = configure_env(arg_dict, model_logdir, for_train=False) - eval_env = env - eval_callback = CustomEvalCallback(eval_env, log_path=model_logdir, - eval_freq=arg_dict["eval_freq"], - algo_steps=arg_dict["algo_steps"], - n_eval_episodes=arg_dict["eval_episodes"], - record=arg_dict["record"], - camera_id=arg_dict["camera"]) - callbacks_list.append(eval_callback) - #callbacks_list.append(PlottingCallback(model_logdir)) - #with ProgressBarManager(total_timesteps=arg_dict["steps"]) as progress_callback: - # callbacks_list.append(progress_callback) - model.learn(total_timesteps=arg_dict["steps"], callback=callbacks_list) - model.save(os.path.join(model_logdir, model_name)) - print("Training time: {:.2f} s".format(time.time() - start_time)) - print("Training steps: {:} s".format(model.num_timesteps)) - - # info_keywords in monitor class above is neccessary for pybullet to save_results - # when using the info_keywords for mujoco we get an error - if arg_dict["engine"] == "pybullet": - save_results(arg_dict, model_name, env, model_logdir) - return model - -def get_parser(): - parser = argparse.ArgumentParser() - #Envinronment - parser.add_argument("-cfg", "--config", default="./configs/train_A_RDDL.json", help="Can be passed instead of all arguments") - parser.add_argument("-n", "--env_name", type=str, help="The name of environment") - parser.add_argument("-ws", "--workspace", type=str, help="The name of workspace") - parser.add_argument("-p", "--engine", type=str, help="Name of the simulation engine you want to use") - parser.add_argument("-sd", "--seed", type=int, help="Seed number") - parser.add_argument("-d", "--render", type=str, help="Type of rendering: opengl, opencv") - parser.add_argument("-c", "--camera", type=int, help="The number of camera used to render and record") - parser.add_argument("-vi", "--visualize", type=int, help="Whether visualize camera render and vision in/out or not: 1 or 0") - parser.add_argument("-vg", "--visgym", type=int, help="Whether visualize gym background: 1 or 0") - parser.add_argument("-g", "--gui", type=int, help="Wether the GUI of the simulation should be used or not: 1 or 0") - #Robot - parser.add_argument("-b", "--robot", type=str, help="Robot to train: kuka, panda, jaco ...") - parser.add_argument("-bi", "--robot_init", nargs="*", type=float, help="Initial robot's end-effector position") - parser.add_argument("-ba", "--robot_action", type=str, help="Robot's action control: step - end-effector relative position, absolute - end-effector absolute position, joints - joints' coordinates") - parser.add_argument("-mv", "--max_velocity", type=float, help="Maximum velocity of robotic arm") - parser.add_argument("-mf", "--max_force", type=float, help="Maximum force of robotic arm") - parser.add_argument("-ar", "--action_repeat", type=int, help="Substeps of simulation without action from env") - #Task - parser.add_argument("-tt", "--task_type", type=str, help="Type of task to learn: reach, push, throw, pick_and_place") - parser.add_argument("-to", "--task_objects", nargs="*", type=str, help="Object (for reach) or a pair of objects (for other tasks) to manipulate with") - parser.add_argument("-u", "--used_objects", nargs="*", type=str, help="List of extra objects to randomly appear in the scene") - #Distractors - parser.add_argument("-di", "--distractors", type=str, help="Object (for reach) to evade") - parser.add_argument("-dm", "--distractor_moveable", type=int, help="can distractor move (0/1)") - parser.add_argument("-ds", "--distractor_constant_speed", type=int, help="is speed of distractor constant (0/1)") - parser.add_argument("-dd", "--distractor_movement_dimensions", type=int, help="in how many directions can the distractor move (1/2/3)") - parser.add_argument("-de", "--distractor_movement_endpoints", nargs="*", type=float, help="2 coordinates (starting point and ending point)") - parser.add_argument("-no", "--observed_links_num", type=int, help="number of robot links in observation space") - #Reward - parser.add_argument("-re", "--reward", type=str, help="Defines how to compute the reward") - parser.add_argument("-dt", "--distance_type", type=str, help="Type of distance metrics: euclidean, manhattan") - #Train - parser.add_argument("-w", "--train_framework", type=str, help="Name of the training framework you want to use: {tensorflow, pytorch}") - parser.add_argument("-a", "--algo", type=str, help="The learning algorithm to be used (ppo2 or her)") - parser.add_argument("-s", "--steps", type=int, help="The number of steps to train") - parser.add_argument("-ms", "--max_episode_steps", type=int, help="The maximum number of steps per episode") - parser.add_argument("-ma", "--algo_steps", type=int, help="The number of steps per for algo training (PPO2,A2C)") - #Evaluation - parser.add_argument("-ef", "--eval_freq", type=int, help="Evaluate the agent every eval_freq steps") - parser.add_argument("-e", "--eval_episodes", type=int, help="Number of episodes to evaluate performance of the robot") - #Saving and Logging - parser.add_argument("-l", "--logdir", type=str, help="Where to save results of training and trained models") - parser.add_argument("-r", "--record", type=int, help="1: make a gif of model perfomance, 2: make a video of model performance, 0: don't record") - #Mujoco - parser.add_argument("-i", "--multiprocessing", type=int, help="True: multiprocessing on (specify also the number of vectorized environemnts), False: multiprocessing off") - parser.add_argument("-v", "--vectorized_envs", type=int, help="The number of vectorized environments to run at once (mujoco multiprocessing only)") - #Paths - parser.add_argument("-m", "--model_path", type=str, help="Path to the the trained model to test") - parser.add_argument("-vp", "--vae_path", type=str, help="Path to a trained VAE in 2dvu reward type") - parser.add_argument("-yp", "--yolact_path", type=str, help="Path to a trained Yolact in 3dvu reward type") - parser.add_argument("-yc", "--yolact_config", type=str, help="Path to saved config obj or name of an existing one in the data/Config script (e.g. 'yolact_base_config') or None for autodetection") - parser.add_argument('-ptm', "--pretrained_model", type=str, help="Path to a model that you want to continue training") - #Language - # parser.add_argument("-nl", "--natural_language", type=str, default="", - # help="If passed, instead of training the script will produce a natural language output " - # "of the given type, save it to the predefined file (for communication with other scripts) " - # "and exit the program (without the actual training taking place). Expected values are \"description\" " - # "(generate a task description) or \"new_tasks\" (generate new tasks)") - return parser - -def get_arguments(parser): - args = parser.parse_args() - with open(args.config, "r") as f: - arg_dict = commentjson.load(f) - for key, value in vars(args).items(): - if value is not None and key is not "config": - if key in ["robot_init"]: - arg_dict[key] = [float(arg_dict[key][i]) for i in range(len(arg_dict[key]))] - elif key in ["task_objects"]: - arg_dict[key] = task_objects_replacement(value, arg_dict[key], arg_dict["task_type"]) - else: - arg_dict[key] = value - return arg_dict - - -def task_objects_replacement(task_objects_new, task_objects_old, task_type): - """ - If task_objects is given as a parameter, this method converts string into a proper format depending on task_type (null init for task_type reach) - - [{"init":{"obj_name":"null"}, "goal":{"obj_name":"cube_holes","fixed":1,"rand_rot":0, "sampling_area":[-0.5, 0.2, 0.3, 0.6, 0.1, 0.4]}}] - """ - ret = copy.deepcopy(task_objects_old) - if len(task_objects_new) > len(task_objects_old): - msg = "More objects given than there are subtasks." - raise Exception(msg) - dest = "" #init or goal - if task_type == "reach": - dest = "goal" - else: - dest = "init" - for i in range(len(task_objects_new)): - ret[i][dest]["obj_name"] = task_objects_new[i] - return ret - - - - -def process_natural_language_command(cmd, env, output_relative_path=os.path.join("envs", "examples", "natural_language.txt")): - env.reset() - nl = NaturalLanguage(env) - - if cmd in ["description", "new_tasks"]: - with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), output_relative_path), "w") as file: - file.write(nl.generate_task_description() if cmd == "description" else "\n".join(nl.generate_new_tasks())) - else: - msg = f"Unknown natural language command: {cmd}" - raise Exception(msg) - -def main(): - parser = get_parser() - arg_dict = get_arguments(parser) - - # Check if we chose one of the existing engines - if arg_dict["engine"] not in AVAILABLE_SIMULATION_ENGINES: - print(f"Invalid simulation engine. Valid arguments: --engine {AVAILABLE_SIMULATION_ENGINES}.") - return - if not os.path.isabs(arg_dict["logdir"]): - arg_dict["logdir"] = os.path.join("./", arg_dict["logdir"]) - os.makedirs(arg_dict["logdir"], exist_ok=True) - model_logdir_ori = os.path.join(arg_dict["logdir"], "_".join((arg_dict["task_type"],arg_dict["workspace"],arg_dict["robot"],arg_dict["robot_action"],arg_dict["algo"]))) - model_logdir = model_logdir_ori - add = 2 - while True: - try: - os.makedirs(model_logdir, exist_ok=False) - break - except: - model_logdir = "_".join((model_logdir_ori, str(add))) - add += 1 - - env = configure_env(arg_dict, model_logdir, for_train=1) - implemented_combos = configure_implemented_combos(env, model_logdir, arg_dict) - train(env, implemented_combos, model_logdir, arg_dict, arg_dict["pretrained_model"]) - print(model_logdir) - -if __name__ == "__main__": - main() diff --git a/myGym/train_checker.sh b/myGym/train_checker.sh deleted file mode 100644 index 3be75cb4..00000000 --- a/myGym/train_checker.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -#This is minimal unit test to check, whether the current version of myGym is able to train. -# It will do simle train and evaluate in 2 minutes. If success rate is above 30 percdents, it works. -rm -r ./trained_models/check -python train.py --config ./configs/train_reach.json --gui 1 --algo ppo2 -b kuka -ba joints --max_episode_steps 256 --algo_steps 256 --steps 20000 --eval_episodes 10 --logdir ./trained_models/check -#python test.py -cfg ./trained_models/check/reach_table_kuka_joints_ppo2/train.json -#rm -r ./trained_models/check \ No newline at end of file diff --git a/myGym/train_multithread.py b/myGym/train_multithread.py deleted file mode 100644 index 536dc47b..00000000 --- a/myGym/train_multithread.py +++ /dev/null @@ -1,217 +0,0 @@ -import random -import pkg_resources -import os, sys, time, yaml -import argparse -import numpy as np -import json, commentjson -from myGym import envs -import ray -from ray.rllib.algorithms.ppo import PPOConfig -from ray.rllib.algorithms.sac.sac import SACConfig -from ray.rllib.algorithms.appo import APPOConfig -from ray.rllib.algorithms.ddpg import DDPGConfig -from ray.rllib.algorithms.marwil import MARWILConfig -from ray.rllib.evaluation import Episode, RolloutWorker -from ray.rllib.policy import Policy -from ray.rllib.policy.sample_batch import SampleBatch -from ray.rllib.env.base_env import BaseEnv -from ray.tune.logger import pretty_print -from train import get_parser, get_arguments, AVAILABLE_SIMULATION_ENGINES -from gymnasium.wrappers import EnvCompatibility -from ray.tune.registry import register_env -from myGym.envs.gym_env import GymEnv - -def save_results(arg_dict, model_name, env, model_logdir=None, show=False): - if model_logdir is None: - model_logdir = arg_dict["logdir"] - print(f"model_logdir: {model_logdir}") - - #results_plotter.EPISODES_WINDOW = 100 - #results_plotter.plot_results([model_logdir], arg_dict["steps"], results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " reward") - #plt.gcf().set_size_inches(8, 6) - #plt.savefig(os.path.join(model_logdir, model_name) + '_reward_results.png') - #plot_extended_results(model_logdir, 'd', results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " distance", "Episode Distances") - #plt.gcf().set_size_inches(8, 6) - #plt.savefig(os.path.join(model_logdir, model_name) + '_distance_results.png') - #plt.close() - #plt.close() - #if isinstance(env, HERGoalEnvWrapper): - # results_plotter.plot_curves([(np.arange(len(env.env.episode_final_distance)),np.asarray(env.env.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance') - #else: - # results_plotter.plot_curves([(np.arange(len(env.unwrapped.episode_final_distance)),np.asarray(env.unwrapped.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance') - #plt.gcf().set_size_inches(8, 6) - #plt.ylabel("Step Distances") - #plt.savefig(os.path.join(model_logdir, model_name) + "_final_distance_results.png") - #plt.close() - print("Congratulations! Training with {} timesteps succeed!".format(arg_dict["steps"])) - #if show: - # plt.show() - -def configure_env(arg_dict, for_train=True): - env_arguments = {"render_on": True, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"], - "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"], - "robot_action": arg_dict["robot_action"],"max_velocity": arg_dict["max_velocity"], - "max_force": arg_dict["max_force"],"task_type": arg_dict["task_type"], - "action_repeat": arg_dict["action_repeat"], - "task_objects":arg_dict["task_objects"], "observation":arg_dict["observation"], "distractors":arg_dict["distractors"], - "num_networks":arg_dict.get("num_networks", 1), "network_switcher":arg_dict.get("network_switcher", "gt"), - "distance_type": arg_dict["distance_type"], "used_objects": arg_dict["used_objects"], - "active_cameras": arg_dict["camera"], "color_dict":arg_dict.get("color_dict", {}), - "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"], - "reward": arg_dict["reward"], "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"], - "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"], "algo":arg_dict["algo"]} - if for_train: - env_arguments["gui_on"] = arg_dict["gui"] - else: - env_arguments["gui_on"] = arg_dict["gui"] - return env_arguments - -def env_creator(env_config): - return EnvCompatibility(GymEnv(**env_config)) - -def configure_implemented_combos(arg_dict): - implemented_combos = {"ppo": PPOConfig, "sac": SACConfig, "marwil": MARWILConfig, "appo":APPOConfig, "ddpg":DDPGConfig} - return implemented_combos[arg_dict["algo"]] - -def get_parser(): - parser = argparse.ArgumentParser() - #Envinronment - parser.add_argument("-cfg", "--config", default="./configs/rllib_debug.json", help="Can be passed instead of all arguments") - parser.add_argument("-rt", "--ray_tune", action='store_true', help="Whether to train with ray grid search") - parser.add_argument("-n", "--env_name", type=str, help="The name of environment") - parser.add_argument("-ws", "--workspace", type=str, help="The name of workspace") - parser.add_argument("-p", "--engine", type=str, help="Name of the simulation engine you want to use") - parser.add_argument("-sd", "--seed", type=int, help="Seed number") - parser.add_argument("-d", "--render", type=str, help="Type of rendering: opengl, opencv") - parser.add_argument("-c", "--camera", type=int, help="The number of camera used to render and record") - parser.add_argument("-vi", "--visualize", type=int, help="Whether visualize camera render and vision in/out or not: 1 or 0") - parser.add_argument("-vg", "--visgym", type=int, help="Whether visualize gym background: 1 or 0") - parser.add_argument("-g", "--gui", type=int, help="Wether the GUI of the simulation should be used or not: 1 or 0") - #Robot - parser.add_argument("-b", "--robot", type=str, help="Robot to train: kuka, panda, jaco ...") - parser.add_argument("-bi", "--robot_init", nargs="*", type=float, help="Initial robot's end-effector position") - parser.add_argument("-ba", "--robot_action", type=str, help="Robot's action control: step - end-effector relative position, absolute - end-effector absolute position, joints - joints' coordinates") - parser.add_argument("-mv", "--max_velocity", type=float, help="Maximum velocity of robotic arm") - parser.add_argument("-mf", "--max_force", type=float, help="Maximum force of robotic arm") - parser.add_argument("-ar", "--action_repeat", type=int, help="Substeps of simulation without action from env") - #Task - parser.add_argument("-tt", "--task_type", type=str, help="Type of task to learn: reach, push, throw, pick_and_place") - parser.add_argument("-to", "--task_objects", nargs="*", type=str, help="Object (for reach) or a pair of objects (for other tasks) to manipulate with") - parser.add_argument("-u", "--used_objects", nargs="*", type=str, help="List of extra objects to randomly appear in the scene") - #Distractors - parser.add_argument("-di", "--distractors", type=str, help="Object (for reach) to evade") - parser.add_argument("-dm", "--distractor_moveable", type=int, help="can distractor move (0/1)") - parser.add_argument("-ds", "--distractor_constant_speed", type=int, help="is speed of distractor constant (0/1)") - parser.add_argument("-dd", "--distractor_movement_dimensions", type=int, help="in how many directions can the distractor move (1/2/3)") - parser.add_argument("-de", "--distractor_movement_endpoints", nargs="*", type=float, help="2 coordinates (starting point and ending point)") - parser.add_argument("-no", "--observed_links_num", type=int, help="number of robot links in observation space") - #Reward - parser.add_argument("-re", "--reward", type=str, help="Defines how to compute the reward") - parser.add_argument("-dt", "--distance_type", type=str, help="Type of distance metrics: euclidean, manhattan") - #Train - parser.add_argument("-w", "--train_framework", type=str, help="Name of the training framework you want to use: {tensorflow, pytorch}") - parser.add_argument("-a", "--algo", type=str, help="The learning algorithm to be used (ppo2 or her)") - parser.add_argument("-s", "--steps", type=int, help="The number of steps to train") - parser.add_argument("-ms", "--max_episode_steps", type=int, help="The maximum number of steps per episode") - parser.add_argument("-ma", "--algo_steps", type=int, help="The number of steps per for algo training (PPO2,A2C)") - #Evaluation - parser.add_argument("-ef", "--eval_freq", type=int, help="Evaluate the agent every eval_freq steps") - parser.add_argument("-e", "--eval_episodes", type=int, help="Number of episodes to evaluate performance of the robot") - #Saving and Logging - parser.add_argument("-l", "--logdir", type=str, help="Where to save results of training and trained models") - parser.add_argument("-r", "--record", type=int, help="1: make a gif of model perfomance, 2: make a video of model performance, 0: don't record") - #Mujoco - parser.add_argument("-i", "--multiprocessing", type=int, help="True: multiprocessing on (specify also the number of vectorized environemnts), False: multiprocessing off") - parser.add_argument("-v", "--vectorized_envs", type=int, help="The number of vectorized environments to run at once (mujoco multiprocessing only)") - #Paths - parser.add_argument("-m", "--model_path", type=str, help="Path to the the trained model to test") - parser.add_argument("-vp", "--vae_path", type=str, help="Path to a trained VAE in 2dvu reward type") - parser.add_argument("-yp", "--yolact_path", type=str, help="Path to a trained Yolact in 3dvu reward type") - parser.add_argument("-yc", "--yolact_config", type=str, help="Path to saved config obj or name of an existing one in the data/Config script (e.g. 'yolact_base_config') or None for autodetection") - parser.add_argument('-ptm', "--pretrained_model", type=str, help="Path to a model that you want to continue training") - return parser - -def get_arguments(parser): - args = parser.parse_args() - with open(args.config, "r") as f: - arg_dict = commentjson.load(f) - for key, value in vars(args).items(): - if value is not None and key != "config": - if key in ["robot_init"]: - arg_dict[key] = [float(arg_dict[key][i]) for i in range(len(arg_dict[key]))] - else: - arg_dict[key] = value - return arg_dict, args - -def train(args, arg_dict, algorithm, num_steps, algo_steps): - if not args.ray_tune: - # manual training with train loop using PPO and fixed learning rate - print("Running manual train loop without Ray Tune.") - # use fixed learning rate instead of grid search (needs tune) - algo = ( - algorithm() - .rollouts(num_rollout_workers=10, batch_mode="complete_episodes") # You can try to increase or decrease based on your systems specs - .resources(num_gpus=1, num_gpus_per_worker=0.1) # You can try to increase or decrease based on your systems specs - .environment(env='GymEnv-v0', env_config=arg_dict) - .training(train_batch_size=256) - .build() - ) - # run manual training loop and print results after each iteration - for _ in range(int(num_steps/algo_steps)): - result = algo.train() # Runs one logical iteration of training. - print(pretty_print(result)) - # stop training of the target train steps or reward are reached - if result["timesteps_total"] >= num_steps: - break - algo.stop() - else: - print("Not Implemented, TODO") - # automated run with Tune and grid search and TensorBoard - # tuner = tune.Tuner( - # args.run, - # param_space=config.to_dict(), - # run_config=air.RunConfig(stop=stop), - # ) - # results = tuner.fit() - - # if args.as_test: - # print("Checking if learning goals were achieved") - # check_learning_achieved(results, args.stop_reward) - - -def main(): - parser = get_parser() - arg_dict, args = get_arguments(parser) - ray.init(num_gpus=1, num_cpus=10) - - # Check if we chose one of the existing engines - if arg_dict["engine"] not in AVAILABLE_SIMULATION_ENGINES: - print(f"Invalid simulation engine. Valid arguments: --engine {AVAILABLE_SIMULATION_ENGINES}.") - return - if not os.path.isabs(arg_dict["logdir"]): - arg_dict["logdir"] = os.path.join("./", arg_dict["logdir"]) - os.makedirs(arg_dict["logdir"], exist_ok=True) - model_logdir_ori = os.path.join(arg_dict["logdir"], "_".join((arg_dict["task_type"],arg_dict["workspace"],arg_dict["robot"],arg_dict["robot_action"],arg_dict["algo"]))) - model_logdir = model_logdir_ori - add = 2 - while True: - try: - os.makedirs(model_logdir, exist_ok=False) - break - except: - model_logdir = "_".join((model_logdir_ori, str(add))) - add += 1 - - num_steps = arg_dict["steps"] - algo_steps = arg_dict["algo_steps"] - arg_dict = configure_env(arg_dict, for_train=1) - register_env('GymEnv-v0', env_creator) - if not args.ray_tune: - assert arg_dict["algo"] == "ppo", "Training without ray tune only works with PPO (rllib limitation)" - algorithm = configure_implemented_combos(arg_dict) - arg_dict.pop("algo") - train(args, arg_dict, algorithm, num_steps, algo_steps) - - -if __name__ == "__main__": - main() diff --git a/myGym/train_parallel.py b/myGym/train_parallel.py deleted file mode 100644 index fcb05aca..00000000 --- a/myGym/train_parallel.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -import threading - -algos = [' ppo ',' ppo2 ','sac','trpo'] -api = '~/myGym/myGym' -configfile = 'configs/train.json' -script_path = api + '/train.py' - -def train(algo): - os.system('cd {api};python {script_path} --config {configfile} --algo {algos}'.format(script_path=script_path, api=api, configfile=configfile, algos=algo)) - - - -if __name__ == '__main__': - threads = [] - for i, algo in enumerate(algos): - thread = threading.Thread(target=train, args=(algo,)) - thread.start() - threads.append(thread) - - for thread in threads: - thread.join() - - - - - -# os.system("./train.py --task_type reach --algo ppo2 --reward_type gt --robot kuka -ba step --env_name CrowWorkspaceEnv-v0 --steps 300000 -to hammer -w tensorflow -g 0 -p pybullet -r 0 -c 6 -dt euclidian -i 0 -t 1 -f 0 -d opengl") \ No newline at end of file diff --git a/myGym/traintf2.py b/myGym/trainprag.py similarity index 52% rename from myGym/traintf2.py rename to myGym/trainprag.py index 5a64ba92..2aaf89b5 100644 --- a/myGym/traintf2.py +++ b/myGym/trainprag.py @@ -1,42 +1,25 @@ -import pkg_resources +import importlib.resources as resources import os, sys, time, yaml import argparse import numpy as np import matplotlib.pyplot as plt import json, commentjson -import gym +import gymnasium as gym from myGym import envs import myGym.utils.cfg_comparator as cfg -from stable_baselines.common.policies import MlpPolicy -from stable_baselines.common import make_vec_env -from stable_baselines.common.vec_env import DummyVecEnv -from stable_baselines.bench import Monitor -from stable_baselines import results_plotter -from stable_baselines.her import GoalSelectionStrategy, HERGoalEnvWrapper -# For now I am importing both with slightly modified names P-PyTorch T-TensorFlow -from stable_baselines import PPO1 as PPO1_T, PPO2 as PPO2_T, HER as HER_T, SAC as SAC_T -from stable_baselines import TD3 as TD3_T, A2C as A2C_T, ACKTR as ACKTR_T, TRPO as TRPO_T +from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3.her import GoalSelectionStrategy #, HERGoalEnvWrapper try: from stable_baselines3 import PPO as PPO_P, A2C as A2C_P, SAC as SAC_P, TD3 as TD3_P except: print("Torch isn't probably installed correctly") -from myGym.stable_baselines_mygym.algo import MyAlgo -from myGym.stable_baselines_mygym.policies import MyMlpPolicy -from myGym.stable_baselines_mygym.TorchPPO import TorchPPO -from myGym.stable_baselines_mygym.TorchPPOpolicies import TorchMlpPolicy - - -#from stable_baselines.gail import ExpertDataset, generate_expert_traj -from stable_baselines.sac.policies import MlpPolicy as MlpPolicySAC -#from stable_baselines.ddpg.policies import MlpPolicy as MlpPolicyDDPG -from stable_baselines.td3.policies import MlpPolicy as MlpPolicyTD3 - # Import helper classes and functions for monitoring -from myGym.utils.callbackstf2 import ProgressBarManager, SaveOnBestTrainingRewardCallback, PlottingCallback, CustomEvalCallback +from myGym.utils.callbackstf2 import ProgressBarManager, SaveOnBestTrainingRewardCallback, CustomEvalCallback +import myGym + # This is global variable for the type of engine we are working with -AVAILABLE_SIMULATION_ENGINES = ["mujoco", "pybullet"] AVAILABLE_TRAINING_FRAMEWORKS = ["tensorflow", "pytorch"] @@ -45,8 +28,8 @@ def save_results(arg_dict, model_name, env, model_logdir=None, show=False): model_logdir = arg_dict["logdir"] print(f"model_logdir: {model_logdir}") - results_plotter.EPISODES_WINDOW = 100 - results_plotter.plot_results([model_logdir], arg_dict["steps"], results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " reward") + #results_plotter.EPISODES_WINDOW = 100 + #results_plotter.plot_results([model_logdir], arg_dict["steps"], results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " reward") plt.gcf().set_size_inches(8, 6) plt.savefig(os.path.join(model_logdir, model_name) + '_reward_results.png') #plot_extended_results(model_logdir, 'd', results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " distance", "Episode Distances") @@ -54,10 +37,7 @@ def save_results(arg_dict, model_name, env, model_logdir=None, show=False): plt.savefig(os.path.join(model_logdir, model_name) + '_distance_results.png') plt.close() plt.close() - if isinstance(env, HERGoalEnvWrapper): - results_plotter.plot_curves([(np.arange(len(env.env.episode_final_distance)),np.asarray(env.env.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance') - else: - results_plotter.plot_curves([(np.arange(len(env.unwrapped.episode_final_distance)),np.asarray(env.unwrapped.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance') + #results_plotter.plot_curves([(np.arange(len(env.unwrapped.episode_final_distance)),np.asarray(env.unwrapped.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance') plt.gcf().set_size_inches(8, 6) plt.ylabel("Step Distances") plt.savefig(os.path.join(model_logdir, model_name) + "_final_distance_results.png") @@ -67,45 +47,31 @@ def save_results(arg_dict, model_name, env, model_logdir=None, show=False): plt.show() def configure_env(arg_dict, model_logdir=None, for_train=True): - if arg_dict["engine"] == "pybullet": - env_arguments = {"render_on": True, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"], - "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"], - "robot_action": arg_dict["robot_action"], "task_type": arg_dict["task_type"], "num_subgoals": arg_dict["num_subgoals"], - "task_objects":arg_dict["task_objects"], "distractors":arg_dict["distractors"], - "distractor_moveable":arg_dict["distractor_moveable"], - "distractor_constant_speed":arg_dict["distractor_constant_speed"], - "distractor_movement_dimensions":arg_dict["distractor_movement_dimensions"], - "distractor_movement_endpoints":arg_dict["distractor_movement_endpoints"], - "coefficient_kd":arg_dict["coefficient_kd"], - "coefficient_kw":arg_dict["coefficient_kw"], - "coefficient_ka":arg_dict["coefficient_ka"], - "observed_links_num":arg_dict["observed_links_num"], - "reward_type": arg_dict["reward_type"], - "distance_type": arg_dict["distance_type"], "used_objects": arg_dict["used_objects"], - "object_sampling_area": arg_dict["object_sampling_area"], "active_cameras": arg_dict["camera"], - "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"], - "reward": arg_dict["reward"], "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"], - "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"]} - if for_train: - env_arguments["gui_on"] = False - else: - env_arguments["gui_on"] = arg_dict["gui"] + env_arguments = {"render_on": True, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"], "framework":"stable_baselines", + "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"], + "robot_action": arg_dict["robot_action"],"max_velocity": arg_dict["max_velocity"], + "max_force": arg_dict["max_force"], + "action_repeat": arg_dict["action_repeat"], "rddl": arg_dict["rddl"], + "observation":arg_dict["observation"], "distractors":arg_dict["distractors"], + "num_networks":arg_dict.get("num_networks", 1), "network_switcher":arg_dict.get("network_switcher", "gt"), + "active_cameras": arg_dict["camera"], "color_dict":arg_dict.get("color_dict", {}), + "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"], + "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"], + "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"], + "natural_language": bool(arg_dict["natural_language"]), + "training": bool(for_train) + } + env_arguments["gui_on"] = arg_dict["gui"] - if arg_dict["algo"] == "her": - env = gym.make(arg_dict["env_name"], **env_arguments, obs_space="dict") # her needs obs as a dict - else: - env = gym.make(arg_dict["env_name"], **env_arguments) - elif arg_dict["engine"] == "mujoco": - if arg_dict["multiprocessing"]: - # ACKTR, PPO2, A2C, DDPG can use vectorized environments, but the only way to display the results (for me) is using CV2 imshow. -(TensorFlow comment) - env = make_vec_env(arg_dict["env_name"], n_envs=arg_dict["vectorized_envs"]) - else: - env = gym.make(arg_dict["env_name"]) + if arg_dict["algo"] == "her": + env = gym.make(arg_dict["env_name"], **env_arguments, obs_space="dict") # her needs obs as a dict + else: + env = gym.make(arg_dict["env_name"], **env_arguments) if for_train: - if arg_dict["engine"] == "mujoco": - env = VecMonitor(env, model_logdir) if arg_dict["multiprocessing"] else Monitor(env, model_logdir) - elif arg_dict["engine"] == "pybullet": + try: env = Monitor(env, model_logdir, info_keywords=tuple('d')) + except: + pass if arg_dict["algo"] == "her": env = HERGoalEnvWrapper(env) @@ -113,22 +79,11 @@ def configure_env(arg_dict, model_logdir=None, for_train=True): def configure_implemented_combos(env, model_logdir, arg_dict): - implemented_combos = {"ppo2": {"tensorflow": [PPO2_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}]}, - "ppo": {"tensorflow": [PPO1_T, (MlpPolicy, env), {"verbose": 1}],}, - "sac": {"tensorflow": [SAC_T, (MlpPolicySAC, env), {"verbose": 1}],}, - "td3": {"tensorflow": [TD3_T, (MlpPolicyTD3, env), {"verbose": 1}],}, - "acktr": {"tensorflow": [ACKTR_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}]}, - "trpo": {"tensorflow": [TRPO_T, (MlpPolicy, env), {"verbose": 1}]}, - "a2c": {"tensorflow": [A2C_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}],}, - "torchppo": {"tensorflow": [TorchPPO, (TorchMlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}]}, - "myalgo": {"tensorflow": [MyAlgo, (MyMlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}]}, - "dual": {"tensorflow": [PPO2_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}]}} - - if "PPO_P" in sys.modules: - implemented_combos["ppo"]["pytorch"] = [PPO_P, ('MlpPolicy', env), {"n_steps": 1024, "verbose": 1, "tensorboard_log": model_logdir}] - implemented_combos["sac"]["pytorch"] = [SAC_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}] - implemented_combos["td3"]["pytorch"] = [TD3_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}] - implemented_combos["a2c"]["pytorch"] = [A2C_P, ('MlpPolicy', env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}] + implemented_combos = {"ppo":{}, "sac":{}, "td3":{}, "a2c":{}} + implemented_combos["ppo"]["pytorch"] = [PPO_P, ('MlpPolicy', env), {"n_steps": 1024, "verbose": 1, "tensorboard_log": model_logdir}] + implemented_combos["sac"]["pytorch"] = [SAC_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}] + implemented_combos["td3"]["pytorch"] = [TD3_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}] + implemented_combos["a2c"]["pytorch"] = [A2C_P, ('MlpPolicy', env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}] return implemented_combos @@ -141,23 +96,16 @@ def train(env, implemented_combos, model_logdir, arg_dict, pretrained_model=None with open(conf_pth, "w") as f: json.dump(arg_dict, f, indent=4) - try: - model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1] - model_kwargs = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][2] - if pretrained_model: - if not os.path.isabs(pretrained_model): - pretrained_model = pkg_resources.resource_filename("myGym", pretrained_model) - env = model_args[1] - vec_env = DummyVecEnv([lambda: env]) - model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(pretrained_model, vec_env) - else: - model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0](*model_args, **model_kwargs) - except: - if arg_dict["algo"] in implemented_combos.keys(): - err = "{} is only implemented with {}".format(arg_dict["algo"], list(implemented_combos[arg_dict["algo"]].keys())[0]) - else: - err = "{} algorithm is not implemented.".format(arg_dict["algo"]) - raise Exception(err) + model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1] + model_kwargs = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][2] + if pretrained_model: + if not os.path.isabs(pretrained_model): + pretrained_model = resources.files("myGym").joinpath(pretrained_model) + env = model_args[1] + vec_env = DummyVecEnv([lambda: env]) + model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(pretrained_model, vec_env) + else: + model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0](*model_args, **model_kwargs) if arg_dict["algo"] == "gail": # Multi processing: (using MPI) @@ -171,8 +119,7 @@ def train(env, implemented_combos, model_logdir, arg_dict, pretrained_model=None start_time = time.time() callbacks_list = [] - auto_save_callback = SaveOnBestTrainingRewardCallback(check_freq=1024, logdir=model_logdir, env=env, engine=arg_dict["engine"], multiprocessing=arg_dict["multiprocessing"]) - callbacks_list.append(auto_save_callback) + if arg_dict["eval_freq"]: eval_env = configure_env(arg_dict, model_logdir, for_train=False) eval_callback = CustomEvalCallback(eval_env, log_path=model_logdir, @@ -181,23 +128,17 @@ def train(env, implemented_combos, model_logdir, arg_dict, pretrained_model=None record=arg_dict["record"], camera_id=arg_dict["camera"]) callbacks_list.append(eval_callback) - # plotting_callback = PlottingCallback(model_logdir) - with ProgressBarManager(total_timesteps=arg_dict["steps"]) as progress_callback: - callbacks_list.append(progress_callback) - model.learn(total_timesteps=arg_dict["steps"], callback=callbacks_list) + + model.learn(total_timesteps=arg_dict["steps"], callback=callbacks_list) model.save(os.path.join(model_logdir, model_name)) print("Training time: {:.2f} s".format(time.time() - start_time)) - # info_keywords in monitor class above is neccessary for pybullet to save_results - # when using the info_keywords for mujoco we get an error - if arg_dict["engine"] == "pybullet": - save_results(arg_dict, model_name, env, model_logdir) return model def get_parser(): parser = argparse.ArgumentParser() #Envinronment - parser.add_argument("-cfg", "--config", default="configs/train.json", help="Can be passed instead of all arguments") + parser.add_argument("-cfg", "--config", default="configs/train_rddl.json", help="Can be passed instead of all arguments") parser.add_argument("-n", "--env_name", type=str, help="The name of environment") parser.add_argument("-ws", "--workspace", type=str, help="The name of workspace") parser.add_argument("-p", "--engine", type=str, help="Name of the simulation engine you want to use") @@ -241,7 +182,6 @@ def get_parser(): parser.add_argument("-r", "--record", type=int, help="1: make a gif of model perfomance, 2: make a video of model performance, 0: don't record") #Mujoco parser.add_argument("-i", "--multiprocessing", type=int, help="True: multiprocessing on (specify also the number of vectorized environemnts), False: multiprocessing off") - parser.add_argument("-v", "--vectorized_envs", type=int, help="The number of vectorized environments to run at once (mujoco multiprocessing only)") #Paths parser.add_argument("-m", "--model_path", type=str, help="Path to the the trained model to test") parser.add_argument("-vp", "--vae_path", type=str, help="Path to a trained VAE in 2dvu reward type") @@ -252,10 +192,15 @@ def get_parser(): def get_arguments(parser): args = parser.parse_args() + if not os.path.exists(args.config): + # try looking in the package main dir + args.config = os.path.join(myGym.__path__[0], args.config) + if not os.path.exists(args.config): + raise ValueError("Could not find config file: {}".format(args.config)) with open(args.config, "r") as f: arg_dict = commentjson.load(f) for key, value in vars(args).items(): - if value is not None and key is not "config": + if value != None and key != "config": if key in ["robot_init", "object_sampling_area"]: arg_dict[key] = [float(arg_dict[key][i]) for i in range(len(arg_dict[key]))] else: @@ -268,13 +213,10 @@ def main(): arg_dict = get_arguments(parser) # Check if we chose one of the existing engines - if arg_dict["engine"] not in AVAILABLE_SIMULATION_ENGINES: - print(f"Invalid simulation engine. Valid arguments: --engine {AVAILABLE_SIMULATION_ENGINES}.") - return if not os.path.isabs(arg_dict["logdir"]): - arg_dict["logdir"] = pkg_resources.resource_filename("myGym", arg_dict["logdir"]) + arg_dict["logdir"] = os.path.join("./", arg_dict["logdir"]) os.makedirs(arg_dict["logdir"], exist_ok=True) - model_logdir_ori = os.path.join(arg_dict["logdir"], "_".join((arg_dict["task_type"],arg_dict["workspace"],arg_dict["robot"],arg_dict["robot_action"],arg_dict["reward_type"],arg_dict["algo"]))) + model_logdir_ori = os.path.join(arg_dict["logdir"], "_".join((arg_dict["workspace"],arg_dict["robot"],arg_dict["robot_action"],arg_dict["algo"]))) model_logdir = model_logdir_ori add = 2 while True: @@ -290,5 +232,6 @@ def main(): train(env, implemented_combos, model_logdir, arg_dict, arg_dict["pretrained_model"]) print(model_logdir) + if __name__ == "__main__": main() diff --git a/myGym/trainray.py b/myGym/trainray.py deleted file mode 100644 index 7b0b57c4..00000000 --- a/myGym/trainray.py +++ /dev/null @@ -1,9 +0,0 @@ -from ray.rllib.algorithms.ppo import PPOConfig -config = PPOConfig() -config = config.training(gamma=0.9, lr=0.01, kl_coeff=0.3) -config = config.resources(num_gpus=1) -config = config.rollouts(num_rollout_workers=4) -print(config.to_dict()) -# Build a Algorithm object from the config and run 1 training iteration. -algo = config.build(env="CartPole-v1") -algo.train() \ No newline at end of file diff --git a/myGym/trainray_ppo_reach.py b/myGym/trainray_ppo_reach.py deleted file mode 100644 index 648776f4..00000000 --- a/myGym/trainray_ppo_reach.py +++ /dev/null @@ -1,208 +0,0 @@ -from ray.rllib.algorithms.ppo import PPOConfig -from ray.rllib.algorithms.callbacks import DefaultCallbacks -from ray.rllib.evaluation import Episode, RolloutWorker -from ray.rllib.policy import Policy -from ray.rllib.policy.sample_batch import SampleBatch -from ray.rllib.env.base_env import BaseEnv -from ray.tune.logger import pretty_print -from ray import tune -from train import get_parser, get_arguments, AVAILABLE_SIMULATION_ENGINES -import os -import gymnasium as gym -from gymnasium.wrappers import EnvCompatibility -from ray.tune.registry import register_env -from myGym.envs.gym_env import GymEnv -import numpy as np -import time - -from typing import Dict, Tuple -import argparse -import gymnasium as gym - - -class MyCallbacks(DefaultCallbacks): - def on_episode_start( - self, - *, - worker: RolloutWorker, - base_env: BaseEnv, - policies: Dict[str, Policy], - episode: Episode, - env_index: int, - **kwargs, - ): - # Make sure this episode has just been started (only initial obs - # logged so far). - assert episode.length == 0, ( - "ERROR: `on_episode_start()` callback should be called right " - "after env reset!" - ) - # Create lists to store angles in - episode.user_data["pole_angles"] = [] - episode.hist_data["pole_angles"] = [] - - def on_episode_step( - self, - *, - worker: RolloutWorker, - base_env: BaseEnv, - policies: Dict[str, Policy], - episode: Episode, - env_index: int, - **kwargs, - ): - # Make sure this episode is ongoing. - assert episode.length > 0, ( - "ERROR: `on_episode_step()` callback should not be called right " - "after env reset!" - ) - pole_angle = abs(episode.last_observation_for()[2]) - raw_angle = abs(episode.last_raw_obs_for()[2]) - assert pole_angle == raw_angle - episode.user_data["pole_angles"].append(pole_angle) - - # Sometimes our pole is moving fast. We can look at the latest velocity - # estimate from our environment and log high velocities. - if np.abs(episode.last_info_for()["pole_angle_vel"]) > 0.25: - print("This is a fast pole!") - - def on_episode_end( - self, - *, - worker: RolloutWorker, - base_env: BaseEnv, - policies: Dict[str, Policy], - episode: Episode, - env_index: int, - **kwargs, - ): - # Check if there are multiple episodes in a batch, i.e. - # "batch_mode": "truncate_episodes". - if worker.config.batch_mode == "truncate_episodes": - # Make sure this episode is really done. - assert episode.batch_builder.policy_collectors["default_policy"].batches[ - -1 - ]["dones"][-1], ( - "ERROR: `on_episode_end()` should only be called " - "after episode is done!" - ) - pole_angle = np.mean(episode.user_data["pole_angles"]) - episode.custom_metrics["pole_angle"] = pole_angle - episode.hist_data["pole_angles"] = episode.user_data["pole_angles"] - - def on_sample_end(self, *, worker: RolloutWorker, samples: SampleBatch, **kwargs): - # We can also do our own sanity checks here. - assert ( - samples.count == 2000 - ), f"I was expecting 2000 here, but got {samples.count}!" - - def on_train_result(self, *, algorithm, result: dict, **kwargs): - # you can mutate the result dict to add new fields to return - result["callback_ok"] = True - - # Normally, RLlib would aggregate any custom metric into a mean, max and min - # of the given metric. - # For the sake of this example, we will instead compute the variance and mean - # of the pole angle over the evaluation episodes. - pole_angle = result["custom_metrics"]["pole_angle"] - var = np.var(pole_angle) - mean = np.mean(pole_angle) - result["custom_metrics"]["pole_angle_var"] = var - result["custom_metrics"]["pole_angle_mean"] = mean - # We are not interested in these original values - del result["custom_metrics"]["pole_angle"] - del result["custom_metrics"]["num_batches"] - - def on_learn_on_batch( - self, *, policy: Policy, train_batch: SampleBatch, result: dict, **kwargs - ) -> None: - result["sum_actions_in_train_batch"] = train_batch["actions"].sum() - # Log the sum of actions in the train batch. - print( - "policy.learn_on_batch() result: {} -> sum actions: {}".format( - policy, result["sum_actions_in_train_batch"] - ) - ) - - def on_postprocess_trajectory( - self, - *, - worker: RolloutWorker, - episode: Episode, - agent_id: str, - policy_id: str, - policies: Dict[str, Policy], - postprocessed_batch: SampleBatch, - original_batches: Dict[str, Tuple[Policy, SampleBatch]], - **kwargs, - ): - if "num_batches" not in episode.custom_metrics: - episode.custom_metrics["num_batches"] = 0 - episode.custom_metrics["num_batches"] += 1 - - -def main(): - #Start time counter and print it - start_time = time.time() - print(start_time) - parser = get_parser() - arg_dict = get_arguments(parser) - - if not os.path.isabs(arg_dict["logdir"]): - arg_dict["logdir"] = os.path.join("./", arg_dict["logdir"]) - os.makedirs(arg_dict["logdir"], exist_ok=True) - model_logdir_ori = os.path.join(arg_dict["logdir"], "_".join((arg_dict["task_type"],arg_dict["workspace"],arg_dict["robot"],arg_dict["robot_action"],arg_dict["algo"]))) - model_logdir = model_logdir_ori - add = 2 - while True: - try: - os.makedirs(model_logdir, exist_ok=False) - break - except: - model_logdir = "_".join((model_logdir_ori, str(add))) - add += 1 - - # Set env arguments - env_arguments = {"render_on": False, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"], - "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"], - "robot_action": arg_dict["robot_action"],"max_velocity": arg_dict["max_velocity"], - "max_force": arg_dict["max_force"],"task_type": arg_dict["task_type"], - "action_repeat": arg_dict["action_repeat"], - "task_objects":arg_dict["task_objects"], "observation":arg_dict["observation"], "distractors":arg_dict["distractors"], - "num_networks":arg_dict.get("num_networks", 1), "network_switcher":arg_dict.get("network_switcher", "gt"), - "distance_type": arg_dict["distance_type"], "used_objects": arg_dict["used_objects"], - "active_cameras": arg_dict["camera"], "color_dict":arg_dict.get("color_dict", {}), - "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"], - "reward": arg_dict["reward"], "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"], - "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"]} - - env_arguments["gui_on"] = False - - # Register OpenAI gym env in gymnasium - def env_creator(env_config): - return EnvCompatibility(GymEnv(**env_config)) - register_env('GymEnv-v0', env_creator) - - # Set up algo - algo = ( - PPOConfig() - .rollouts(num_rollout_workers=12) # You can try to increase or decrease based on your systems specs - .resources(num_gpus=1) # You can try to increase or decrease based on your systems specs - .environment(env='GymEnv-v0', env_config=env_arguments) - .build() - ) - - #Train - for i in range(200): - result = algo.train() - print(pretty_print(result)) - - if i % 100 == 0: - #action = algo.compute_single_action(obs) - #obs, reward, terminated, truncated, info = env.step(action) - checkpoint_dir = algo.save() - print(f"Checkpoint saved in directory {checkpoint_dir}") - #Print start time minus end time - print("--- %s seconds ---" % (time.time() - start_time)) -if __name__ == "__main__": - main() diff --git a/myGym/trainrayppo.py b/myGym/trainrayppo.py deleted file mode 100644 index 47b79f55..00000000 --- a/myGym/trainrayppo.py +++ /dev/null @@ -1,19 +0,0 @@ -from ray.rllib.algorithms.ppo import PPOConfig -from ray.tune.logger import pretty_print - - -algo = ( - PPOConfig() - .rollouts(num_rollout_workers=10) - .resources(num_gpus=1) - .environment(env="CartPole-v1") - .build() -) - -for i in range(10): - result = algo.train() - print(pretty_print(result)) - - if i % 5 == 0: - checkpoint_dir = algo.save() - print(f"Checkpoint saved in directory {checkpoint_dir}") \ No newline at end of file diff --git a/myGym/trajectory.txt b/myGym/trajectory.txt deleted file mode 100644 index c186c647..00000000 --- a/myGym/trajectory.txt +++ /dev/null @@ -1,340 +0,0 @@ -[ 13.63947071 46.49404337 34.98897878 154.40660495 -8.43316019 - 8.75505647 0. ] -[ 23.03022382 52.9324022 10.26970663 112.64983594 3.3500255 - -4.11878627 -3.19748864] -[ 23.83120954 53.17292871 12.69943217 112.71191671 6.4846686 - -5.70568116 -5.00478868] -[ 24.72088475 53.40316953 15.04529638 112.70807287 9.36316623 - -7.28295541 -6.02350917] -[ 25.76633313 53.51639673 17.27593977 112.61602464 11.85630538 - -9.09920692 -4.77974021] -[ 26.882498 53.58348956 19.50573275 112.48776724 14.225859 - -10.96198333 -2.96720977] -[ 27.99026135 53.85817431 21.69865119 112.5866467 16.63596035 - -13.00688196 -1.20832591] -[ 29.09910952 54.34645002 23.82666771 112.91138525 19.08409007 - -14.89854166 -0.21009449] -[ 30.28514499 54.77586352 25.93350029 113.20282977 21.43471072 - -16.22740888 0. ] -[ 31.52547203 55.16245197 28.02814389 113.4717309 23.72266285 - -17.31672281 0. ] - -[ 35.17233607 56.882108 50.03606026 171.40176078 -8.35708235 - 6.60459623 0. ] -[ 23.03022382 52.9324022 10.26970663 112.64983594 3.3500255 - -4.11878627 -3.19748864] -[ 23.83120954 53.17292871 12.69943217 112.71191671 6.4846686 - -5.70568116 -5.00478868] -[ 24.72088475 53.40316953 15.04529638 112.70807287 9.36316623 - -7.28295541 -6.02350917] -[ 25.76633313 53.51639673 17.27593977 112.61602464 11.85630538 - -9.09920692 -4.77974021] -[ 26.882498 53.58348956 19.50573275 112.48776724 14.225859 - -10.96198333 -2.96720977] -[ 27.99026135 53.85817431 21.69865119 112.5866467 16.63596035 - -13.00688196 -1.20832591] -[ 29.09910952 54.34645002 23.82666771 112.91138525 19.08409007 - -14.89854166 -0.21009449] -[ 30.28514499 54.77586352 25.93350029 113.20282977 21.43471072 - -16.22740888 0. ] -[ 31.52547203 55.16245197 28.02814389 113.4717309 23.72266285 - -17.31672281 0. ] - -[ 55.06680045 36.23442066 12.52413422 130.23721056 17.25970869 - -14.96050798 -6.3933651 ] -[ 21.93234962 53.64374994 11.19624587 112.73470817 10.28428424 - -8.53039365 -5.74159576] -[ 22.8296819 53.95476189 13.45793531 112.672769 13.11268535 - -9.99127093 -6.95387006] -[ 23.8857069 54.12360863 15.59245058 112.53540778 15.52109033 - -11.72898384 -5.69393502] -[ 25.01696652 54.23397831 17.7206604 112.36615181 17.79311531 - -13.54786236 -3.70743207] -[ 26.19981059 54.31517212 19.88144033 112.17725194 20.01566549 - -15.37259418 -1.59154829] -[ 27.3862054 54.69648575 21.96902841 112.35516157 22.29075241 - -17.04744247 -0.17509974] -[ 28.5977246 55.15229457 24.03942953 112.61249171 24.57905903 - -18.28756357 0. ] -[ 29.86297885 55.56051318 26.09872822 112.84994598 26.8058054 - -19.29243524 0. ] -[ 31.16612578 55.94029701 28.15384476 113.07425872 28.9984629 - -20.27102218 0. ] - -[ 30.80651705 34.81330784 15.15328846 131.81201321 9.9057741 - -6.91257465 -3.66080344] -[ 21.93234962 53.64374994 11.19624587 112.73470817 10.28428424 - -8.53039365 -5.74159576] -[ 22.8296819 53.95476189 13.45793531 112.672769 13.11268535 - -9.99127093 -6.95387006] -[ 23.8857069 54.12360863 15.59245058 112.53540778 15.52109033 - -11.72898384 -5.69393502] -[ 25.01696652 54.23397831 17.7206604 112.36615181 17.79311531 - -13.54786236 -3.70743207] -[ 26.19981059 54.31517212 19.88144033 112.17725194 20.01566549 - -15.37259418 -1.59154829] -[ 27.3862054 54.69648575 21.96902841 112.35516157 22.29075241 - -17.04744247 -0.17509974] -[ 28.5977246 55.15229457 24.03942953 112.61249171 24.57905903 - -18.28756357 0. ] -[ 29.86297885 55.56051318 26.09872822 112.84994598 26.8058054 - -19.29243524 0. ] -[ 31.16612578 55.94029701 28.15384476 113.07425872 28.9984629 - -20.27102218 0. ] - -[ 43.66754056 59.38762511 54.53285684 175.22040931 -7.67350031 - 5.74375543 0. ] -[ 23.03022382 52.9324022 10.26970663 112.64983594 3.3500255 - -4.11878627 -3.19748864] -[ 23.83120954 53.17292871 12.69943217 112.71191671 6.4846686 - -5.70568116 -5.00478868] -[ 24.72088475 53.40316953 15.04529638 112.70807287 9.36316623 - -7.28295541 -6.02350917] -[ 25.76633313 53.51639673 17.27593977 112.61602464 11.85630538 - -9.09920692 -4.77974021] -[ 26.882498 53.58348956 19.50573275 112.48776724 14.225859 - -10.96198333 -2.96720977] -[ 27.99026135 53.85817431 21.69865119 112.5866467 16.63596035 - -13.00688196 -1.20832591] -[ 29.09910952 54.34645002 23.82666771 112.91138525 19.08409007 - -14.89854166 -0.21009449] -[ 30.28514499 54.77586352 25.93350029 113.20282977 21.43471072 - -16.22740888 0. ] -[ 31.52547203 55.16245197 28.02814389 113.4717309 23.72266285 - -17.31672281 0. ] - -[ 60.48958311 61.91817165 55.04372464 176.90363438 -1.15329069 - 4.20244993 0. ] -[ 22.85398006 53.01226526 10.36746474 112.62187142 5.61425724 - -4.58182503 -3.392914 ] -[ 23.66714208 53.26776935 12.77573327 112.64838505 8.71351727 - -6.13742572 -5.18691076] -[ 24.54708409 53.53480814 15.1271719 112.62440686 11.62396454 - -7.62146732 -6.68892836] -[ 25.59124677 53.67301693 17.35662769 112.51511504 14.12939298 - -9.3646357 -5.77452605] -[ 26.72118046 53.74445835 19.57226133 112.36516203 16.47474061 - -11.20056414 -3.96906869] -[ 27.83797492 54.05309244 21.74422092 112.47407417 18.873036 - -13.25286415 -2.22030575] -[ 28.93400846 54.59271365 23.88271345 112.78256969 21.36056905 - -15.41104757 -1.12555719] -[ 30.10504616 55.08884569 26.00144843 113.0618512 23.75349985 - -17.2898791 -0.27351956] -[ 31.35930642 55.49755398 28.0884849 113.31275202 26.02649678 - -18.59180028 0. ] - -[ 14.07201469 52.27759173 47.05772404 166.26703175 -12.33084162 - 9.25262087 0. ] -[ 23.03022382 52.9324022 10.26970663 112.64983594 3.3500255 - -4.11878627 -3.19748864] -[ 23.83120954 53.17292871 12.69943217 112.71191671 6.4846686 - -5.70568116 -5.00478868] -[ 24.72088475 53.40316953 15.04529638 112.70807287 9.36316623 - -7.28295541 -6.02350917] -[ 25.76633313 53.51639673 17.27593977 112.61602464 11.85630538 - -9.09920692 -4.77974021] -[ 26.882498 53.58348956 19.50573275 112.48776724 14.225859 - -10.96198333 -2.96720977] -[ 27.99026135 53.85817431 21.69865119 112.5866467 16.63596035 - -13.00688196 -1.20832591] -[ 29.09910952 54.34645002 23.82666771 112.91138525 19.08409007 - -14.89854166 -0.21009449] -[ 30.28514499 54.77586352 25.93350029 113.20282977 21.43471072 - -16.22740888 0. ] -[ 31.52547203 55.16245197 28.02814389 113.4717309 23.72266285 - -17.31672281 0. ] - -[ 64.95232478 58.50308888 40.17768839 167.40993306 7.65502604 - 1.54838064 0. ] -[ 22.63183159 53.12238553 10.48973951 112.56379054 8.53353439 - -5.1811641 -3.64157638] -[ 23.46252657 53.39529534 12.86966459 112.54610013 11.58556877 - -6.69601396 -5.41829635] -[ 24.35119568 53.68766488 15.20724839 112.48656569 14.48340533 - -8.11222831 -7.14249378] -[ 25.38191585 53.87535358 17.44835775 112.35967864 17.04049134 - -9.71973689 -6.95917921] -[ 26.52814057 53.95393275 19.64834253 112.18412466 19.36116594 - -11.51451033 -5.21415609] -[ 27.65680919 54.3068743 21.79438987 112.30746737 21.74630508 - -13.57475694 -3.50072405] -[ 28.77810364 54.85064663 23.91642857 112.58582176 24.20116093 - -15.71157142 -2.4098174 ] -[ 29.94843472 55.39490109 26.03750772 112.84075053 26.61535923 - -17.84231094 -1.44620457] -[ 31.18618173 55.89541257 28.1379666 113.07509103 28.9421511 - -19.79973245 -0.5402476 ] - -[ 80.00209693 64.75992481 57.8368658 179.32432717 6.36778728 - 2.82108446 0. ] -[ 22.63183159 53.12238553 10.48973951 112.56379054 8.53353439 - -5.1811641 -3.64157638] -[ 23.46252657 53.39529534 12.86966459 112.54610013 11.58556877 - -6.69601396 -5.41829635] -[ 24.35119568 53.68766488 15.20724839 112.48656569 14.48340533 - -8.11222831 -7.14249378] -[ 25.38191585 53.87535358 17.44835775 112.35967864 17.04049134 - -9.71973689 -6.95917921] -[ 26.52814057 53.95393275 19.64834253 112.18412466 19.36116594 - -11.51451033 -5.21415609] -[ 27.65680919 54.3068743 21.79438987 112.30746737 21.74630508 - -13.57475694 -3.50072405] -[ 28.77810364 54.85064663 23.91642857 112.58582176 24.20116093 - -15.71157142 -2.4098174 ] -[ 29.94843472 55.39490109 26.03750772 112.84075053 26.61535923 - -17.84231094 -1.44620457] -[ 31.18618173 55.89541257 28.1379666 113.07509103 28.9421511 - -19.79973245 -0.5402476 ] - -[ 51.33380635 55.43864475 37.69848379 163.83643658 4.72213294 - 2.51080027 0. ] -[ 22.63183159 53.12238553 10.48973951 112.56379054 8.53353439 - -5.1811641 -3.64157638] -[ 23.46252657 53.39529534 12.86966459 112.54610013 11.58556877 - -6.69601396 -5.41829635] -[ 24.35119568 53.68766488 15.20724839 112.48656569 14.48340533 - -8.11222831 -7.14249378] -[ 25.38191585 53.87535358 17.44835775 112.35967864 17.04049134 - -9.71973689 -6.95917921] -[ 26.52814057 53.95393275 19.64834253 112.18412466 19.36116594 - -11.51451033 -5.21415609] -[ 27.65680919 54.3068743 21.79438987 112.30746737 21.74630508 - -13.57475694 -3.50072405] -[ 28.77810364 54.85064663 23.91642857 112.58582176 24.20116093 - -15.71157142 -2.4098174 ] -[ 29.94843472 55.39490109 26.03750772 112.84075053 26.61535923 - -17.84231094 -1.44620457] -[ 31.18618173 55.89541257 28.1379666 113.07509103 28.9421511 - -19.79973245 -0.5402476 ] - -[ 57.65079851 54.15196691 32.6200129 159.77221184 9.17932293 - -0.55001958 0. ] -[ 22.39841407 53.27429778 10.71663228 112.61549421 9.08139949 - -6.66257719 -3.44821121] -[ 23.24156095 53.56892133 13.06642928 112.58436825 12.08446189 - -8.13865027 -5.20710911] -[ 24.15464957 53.86565248 15.36799544 112.50927443 14.90939737 - -9.55792832 -6.67340488] -[ 25.23086849 54.01743215 17.55823011 112.3623276 17.33952575 - -11.26218864 -5.66515416] -[ 26.39218651 54.09665937 19.74170338 112.18077151 19.6194065 - -13.07004462 -3.76024201] -[ 27.5255863 54.48898469 21.87644306 112.32918266 21.9983143 - -15.15606812 -2.06921091] -[ 28.65951447 55.04484052 23.99597791 112.60277837 24.43817877 - -17.29956498 -0.98911784] -[ 29.87685154 55.53240146 26.08738476 112.85024833 26.76139137 - -19.06121218 -0.16577415] -[ 31.16797522 55.935525 28.15108839 113.07672318 28.9791304 - -20.24601014 0. ] - -[ 64.83735606 54.03117125 30.52631813 157.96374997 13.79527764 - -3.84333133 0. ] -[ 22.06619114 53.51343571 11.04189533 112.68751643 9.89178841 - -8.81294143 -3.159954 ] -[ 22.92807565 53.83745407 13.34880052 112.63780549 12.82411109 - -10.23426617 -4.89374218] -[ 23.90610646 54.09730974 15.56914513 112.53093419 15.45929133 - -11.75652488 -5.29628663] -[ 25.02119179 54.22874096 17.7140026 112.36740542 17.77703676 - -13.52553488 -3.67463218] -[ 26.2013959 54.31318827 19.87726745 112.17920161 20.00661003 - -15.34228061 -1.61668244] -[ 27.38602446 54.69602957 21.9664359 112.35649251 22.28542078 - -17.03383551 -0.19028211] -[ 28.59708715 55.15246865 24.03715483 112.61395397 24.57458789 - -18.28397907 0. ] -[ 29.86222196 55.560885 26.09655007 112.85148606 26.80160371 - -19.28971326 0. ] -[ 31.16533887 55.94070893 28.15170271 113.07584573 28.99433663 - -20.26839318 0. ] - -[ 80.00209693 64.46981706 50.30532624 175.57235224 15.12032428 - 0.46626288 0. ] -[ 22.5213138 53.19267812 10.59691099 112.5883235 8.79144207 - -5.8793521 -3.55107159] -[ 23.35783276 53.47597464 12.9625778 112.56422659 11.82036775 - -7.37581582 -5.31933762] -[ 24.25518997 53.77417557 15.28593395 112.4983134 14.6914882 - -8.78475031 -6.98484637] -[ 25.30988742 53.94215396 17.5004734 112.36083648 17.18225459 - -10.44457384 -6.36019345] -[ 26.46350688 54.02065397 19.69233962 112.18226399 19.48290621 - -12.24640291 -4.53283607] -[ 27.59450998 54.39179703 21.83282899 112.31725941 21.86458759 - -14.31904987 -2.82586094] -[ 28.72168662 54.94135891 23.95371648 112.59337269 24.3124838 - -16.45888361 -1.7409444 ] -[ 29.898588 55.48996451 26.07293673 112.84626219 26.71828057 - -18.59018671 -0.77690756] -[ 3.11754031e+01 5.59187049e+01 2.81455713e+01 1.13075901e+02 - 2.89632891e+01 -2.01071007e+01 -9.78944316e-02] - -[ 33.12632999 59.70995423 58.0478631 177.02865875 -6.57008903 - 6.26912983 -0.54837016] -[ 22.19420359 52.77960534 7.96044938 112.57129678 0.52827677 - -2.46426973 -2.61610008] -[ 23.00523578 52.95701493 10.38752009 112.65369427 3.68299262 - -4.25322616 -3.61848687] -[ 23.87545568 53.14571633 12.73963736 112.67812295 6.62653568 - -5.97282868 -4.19010357] -[ 24.80522397 53.3312602 15.04772622 112.6534682 9.40147029 - -7.65061118 -4.41112826] -[ 25.84571276 53.45645008 17.29573011 112.57378389 11.9338627 - -8.98310054 -4.35603669] -[ 27.0003678 53.49405757 19.50908587 112.44935806 14.25591318 - -9.91346599 -4.11617556] -[ 28.19054647 53.65902944 21.63579505 112.53257594 16.51191862 - -10.65189207 -3.90251969] -[ 29.26347297 54.0173019 23.49655508 112.84702306 18.78699998 - -11.4272626 -3.73170965] -[ 30.22671677 54.44829364 25.11822618 113.23448629 21.0584698 - -12.18900707 -3.61629061] -[ 31.17256498 54.90087093 26.65680967 113.64296208 23.3200012 - -12.94080297 -3.53080778] -[ 32.12277812 55.35994769 28.1481875 114.05923413 25.56834466 - -13.67850173 -3.46417003] -[ 33.08429277 55.82110149 29.60268683 114.48206141 27.80207914 - -14.39569971 -3.41254614] -[ 34.05867192 56.2833412 31.02196629 114.91383871 30.0208477 - -15.0855936 -3.37458172] -[ 35.05279538 56.74402492 32.41744628 115.35581247 32.22505736 - -15.745129 -3.34758066] -[ 36.07105567 57.20140494 33.79678175 115.80999234 34.41555769 - -16.37171029 -3.32909486] -[ 37.11508775 57.65484489 35.16272931 116.27978809 36.5933962 - -16.96035012 -3.3174486 ] -[ 38.18669821 58.10430994 36.51872311 116.7689966 38.76025388 - -17.50812875 -3.31058887] -[ 39.28694332 58.55044768 37.86701786 117.28192439 40.91819013 - -18.01236684 -3.30672445] -[ 40.41745203 58.99400477 39.2103872 117.82349837 43.06950545 - -18.46879507 -3.30469864] -[ 41.57985882 59.43471441 40.55333694 118.40165967 45.1853799 - -18.87253778 -3.30232815] -[ 42.77213148 59.86495343 41.90789295 119.04612557 47.01236062 - -19.20816803 -3.29295444] -[ 43.99797528 60.28025177 43.28702415 119.76008756 48.53146061 - -19.46744447 -3.27336939] -[ 45.25697445 60.68380607 44.68799603 120.52987225 49.88211509 - -19.65477301 -3.2440274 ] -[ 46.54420629 61.07978893 46.10796839 121.36093796 51.04062542 - -19.77454915 -3.20551038] -[ 47.87169328 61.46843222 47.56880895 122.26356789 51.95960922 - -19.83101459 -3.15872246] -[ 49.17927412 61.86214404 48.98903937 123.21769459 52.67390849 - -19.83260771 -3.10482615] - - - - - - - - - - - - - diff --git a/myGym/utils/callbackstf2.py b/myGym/utils/callbackstf2.py index 738600d5..817793c3 100644 --- a/myGym/utils/callbackstf2.py +++ b/myGym/utils/callbackstf2.py @@ -1,6 +1,5 @@ -from stable_baselines.results_plotter import load_results, ts2xy -from stable_baselines.common.callbacks import BaseCallback, EvalCallback -from stable_baselines import results_plotter +#from stable_baselines3.results_plotter import load_results, ts2xy +from stable_baselines3.common.callbacks import BaseCallback, EvalCallback import os import matplotlib.pyplot as plt @@ -11,13 +10,13 @@ from typing import Union, List, Dict, Any, Optional from tqdm.auto import tqdm import numpy as np -import tensorflow as tf -import gym +#import tensorflow as tf +import gymnasium as gym import warnings -from stable_baselines.common.vec_env import VecEnv, sync_envs_normalization, DummyVecEnv -from stable_baselines.common.evaluation import evaluate_policy +from stable_baselines3.common.vec_env import VecEnv, sync_envs_normalization, DummyVecEnv +from stable_baselines3.common.evaluation import evaluate_policy class CustomEvalCallback(EvalCallback): @@ -50,7 +49,7 @@ def __init__(self, eval_env: Union[gym.Env, VecEnv], gui_on = True, record=False, camera_id=0, - record_steps_limit=256): # pybullet or mujoco + record_steps_limit=256): super(EvalCallback, self).__init__(callback_on_new_best, verbose=verbose) self.n_eval_episodes = n_eval_episodes self.eval_freq = eval_freq @@ -123,9 +122,6 @@ def evaluate_policy( images.append(image) print(f"appending image: total size: {len(images)}]") - if self.physics_engine == "mujoco" and self.gui_on: # Rendering for mujoco engine - self.eval_env.render() - episode_rewards.append(episode_reward) success_episodes_num += is_successful distance_error_sum += distance_error @@ -259,31 +255,6 @@ def _on_step(self) -> bool: print("Saving new best model to {}".format(self.save_path)) self.model.save(self.save_path) - if self.engine == "mujoco": # Mujoco has additional prints - # Temporal workaround multiprocessing - if not self.multiprocessing and self.verbose > 0: - # Current success rate over the last 'self.STATS_EVERY' episodes - current_success_rate = np.mean(self.env.successfull_failed_episodes[-self.STATS_EVERY:]) - print(f"Best average reward: {self.best_average_reward:.2f} \ - - Last average reward: {average_reward:.2f} \ - and current success rate: {current_success_rate:.2f} \ - over the last {self.STATS_EVERY} episodes \ - - Current touch threshold: {self.env.sensor_goal_threshold:.2f} - \ - Current episode: {episode}") - - if self.engine == "mujoco": - # Save graph of success rate every 'self.save_success_graph_every_steps'. - if self.n_calls % self.save_success_graph_every_steps == 0 and not self.multiprocessing: - # Save graph - generate_and_save_mean_graph_from_1_or_2arrays( - data_array_1=self.env.successfull_failed_episodes, - data_array_2=None, - save_dir=self.logdir, - average_x_axis_span=self.success_graph_mean_past_episodes , - axis_x_name="episodes", - axis_y_names=["success_rate"]) - return True - class ProgressBarCallback(BaseCallback): """ diff --git a/myGym/utils/helpers.py b/myGym/utils/helpers.py index 80f9951d..0f13ee2f 100644 --- a/myGym/utils/helpers.py +++ b/myGym/utils/helpers.py @@ -6,49 +6,49 @@ def get_workspace_dict(): 'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]}, 'camera': {'position': [[0.56, -1.71, 0.6], [-1.3, 3.99, 0.6], [-3.43, 0.67, 1.0], [2.76, 2.68, 1.0], [-0.54, 1.19, 3.4]], 'target': [[0.53, -1.62, 0.59], [-1.24, 3.8, 0.55], [-2.95, 0.83, 0.8], [2.28, 2.53, 0.8], [-0.53, 1.2, 3.2]]}, - 'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]}, + 'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9], 'reachable_borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]}, 'collabtable': {'urdf': 'collabtable.urdf', 'texture': 'collabtable.jpg', 'transform': {'position':[0.45, -5.1, -1.05], 'orientation':[0.0, 0.0, -0.35*np.pi]}, 'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]}, 'camera': {'position': [[-0.25, 3.24, 1.2], [-0.44, -1.34, 1.0], [-1.5, 2.6, 1.0], [1.35, -1.0, 1.0], [-0.1, 1.32, 1.4]], 'target': [[-0.0, 0.56, 0.6], [-0.27, 0.42, 0.7], [-1, 2.21, 0.8], [-0.42, 2.03, 0.2], [-0.1, 1.2, 0.7]]}, - 'borders':[-0.7, 0.7, 0.5, 1.2, 0.2, 0.2]}, + 'borders':[-0.7, 0.7, 0.5, 1.2, 0.2, 0.2], 'reachable_borders':[-0.7, 0.7, 0.5, 1.2, 0.2, 0.2]}, 'darts': {'urdf': 'darts.urdf', 'texture': 'darts.jpg', 'transform': {'position':[-1.4, -6.7, -1.05], 'orientation':[0.0, 0.0, -1.0*np.pi]}, 'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]}, 'camera': {'position': [[-0.0, 2.1, 1.0], [0.0, -1.5, 1.2], [2.3, 0.5, 1.0], [-2.6, 0.5, 1.0], [-0.0, 1.1, 4.9]], 'target': [[0.0, 0.0, 0.7], [-0.0, 1.3, 0.6], [1.0, 0.9, 0.9], [-1.6, 0.9, 0.9], [-0.0, 1.2, 3.1]]}, - 'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]}, + 'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9], 'reachable_borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]}, 'drawer': {'urdf': 'drawer.urdf', 'texture': 'drawer.jpg', 'transform': {'position':[-4.81, 1.75, -1.05], 'orientation':[0.0, 0.0, 0.0*np.pi]}, 'robot': {'position': [0.0, 0.2, 0.0], 'orientation': [0, 0, 0.5*np.pi]}, 'camera': {'position': [[-0.14, -1.63, 1.0], [-0.14, 3.04, 1.0], [-1.56, -0.92, 1.0], [1.2, -1.41, 1.0], [-0.18, 0.88, 2.5]], 'target': [[-0.14, -0.92, 0.8], [-0.14, 2.33, 0.8], [-0.71, -0.35, 0.7], [0.28, -0.07, 0.6], [-0.18, 0.84, 2.1]]}, - 'borders':[-0.7, 0.7, 0.4, 1.3, 0.8, 0.1]}, + 'borders':[-0.7, 0.7, 0.4, 1.3, 0.8, 0.1], 'reachable_borders':[-0.7, 0.7, 0.4, 1.3, 0.8, 0.1]}, 'football': {'urdf': 'football.urdf', 'texture': 'football.jpg', 'transform': {'position':[4.2, -5.4, -1.05], 'orientation':[0.0, 0.0, -1.0*np.pi]}, 'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]}, 'camera': {'position': [[-0.0, 2.1, 1.0], [0.0, -1.7, 1.2], [3.5, -0.6, 1.0], [-3.5, -0.7, 1.0], [-0.0, 2.0, 4.9]], 'target': [[0.0, 0.0, 0.7], [-0.0, 1.3, 0.2], [3.05, -0.2, 0.9], [-2.9, -0.2, 0.9], [-0.0, 2.1, 3.6]]}, - 'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]}, + 'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9], 'reachable_borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]}, 'fridge': {'urdf': 'fridge.urdf', 'texture': 'fridge.jpg', 'transform': {'position':[1.6, -5.95, -1.05], 'orientation':[0.0, 0.0, 0*np.pi]}, 'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]}, 'camera': {'position': [[0.0, -1.3, 1.0], [0.0, 2.35, 1.2], [-1.5, 0.85, 1.0], [1.4, 0.85, 1.0], [0.0, 0.55, 2.5]], 'target': [[0.0, 0.9, 0.7], [0.0, 0.9, 0.6], [0.0, 0.55, 0.5], [0.4, 0.55, 0.7], [0.0, 0.45, 1.8]]}, - 'borders':[-0.7, 0.7, 0.3, 0.5, -0.9, -0.9]}, + 'borders':[-0.7, 0.7, 0.3, 0.5, -0.9, -0.9], 'reachable_borders':[-0.7, 0.7, 0.3, 0.5, -0.9, -0.9]}, 'maze': {'urdf': 'maze.urdf', 'texture': 'maze.jpg', 'transform': {'position':[6.7, -3.1, 0.0], 'orientation':[0.0, 0.0, -0.5*np.pi]}, 'robot': {'position': [0.0, 0.0, 0.1], 'orientation': [0.0, 0.0, 0.5*np.pi]}, 'camera': {'position': [[0.0, -1.4, 2.3], [-0.0, 5.9, 1.9], [4.7, 2.7, 2.0], [-3.2, 2.7, 2.0], [-0.0, 3.7, 5.0]], 'target': [[0.0, -1.0, 1.9], [-0.0, 5.6, 1.7], [3.0, 2.7, 1.5], [-2.9, 2.7, 1.7], [-0.0, 3.65, 4.8]]}, - 'borders':[-2.5, 2.2, 0.7, 4.7, 0.05, 0.05]}, + 'borders':[-2.5, 2.2, 0.7, 4.7, 0.05, 0.05], 'reachable_borders':[-2.5, 2.2, 0.7, 4.7, 0.05, 0.05]}, 'stairs': {'urdf': 'stairs.urdf', 'texture': 'stairs.jpg', 'transform': {'position':[-5.5, -0.08, -1.05], 'orientation':[0.0, 0.0, -0.20*np.pi]}, 'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]}, 'camera': {'position': [[0.04, -1.64, 1.0], [0.81, 3.49, 1.0], [-2.93, 1.76, 1.0], [4.14, 0.33, 1.0], [2.2, 1.24, 3.2]], 'target': [[0.18, -1.12, 0.85], [0.81, 2.99, 0.8], [-1.82, 1.57, 0.7], [3.15, 0.43, 0.55], [2.17, 1.25, 3.1]]}, - 'borders':[-0.5, 2.5, 0.8, 1.6, 0.1, 0.1]}, + 'borders':[-0.5, 2.5, 0.8, 1.6, 0.1, 0.1], 'reachable_borders':[-0.5, 2.5, 0.8, 1.6, 0.1, 0.1]}, 'table': {'urdf': 'table.urdf', 'texture': 'table.jpg', 'transform': {'position':[-0.0, -0.0, -1.05], 'orientation':[0.0, 0.0, 0*np.pi]}, 'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]}, @@ -56,7 +56,7 @@ def get_workspace_dict(): [0.0, 1.6, 0.8], [-0.0, -0.5, 0.8], [0.8, 0.9, 0.6], [-0.8, 0.9, 0.8], [0.0, 0.9, 1.]], 'target': [[0.0, 2.1, 0.9], [-0.0, -0.8, 0.9], [1.4, 0.9, 0.88], [-1.4, 0.9, 0.88], [0.0, 0.80, 1.], [0.0, 1.3, 0.5], [-0.0, -0.0, 0.6], [0.6, 0.9, 0.4], [-0.6, 0.9, 0.5], [0.0, 0.898, 0.8]]}, - 'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1]}, + 'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1], 'reachable_borders':[-0.5, 0.5, 0.5, 0.9, 0.05, 0.4]}, 'table_tiago': {'urdf': 'table_tiago.urdf', 'texture': 'table.jpg', 'transform': {'position':[-0.0, -0.0, -1.05], 'orientation':[0.0, 0.0, 0*np.pi]}, 'robot': {'position': [0.0, 0.3, -0.85], 'orientation': [0.0, 0.0, 0.5*np.pi]}, @@ -64,7 +64,7 @@ def get_workspace_dict(): [0.0, 1.6, 0.8], [-0.0, -0.5, 0.8], [0.8, 0.9, 0.6], [-0.8, 0.9, 0.8], [0.0, 0.9, 1.]], 'target': [[0.0, 2.1, 0.9], [-0.0, -0.8, 0.9], [1.4, 0.9, 0.88], [-1.4, 0.9, 0.88], [0.0, 0.80, 1.], [0.0, 1.3, 0.5], [-0.0, -0.0, 0.6], [0.6, 0.9, 0.4], [-0.6, 0.9, 0.5], [0.0, 0.898, 0.8]]}, - 'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1]}, + 'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1], 'reachable_borders':[-0.5, 0.5, 0.5, 0.9, 0.05, 0.4]}, 'table_nico': {'urdf': 'table_nico.urdf', 'texture': 'table.jpg', 'transform': {'position':[-0.0, -0.0, -1.05], 'orientation':[0.0, 0.0, 0*np.pi]}, 'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]}, @@ -72,19 +72,19 @@ def get_workspace_dict(): [0.0, 1.6, 0.8], [-0.0, -0.5, 0.8], [0.8, 0.9, 0.6], [-0.8, 0.9, 0.8], [0.0, 0.9, 1.]], 'target': [[0.0, 2.1, 0.9], [-0.0, -0.8, 0.9], [1.4, 0.9, 0.88], [-1.4, 0.9, 0.88], [0.0, 0.80, 1.], [0.0, 1.3, 0.5], [-0.0, -0.0, 0.6], [0.6, 0.9, 0.4], [-0.6, 0.9, 0.5], [0.0, 0.898, 0.8]]}, - 'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1]}, + 'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1], 'reachable_borders':[-0.5, 0.5, 0.5, 0.9, 0.05, 0.4]}, 'verticalmaze': {'urdf': 'verticalmaze.urdf', 'texture': 'verticalmaze.jpg', 'transform': {'position':[-5.7, -7.55, -1.05], 'orientation':[0.0, 0.0, 0.5*np.pi]}, 'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]}, 'camera': {'position': [[-0.0, -1.25, 1.0], [0.0, 1.35, 1.3], [1.7, -1.25, 1.0], [-1.6, -1.25, 1.0], [0.0, 0.05, 2.5]], 'target': [[-0.0, -1.05, 1.0], [0.0, 0.55, 1.3], [1.4, -0.75, 0.9], [-1.3, -0.75, 0.9], [0.0, 0.15, 2.1]]}, - 'borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4]}, + 'borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4], 'reachable_borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4]}, 'modularmaze': {'urdf': 'modularmaze.urdf', 'texture': 'verticalmaze.jpg', 'transform': {'position':[-7, 8, 0.0], 'orientation':[0.0, 0.0, 0.0]}, 'robot': {'position': [0.0, -0.5, 0.05], 'orientation': [0.0, 0.0, 0.5*np.pi]}, 'camera': {'position': [[-0.0, -1.25, 1.0], [0.0, 1.35, 1.3], [1.7, -1.25, 1.0], [-1.6, -1.25, 1.0], [0.0, 0.7, 2.1], [-0.0, -0.3, 0.2]], 'target': [[-0.0, -1.05, 0.9], [0.0, 0.55, 1.3], [1.4, -0.75, 0.9], [-1.3, -0.75, 0.9], [0.0, 0.71, 1.8], [-0.0, -0.25, 0.199]]}, - 'borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4]}} + 'borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4], 'reachable_borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4]}} return ws_dict @@ -127,3 +127,17 @@ def get_robot_dict(): 'hsr': {'path': '/envs/robots/hsr/hsrb4s.urdf', 'position': np.array([0.0, -0.15, -0.4]), 'orientation': [0.0, 0.0, 0 * np.pi]}, } return r_dict + + +def get_module_type(observation): + """ + Get source of the information from environment (ground_truth, yolact, vae) + + Returns: + :return source: (string) Source of information + """ + if observation["actual_state"] not in ["vae", "yolact", "voxel", "dope"]: + src = "ground_truth_6D" if "6D" in observation["actual_state"] else "ground_truth" + else: + src = observation["actual_state"] + return src diff --git a/myGym/utils/modder.py b/myGym/utils/modder.py deleted file mode 100644 index 3f833b7e..00000000 --- a/myGym/utils/modder.py +++ /dev/null @@ -1,457 +0,0 @@ -""" -Utilites for changing textures and materials after creating a -MuJoCo simulation. This allows for super fast scene generation. -""" -from collections import defaultdict -import numpy as np -from mujoco_py import cymj - - -class BaseModder(): - - def __init__(self, sim, random_state=None): - self.sim = sim - if random_state is None: - self.random_state = np.random.RandomState() - elif isinstance(random_state, int): - # random_state assumed to be an int - self.random_state = np.random.RandomState(random_state) - else: - self.random_state = random_state - - @property - def model(self): - # Available for quick convenience access - return self.sim.model - - -class LightModder(BaseModder): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def set_pos(self, name, value): - lightid = self.get_lightid(name) - assert lightid > -1, "Unkwnown light %s" % name - - value = list(value) - assert len(value) == 3, "Expected 3-dim value, got %s" % value - - self.model.light_pos[lightid] = value - - def set_dir(self, name, value): - lightid = self.get_lightid(name) - assert lightid > -1, "Unkwnown light %s" % name - - value = list(value) - assert len(value) == 3, "Expected 3-dim value, got %s" % value - - self.model.light_dir[lightid] = value - - def set_active(self, name, value): - lightid = self.get_lightid(name) - assert lightid > -1, "Unkwnown light %s" % name - - self.model.light_active[lightid] = value - - def set_specular(self, name, value): - lightid = self.get_lightid(name) - assert lightid > -1, "Unkwnown light %s" % name - - value = list(value) - assert len(value) == 3, "Expected 3-dim value, got %s" % value - - self.model.light_specular[lightid] = value - - def set_ambient(self, name, value): - lightid = self.get_lightid(name) - assert lightid > -1, "Unkwnown light %s" % name - - value = list(value) - assert len(value) == 3, "Expected 3-dim value, got %s" % value - - self.model.light_ambient[lightid] = value - - def set_diffuse(self, name, value): - lightid = self.get_lightid(name) - assert lightid > -1, "Unkwnown light %s" % name - - value = list(value) - assert len(value) == 3, "Expected 3-dim value, got %s" % value - - self.model.light_diffuse[lightid] = value - - def set_castshadow(self, name, value): - lightid = self.get_lightid(name) - assert lightid > -1, "Unkwnown light %s" % name - self.model.light_castshadow[lightid] = value - - def get_lightid(self, name): - return self.model.light_name2id(name) - - -class CameraModder(BaseModder): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def set_fovy(self, name, value): - camid = self.get_camid(name) - assert 0 < value < 180 - assert camid > -1, "Unknown camera %s" % name - self.model.cam_fovy[camid] = value - - def get_quat(self, name): - camid = self.get_camid(name) - assert camid > -1, "Unknown camera %s" % name - return self.model.cam_quat[camid] - - def set_quat(self, name, value): - value = list(value) - assert len(value) == 4, ( - "Expectd value of length 3, instead got %s" % value) - camid = self.get_camid(name) - assert camid > -1, "Unknown camera %s" % name - self.model.cam_quat[camid] = value - - def get_pos(self, name): - camid = self.get_camid(name) - assert camid > -1, "Unknown camera %s" % name - return self.model.cam_pos[camid] - - def set_pos(self, name, value): - value = list(value) - assert len(value) == 3, ( - "Expected value of length 3, instead got %s" % value) - camid = self.get_camid(name) - assert camid > -1 - self.model.cam_pos[camid] = value - - def get_camid(self, name): - return self.model.camera_name2id(name) - - -class MaterialModder(BaseModder): - """ - Modify material properties of a model. Example use: - - sim = MjSim(...) - modder = MaterialModder(sim) - modder.set_specularity('some_geom', 0.5) - .rand_all('another_geom') - - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def set_specularity(self, name, value): - assert 0 <= value <= 1.0 - mat_id = self.get_mat_id(name) - self.model.mat_specular[mat_id] = value - - def set_shininess(self, name, value): - assert 0 <= value <= 1.0 - mat_id = self.get_mat_id(name) - self.model.mat_shininess[mat_id] = value - - def set_reflectance(self, name, value): - assert 0 <= value <= 1.0 - mat_id = self.get_mat_id(name) - self.model.mat_reflectance[mat_id] = value - - def set_texrepeat(self, name, repeat_x, repeat_y): - mat_id = self.get_mat_id(name) - # ensure the following is set to false, so that repeats are - # relative to the extent of the body. - self.model.mat_texuniform[mat_id] = 0 - self.model.mat_texrepeat[mat_id, :] = [repeat_x, repeat_y] - - def rand_all(self, name): - self.rand_specularity(name) - self.rand_shininess(name) - self.rand_reflectance(name) - - def rand_specularity(self, name): - value = 0.1 + 0.2 * self.random_state.uniform() - self.set_specularity(name, value) - - def rand_shininess(self, name): - value = 0.1 + 0.5 * self.random_state.uniform() - self.set_shininess(name, value) - - def rand_reflectance(self, name): - value = 0.1 + 0.5 * self.random_state.uniform() - self.set_reflectance(name, value) - - def rand_texrepeat(self, name, max_repeat=5): - repeat_x = self.random_state.randint(0, max_repeat) + 1 - repeat_y = self.random_state.randint(0, max_repeat) + 1 - self.set_texrepeat(name, repeat_x, repeat_y) - - def get_mat_id(self, name): - """ Returns the material id based on the geom name. """ - geom_id = self.model.geom_name2id(name) - return self.model.geom_matid[geom_id] - - -class TextureModder(BaseModder): - """ - Modify textures in model. Example use: - - sim = MjSim(...) - modder = TextureModder(sim) - modder.whiten_materials() # ensures materials won't impact colors - modder.set_checker('some_geom', (255, 0, 0), (0, 0, 0)) - modder.rand_all('another_geom') - - Note: in order for the textures to take full effect, you'll need to set - the rgba values for all materials to [1, 1, 1, 1], otherwise the texture - colors will be modulated by the material colors. Call the - `whiten_materials` helper method to set all material colors to white. - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.textures = [Texture(self.model, i) - for i in range(self.model.ntex)] - self._build_tex_geom_map() - - # These matrices will be used to rapidly synthesize - # checker pattern bitmaps - self._cache_checker_matrices() - - def get_texture(self, name): - if name == 'skybox': - tex_id = -1 - for i in range(self.model.ntex): - # TODO: Don't hardcode this - skybox_textype = 2 - if self.model.tex_type[i] == skybox_textype: - tex_id = i - assert tex_id >= 0, "Model has no skybox" - else: - geom_id = self.model.geom_name2id(name) - mat_id = self.model.geom_matid[geom_id] - assert mat_id >= 0, "Geom has no assigned material" - tex_id = self.model.mat_texid[mat_id] - assert tex_id >= 0, "Material has no assigned texture" - - texture = self.textures[tex_id] - - return texture - - def get_checker_matrices(self, name): - if name == 'skybox': - return self._skybox_checker_mat - else: - geom_id = self.model.geom_name2id(name) - return self._geom_checker_mats[geom_id] - - def set_checker(self, name, rgb1, rgb2): - bitmap = self.get_texture(name).bitmap - cbd1, cbd2 = self.get_checker_matrices(name) - - rgb1 = np.asarray(rgb1).reshape([1, 1, -1]) - rgb2 = np.asarray(rgb2).reshape([1, 1, -1]) - bitmap[:] = rgb1 * cbd1 + rgb2 * cbd2 - - self.upload_texture(name) - return bitmap - - def set_gradient(self, name, rgb1, rgb2, vertical=True): - """ - Creates a linear gradient from rgb1 to rgb2. - - Args: - - rgb1 (array): start color - - rgb2 (array): end color - - vertical (bool): if True, the gradient in the positive - y-direction, if False it's in the positive x-direction. - """ - # NOTE: MuJoCo's gradient uses a sigmoid. Here we simplify - # and just use a linear gradient... We could change this - # to just use a tanh-sigmoid if needed. - bitmap = self.get_texture(name).bitmap - h, w = bitmap.shape[:2] - if vertical: - p = np.tile(np.linspace(0, 1, h)[:, None], (1, w)) - else: - p = np.tile(np.linspace(0, 1, w), (h, 1)) - - for i in range(3): - bitmap[..., i] = rgb2[i] * p + rgb1[i] * (1.0 - p) - - self.upload_texture(name) - return bitmap - - def set_rgb(self, name, rgb): - bitmap = self.get_texture(name).bitmap - bitmap[..., :] = np.asarray(rgb) - - self.upload_texture(name) - return bitmap - - def set_noise(self, name, rgb1, rgb2, fraction=0.9): - """ - Args: - - name (str): name of geom - - rgb1 (array): background color - - rgb2 (array): color of random noise foreground color - - fraction (float): fraction of pixels with foreground color - """ - bitmap = self.get_texture(name).bitmap - h, w = bitmap.shape[:2] - mask = self.random_state.uniform(size=(h, w)) < fraction - - bitmap[..., :] = np.asarray(rgb1) - bitmap[mask, :] = np.asarray(rgb2) - - self.upload_texture(name) - return bitmap - - def randomize(self): - for name in self.sim.model.geom_names: - self.rand_all(name) - - def rand_all(self, name): - choices = [ - self.rand_checker, - self.rand_gradient, - self.rand_rgb, - self.rand_noise, - ] - choice = self.random_state.randint(len(choices)) - return choices[choice](name) - - def rand_checker(self, name): - rgb1, rgb2 = self.get_rand_rgb(2) - return self.set_checker(name, rgb1, rgb2) - - def rand_gradient(self, name): - rgb1, rgb2 = self.get_rand_rgb(2) - vertical = bool(self.random_state.uniform() > 0.5) - return self.set_gradient(name, rgb1, rgb2, vertical=vertical) - - def rand_rgb(self, name): - rgb = self.get_rand_rgb() - return self.set_rgb(name, rgb) - - def rand_noise(self, name): - fraction = 0.1 + self.random_state.uniform() * 0.8 - rgb1, rgb2 = self.get_rand_rgb(2) - return self.set_noise(name, rgb1, rgb2, fraction) - - def upload_texture(self, name): - """ - Uploads the texture to the GPU so it's available in the rendering. - """ - texture = self.get_texture(name) - if not self.sim.render_contexts: - cymj.MjRenderContextOffscreen(self.sim) - for render_context in self.sim.render_contexts: - render_context.upload_texture(texture.id) - - def whiten_materials(self, geom_names=None): - """ - Helper method for setting all material colors to white, otherwise - the texture modifications won't take full effect. - - Args: - - geom_names (list): list of geom names whose materials should be - set to white. If omitted, all materials will be changed. - """ - geom_names = geom_names or [] - if geom_names: - for name in geom_names: - geom_id = self.model.geom_name2id(name) - mat_id = self.model.geom_matid[geom_id] - self.model.mat_rgba[mat_id, :] = 1.0 - else: - self.model.mat_rgba[:] = 1.0 - - def get_rand_rgb(self, n=1): - def _rand_rgb(): - return np.array(self.random_state.uniform(size=3) * 255, - dtype=np.uint8) - - if n == 1: - return _rand_rgb() - else: - return tuple(_rand_rgb() for _ in range(n)) - - def _build_tex_geom_map(self): - # Build a map from tex_id to geom_ids, so we can check - # for collisions. - self._geom_ids_by_tex_id = defaultdict(list) - for geom_id in range(self.model.ngeom): - mat_id = self.model.geom_matid[geom_id] - if mat_id >= 0: - tex_id = self.model.mat_texid[mat_id] - if tex_id >= 0: - self._geom_ids_by_tex_id[tex_id].append(geom_id) - - def _cache_checker_matrices(self): - """ - Cache two matrices of the form [[1, 0, 1, ...], - [0, 1, 0, ...], - ...] - and [[0, 1, 0, ...], - [1, 0, 1, ...], - ...] - for each texture. To use for fast creation of checkerboard patterns - """ - self._geom_checker_mats = [] - for geom_id in range(self.model.ngeom): - mat_id = self.model.geom_matid[geom_id] - tex_id = self.model.mat_texid[mat_id] - texture = self.textures[tex_id] - h, w = texture.bitmap.shape[:2] - self._geom_checker_mats.append(self._make_checker_matrices(h, w)) - - # add skybox - skybox_tex_id = -1 - for tex_id in range(self.model.ntex): - skybox_textype = 2 - if self.model.tex_type[tex_id] == skybox_textype: - skybox_tex_id = tex_id - if skybox_tex_id >= 0: - texture = self.textures[skybox_tex_id] - h, w = texture.bitmap.shape[:2] - self._skybox_checker_mat = self._make_checker_matrices(h, w) - else: - self._skybox_checker_mat = None - - def _make_checker_matrices(self, h, w): - re = np.r_[((w + 1) // 2) * [0, 1]] - ro = np.r_[((w + 1) // 2) * [1, 0]] - cbd1 = np.expand_dims(np.row_stack(((h + 1) // 2) * [re, ro]), -1)[:h, :w] - cbd2 = np.expand_dims(np.row_stack(((h + 1) // 2) * [ro, re]), -1)[:h, :w] - return cbd1, cbd2 - - -# From mjtTexture -MJT_TEXTURE_ENUM = ['2d', 'cube', 'skybox'] - - -class Texture(): - """ - Helper class for operating on the MuJoCo textures. - """ - - __slots__ = ['id', 'type', 'height', 'width', 'tex_adr', 'tex_rgb'] - - def __init__(self, model, tex_id): - self.id = tex_id - self.type = MJT_TEXTURE_ENUM[model.tex_type[tex_id]] - self.height = model.tex_height[tex_id] - self.width = model.tex_width[tex_id] - self.tex_adr = model.tex_adr[tex_id] - self.tex_rgb = model.tex_rgb - - @property - def bitmap(self): - size = self.height * self.width * 3 - data = self.tex_rgb[self.tex_adr:self.tex_adr + size] - return data.reshape((self.height, self.width, 3)) diff --git a/myGym/utils/results_plotter.py b/myGym/utils/results_plotter.py new file mode 100644 index 00000000..9df80687 --- /dev/null +++ b/myGym/utils/results_plotter.py @@ -0,0 +1,340 @@ +import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import pandas, json +import os +from glob import glob +import time, gym, csv +from typing import Tuple, Dict, Any, List, Optional + +# matplotlib.use('TkAgg') # Can change to 'Agg' for non-interactive mode +plt.rcParams['svg.fonttype'] = 'none' + +X_TIMESTEPS = 'timesteps' +X_EPISODES = 'episodes' +X_WALLTIME = 'walltime_hrs' +POSSIBLE_X_AXES = [X_TIMESTEPS, X_EPISODES, X_WALLTIME] +EPISODES_WINDOW = 100 +COLORS = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'purple', 'pink', + 'brown', 'orange', 'teal', 'coral', 'lightblue', 'lime', 'lavender', 'turquoise', + 'darkgreen', 'tan', 'salmon', 'gold', 'lightpurple', 'darkred', 'darkblue'] + +class LoadMonitorResultsError(Exception): + """ + Raised when loading the monitor log fails. + """ + pass + +class Monitor(gym.Wrapper): + """ + A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data. + + :param env: (gym.Env) The environment + :param filename: (Optional[str]) the location to save a log file, can be None for no log + :param allow_early_resets: (bool) allows the reset of the environment before it is done + :param reset_keywords: (tuple) extra keywords for the reset call, if extra parameters are needed at reset + :param info_keywords: (tuple) extra information to log, from the information return of environment.step + """ + EXT = "monitor.csv" + file_handler = None + + def __init__(self, + env: gym.Env, + filename: Optional[str], + allow_early_resets: bool = True, + reset_keywords=(), + info_keywords=()): + super(Monitor, self).__init__(env=env) + self.t_start = time.time() + if filename is None: + self.file_handler = None + self.logger = None + else: + if not filename.endswith(Monitor.EXT): + if os.path.isdir(filename): + filename = os.path.join(filename, Monitor.EXT) + else: + filename = filename + "." + Monitor.EXT + self.file_handler = open(filename, "wt") + self.file_handler.write('#%s\n' % json.dumps({"t_start": self.t_start, 'env_id': env.spec and env.spec.id})) + self.logger = csv.DictWriter(self.file_handler, + fieldnames=('r', 'l', 't') + reset_keywords + info_keywords) + self.logger.writeheader() + self.file_handler.flush() + + self.reset_keywords = reset_keywords + self.info_keywords = info_keywords + self.allow_early_resets = allow_early_resets + self.rewards = None + self.needs_reset = True + self.episode_rewards = [] + self.episode_lengths = [] + self.episode_times = [] + self.total_steps = 0 + self.current_reset_info = {} # extra info about the current episode, that was passed in during reset() + + def reset(self, **kwargs) -> np.ndarray: + """ + Calls the Gym environment reset. Can only be called if the environment is over, or if allow_early_resets is True + + :param kwargs: Extra keywords saved for the next episode. only if defined by reset_keywords + :return: (np.ndarray) the first observation of the environment + """ + if not self.allow_early_resets and not self.needs_reset: + raise RuntimeError("Tried to reset an environment before done. If you want to allow early resets, " + "wrap your env with Monitor(env, path, allow_early_resets=True)") + self.rewards = [] + self.needs_reset = False + for key in self.reset_keywords: + value = kwargs.get(key) + if value is None: + raise ValueError('Expected you to pass kwarg {} into reset'.format(key)) + self.current_reset_info[key] = value + return self.env.reset(**kwargs) + + def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict[Any, Any]]: + """ + Step the environment with the given action + + :param action: (np.ndarray) the action + :return: (Tuple[np.ndarray, float, bool, Dict[Any, Any]]) observation, reward, done, information + """ + if self.needs_reset: + raise RuntimeError("Tried to step environment that needs reset") + observation, reward, done, info = self.env.step(action) + self.rewards.append(reward) + if done: + self.needs_reset = True + ep_rew = sum(self.rewards) + eplen = len(self.rewards) + ep_info = {"r": round(ep_rew, 6), "l": eplen, "t": round(time.time() - self.t_start, 6)} + for key in self.info_keywords: + ep_info[key] = info[key] + self.episode_rewards.append(ep_rew) + self.episode_lengths.append(eplen) + self.episode_times.append(time.time() - self.t_start) + ep_info.update(self.current_reset_info) + if self.logger: + self.logger.writerow(ep_info) + self.file_handler.flush() + info['episode'] = ep_info + self.total_steps += 1 + return observation, reward, done, info + + def close(self): + """ + Closes the environment + """ + super(Monitor, self).close() + if self.file_handler is not None: + self.file_handler.close() + + def get_total_steps(self) -> int: + """ + Returns the total number of timesteps + + :return: (int) + """ + return self.total_steps + + def get_episode_rewards(self) -> List[float]: + """ + Returns the rewards of all the episodes + + :return: ([float]) + """ + return self.episode_rewards + + def get_episode_lengths(self) -> List[int]: + """ + Returns the number of timesteps of all the episodes + + :return: ([int]) + """ + return self.episode_lengths + + def get_episode_times(self) -> List[float]: + """ + Returns the runtime in seconds of all the episodes + + :return: ([float]) + """ + return self.episode_times + +def get_monitor_files(path: str) -> List[str]: + """ + get all the monitor files in the given path + + :param path: (str) the logging folder + :return: ([str]) the log files + """ + return glob(os.path.join(path, "*" + Monitor.EXT)) + +def load_results(path: str) -> pandas.DataFrame: + """ + Load all Monitor logs from a given directory path matching ``*monitor.csv`` and ``*monitor.json`` + + :param path: (str) the directory path containing the log file(s) + :return: (pandas.DataFrame) the logged data + """ + # get both csv and (old) json files + monitor_files = (glob(os.path.join(path, "*monitor.json")) + get_monitor_files(path)) + if not monitor_files: + raise LoadMonitorResultsError("no monitor files of the form *%s found in %s" % (Monitor.EXT, path)) + data_frames = [] + headers = [] + for file_name in monitor_files: + with open(file_name, 'rt') as file_handler: + if file_name.endswith('csv'): + first_line = file_handler.readline() + assert first_line[0] == '#' + header = json.loads(first_line[1:]) + data_frame = pandas.read_csv(file_handler, index_col=None) + headers.append(header) + elif file_name.endswith('json'): # Deprecated json format + episodes = [] + lines = file_handler.readlines() + header = json.loads(lines[0]) + headers.append(header) + for line in lines[1:]: + episode = json.loads(line) + episodes.append(episode) + data_frame = pandas.DataFrame(episodes) + else: + assert 0, 'unreachable' + data_frame['t'] += header['t_start'] + data_frames.append(data_frame) + data_frame = pandas.concat(data_frames) + data_frame.sort_values('t', inplace=True) + data_frame.reset_index(inplace=True) + data_frame['t'] -= min(header['t_start'] for header in headers) + # data_frame.headers = headers # HACK to preserve backwards compatibility + return data_frame + +def rolling_window(array, window): + """ + apply a rolling window to a np.ndarray + + :param array: (np.ndarray) the input Array + :param window: (int) length of the rolling window + :return: (np.ndarray) rolling window on the input array + """ + shape = array.shape[:-1] + (array.shape[-1] - window + 1, window) + strides = array.strides + (array.strides[-1],) + return np.lib.stride_tricks.as_strided(array, shape=shape, strides=strides) + + +def window_func(var_1, var_2, window, func): + """ + apply a function to the rolling window of 2 arrays + + :param var_1: (np.ndarray) variable 1 + :param var_2: (np.ndarray) variable 2 + :param window: (int) length of the rolling window + :param func: (numpy function) function to apply on the rolling window on variable 2 (such as np.mean) + :return: (np.ndarray, np.ndarray) the rolling output with applied function + """ + var_2_window = rolling_window(var_2, window) + function_on_var2 = func(var_2_window, axis=-1) + return var_1[window - 1:], function_on_var2 + + +def ts2xy(timesteps, xaxis): + """ + Decompose a timesteps variable to x ans ys + + :param timesteps: (Pandas DataFrame) the input data + :param xaxis: (str) the axis for the x and y output + (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs') + :return: (np.ndarray, np.ndarray) the x and y output + """ + if xaxis == X_TIMESTEPS: + x_var = np.cumsum(timesteps.l.values) + y_var = timesteps.r.values + elif xaxis == X_EPISODES: + x_var = np.arange(len(timesteps)) + y_var = timesteps.r.values + elif xaxis == X_WALLTIME: + x_var = timesteps.t.values / 3600. + y_var = timesteps.r.values + else: + raise NotImplementedError + return x_var, y_var + + +def plot_curves(xy_list, xaxis, title): + """ + plot the curves + + :param xy_list: ([(np.ndarray, np.ndarray)]) the x and y coordinates to plot + :param xaxis: (str) the axis for the x and y output + (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs') + :param title: (str) the title of the plot + """ + + plt.figure(figsize=(8, 2)) + maxx = max(xy[0][-1] for xy in xy_list) + minx = 0 + for (i, (x, y)) in enumerate(xy_list): + color = COLORS[i] + plt.scatter(x, y, s=2) + # Do not plot the smoothed curve at all if the timeseries is shorter than window size. + if x.shape[0] >= EPISODES_WINDOW: + # Compute and plot rolling mean with window of size EPISODE_WINDOW + x, y_mean = window_func(x, y, EPISODES_WINDOW, np.mean) + plt.plot(x, y_mean, color=color) + plt.xlim(minx, maxx) + plt.title(title) + plt.xlabel(xaxis) + plt.ylabel("Episode Rewards") + plt.tight_layout() + + +def plot_results(dirs, num_timesteps, xaxis, task_name): + """ + plot the results + + :param dirs: ([str]) the save location of the results to plot + :param num_timesteps: (int or None) only plot the points below this value + :param xaxis: (str) the axis for the x and y output + (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs') + :param task_name: (str) the title of the task to plot + """ + + tslist = [] + for folder in dirs: + timesteps = load_results(folder) + if num_timesteps is not None: + timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps] + tslist.append(timesteps) + xy_list = [ts2xy(timesteps_item, xaxis) for timesteps_item in tslist] + plot_curves(xy_list, xaxis, task_name) + + +def main(): + """ + Example usage in jupyter-notebook + + .. code-block:: python + + from stable_baselines import results_plotter + %matplotlib inline + results_plotter.plot_results(["./log"], 10e6, results_plotter.X_TIMESTEPS, "Breakout") + + Here ./log is a directory containing the monitor.csv files + """ + import argparse + import os + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--dirs', help='List of log directories', nargs='*', default=['./log']) + parser.add_argument('--num_timesteps', type=int, default=int(10e6)) + parser.add_argument('--xaxis', help='Varible on X-axis', default=X_TIMESTEPS) + parser.add_argument('--task_name', help='Title of plot', default='Breakout') + args = parser.parse_args() + args.dirs = [os.path.abspath(folder) for folder in args.dirs] + plot_results(args.dirs, args.num_timesteps, args.xaxis, args.task_name) + plt.show() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/myGym/vae/train_vae.py b/myGym/vae/train_vae.py index 5b326586..8ad6014f 100644 --- a/myGym/vae/train_vae.py +++ b/myGym/vae/train_vae.py @@ -12,8 +12,8 @@ from datetime import datetime from myGym.vae import sample from configparser import ConfigParser -import pkg_resources -from myGym.vae.vis_helpers import load_checkpoint +import importlib.resources as resources +currentdir = resources.files("myGym").joinpath("envs") SEED = 1111 random.seed(SEED) @@ -219,7 +219,7 @@ def load_images_all(path, imsize): help='vae config') args = parser.parse_args() config = ConfigParser() - config.read(pkg_resources.resource_filename("myGym", '/vae/{}'.format(args.config))) + config.read(resources.files("myGym").joinpath("vae", args.config)) with open(os.path.join(SAVE_DIR,'config.ini'), 'w') as f: config.write(f) diff --git a/myGym/visualize_multieval.py b/myGym/visualize_multieval.py deleted file mode 100644 index 8ee2361a..00000000 --- a/myGym/visualize_multieval.py +++ /dev/null @@ -1,158 +0,0 @@ -import os -import re -import threading -import subprocess -from sklearn.model_selection import ParameterGrid -import json -import matplotlib.pyplot as plt -import matplotlib.colors as colors -import pandas as pd -from mpl_toolkits.mplot3d import Axes3D -import numpy as np -import pkg_resources -import numpy as np -from matplotlib.lines import Line2D -import operator - -currentdir = pkg_resources.resource_filename("myGym", "trained_models") -algo_1_results_path = os.path.join(currentdir, "multi_reach.json") -algo_2_results_path = os.path.join(currentdir, "multi_reach.json") -algo_3_results_path = os.path.join(currentdir, "multi_evaluation_results_ppo.json") -algo_4_results_path = os.path.join(currentdir, "multi_evaluation_results_td3.json") -algo_5_results_path = os.path.join(currentdir, "multi_evaluation_results_trpo.json") -algo_6_results_path = os.path.join(currentdir, "multi_evaluation_results_ddpg.json") -algo_7_results_path = os.path.join(currentdir, "multi_evaluation_results_acktr.json") -algo_8_results_path = os.path.join(currentdir, "multi_evaluation_results_a2c.json") -selected_metric = "success_rate" - - -def load_algo_results(results_path): - with open(results_path) as f: - results = json.load(f) - list_results = [json.loads(key.replace('\'', '"')) - + [float(value[selected_metric])] for key, value - in results.items()]# if str(fixed_parameter_value) in key] - #list_results = list(map(lambda x: [v for v in x if v != fixed_parameter_value], list_results)) - df = pd.DataFrame(list_results) - order2=['reach', 'push', 'pnp'] - order1=['step', 'joints', 'joints_gripper'] - order3=[1024, 512, 256] - df[1] = pd.Categorical(df[1], order1) - df[2] = pd.Categorical(df[2], order2) - df[0] = pd.Categorical(df[0], order3) - df = df.sort_values([2,1,0]) - list_results = df.values.tolist() - return list_results - -def plot_algo_results(ax, list_results, label, c): - w_labels = [result[0] for result in list_results] - w_set = sorted(set(w_labels), key=w_labels.index) - w = np.asarray([w_set.index(label) for label in w_labels]) - - x_labels = [result[1] for result in list_results] - #x_labels = [x.replace('joints_gripper', 'gripper') for x in x_labels] - x_set = sorted(set(x_labels), key=x_labels.index) - x = np.asarray([x_set.index(label) for label in x_labels]) - - y_labels = [result[2] for result in list_results] - y_set = sorted(set(y_labels), key=y_labels.index) - y = np.asarray([y_set.index(label) for label in y_labels]) - - z = [result[3] for result in list_results] - z += z[:1] - - # Make data: I have 3 groups and 7 subgroups - group_names = y_set - n_groups = len(group_names) - group_size = [100/n_groups]*n_groups - - subgroup_names = x_set*n_groups - n_subgroups = len(subgroup_names) - subgroup_size = [100/n_subgroups]*n_subgroups - - subsubgroup_names = w_set*n_subgroups - n_subsubgroups = len(subsubgroup_names) - subsubgroup_size = [100/n_subsubgroups]*n_subsubgroups - - # Create colors - r, g, b=[plt.cm.Reds, plt.cm.Greens, plt.cm.Blues] - - # First Ring (outside) - ax.set(aspect="equal") - mypie, txt = ax.pie(group_size, radius=1.6, labels=group_names, startangle=0, pctdistance=-5.125, colors=[r(0.4), r(0.6), r(0.8)], rotatelabels=True) - plt.setp(mypie, width=0.12, edgecolor='white') - - # Second Ring (Inside) - mypie2, txt2 = ax.pie(subgroup_size, radius=1.6-0.12, labels=subgroup_names, startangle=0, colors=[b(0.4), b(0.6), b(0.8)]*n_groups, rotatelabels=True) - plt.setp(mypie2, width=0.12, edgecolor='white') - - # Third Ring - mypie3, txt3 = ax.pie(subsubgroup_size, radius=1.6-0.24, labels=subsubgroup_names, startangle=0, colors=[g(0.4), g(0.6), g(0.8)]*n_subgroups, rotatelabels=True) - plt.setp(mypie3, width=0.12, edgecolor='white') - plt.margins(0,0) - - for t in range(len(txt)): - txt[t]._x = np.cos((0.5+t)*group_size[0]/100*np.pi*2)*(1.49) - txt[t]._y = np.sin((0.5+t)*group_size[0]/100*np.pi*2)*(1.49) - txt[t]._rotation -= 90*np.sign(txt[t]._x) - for t in range(len(txt2)): - txt2[t]._x = np.cos((0.5+t)*subgroup_size[0]/100*np.pi*2)*(1.35) - txt2[t]._y = np.sin((0.5+t)*subgroup_size[0]/100*np.pi*2)*(1.35) - txt2[t]._rotation -= 90*np.sign(txt2[t]._x) - for t in range(len(txt3)): - txt3[t]._x = np.cos((0.5+t)*subsubgroup_size[0]/100*np.pi*2)*(1.25) - txt3[t]._y = np.sin((0.5+t)*subsubgroup_size[0]/100*np.pi*2)*(1.25) - txt3[t]._rotation -= 90*np.sign(txt3[t]._x) - - #plt.show() - - categories=w_labels - N = len(categories) - angles = [n / float(N) * 2 * np.pi + 0.5 / float(N) * 2 * np.pi for n in range(N)] - angles += angles[:1] - - # Initialise the spider plot - radar = fig.add_subplot(111, polar=True) - - # Draw one axe per variable + add labels labels yet - plt.xticks(angles[:-1], categories, color='None') - - # Draw ylabels - yticks = np.arange(0,101,10) - yticks_labels = [str(tick)+' %' for tick in yticks] - radar.set_rlabel_position(90) - plt.yticks(yticks, labels=yticks_labels, color="black", size=8) - plt.ylim(0,110) - - # Plot data - radar.plot(angles, z, c, linewidth=1, linestyle='solid') - radar.fill(angles, z, c, alpha=0.3) - - #plt.show() - learnability = np.mean(z) - print("Learnability for {}: {}".format(label, learnability)) - return learnability - -if __name__ == '__main__': - algo_1_list_results = load_algo_results(algo_1_results_path) - algo_2_list_results = load_algo_results(algo_1_results_path) - - fig = plt.figure() - ax = fig.subplots() - - learnability_ppo2 = plot_algo_results(ax, algo_1_list_results, "ppo2", "r") - learnability_ppo = plot_algo_results(ax, algo_2_list_results, "ppo2", "g") - - - txt = plt.title('Learnability', fontsize=18) - txt._x=1.07 - txt._y=1.08 - legend_elements = [Line2D([0], [1], linewidth=2, color='r', label='PPO2, {:.2f} %'.format(learnability_ppo2)), - Line2D([0], [1], linewidth=2, color='m', label='TRPO, {:.2f} %'.format(learnability_ppo))] - #Line2D([0], [1], linewidth=2, color='c', label='TD3, {:.2f} %'.format(learnability_td3)), - #Line2D([0], [1], linewidth=2, color='y', label='DDPG, {:.2f} %'.format(learnability_ddpg)), - #Line2D([0], [1], linewidth=2, color='r', label='A2C, {:.2f} %'.format(learnability_a2c))] - ax.legend(handles=legend_elements, loc=(0.97, 0.85), prop={'size': 16}) - plt.savefig(os.path.join(currentdir, "multieval_visualization.png")) - - plt.show() diff --git a/myGym/walker2d-ppo.yaml b/myGym/walker2d-ppo.yaml deleted file mode 100644 index 8f16acd6..00000000 --- a/myGym/walker2d-ppo.yaml +++ /dev/null @@ -1,15 +0,0 @@ -walker2d-v1-ppo: - env: Walker2d-v4 - run: PPO - config: - # Works for both torch and tf. - framework: torch - kl_coeff: 1.0 - num_sgd_iter: 20 - lr: .0001 - sgd_minibatch_size: 32768 - train_batch_size: 320000 - num_workers: 64 - num_gpus: 1 - batch_mode: complete_episodes - observation_filter: MeanStdFilter diff --git a/myGym/yolact_vision/inference_tool.py b/myGym/yolact_vision/inference_tool.py index b5054477..b4650706 100644 --- a/myGym/yolact_vision/inference_tool.py +++ b/myGym/yolact_vision/inference_tool.py @@ -9,7 +9,6 @@ import torch import cv2 import numpy as np -import pkg_resources import dill import numpy as np diff --git a/pyproject.toml b/pyproject.toml index 08eced32..443b4d08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "myGym" -version = "0.1.0" +version = "3.10.0" description = "myGym" authors = [ {name = "Incognite group CIIRC CVUT", email = "michal.vavrecka@cvut.cz"}, @@ -22,21 +22,22 @@ dependencies = [ "opencv-python==4.9.0.80", "pybullet==3.2.6", "open3d==0.18.0", - "gym==0.26.2", - "tensorflow>=2.11.0", - "torch<2.5,>=2.2", + "stable-baselines3==2.4.0", + "tensorboard>=2.18.0", + "torch==2.5.1", "torchvision>=0.17.1", "torchaudio>=2.2.1", "matplotlib>=3.8.3", "pyquaternion>=0.9.9", "commentjson>=0.9.0", - "ray[rllib,tune]>=2.9.3", - "gymnasium>=0.28.1", + "gymnasium>=1.0.0", "protobuf<4,>3", + "stable-baselines3>=2.4.0", + "shimmy==0.2.1", ] requires-python = ">=3.10,<3.11" readme = "README.md" -license = {text = "MIT License\n\nCopyright (c) 2020 CIIRC, CTU in Prague\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n\n"} +license = {text = "MIT License\n\nCopyright (c) 2024 CIIRC, CTU in Prague\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n\n"} maintainers = [ {email = "michal.vavrecka@cvut.cz"}, ] diff --git a/rllib.yml b/rllib.yml index 19e965be..1c0d41d1 100644 --- a/rllib.yml +++ b/rllib.yml @@ -14,14 +14,15 @@ dependencies: - tensorflow-base==2.11.1 - pip: - commentjson==0.9.0 - - ray==2.7.2 - - gym==0.24.0 - - gymnasium==0.29.1 + - ray==2.23.0 + - gym==0.26.2 + - gymnasium==0.28.1 - gymnasium-notices==0.0.1 - h5py==3.10.0 - imageio==2.33.1 - google==3.0.0 - google-cloud==0.34.0 + - memory-profiler==0.61.0 - matplotlib==3.8.2 - numpy==1.26.4 - opencv-python==4.9.0.80 @@ -39,10 +40,12 @@ dependencies: - pywavelets==1.5.0 - pyyaml==6.0.1 - scikit-image==0.22.0 + - Shimmy==2.0.0 - tensorflow-addons==0.23.0 - tensorflow-io-gcs-filesystem==0.36.0 - torch==2.2.0 - - torchvision==0.17.0 + - torchvision==0.17.2 - urllib3==2.2.0 + - stable-baselines3==2.3.2 prefix: /home/sejnogab/.conda/envs/mygymrl