diff --git a/myGym/__init__.py b/myGym/__init__.py
index e19bcdf1..08d10e0d 100644
--- a/myGym/__init__.py
+++ b/myGym/__init__.py
@@ -1,3 +1,3 @@
 # This file was generated by the setup.py and will be rewritten at the next build
-__version__ = "2.2.2.dev279+g20c5920"
+__version__ = "2.2.2.dev316+gb2cc0b1.d20241118"
 __name__ = "myGym"
\ No newline at end of file
diff --git a/myGym/atari-ddppo.yaml b/myGym/atari-ddppo.yaml
deleted file mode 100644
index e437a5d8..00000000
--- a/myGym/atari-ddppo.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# Basically the same as atari-ppo, but adapted for DDPPO. Note that DDPPO
-# isn't actually any more efficient on Atari, since the network size is
-# relatively small and the env doesn't require a GPU.
-atari-ddppo:
-    env: ALE/Breakout-v5
-    run: DDPPO
-    config:
-        # DDPPO only supports PyTorch so far.
-        framework: torch
-        env_config:
-            frameskip: 1  # no frameskip
-        # Worker config: 10 workers, each of which requires a GPU.
-        num_workers: 10
-        num_gpus_per_worker: 1
-        # Each worker will sample 100 * 5 envs per worker steps = 500 steps
-        # per optimization round. This is 5000 steps summed across workers.
-        train_batch_size: 500
-        num_envs_per_worker: 5
-        # Each worker will take a minibatch of 50. There are 10 workers total,
-        # so the effective minibatch size will be 500.
-        sgd_minibatch_size: 50
-        num_sgd_iter: 10
-        # Params from standard PPO Atari config:
-        lambda: 0.95
-        kl_coeff: 0.5
-        clip_rewards: True
-        clip_param: 0.1
-        vf_clip_param: 10.0
-        entropy_coeff: 0.01
-        batch_mode: truncate_episodes
-        observation_filter: NoFilter
-        model:
-            vf_share_layers: true
diff --git a/myGym/condaenv.e218cqgt.requirements.txt b/myGym/condaenv.e218cqgt.requirements.txt
deleted file mode 100644
index b8829734..00000000
--- a/myGym/condaenv.e218cqgt.requirements.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-absl-py==2.0.0
-aiosignal==1.3.1
-ale-py==0.8.1
-astor==0.8.1
-attrs==23.1.0
-cachetools==5.3.1
-charset-normalizer==3.3.0
-click==8.1.3
-commentjson==0.9.0
-cycler==0.11.0
-dm-tree==0.1.8
-filelock==3.12.2
-flatbuffers==23.5.26
-fonttools==4.38.0
-fsspec==2023.1.0
-future==0.18.3
-google-auth==2.23.2
-google-auth-oauthlib==0.4.6
-gputils==1.0.6
-gputil==1.4.0
-grpcio==1.51.3
-gym==0.26.2
-gym-notices==0.0.8
-gymnasium==0.26.3
-gymnasium-notices==0.0.1
-h5py==3.8.0
-imageio==2.31.2
-importlib-metadata==6.7.0
-importlib-resources==5.12.0
-joblib==1.3.2
-jsonschema==4.17.3
-keras==2.11.0
-kiwisolver==1.4.5
-libclang==16.0.6
-lz4==4.3.2
-markdown==3.4.4
-markdown-it-py==2.2.0
-markupsafe==2.1.3
-matplotlib==3.5.3
-mdurl==0.1.2
-msgpack==1.0.5
-networkx==2.6.3
-numpy==1.18.5
-nvidia-cublas-cu11==11.10.3.66
-nvidia-cuda-nvrtc-cu11==11.7.99
-nvidia-cuda-runtime-cu11==11.7.99
-nvidia-cudnn-cu11==8.5.0.96
-oauthlib==3.2.2
-opencv-python==4.8.1.78
-packaging==23.2
-pandas==1.3.5
-pillow==9.5.0
-pkgutil-resolve-name==1.3.10
-protobuf==3.19.6
-pyarrow==12.0.1
-pyasn1==0.5.0
-pyasn1-modules==0.3.0
-pybullet==2.6.6
-pygame==2.1.0
-pygments==2.16.1
-pyparsing==3.1.1
-pyquaternion==0.9.9
-pyrsistent==0.19.3
-python-dateutil==2.8.2
-pytz==2023.3
-pywavelets==1.3.0
-pyyaml==6.0.1
-requests==2.31.0
-requests-oauthlib==1.3.1
-rich==13.4.2
-rsa==4.9
-scikit-image==0.19.3
-stable-baselines==2.10.2
-stable-baselines3==1.2.0
-tensorflow-addons==0.19.0
-tensorflow-io-gcs-filesystem==0.34.0
-tifffile==2021.11.2
-torch==1.13.1
-torchvision==0.14.1
-typeguard==4.1.2
-typer==0.9.0
-typing-extensions==4.7.1
-urllib3==2.0.3
-werkzeug==2.2.3
-wrapt==1.15.0
-zipp==3.15.0
\ No newline at end of file
diff --git a/myGym/configs/rllib_debug.json b/myGym/configs/rllib_debug.json
index 968beae0..06b7079d 100644
--- a/myGym/configs/rllib_debug.json
+++ b/myGym/configs/rllib_debug.json
@@ -19,7 +19,7 @@
     "task_type"             :"reach",
     "natural_language"      :0,
     "task_objects"          :[{"init":{"obj_name":"null"},
-                               "goal":{"obj_name":"cube_holes","fixed":1,"rand_rot":0, "sampling_area":[-0.5, 0.2, 0.3, 0.6, 0.1, 0.4]}}],
+                               "goal":{"obj_name":"apple_target","fixed":1,"rand_rot":0, "sampling_area":[-0.5, 0.2, 0.3, 0.6, 0.1, 0.4]}}],
     "used_objects"          :{"num_range":[0,0], "obj_list":[]},
     // Observation
     // actual_state options (pick one): "endeff_xyz", "endeff_6D" (robot end effector), "obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope"
diff --git a/myGym/configs/train_rddl.json b/myGym/configs/train_rddl.json
new file mode 100644
index 00000000..35743eb0
--- /dev/null
+++ b/myGym/configs/train_rddl.json
@@ -0,0 +1,44 @@
+{
+    #Environment
+    "env_name"  :"Gym-v0",
+    "workspace" :"table",
+    "engine"    :"pybullet",
+    "render"    :"opengl",
+    "camera"    :1,
+    "gui"       :1,
+    "visualize" :1,
+    "visgym"    :0,
+    #Robot
+    "robot"         :"kuka",
+    "robot_action"  :"step",
+    "robot_init"    :[0.0, 0.4, 0.4],
+    "max_velocity"  :1,
+    "max_force"     :70,
+    "action_repeat" :1,
+    #Task
+    "natural_language"      :0,
+    "rddl": {"protoactions":["Approach", "Grasp", "Move", "Drop"], "num_task_range":[1,3], "allowed_objects":["Apple", "Tuna", "Banana"], "allowed_predicates":["Near"]},
+    "observation"           : {"actual_state":"obj_xyz", "goal_state":"obj_xyz"}, # this is ignored
+    #Distractor
+    "distractors"           : {"list":null, "moveable":1, "constant_speed":0, "movement_dims":3,
+                                           "movement_endpoints":[-0.3, 0.3, 0.4, 0.7, 0.1, 0.3]},
+    #Reward
+    "vae_path"      :null,
+    "yolact_path"   :null,
+    "yolact_config" :null,
+    #Train
+    "train_framework"   :"pytorch",
+    "algo"              :"ppo",
+    "num_networks" : 1,
+    "max_episode_steps" :1024,
+    "algo_steps"        :1024,
+    "steps"             :200000,
+    "pretrained_model"  :null,
+    "multiprocessing"   :false,
+    #Evaluation
+    "eval_freq"     :10000,
+    "eval_episodes" :10,
+    #Saving and logging
+    "logdir"    :"trained_models/test",
+    "record"    :0
+    }
\ No newline at end of file
diff --git a/myGym/env.txt b/myGym/env.txt
index b44abc94..9439ce65 100644
--- a/myGym/env.txt
+++ b/myGym/env.txt
@@ -57,7 +57,6 @@ matplotlib                    3.1.3
 mdurl                         0.1.2
 mpi4py                        3.1.4
 msgpack                       1.0.5
-mujoco-py                     2.1.2.14
 myGym                         2.2.2.dev32+gd0bbd2d /home/michal/code/myGym
 networkx                      2.6.3
 numpy                         1.18.5
@@ -221,7 +220,6 @@ mistune                       0.8.4
 mpi4py                        3.1.4
 mpmath                        1.3.0
 msgpack                       1.0.5
-mujoco-py                     2.1.2.14
 myGym                         2.2.2.dev32+gd0bbd2d /home/michal/code/myGym
 networkx                      2.6.3
 nicomotion                    1.0
diff --git a/myGym/envs/__init__.py b/myGym/envs/__init__.py
index 883514e9..db7e47c6 100755
--- a/myGym/envs/__init__.py
+++ b/myGym/envs/__init__.py
@@ -1,4 +1,4 @@
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
 
 register(
     id="CrowWorkspaceEnv-v0",
diff --git a/myGym/envs/base_env.py b/myGym/envs/base_env.py
index b7718307..173e6bfb 100644
--- a/myGym/envs/base_env.py
+++ b/myGym/envs/base_env.py
@@ -4,14 +4,13 @@
 import pybullet_utils.bullet_client as bc
 import time
 import numpy as np
-from gym.utils import seeding
-import gym
+from gymnasium.utils import seeding
+import gymnasium as gym
 import inspect
 from myGym.envs.camera import Camera
-import pkg_resources
-currentdir = pkg_resources.resource_filename("myGym", "envs")
-repodir = pkg_resources.resource_filename("myGym", "./")
-
+import importlib.resources as resources
+currentdir = resources.files("myGym").joinpath("envs")
+repodir = resources.files("myGym").parent
 
 class BaseEnv(gym.Env):
     """
@@ -30,7 +29,7 @@ class BaseEnv(gym.Env):
 
     def __init__(self,
                  gui_on=True,
-                 objects_dir_path=pkg_resources.resource_filename("myGym", "envs/"),
+                 objects_dir_path=resources.files("myGym").joinpath("envs"),
                  max_steps=1024,
                  show_bounding_boxes_gui=False,
                  changing_light_gui=False,
@@ -73,6 +72,7 @@ def __init__(self,
         self._set_physics()
         self._setup_scene()
         self._set_action_space()
+        self.obsdim = self.check_obs_template()
         self._set_observation_space()
 
 
@@ -197,7 +197,7 @@ def _restart_episode(self):
         self.episode_reward = 0.0
         self.episode_steps = 0
 
-    def reset(self, hard=False):
+    def reset(self, hard=False,options=None):
         """
         Reset the state of the environment
         """
diff --git a/myGym/envs/env_object.py b/myGym/envs/env_object.py
index 87c5dc7f..c3c11c85 100644
--- a/myGym/envs/env_object.py
+++ b/myGym/envs/env_object.py
@@ -9,11 +9,11 @@
 import numpy as np
 import sys, shutil
 from datetime import datetime
-import pkg_resources
-currentdir = pkg_resources.resource_filename("myGym", "envs")
+import importlib.resources as resources
+currentdir = resources.files("myGym").joinpath("envs")
+from myGym.envs.vision_module import VisionModule
 
-
-class EnvObject:
+class EnvObject(VisionModule):
     """
     Env object class for dynamic object in PyBullet environment
 
@@ -24,23 +24,25 @@ class EnvObject:
         :param fixed: (bool) Whether the object should have fixed position and orientation
         :param pybullet_client: Which pybullet client the environment should refere to in case of parallel existence of multiple instances of this environment
     """
-    def __init__(self, urdf_path, position=[0, 0, 0],
+    def __init__(self, urdf_path, env, position=[0, 0, 0],
                  orientation=[0, 0, 0, 0], fixed=False,
-                 pybullet_client=None):
+                 pybullet_client=None, observation="ground_truth", vae_path=None, yolact_path=None, yolact_config=None, is_robot=False, rgba=None):
         self.p = pybullet_client
         self.urdf_path = urdf_path
         self.init_position = position
         self.init_orientation = orientation
         self.fixed = fixed
-        self.name = os.path.splitext(os.path.basename(self.urdf_path))[0]
-        self.virtual = True if "virtual" in self.name else False
+        self.is_robot = is_robot
+        self.o_name = os.path.splitext(os.path.basename(self.urdf_path))[0]
+        self.virtual = True if "virtual" in self.o_name else False
         self.object_ldamping = 1
         self.object_adamping = 1
         self.object_lfriction = 100
         self.object_rfriction = 100
         self.object_mass = 10
-        self. object_stiffness = 1
-        if not self.virtual:
+        self.color_rgba = rgba
+        self.object_stiffness = 1
+        if not self.virtual and not self.is_robot:    
             self.uid = self.load()
             self.bounding_box = self.get_bounding_box()
             self.centroid = self.get_centroid()
@@ -50,6 +52,8 @@ def __init__(self, urdf_path, position=[0, 0, 0],
             self.centroid = None
         self.debug_line_ids = []
         self.cuboid_dimensions = None
+        super(EnvObject, self).__init__(observation=observation, env=env, 
+                                        vae_path=vae_path, yolact_path=yolact_path, yolact_config = yolact_config)
 
     def set_color(self, color):
         """
@@ -144,7 +148,8 @@ def get_position_and_orientation(self):
             :return orientation: (array) Position ([x,y,z]) Orientation of object (quaternion [x,y,z,w])
         """
         if not self.virtual:
-            return self.p.getBasePositionAndOrientation(self.uid)
+            # return self.p.getBasePositionAndOrientation(self.uid)
+            return tuple(np.array(a) for a in self.p.getBasePositionAndOrientation(self.uid))
         else:
             return (self.init_position, self.init_orientation)
 
@@ -208,7 +213,7 @@ def load(self):
         Returns:
             :return self.uid: (int) ID of loaded object
         """
-        self.uid = self.p.loadURDF(self.urdf_path, self.init_position, self.init_orientation, useFixedBase=self.fixed,  flags=self.p.URDF_USE_SELF_COLLISION)
+        self.uid = self.p.loadURDF(str(self.urdf_path), self.init_position, self.init_orientation, useFixedBase=self.fixed,  flags=self.p.URDF_USE_SELF_COLLISION)
         if self.fixed:
             self.p.changeDynamics(self.uid, 0, collisionMargin=0., contactProcessingThreshold=0.0, ccdSweptSphereRadius=0)
         else:
@@ -305,9 +310,9 @@ def get_name(self):
         Get object's name. Uses data from URDF.
 
         Returns:
-            :return self.name: (string) Object's name
+            :return self.o_name: (string) Object's name
         """
-        return self.name
+        return self.o_name
 
     def get_uid(self):
         """
@@ -317,23 +322,60 @@ def get_uid(self):
             :return self.uid: Object's unique ID
         """
         return self.uid
+    
+    def get_obj_position_for_obs(self, img=None, depth=None):
+        """
+        Get object position in world coordinates of environment for observation (i.e. from Yolact)
+
+        Parameters:
+            :param obj: (object) Object to find its mask and centroid
+            :param img: (array) 2D input image to inference of vision model
+            :param depth: (array) Depth input image to inference of vision model
+        Returns:
+            :return position: (list) Centroid of object in world coordinates
+        """
+        return super().get_obj_position_for_obs(self, img, depth)
+    
+    def get_obj_orientation_for_obs(self, img=None):
+        """
+        Get object orientation in world coordinates of environment for observation (i.e. from Yolact)
+
+        Parameters:
+            :param obj: (object) Object to find its mask and centroid
+            :param img: (array) 2D input image to inference of vision model
+        Returns:
+            :return orientation: (list) Orientation of object in world coordinates
+        """
+        return super().get_obj_orientation_for_obs(self, img)
+
+    def get_obj_bbox_for_obs(self, img=None):
+        """
+        Get bounding box of an object for observation (i.e. from Yolact)
+
+        Parameters:
+            :param obj: (object) Object to find its bounding box
+            :param img: (array) 2D input image to inference of vision model
+        Returns:
+            :return bbox: (list) Bounding box of object
+        """
+        return super().get_obj_bbox_for_obs(self, img)
 
     @staticmethod
-    def get_random_object_position(boarders):
+    def get_random_object_position(borders):
         """
         Generate random position in defined volume
 
         Parameters:
-            :param boarders: (list) Volume, where position may be generated ([x,x,y,y,z,z])
+            :param borders: (list) Volume, where position may be generated ([x,x,y,y,z,z])
         Returns:
             :return pos: (list) Position in specified volume ([x,y,z])
         """
-        if any(isinstance(i, list) for i in boarders):
-            boarders = boarders[random.randint(0,len(boarders)-1)]
+        if any(isinstance(i, list) for i in borders):
+            borders = borders[random.randint(0,len(borders)-1)]
         pos = []
-        pos.append(random.uniform(boarders[0], boarders[1])) #x
-        pos.append(random.uniform(boarders[2], boarders[3])) #y
-        pos.append(random.uniform(boarders[4], boarders[5])) #z
+        pos.append(random.uniform(borders[0], borders[1])) #x
+        pos.append(random.uniform(borders[2], borders[3])) #y
+        pos.append(random.uniform(borders[4], borders[5])) #z
         return pos
 
     @staticmethod
diff --git a/myGym/envs/gym_env.py b/myGym/envs/gym_env.py
index 1708c794..04c12df4 100644
--- a/myGym/envs/gym_env.py
+++ b/myGym/envs/gym_env.py
@@ -14,13 +14,15 @@
 import random
 
 from myGym.utils.helpers import get_workspace_dict
-import pkg_resources
+import importlib.resources as resources
+currentdir = resources.files("myGym").joinpath("envs")
 from myGym.envs.human import Human
 import myGym.utils.colors as cs
-from myGym.envs.vision_module import get_module_type
+from myGym.utils.helpers import get_module_type
 from myGym.envs.natural_language import NaturalLanguage
+from myGym.envs.task import TaskModule
+
 
-currentdir = pkg_resources.resource_filename("myGym", "envs")
 
 # used to exclude these colors for other objects, in the order of goal, init, done, else
 COLORS_RESERVED_FOR_HIGHLIGHTING = ["dark green", "green", "blue", "gray"]
@@ -31,8 +33,8 @@ class GymEnv(CameraEnv):
     Environment class for particular environment based on myGym basic environment classes
 
     Parameters:
-        :param task_objects: (list of strings) Objects that are relevant for performing the task
         :param observation: (dict) Template dictionary of what should be part of the network observation
+        :param rddl: (dict) Template dictionary for task creation via rddl
         :param workspace: (string) Workspace in gym, where training takes place (collabtable, maze, drawer, ...)
         :param dimension_velocity: (float) Maximum allowed velocity for robot movements in individual x,y,z axis
         :param used_objects: (list of strings) Names of extra objects (not task objects) that can appear in the scene
@@ -61,12 +63,11 @@ class GymEnv(CameraEnv):
     """
 
     def __init__(self,
-                 task_objects,
                  observation,
+                 rddl={},
                  framework="ray",
                  workspace="table",
                  dimension_velocity=0.05,
-                 used_objects=None,
                  action_repeat=1,
                  color_dict={},
                  robot='kuka',
@@ -78,8 +79,6 @@ def __init__(self,
                  num_networks=1,
                  network_switcher="gt",
                  distractors=None,
-                 reward='distance',
-                 distance_type='euclidean',
                  active_cameras=None,
                  dataset=False,
                  obs_space=None,
@@ -106,12 +105,12 @@ def __init__(self,
         self.action_repeat          = action_repeat
         self.dimension_velocity     = dimension_velocity
         self.active_cameras         = active_cameras
-        self.used_objects           = used_objects
         self.action_repeat          = action_repeat
         self.color_dict             = color_dict
         self.task_type              = task_type
-        self.task_objects_dict     = task_objects
+        self.rddl_config = rddl
         self.framework = framework
+        self.vision_source = get_module_type(self.obs_type)
         self.task_objects = []
         self.env_objects = []
         self.vae_path = vae_path
@@ -119,17 +118,18 @@ def __init__(self,
         self.yolact_config = yolact_config
         self.has_distractor         = distractors["list"] != None
         self.distractors            = distractors
-        self.distance_type          = distance_type
         self.objects_area_borders = None
-        self.reward = reward
+        self.reachable_borders = None
         self.dist = d.DistractorModule(distractors["moveable"], distractors["movement_endpoints"],
                                        distractors["constant_speed"], distractors["movement_dims"], env=self)
         self.dataset   = dataset
         self.obs_space = obs_space
         self.visualize = visualize
         self.visgym    = visgym
+        self.check_obs_template()
         self.logdir    = logdir
         self.workspace_dict = get_workspace_dict()
+        self.robot = None
         if not hasattr(self, "task"):
           self.task = None
 
@@ -145,6 +145,7 @@ def __init__(self,
         self.nl_mode = natural_language
         if self.nl_mode:
             self.nl = NaturalLanguage(self)
+        self.use_magnet = True # @TODO provisory solution, used when _gripper in self.robot_action. We should make adjustable in config whether to use asctual gripper control or magnet
         self.nl_text_id = None
         self.training = training
         if self.nl_mode:
@@ -154,41 +155,31 @@ def __init__(self,
             # # just some dummy settings so that _set_observation_space() doesn't throw exceptions at the beginning
             # self.num_networks = 3
         self.rng = np.random.default_rng(seed=0)
-        self.task_objects_were_given_as_list = isinstance(self.task_objects_dict, list)
-        self.n_subtasks = len(self.task_objects_dict) if self.task_objects_were_given_as_list else 1
         if self.reach_gesture and not self.nl_mode:
             raise Exception("Reach gesture task can't be started without natural language mode")
 
         super(GymEnv, self).__init__(active_cameras=active_cameras, **kwargs)
 
     def _init_task_and_reward(self):
-        if self.reward == 'distractor':
-            self.has_distractor = True
-            self.distractor = ['bus'] if not self.distractors["list"] else self.distractors["list"]
-        reward_classes = {
-            "1-network": {"F": F, "A": A, "switch": SwitchRewardNew, "turn": TurnRewardNew, "FM": FaM, "FMR": FaMaR,"FROM": FaROaM,  "FMOR": FaMOaR, "FMOT": FaMOaT, "FROT": FaROaT,
-                          "FMOM": FaMOaM, "FMLFR": FaMaLaFaR},
-            "2-network": {"switch": SwitchRewardNew, "turn": TurnRewardNew, "FM": FaM, "AG": AaG},
-            "3-network": {"FMR": FaMaR,"FROM": FaROaM,  "FMOR": FaMOaR, "FMOT": FaMOaT, "FROT": FaROaT,
-                          "FMOM": FaMOaM, "AGM": AaGaM},
-            "4-network": {"FMLFR": FaMaLaFaR, "AGMD" : AaGaMaD},
-            "5-network": {"AGMDW" : AaGaMaDaW}}
-    
-        scheme = "{}-network".format(str(self.num_networks))
-        assert self.reward in reward_classes[scheme].keys(), "Failed to find the right reward class. Check reward_classes in gym_env.py"
-        self.task = t.TaskModule(task_type=self.task_type,
-                                 observation=self.obs_type,
-                                 vae_path=self.vae_path,
-                                 yolact_path=self.yolact_path,
-                                 yolact_config=self.yolact_config,
-                                 distance_type=self.distance_type,
-                                 number_tasks=len(self.task_objects_dict),
-                                 env=self)
-        self.reward = reward_classes[scheme][self.reward](env=self, task=self.task)
+        """Main communicator with rddl. Passes arguments from config to RDDLWorld, tells rddl to make a task sequence and to build
+        a scene accordingly, including robot. Work in progress"""
+        self.task = TaskModule(self, self.rddl_config["num_task_range"], self.rddl_config["protoactions"], self.rddl_config["allowed_objects"], self.rddl_config["allowed_predicates"], self.p)
+        # generates task sequence and initializes scene with objects accordingly. The first action is set as self.task.current_task
+        self.task.build_scene_for_task_sequence() # it also loads the robot. must be done his way so that rddl knows about the robot
+        self.reward = self.task.current_action.reward # reward class
+
+        print(f"Initial condition is: {self.task.rddl_task.current_action.initial.decide()}")
+        print(f"Goal condition is: {self.task.rddl_task.current_action.goal.decide()}")
+
+        self.compute_reward = self.task.current_action.reward # function that computes reward (no inputs needed, already bound to the objects)
+        obs_entities = self.reward.get_relevant_entities() # does not work yet, must be done in rddl
+        self.robot = self.task.rddl_robot # robot class as we know it
+
+
 
     def _setup_scene(self):
         """
-        Set-up environment scene. Load static objects, apply textures. Load robot.
+        Set-up environment scene. Load static objects, apply textures.
         """
         self._add_scene_object_uid(self._load_static_scene_urdf(path="rooms/plane.urdf", name="floor"), "floor")
         if self.visgym:
@@ -203,25 +194,26 @@ def _setup_scene(self):
         if ws_texture: self._change_texture(self.workspace, self._load_texture(ws_texture))
         self._change_texture("floor", self._load_texture("parquet1.jpg"))
         self.objects_area_borders = self.workspace_dict[self.workspace]['borders']
-        kwargs = {"position": self.workspace_dict[self.workspace]['robot']['position'],
+        self.reachable_borders = self.workspace_dict[self.workspace]['reachable_borders']
+        self.robot_kwargs = {"position": self.workspace_dict[self.workspace]['robot']['position'],
                   "orientation": self.workspace_dict[self.workspace]['robot']['orientation'],
                   "init_joint_poses": self.robot_init_joint_poses, "max_velocity": self.max_velocity,
                   "max_force": self.max_force, "dimension_velocity": self.dimension_velocity,
-                  "pybullet_client": self.p}
-        self.robot = robot.Robot(self.robot_type, robot_action=self.robot_action, task_type=self.task_type, **kwargs)
+                  "pybullet_client": self.p, "env": self, "observation":self.vision_source, "vae_path":self.vae_path,
+                  "yolact_path":self.yolact_path, "yolact_config": self.yolact_config}
         if self.workspace == 'collabtable': self.human = Human(model_name='human', pybullet_client=self.p)
 
 
     def _load_urdf(self, path, fixedbase=True, maxcoords=True):
         transform = self.workspace_dict[self.workspace]['transform']
-        return self.p.loadURDF(pkg_resources.resource_filename("myGym", os.path.join("envs", path)),
+        return self.p.loadURDF(resources.files("myGym").joinpath("envs",path),
                                transform['position'],  self.p.getQuaternionFromEuler(transform['orientation']),
                                useFixedBase=fixedbase,
                                useMaximalCoordinates=maxcoords)
-    
+
     def _load_static_scene_urdf(self, path, name, fixedbase=True):
         transform = self.workspace_dict[self.workspace]['transform']
-        object = env_object.EnvObject(pkg_resources.resource_filename("myGym", os.path.join("envs", path)), transform['position'], self.p.getQuaternionFromEuler(transform['orientation']), pybullet_client=self.p, fixed=fixedbase)
+        object = env_object.EnvObject(resources.files("myGym").joinpath("envs",path), self, transform['position'], self.p.getQuaternionFromEuler(transform['orientation']), pybullet_client=self.p, fixed=fixedbase, observation=self.vision_source, vae_path=self.vae_path, yolact_path=self.yolact_path, yolact_config=self.yolact_config)
         self.static_scene_objects[name] = object
         return object.uid
 
@@ -231,7 +223,7 @@ def _change_texture(self, name, texture_id):
 
 
     def _load_texture(self, name):
-        return self.p.loadTexture(pkg_resources.resource_filename("myGym", "/envs/textures/{}".format(name)))
+        return self.p.loadTexture(str(resources.files("myGym").joinpath("envs/textures/",name)))
 
 
     def _set_observation_space(self):
@@ -241,16 +233,15 @@ def _set_observation_space(self):
         if self.framework == "ray":
             from gymnasium import spaces
         else:
-            from gym import spaces
-        self._init_task_and_reward()
+            from gymnasium import spaces
         if self.obs_space == "dict":
-            goaldim = int(self.task.obsdim / 2) if self.task.obsdim % 2 == 0 else int(self.task.obsdim / 3)
+            goaldim = int(self.obsdim / 2) if self.obsdim % 2 == 0 else int(self.obsdim / 3)
             self.observation_space = spaces.Dict(
-                {"observation": spaces.Box(low=-10, high=10, shape=(self.task.obsdim,)),
+                {"observation": spaces.Box(low=-10, high=10, shape=(self.obsdim,)),
                  "achieved_goal": spaces.Box(low=-10, high=10, shape=(goaldim,)),
                  "desired_goal": spaces.Box(low=-10, high=10, shape=(goaldim,))})
         else:
-            observationDim = self.task.obsdim
+            observationDim = self.obsdim
             observation_high = np.array([100] * observationDim)
             self.observation_space = spaces.Box(-observation_high,
                                                 observation_high)
@@ -259,10 +250,11 @@ def _set_action_space(self):
         """
         Set action space dimensions and range
         """
+        self._init_task_and_reward() # we first need rddl to make a robot
         if self.framework == "ray":
             from gymnasium import spaces
         else:
-            from gym import spaces
+            from gymnasium import spaces
         action_dim = self.robot.get_action_dimension()
         if "step" in self.robot_action:
             self.action_low = np.array([-1] * action_dim)
@@ -271,7 +263,6 @@ def _set_action_space(self):
             #    self.action_low = np.insert(self.action_low, action_dim, self.robot.gjoints_limits[0][1])
             #    self.action_high = np.insert(self.action_high, action_dim,self.robot.gjoints_limits[1][1])
 
-
         elif "absolute" in self.robot_action:
             if any(isinstance(i, list) for i in self.objects_area_borders):
                 borders_max = np.max(self.objects_area_borders, 0)
@@ -304,7 +295,7 @@ def _rescale_action(self, action):
         """
         return [(sub + 1) * (h - l) / 2 + l for sub, l, h in zip(action, self.action_low, self.action_high)]
 
-    def reset(self, random_pos=True, hard=False, random_robot=False, only_subtask=False):
+    def reset(self, random_pos=True, hard=False, random_robot=False, only_subtask=False, options=None, seed=None):
         """
         Environment reset called at the beginning of an episode. Reset state of objects, robot, task and reward.
 
@@ -317,100 +308,11 @@ def reset(self, random_pos=True, hard=False, random_robot=False, only_subtask=Fa
             :return self._observation: (list) Observation data of the environment
         """
         if not only_subtask:
-            self.robot.reset(random_robot=random_robot)
-            super().reset(hard=hard)
-
-            if not self.nl_mode:
-                other_objects = []
-                if self.task_objects_were_given_as_list:
-                    task_objects_dict = copy.deepcopy(self.task_objects_dict)
-                else:
-                    if not self.reach_gesture:
-                        init = self.rng.choice(self.task_objects_dict["init"])
-                        goal = self.rng.choice(self.task_objects_dict["goal"])
-                        objects = self.task_objects_dict["init"] + self.task_objects_dict["goal"]
-                        task_objects_dict = [{"init": init, "goal": goal}]
-                        other_objects = self._randomly_place_objects({"obj_list": [o for o in objects if o != init and o != goal]})
-                    else:
-                        goal = self.rng.choice(self.task_objects_dict["goal"])
-                        task_objects_dict = [{"init": {"obj_name":"null"}, "goal": goal}]
-                        other_objects = self._randomly_place_objects({"obj_list": [o for o in self.task_objects_dict["goal"] if o != goal]})
-
-                all_subtask_objects = [x for i, x in enumerate(task_objects_dict) if i != self.task.current_task]
-                subtasks_processed = [list(x.values()) for x in all_subtask_objects]
-                subtask_objects = self._randomly_place_objects({"obj_list": list(chain.from_iterable(subtasks_processed))})
-                self.env_objects = {"env_objects": self._randomly_place_objects(self.used_objects)}
-                if self.task_objects_were_given_as_list:
-                    self.env_objects["env_objects"] += other_objects
-                self.task_objects = self._randomly_place_objects(task_objects_dict[self.task.current_task])
-                self.task_objects = dict(ChainMap(*self.task_objects))
-                if subtask_objects:
-                    self.task_objects["distractor"] = subtask_objects
-            else:
-                init_objects = []
-                if not self.reach_gesture:
-                    init_objects = self._randomly_place_objects({"obj_list": self.task_objects_dict["init"]})
-                    for i, c in enumerate(cs.draw_random_rgba(size=len(init_objects), excluding=COLORS_RESERVED_FOR_HIGHLIGHTING)):
-                        init_objects[i].set_color(c)
-                goal_objects = self._randomly_place_objects({"obj_list": self.task_objects_dict["goal"]})
-                for i, c in enumerate(cs.draw_random_rgba(size=len(goal_objects), transparent=self.task_type != "reach", excluding=COLORS_RESERVED_FOR_HIGHLIGHTING)):
-                    goal_objects[i].set_color(c)
-
-                if self.training or (not self.training and self.reach_gesture) and self.nl_mode:
-                    # setting the objects and generating a description based on them
-                    self.nl.get_venv().set_objects(init_goal_objects=(init_objects, goal_objects))
-                    self.nl.generate_subtask_with_random_description()
-
-                    # resetting the objects to remove the knowledge about whether an object is an init or a goal
-                    self.nl.get_venv().set_objects(all_objects=init_objects + goal_objects)
-                    self.task_type, self.reward, self.num_networks, init, goal = self.nl.extract_subtask_info_from_description(self.nl.get_previously_generated_subtask_description())
-                else:
-                    success = False
-                    i = 0
-
-                    while (not success):
-                        try:
-                            if i > 0:
-                                print("Unknown task description format. Actual format is very strict. "
-                                      "All articles must be included. Examples of valid subtask descriptions in general:")
-                                print("\"reach the cyan cube\"")
-                                print("\"reach the transparent pink cube left to the gray cube\"")
-                                print("\"pick the orange cube and place it to the same position as the pink cube\"")
-                                print("Pay attention to the fact that colors, task and objects in your case can be different!")
-                                print("To leave the program use Ctrl + Z!")
-                            if self.nl:
-                                self.nl.set_current_subtask_description(input("Enter a subtask description in the natural language based on what you see:"))
-                                # resetting the objects to remove the knowledge about whether an object is an init or a goal
-                                self.nl.get_venv().set_objects(all_objects=init_objects + goal_objects)
-                                self.task_type, self.reward, self.num_networks, init, goal = self.nl.extract_subtask_info_from_description(self.nl.get_previously_generated_subtask_description())
-                            success = True
-                            break
-                        except:
-                            pass
-                        i += 1
-
-                self.task_objects = {"actual_state": init if init is not None else self.robot, "goal_state": goal}
-                other_objects = [o for o in init_objects + goal_objects if o != init and o != goal]
-                self.env_objects = {"env_objects": other_objects + self._randomly_place_objects(self.used_objects)}
-
-                # will set the task and the reward
-                self._set_observation_space()
-        if only_subtask:
-            if self.task.current_task < (len(self.task_objects_dict)) and not self.nl_mode:
-                self.shift_next_subtask()
-        if self.has_distractor:
-            distrs = []
-            if self.distractors["list"]:
-                for distractor in self.distractors["list"]:
-                    distrs.append(
-                        self.dist.place_distractor(distractor, self.p, self.task_objects["goal_state"].get_position()))
-            if self.task_objects["distractor"]:
-                self.task_objects["distractor"].extend(distrs)
-            else:
-                self.task_objects["distractor"] = distrs
-        self.env_objects = {**self.task_objects, **self.env_objects}
+            self.task.rddl_robot.reset(random_robot=random_robot)
+            super().reset(hard=hard, options=options)
+        # @TODO I removed support of nl_mode, which is dependent on the old structure. We need to add nl_support again in later phases
         self.task.reset_task()
-        self.reward.reset()
+        #self.reward.reset()
         self.p.stepSimulation()
         self._observation = self.get_observation()
 
@@ -420,24 +322,8 @@ def reset(self, random_pos=True, hard=False, random_robot=False, only_subtask=Fa
             self.nl_text_id = self.p.addUserDebugText(self.nl.get_previously_generated_subtask_description(), [2, 0, 1], textSize=1)
             if only_subtask and self.nl_text_id is not None:
                 self.p.removeUserDebugItem(self.nl_text_id)
-
-        return self.flatten_obs(self._observation.copy())
-
-    def shift_next_subtask(self):
-        # put current init and goal back in env_objects
-        self.env_objects["distractor"].extend([self.env_objects["actual_state"], self.env_objects["goal_state"]])
-        # set the next subtask objects as the actual and goal state and remove them from env_objects
-        self.env_objects["actual_state"] = self.env_objects["distractor"][0]
-        self.env_objects["goal_state"] = self.env_objects["distractor"][1]
-        del self.env_objects["distractor"][:2]
-        self.task_objects["distractor"] = self.env_objects["distractor"].copy()
-        # copy the state to task_objects and change colors
-        self.task_objects["actual_state"] = self.env_objects["actual_state"]
-        self.task_objects["goal_state"] = self.env_objects["goal_state"]
-        self.highlight_active_object(self.env_objects["actual_state"], "init")
-        self.highlight_active_object(self.env_objects["goal_state"], "goal")
-        for o in self.env_objects["distractor"][-2:]:
-            self.highlight_active_object(o, "done")
+        info = {'d': 0.9, 'f': 0, 'o': self._observation} 
+        return (np.asarray(self._observation.copy(), dtype="float32"), info)
 
     def flatten_obs(self, obs):
         """ Returns the input obs dict as flattened list """
@@ -458,18 +344,25 @@ def _set_cameras(self):
 
     def get_observation(self):
         """
-        Get observation data from the environment
+        Get observation data from the environment. Probably should be moved to task.py
 
         Returns:
             :return observation: (array) Represented position of task relevant objects
         """
-        self.observation["task_objects"] = self.task.get_observation()
+        ## !!!! This is a provisory solution until self.task.current_task.reward.get_relevant_entities() returns the relevant objects.
+        ## Now we just take the position and orientation for every object in the scene including gripper, but that is legit in very few cases
+        obs = []
+        relevant_entities = self.reward.get_relevant_entities()
+        for e in relevant_entities:
+            obs += list(e.get_position())
+            obs += list(e.get_orientation())
+        if "gripper" in self.robot_action:
+            obs += int(self.robot.gripper_active)
+        self.observation = obs
         if self.dataset:
-            self.observation["camera_data"] = self.render(mode="rgb_array")
-            self.observation["objects"] = self.env_objects
-            self.observation["additional_obs"] = {}
-            return self.observation
-        return self.observation["task_objects"]
+            raise NotImplemented # @TODO one day
+        return self.observation
+
 
     def step(self, action):
         """
@@ -484,27 +377,43 @@ def step(self, action):
             :return info: (dict) Additional information about step
         """
         self._apply_action_robot(action)
+        print("robot action: {}".format(action), end="\r")
         if self.has_distractor: [self.dist.execute_distractor_step(d) for d in self.distractors["list"]]
         self._observation = self.get_observation()
         if self.dataset:
             reward, done, info = 0, False, {}
         else:
-            reward = self.reward.compute(observation=self._observation)
+            reward = self.compute_reward()  # this uses rddl protoaction, no arguments needed
+            print("Reward by RDDL: {}".format(reward),end="\r")
             self.episode_reward += reward
-            # self.task.check_goal()
-            done = self.episode_over
-            info = {'d': self.task.last_distance / self.task.init_distance, 'f': int(self.episode_failed),
-                    'o': self._observation}
-        if done: self.successful_finish(info)
+            done = self.episode_over #@TODO replace with actual is_done value from RDDL
+            info = {'d': 0.9, 'f': int(self.episode_failed),
+                    'o': self._observation} # @TODOreplace 'd' with actual distance values obtained from rddl or make own implementation
+        if done is True: self.successful_finish(info)
         if self.task.subtask_over:
             self.reset(only_subtask=True)
-        # return self._observation, reward, done, info
-        return self.flatten_obs(self._observation.copy()), reward, done, info
+        # return self._observation, reward, done, truncated, info
+        truncated = False #not sure when to use this
+        return np.asarray(self._observation.copy(), dtype="float32"), reward, done, truncated,info
 
-    def compute_reward(self, achieved_goal, desired_goal, info):
-        # @TODO: Reward computation for HER, argument for .compute()
-        reward = self.reward.compute(np.append(achieved_goal, desired_goal))
-        return reward
+
+    def get_linkstates_unpacked(self):
+        o = []
+        [[o.append(x) for x in z] for z in self.robot.observe_all_links()]
+        return o
+
+
+    def check_obs_template(self):
+        """
+        @TODO Add smart and variable observation space constructor based on config? 
+
+        Returns:
+            :return obsdim: (int) Dimensionality of observation
+        """
+        obsdim = 14 # 7 values for actual state (object or gripper pose and orientation) and same for goal state
+        if "gripper" in self.robot_action:
+            obsdim += 1 # binary value for gripper close or open
+        return obsdim
 
     def successful_finish(self, info):
         """
@@ -524,7 +433,6 @@ def _apply_action_robot(self, action):
         Parameters:
             :param action: (list) Action data returned by trained model
         """
-        use_magnet = self.reward.get_magnetization_status()
         for i in range(self.action_repeat):
             objects = self.env_objects
             self.robot.apply_action(action, env_objects=objects)
@@ -568,58 +476,6 @@ def draw_bounding_boxes(self):
         for object in self.env_objects:
             object.draw_bounding_box()
 
-    def _place_object(self, obj_info):
-        fixed = True if obj_info["fixed"] == 1 else False
-        pos = env_object.EnvObject.get_random_object_position(obj_info["sampling_area"])
-        orn = env_object.EnvObject.get_random_z_rotation() if obj_info["rand_rot"] == 1 else [0, 0, 0, 1]
-        object = env_object.EnvObject(obj_info["urdf"], pos, orn, pybullet_client=self.p, fixed=fixed)
-        if self.color_dict: object.set_color(self.color_of_object(object))
-        return object
-
-    def _randomly_place_objects(self, object_dict):
-        """
-        Place dynamic objects to the scene randomly
-
-        Parameters:
-            :param n: (int) Number of objects to place in the scene
-            :param object_names: (list of strings) Objects that may be placed to the scene
-            :param random_pos: (bool) Whether to place object to random positions in the scene
-        Returns:
-            :return env_objects: (list of objects) Objects that are present in the current scene
-        """
-        env_objects = []
-        if not "init" in object_dict.keys():  # solves used_objects
-            for idx, o in enumerate(object_dict["obj_list"]):
-                  if o["obj_name"] != "null":
-                      urdf = self._get_urdf_filename(o["obj_name"])
-                      if urdf:
-                        object_dict["obj_list"][idx]["urdf"] = urdf
-                      else:
-                        del object_dict["obj_list"][idx]
-            if "num_range" in object_dict.keys():
-                for x in range(random.randint(object_dict["num_range"][0], object_dict["num_range"][1])):
-                    env_o = self._place_object(random.choice(object_dict["obj_list"]))
-                    self.highlight_active_object(env_o, "other")
-                    env_objects.append(env_o)
-            else:
-                for o in object_dict["obj_list"]:
-                    if o["obj_name"] != "null":
-                        env_o = self._place_object(o)
-                        self.highlight_active_object(env_o, "other")
-                        env_objects.append(env_o)
-        else:  # solves task_objects
-            for o in ['init', 'goal']:
-                d = object_dict[o]
-                if d["obj_name"] != "null":
-                    d["urdf"] = self._get_urdf_filename(d["obj_name"])
-                    n = "actual_state" if o == "init" else "goal_state"
-                    env_o = self._place_object(d)
-                    self.highlight_active_object(env_o, o)
-                    env_objects.append({n: env_o})
-                elif d["obj_name"] == "null" and o == "init":
-                    env_objects.append({"actual_state": self.robot})
-        return env_objects
-
     def highlight_active_object(self, env_o, obj_role):
         if obj_role == "goal":
             env_o.set_color(cs.name_to_rgba("transparent green"))
@@ -630,21 +486,9 @@ def highlight_active_object(self, env_o, obj_role):
         else:
             env_o.set_color(cs.name_to_rgba("gray"))
 
-    def color_of_object(self, object):
-        """
-        Set object's color
 
-        Parameters:
-            :param object: (object) Object
-        Returns:
-            :return color: (list) RGB color
-        """
-        if object.name not in self.color_dict:
-            return cs.draw_random_rgba()
-        else:
-            color_name = random.sample(self.color_dict[object.name], 1)[0]
-            color = cs.name_to_rgba(color_name)
-        return color
+    def get_random_color(self):
+        return cs.draw_random_rgba()
 
     def get_task_objects(self, with_none=False) -> List[EnvObject]:
         objects = [self.task_objects["actual_state"], self.task_objects["goal_state"]]
diff --git a/myGym/envs/human.py b/myGym/envs/human.py
index ca294820..f1f1baae 100644
--- a/myGym/envs/human.py
+++ b/myGym/envs/human.py
@@ -1,7 +1,7 @@
 from typing import List, Tuple
 
 import numpy as np
-import pkg_resources
+import importlib.resources as resources
 
 from myGym.envs.env_object import EnvObject
 from myGym.utils.helpers import get_robot_dict
@@ -56,7 +56,7 @@ def _load_model(self, model_name):
             :param model_name: (string) Model name in the get_robot_dict() dictionary
         """
         path, position, orientation = get_robot_dict()[model_name].values()
-        path = pkg_resources.resource_filename("myGym", path)
+        path = resources.files("myGym").joinpath(path)
         orientation = self.p.getQuaternionFromEuler(orientation)
 
         if path[-3:] == 'sdf':
diff --git a/myGym/envs/predicates.py b/myGym/envs/predicates.py
index e3189b70..7fe0e5eb 100644
--- a/myGym/envs/predicates.py
+++ b/myGym/envs/predicates.py
@@ -26,12 +26,24 @@ def get_value(self, obj1, obj2):
         base2 = obj2.get_bounding_box()[-1][-1]
         return obj2.uid in overlapping and base1 > base2
 
+
+class IsReachable():
+    def set_value(self, obj, robot):
+        raise NotImplementedError()
+
+    def get_value(self, obj, robot):
+        pass
+
+    
     
     
 def get_scale_from_urdf(pth):
     with open(pth) as f:
         lines = f.readlines()
-    scale = float([x for x in lines if "scale" in x][0].split("scale=\"")[1].split(" ")[0])
+    if len([x for x in lines if "scale" in x]) > 0:
+        scale = float([x for x in lines if "scale" in x][0].split("scale=\"")[1].split(" ")[0])
+    else:
+        scale = 1
     return scale
 
 
@@ -39,44 +51,43 @@ def get_scale_from_urdf(pth):
 
 
 if __name__ == '__main__':
-    import gym
+    import gymnasium as gym 
     from myGym.train import configure_env, get_parser, get_arguments
     from myGym import envs
     parser = get_parser()
     arg_dict = get_arguments(parser)
     arg_dict["gui"] = 1
     env = configure_env(arg_dict)
-    #env.reset()
     table = env.env.env.static_scene_objects["table"]
     touching = Touching()
     on_top = OnTop()
-    pth1 = "./envs/objects/geometric/urdf/cube.urdf"
-    pos = env_object.EnvObject.get_random_object_position([-0.5, 0.5, 0.4, 0.6, 0.1, 0.1])
-    pan1 = env_object.EnvObject(pth1, pos, [0, 0, 0, 1], pybullet_client=p, fixed=False)
-    pan1_info = p.getVisualShapeData(pan1.get_uid())[0]
-    pan1_scale = get_scale_from_urdf(pth1)
-    objpth = pan1_info[4].decode("utf-8")
+
+    # load tuna model and check the scale
+    pth1 = "./envs/objects/household/urdf/tuna_can.urdf"
+    pos = env_object.EnvObject.get_random_object_position([-0.5, 0.5, 0.4, 0.6, 0.07, 0.07])
+    obj1 = env_object.EnvObject(pth1, env, pos, [0, 0, 0, 1], pybullet_client=p, fixed=False)
+    obj1_info = p.getVisualShapeData(obj1.get_uid())[0]
+    obj1_scale = get_scale_from_urdf(pth1)
+    objpth = obj1_info[4].decode("utf-8")
+
+    # voxelize
     o3model = o3d.io.read_triangle_model(objpth)
     mesh = o3model.meshes[0].mesh
-    mesh = mesh.scale(pan1_scale, center=mesh.get_center())
+    mesh = mesh.scale(obj1_scale, center=mesh.get_center())
     vm = VolumeMesh(mesh)
     orig = vm.duplicate()
     orig.paint(np.array([0, 1, 0]))
     voxel_grid = vm.voxelgrid
-    o3d.visualization.draw_geometries([voxel_grid, orig.voxelgrid])
-    pan1.paint([0,1,0,1])
-    print("Green pan touching table:")
-    print(touching.get_value(pan1, table))
-    print("Green pan on top of table:")
-    print(on_top.get_value(pan1, table))
-    print("Table on top of green pan:")
-    print(on_top.get_value(table, pan1))
-
-    pos = env_object.EnvObject.get_random_object_position([-0.5, 0.5, 0.4, 0.6, 0.5, 0.55])
-    pan2 = env_object.EnvObject("myGym/envs/objects/household/urdf/pan.urdf", pos, [0, 0, 0, 1], pybullet_client=p, fixed=False)
-    pan2.set_color([1,0,0,1])
-    print("Red pan touching table:")
-    print(touching.get_value(pan2, table))
-    print("")
+    # visualize geometry
+    #o3d.visualization.draw_geometries([voxel_grid, orig.voxelgrid])
+
+
+    print("Tuna touching table:")
+    print(touching.get_value(obj1, table))
+    print("Tuna on top of table:")
+    print(on_top.get_value(obj1, table))
+    print("Table on top of tuna:")
+    print(on_top.get_value(table, obj1))
+
 
 
diff --git a/myGym/envs/randomizers.py b/myGym/envs/randomizers.py
index 3eac3b99..0c897003 100644
--- a/myGym/envs/randomizers.py
+++ b/myGym/envs/randomizers.py
@@ -2,8 +2,8 @@
 import numpy as np
 import os, glob, random
 import pybullet as p
-import pkg_resources
-repodir = pkg_resources.resource_filename("myGym", "")
+import importlib.resources as resources
+repodir = resources.files("myGym").parent
 
 
 
diff --git a/myGym/envs/robot.py b/myGym/envs/robot.py
index 079bb1a6..820a3a0c 100644
--- a/myGym/envs/robot.py
+++ b/myGym/envs/robot.py
@@ -1,15 +1,16 @@
 from re import S
-import pkg_resources
 from myGym.utils.vector import Vector
 import numpy as np
 import math
 from myGym.utils.helpers import get_robot_dict
+from myGym.envs.env_object import EnvObject
 
-currentdir = pkg_resources.resource_filename("myGym", "envs")
-repodir = pkg_resources.resource_filename("myGym", "")
+import importlib.resources as resources
+currentdir = resources.files("myGym").joinpath("envs")
+repodir = resources.files("myGym")
 
 
-class Robot:
+class Robot(EnvObject):
     """
     Robot class for control of robot environment interaction
 
@@ -40,8 +41,13 @@ def __init__(self,
                  dimension_velocity = 0.5,
                  max_velocity = None, #1.,
                  max_force = None, #50.,
-                 pybullet_client=None):
-
+                 pybullet_client=None,
+                 env = None,
+                 observation="ground_truth",
+                 vae_path=None,
+                 yolact_path=None,
+                 yolact_config=None):
+        self.debug = False
         self.p = pybullet_client
         self.robot_dict = get_robot_dict()
         self.robot_path = self.robot_dict[robot]['path']
@@ -69,47 +75,46 @@ def __init__(self,
         self.task_type = task_type
         self.magnetized_objects = {}
         self.gripper_active = False
+        super(Robot, self).__init__(self.robot_path, env, position=position,
+                 orientation=orientation, fixed=False,
+                 pybullet_client=self.p, observation=observation, vae_path=vae_path, yolact_path=yolact_path, yolact_config=yolact_config, is_robot=True)
         self._load_robot()
-        self.num_joints = self.p.getNumJoints(self.robot_uid)
+        self.num_joints = self.p.getNumJoints(self.uid)
         self._set_motors()
         self.joints_limits, self.joints_ranges, self.joints_rest_poses, self.joints_max_force, self.joints_max_velo = self.get_joints_limits(self.motor_indices)       
         if self.gripper_names:
             self.gjoints_limits, self.gjoints_ranges, self.gjoints_rest_poses, self.gjoints_max_force, self.gjoints_max_velo = self.get_joints_limits(self.gripper_indices)
-        #TODO Clean code (test and gym_env) to initialize just from coordinates
-        #if self.robot_action != "joints":
         self.init_joint_poses = list(self._calculate_accurate_IK(init_joint_poses[:3]))
-        #else:
-        #self.init_joint_poses = np.zeros((len(self.motor_names)))
-        #self.reset()
 
     def _load_robot(self):
         """
         Load SDF or URDF model of specified robot and place it in the environment to specified position and orientation
         """
+        if self.robot_path.startswith('/'):  # leading slash indicates absolute path
+            self.robot_path = self.robot_path[1:]
+
         if self.robot_path[-3:] == 'sdf':
             objects = self.p.loadSDF(
-                pkg_resources.resource_filename("myGym",
-                                                self.robot_path))
-            self.robot_uid = objects[0]
-            self.p.resetBasePositionAndOrientation(self.robot_uid, self.position,
+                str(resources.files("myGym").joinpath(self.robot_path)))
+            self.uid = objects[0]
+            self.p.resetBasePositionAndOrientation(self.uid, self.position,
                                               self.orientation)
         else:
-            self.robot_uid = self.p.loadURDF(
-                pkg_resources.resource_filename("myGym",
-                                                self.robot_path),
+            self.uid = self.p.loadURDF(
+                str(resources.files("myGym").joinpath(self.robot_path)),
                 self.position, self.orientation, useFixedBase=True, flags=(self.p.URDF_USE_SELF_COLLISION))
-        for jid in range(self.p.getNumJoints(self.robot_uid)):
-                self.p.changeDynamics(self.robot_uid, jid,  collisionMargin=0., contactProcessingThreshold=0.0, ccdSweptSphereRadius=0)
+        for jid in range(self.p.getNumJoints(self.uid)):
+                self.p.changeDynamics(self.uid, jid,  collisionMargin=0., contactProcessingThreshold=0.0, ccdSweptSphereRadius=0)
         # if 'jaco' in self.name: #@TODO jaco gripper has closed loop between finger and finger_tip that is not respected by the simulator
-        #     self.p.createConstraint(self.robot_uid, 11, self.robot_uid, 15, self.p.JOINT_FIXED, [0, 0, 0], [0, 0, 0], [0, 0, 0])
-        #     self.p.createConstraint(self.robot_uid, 13, self.robot_uid, 17, self.p.JOINT_FIXED, [0, 0, 0], [0, 0, 0], [0, 0, 0])
+        #     self.p.createConstraint(self.uid, 11, self.uid, 15, self.p.JOINT_FIXED, [0, 0, 0], [0, 0, 0], [0, 0, 0])
+        #     self.p.createConstraint(self.uid, 13, self.uid, 17, self.p.JOINT_FIXED, [0, 0, 0], [0, 0, 0], [0, 0, 0])
 
     def _set_motors(self):
         """
         Identify motors among all joints (fixed joints aren't motors). Identify index of gripper and end-effector link among all links. Uses data from robot model.
         """
         for i in range(self.num_joints):
-            joint_info = self.p.getJointInfo(self.robot_uid, i)
+            joint_info = self.p.getJointInfo(self.uid, i)
             joint_name = joint_info[1]
             q_index = joint_info[3]
             link_name = joint_info[12]
@@ -122,22 +127,25 @@ def _set_motors(self):
             if q_index > -1 and "rjoint" in joint_name.decode("utf-8"): # Fixed joints have q_index -1
                 self.motor_names.append(str(joint_name))
                 self.motor_indices.append(i)
-                self.motor_positions.append(self.p.getJointState(self.robot_uid,i)[0])
+                self.motor_positions.append(self.p.getJointState(self.uid,i)[0])
             if q_index > -1 and "gjoint" in joint_name.decode("utf-8"):
                 self.gripper_names.append(str(joint_name))
                 self.gripper_indices.append(i)
-                self.gripper_positions.append(self.p.getJointState(self.robot_uid,i)[0])
-
-        print("Robot summary")
-        print("--------------")
-        print("Links:")
-        print("\n".join(map(str,self.link_names)))
-        print("Joints:")
-        print("\n".join(map(str,self.motor_names)))
-        print("Gripper joints:")
-        print("\n".join(map(str,self.gripper_names)))
-        print("Gripper index is: " + str(self.gripper_index))
-        print("End effector index is: " + str(self.end_effector_index))
+                self.gripper_positions.append(self.p.getJointState(self.uid,i)[0])
+
+        if self.debug:
+            print("Robot summary")
+            print("--------------")
+            print("Links:")
+            print("\n".join(map(str,self.link_names)))
+            print("Joints:")
+            print("\n".join(map(str,self.motor_names)))
+            print("Gripper joints:")
+            print("\n".join(map(str,self.gripper_names)))
+            print("Gripper index is: " + str(self.gripper_index))
+            print("End effector index is: " + str(self.end_effector_index))
+        else:
+            print("Robot initialized")
         self.joints_num = len(self.motor_names)
         self.gjoints_num = len(self.gripper_names)
 
@@ -160,7 +168,7 @@ def _set_motors(self):
         #print(f"Gripper active:{self.gripper_active}")
 
     def touch_sensors_active(self, target_object):
-        contact_points = self.p.getContactPoints(self.robot_uid, target_object.uid)
+        contact_points = self.p.getContactPoints(self.uid, target_object.uid)
         if len(contact_points)> 0:
             return True
         return False
@@ -204,7 +212,7 @@ def reset_joints(self, joint_poses):
         #    joint_poses = self._calculate_joint_poses(joint_poses)
         joint_poses = np.clip(joint_poses, self.joints_limits[0], self.joints_limits[1])
         for jid in range(len(self.motor_indices)):
-            self.p.resetJointState(self.robot_uid, self.motor_indices[jid], joint_poses[jid])
+            self.p.resetJointState(self.uid, self.motor_indices[jid], joint_poses[jid])
         self._run_motors(joint_poses)
 
     def get_joints_limits(self,indices):
@@ -218,7 +226,7 @@ def get_joints_limits(self,indices):
         """
         joints_limits_l, joints_limits_u, joints_ranges, joints_rest_poses, joints_max_force, joints_max_velo = [], [], [], [], [], []
         for jid in indices:
-            joint_info = self.p.getJointInfo(self.robot_uid, jid)
+            joint_info = self.p.getJointInfo(self.uid, jid)
             joints_limits_l.append(joint_info[8])
             joints_limits_u.append(joint_info[9])
             joints_ranges.append(joint_info[9] - joint_info[8])
@@ -244,7 +252,7 @@ def observe_all_links(self):
         """
         Returns the cartesian world position of all robot's links
         """
-        return [self.p.getLinkState(self.robot_uid, link)[0] for link in range(self.num_joints)]
+        return [self.p.getLinkState(self.uid, link)[0] for link in range(self.num_joints)]
 
     def get_joints_states(self):
         """
@@ -252,7 +260,7 @@ def get_joints_states(self):
         """
         joints = []
         for link in range(self.joints_num):
-           joints.append(self.p.getJointState(self.robot_uid,link)[0])
+           joints.append(self.p.getJointState(self.uid,link)[0])
         return joints
 
     def get_observation_dimension(self):
@@ -272,7 +280,7 @@ def get_observation(self):
             :return observation: (list) Position of end-effector link (center of mass)
         """
         observation = []
-        state = self.p.getLinkState(self.robot_uid, self.end_effector_index)
+        state = self.p.getLinkState(self.uid, self.end_effector_index)
         pos = state[0]
         orn = self.p.getEulerFromQuaternion(state[1])
 
@@ -291,7 +299,7 @@ def get_links_observation(self, num):
         if "kuka" in self.name:
             for link in range(self.gripper_index-num, self.gripper_index):  
             # for link in range(4, self.gripper_index):  
-                state = self.p.getLinkState(self.robot_uid, link)
+                state = self.p.getLinkState(self.uid, link)
                 pos = state[0]
                 observation.extend(list(pos))
         else:
@@ -300,7 +308,7 @@ def get_links_observation(self, num):
         self.observed_links_num = num
         return observation
 
-    def get_position(self):
+    def get_position(self) -> np.ndarray:
         """
         Get position of robot's end-effector link
 
@@ -308,9 +316,9 @@ def get_position(self):
             :return position: (list) Position of end-effector link (center of mass)
         """
         #return self.get_accurate_gripper_position()
-        #print(self.p.getLinkState(self.robot_uid, self.end_effector_index)[0])
-        return self.p.getLinkState(self.robot_uid, self.end_effector_index)[0]
-        
+        #print(self.p.getLinkState(self.uid, self.end_effector_index)[0])
+        return np.array(self.p.getLinkState(self.uid, self.end_effector_index)[0])
+
     def get_orientation(self):
         """
         Get orientation of robot's end-effector link
@@ -318,7 +326,7 @@ def get_orientation(self):
         Returns:
             :return orientation: (list) Orientation of end-effector link (center of mass)
         """
-        return self.p.getLinkState(self.robot_uid, self.end_effector_index)[1]
+        return self.p.getLinkState(self.uid, self.end_effector_index)[1]
 
     def _run_motors(self, joint_poses):
         """
@@ -330,7 +338,7 @@ def _run_motors(self, joint_poses):
         joint_poses = np.clip(joint_poses, self.joints_limits[0], self.joints_limits[1])
         self.joints_state = []
         for i in range(len(self.motor_indices)):
-            self.p.setJointMotorControl2(bodyUniqueId=self.robot_uid,
+            self.p.setJointMotorControl2(bodyUniqueId=self.uid,
                                     jointIndex=self.motor_indices[i],
                                     controlMode=self.p.POSITION_CONTROL,
                                     targetPosition=joint_poses[i],
@@ -339,10 +347,10 @@ def _run_motors(self, joint_poses):
                                     positionGain=0.7,
                                     velocityGain=0.3)
         
-        self.end_effector_pos = self.p.getLinkState(self.robot_uid, self.end_effector_index)[0]
-        self.end_effector_orn = self.p.getLinkState(self.robot_uid, self.end_effector_index)[1]
-        self.gripper_pos = self.p.getLinkState(self.robot_uid, self.gripper_index)[0]  
-        self.gripper_orn = self.p.getLinkState(self.robot_uid, self.gripper_index)[1]  
+        self.end_effector_pos = self.p.getLinkState(self.uid, self.end_effector_index)[0]
+        self.end_effector_orn = self.p.getLinkState(self.uid, self.end_effector_index)[1]
+        self.gripper_pos = self.p.getLinkState(self.uid, self.gripper_index)[0]  
+        self.gripper_orn = self.p.getLinkState(self.uid, self.gripper_index)[1]  
     
     def _move_gripper(self, action):
         """
@@ -352,7 +360,7 @@ def _move_gripper(self, action):
             :param joint_poses: (list) Desired poses of individual joints
         """
         for i in range(len(self.gripper_indices)):
-            self.p.setJointMotorControl2(bodyUniqueId=self.robot_uid,
+            self.p.setJointMotorControl2(bodyUniqueId=self.uid,
                                     jointIndex=self.gripper_indices[i],
                                     controlMode=self.p.POSITION_CONTROL,
                                     targetPosition=action[i],
@@ -373,7 +381,7 @@ def _calculate_joint_poses(self, end_effector_pos):
             :return joint_poses: (list) Calculated joint poses corresponding to desired end-effector position
         """
         if (self.use_fixed_end_effector_orn):
-            joint_poses = self.p.calculateInverseKinematics(self.robot_uid,
+            joint_poses = self.p.calculateInverseKinematics(self.uid,
                                                        self.end_effector_index,
                                                        end_effector_pos,
                                                        self.fixed_end_effector_orn,
@@ -382,7 +390,7 @@ def _calculate_joint_poses(self, end_effector_pos):
                                                        jointRanges=self.joints_ranges,
                                                        restPoses=self.joints_rest_poses)
         else:
-            joint_poses = self.p.calculateInverseKinematics(self.robot_uid,
+            joint_poses = self.p.calculateInverseKinematics(self.uid,
                                                        self.end_effector_index,
                                                        end_effector_pos)
         joint_poses = np.clip(joint_poses[:len(self.motor_indices)], self.joints_limits[0], self.joints_limits[1])
@@ -404,12 +412,12 @@ def _calculate_accurate_IK(self, end_effector_pos):
         iter = 0
         while (not closeEnough and iter < maxIter):
             if (self.use_fixed_end_effector_orn):
-                joint_poses = self.p.calculateInverseKinematics(self.robot_uid,
+                joint_poses = self.p.calculateInverseKinematics(self.uid,
                                                             self.end_effector_index,
                                                             end_effector_pos,
                                                             self.fixed_end_effector_orn)
             else:
-                joint_poses = self.p.calculateInverseKinematics(self.robot_uid,
+                joint_poses = self.p.calculateInverseKinematics(self.uid,
                                                             self.end_effector_index,
                                                             end_effector_pos,
                                                             lowerLimits=self.joints_limits[0],
@@ -420,9 +428,9 @@ def _calculate_accurate_IK(self, end_effector_pos):
             joint_poses = np.clip(joint_poses, self.joints_limits[0], self.joints_limits[1])
             #reset the joint state (ignoring all dynamics, not recommended to use during simulation)
             for jid in range(len(self.motor_indices)):
-                self.p.resetJointState(self.robot_uid, self.motor_indices[jid], joint_poses[jid])
+                self.p.resetJointState(self.uid, self.motor_indices[jid], joint_poses[jid])
                    
-            ls = self.p.getLinkState(self.robot_uid, self.end_effector_index)
+            ls = self.p.getLinkState(self.uid, self.end_effector_index)
             newPos = ls[4] #world position of the URDF link frame
             newOrn = ls[5] #world orientation of the URDF link frame
             diffPos = np.linalg.norm(np.asarray(end_effector_pos)-np.asarray(newPos))
@@ -513,7 +521,7 @@ def apply_action_torque_control(self, action):
         # Desired task-space force using PD law
         F = np.vstack([P_pos * (delta_pos), P_ori * (delta_ori)]) - \
             np.vstack([D_pos * (curr_vel), D_ori * (curr_omg)])
-        Jt, Jr = self.p.calculateJacobian(self.robot_uid, self.end_effector_index, [0,0,0], joint_states, joint_velocities, [10]*(len(joint_states)))
+        Jt, Jr = self.p.calculateJacobian(self.uid, self.end_effector_index, [0,0,0], joint_states, joint_velocities, [10]*(len(joint_states)))
         J = np.array([Jt,Jr]).reshape(-1, len(joint_states))
         tau = np.dot(J.T, F) # + robot.coriolis_comp().reshape(7,1)
 
@@ -606,7 +614,7 @@ def apply_action(self, action, env_objects=None):
             #self.end_effector_prev_ori = self.end_effector_ori
         #if 'gripper' not in self.robot_action:
         #    for joint_index in range(self.gripper_index, self.end_effector_index + 1):
-        #        self.p.resetJointState(self.robot_uid, joint_index, self.p.getJointInfo(self.robot_uid, joint_index)[9])
+        #        self.p.resetJointState(self.uid, joint_index, self.p.getJointInfo(self.uid, joint_index)[9])
 
     def magnetize_object(self, object, distance_threshold=.1):
         if len(self.magnetized_objects) == 0 :
@@ -644,8 +652,8 @@ def get_accurate_gripper_position(self):
         """
         Returns the position of the tip of the pointy gripper. Tested on Kuka only
         """
-        gripper_position = self.p.getLinkState(self.robot_uid, self.end_effector_index)[0]
-        gripper_orientation = self.p.getLinkState(self.robot_uid, self.end_effector_index)[1]
+        gripper_position = self.p.getLinkState(self.uid, self.end_effector_index)[0]
+        gripper_orientation = self.p.getLinkState(self.uid, self.end_effector_index)[1]
         gripper_matrix      = self.p.getMatrixFromQuaternion(gripper_orientation)
         direction_vector    = Vector([0,0,0], [0.0, 0, 0.14])
         m = np.array([[gripper_matrix[0], gripper_matrix[1], gripper_matrix[2]], [gripper_matrix[3], gripper_matrix[4], gripper_matrix[5]], [gripper_matrix[6], gripper_matrix[7], gripper_matrix[8]]])
@@ -670,5 +678,5 @@ def get_uid(self):
         Returns:
             :return self.uid: Robot's unique ID
         """
-        return self.robot_uid
+        return self.uid
 
diff --git a/myGym/envs/scene_objects.py b/myGym/envs/scene_objects.py
new file mode 100644
index 00000000..b2e4b1a4
--- /dev/null
+++ b/myGym/envs/scene_objects.py
@@ -0,0 +1,181 @@
+from time import time
+from typing import Callable, Optional, TypeVar
+
+import numpy as np
+import pytest
+import os
+
+from rddl import Operand, Variable
+from myGym.envs.env_object import EnvObject
+from myGym.envs.robot import Robot
+import importlib.resources as resources
+currentdir = resources.files("myGym").joinpath("envs")
+
+def time_function(f: Callable, *args, **kwargs):
+    start = time()
+    result = f(*args, **kwargs)
+    end = time()
+    return end - start, result
+
+
+def euclidean_distance(point_A: np.ndarray, point_B: np.ndarray) -> float:
+    return np.linalg.norm(np.array(point_A) - np.array(point_B))
+
+
+def is_holding(gripper, obj) -> bool:
+    return bool(np.linalg.norm(gripper.location - obj.location) < NEAR_THRESHOLD)
+
+
+NEAR_THRESHOLD = 0.1
+
+mapping = {
+    "is_holding": is_holding,
+    "euclidean_distance": euclidean_distance,
+    "is_reachable": lambda g, o: True,
+    "near_threshold": NEAR_THRESHOLD,
+    "gripper_at": lambda g, o: all(g.location == o.location),
+    "gripper_open": lambda g: np.random.random() < 0.5,
+    "object_at": lambda g, o: g.location == o.location,
+    "exists": lambda e: True,
+    "on_top" : lambda e: True
+}
+
+Operand.set_mapping(mapping)
+
+from rddl import AtomicAction, Entity, Reward
+from rddl.actions import Approach, Grasp, Drop, Move, Follow
+from rddl.entities import GraspableObject, Gripper, Location, ObjectEntity
+from rddl.operators import NotOp
+from rddl.predicates import IsHolding, Near, OnTop
+
+# CONSTANTS
+
+# END OF CUSTOM FUNCTIONS & VARIABLES
+
+
+class EnvObjectProxy:  # This should be replaced by EnvObject from myGym
+
+    def __init__(self, **kwds):
+        super().__init__()
+        self._name = self.reference
+        self._position = np.random.randn(3)
+        self._orientation = np.random.randn(3)
+
+    def set_position(self, position: np.ndarray):
+        self._position = position
+
+    def set_orientation(self, orientation: np.ndarray):
+        self._orientation = orientation
+
+    def get_position(self) -> np.ndarray:
+        return self._position
+
+    def get_orientation(self) -> np.ndarray:
+        return self._orientation
+
+    def get_position_and_orientation(self) -> tuple[np.ndarray, np.ndarray]:
+        return self.get_position(), self.get_orientation()
+
+    def get_name(self) -> str:
+        return self._name
+
+    def get_uuid(self) -> str:
+        return self.name + "_uuid"
+
+
+class EnvSimulator:  # This should be replaced by actual env from myGym
+    """Simulates the simulation environment"""
+
+    def __init__(self, list_of_objects: list[EnvObjectProxy]):
+        self._objects = list_of_objects
+
+    def reset(self):
+        pass
+
+    def step(self, action):
+        # generate observation
+        obs = {}
+        for obj in self._objects:
+            obj.set_position(np.random.randn(3))  # randomly jiggle the object about
+            obj.set_orientation(np.random.randn(3))
+            obs[obj.get_name()] = obj
+        return obs
+
+
+class Observer:  # This class serves merely as a container & memory for current observation
+
+    def __init__(self):
+        self.obs = None
+
+    def set_observation(self, obs):
+        self.obs = obs
+
+    def get_observation(self):
+        return self.obs
+
+
+Entity.set_observation_getter(lambda self: self)
+Location.monkey_patch(Location._get_location, lambda self: self.get_position())
+
+
+class Apple(GraspableObject, EnvObject):
+
+    def __init__(self, reference: Optional[str] = None, kind: str = "RedDelicious", **kw):
+        self.urdf = "objects/household/urdf/apple.urdf"
+        self.rgba = (0.54, 0.0, 0.0, 1.0)
+        kw["rgba"] = self.rgba
+        kw["urdf_path"] = resources.files("myGym").joinpath("envs",self.urdf)
+        super().__init__(self._get_generic_reference() if reference is None else reference, "apple", **kw)
+        self._kind = kind
+
+
+class Banana(GraspableObject, EnvObject):
+
+    def __init__(self, reference: Optional[str] = None, kind: str = "", **kw):
+        self.urdf = "objects/household/urdf/banana.urdf"
+        self.rgba = (1.0, 0.94, 0.0, 1.0)
+        kw["urdf_path"] = resources.files("myGym").joinpath("envs",self.urdf)
+        kw["rgba"] = self.rgba
+        super().__init__(self._get_generic_reference() if reference is None else reference, "banana", **kw)
+        self._kind = kind
+
+
+
+class Tuna(GraspableObject, EnvObject):
+
+    def __init__(self, reference: Optional[str] = None, kind: str = "", **kw):
+        self.urdf = "objects/household/urdf/tuna_can.urdf"
+        self.rgba = None
+        kw["urdf_path"] = resources.files("myGym").joinpath("envs",self.urdf)
+        kw["rgba"] = self.rgba
+        super().__init__(self._get_generic_reference() if reference is None else reference, "tuna_can", **kw)
+        self._kind = kind
+
+
+class TiagoGripper(Gripper, Robot):
+
+    def __init__(self, reference: Optional[str] = None, **kw):
+        super().__init__("gripper_tiago" if reference is None else reference, **kw)
+        # self._is_holding_predicate = IsHolding(self)
+
+
+
+# @pytest.fixture
+def create_approach_action() -> AtomicAction:
+    a = Approach()
+    return a
+
+
+# @pytest.fixture
+def create_gripper_and_apple() -> dict[str, Entity]:
+    gripper_name = "tiago_gripper"
+    apple_name = "apple_01"
+
+    t_gripper = TiagoGripper(gripper_name)
+    apple = Apple(apple_name)
+    objects_for_approach = {
+        "gripper": t_gripper,
+        "object": apple
+    }
+
+    return objects_for_approach
\ No newline at end of file
diff --git a/myGym/envs/task.py b/myGym/envs/task.py
index 144cc160..8729ebdc 100644
--- a/myGym/envs/task.py
+++ b/myGym/envs/task.py
@@ -1,145 +1,135 @@
-from myGym.envs.vision_module import VisionModule
-import pybullet as p
-import warnings
-import time
-import numpy as np
-import pkg_resources
-import cv2
 import random
-from scipy.spatial.distance import cityblock
-from scipy.spatial.transform import Rotation
-from pyquaternion import Quaternion
-currentdir = pkg_resources.resource_filename("myGym", "envs")
-
+import myGym.envs.scene_objects # allows binding with rddl
+from myGym.envs.scene_objects import TiagoGripper
+from rddl.rddl_sampler import RDDLWorld
+from rddl.task import RDDLTask
+from rddl.entities import Gripper, ObjectEntity
 
 class TaskModule():
     """
-    Task module class for task management
+    Task module class for task management with RDDL
 
     Parameters:
-        :param task_type: (string) Type of learned task (reach, push, ...)
-        :param task_objects: (list of strings) Objects that are relevant for performing the task
-        :param distance_type: (string) Way of calculating distances (euclidean, manhattan)
-        :param logdir: (string) Directory for logging
         :param env: (object) Environment, where the training takes place
+        :param task_num_range: (list of ints) range of allowed number of subtasks
+        :param allowed_objects: (list of strings) names of allowed object classes
+        :param allowed_predicates: (list of strings) names of allowed predicates
+        :param pybullet_client: (object) pybullet instance
     """
-    def __init__(self, task_type='reach', observation={},
-                 vae_path=None, yolact_path=None, yolact_config=None, distance_type='euclidean',
-                 logdir=currentdir, env=None, number_tasks=None):
-        self.task_type = task_type
-        print("task_type", self.task_type)
-        self.distance_type = distance_type
-        self.number_tasks = number_tasks
-        self.current_task = 0
-        self.subtask_over = False
-        self.logdir = logdir
+    def __init__(self, env=None, num_task_range=[], allowed_protoactions=[], allowed_objects=[], allowed_predicates=[], pybullet_client=None):
         self.env = env
-        self.image = None
-        self.depth = None
-        self.last_distance = None
-        self.init_distance = None
-        self.current_norm_distance = None
-        self.current_norm_rotation = None
-        self.stored_observation = []
-        self.obs_template = observation
-        self.vision_module = VisionModule(observation=observation, env=env, vae_path=vae_path, yolact_path=yolact_path, yolact_config=yolact_config)
-        self.obsdim = self.check_obs_template()
-        self.vision_src = self.vision_module.src
-        self.writebool = False
-
-    def reset_task(self):
-        """
-        Reset task relevant data and statistics
-        """
-        self.last_distance = None
-        self.init_distance = None
+        self.p = pybullet_client
+        self.number_tasks_range = num_task_range
+        self.allowed_protoactions = allowed_protoactions
+        self.allowed_objects = allowed_objects
+        self.allowed_predicates = allowed_predicates
+        self.current_task_length = None
+        self.current_task_sequence = None
         self.subtask_over = False
-        self.current_norm_distance = None
-        self.vision_module.mask = {}
-        self.vision_module.centroid = {}
-        self.vision_module.centroid_transformed = {}
-        self.env.task_objects["robot"] = self.env.robot
-        if self.vision_src == "vae":
-            self.generate_new_goal(self.env.objects_area_boarders, self.env.active_cameras)
-
-    def render_images(self):
-        render_info = self.env.render(mode="rgb_array", camera_id=self.env.active_cameras)
-        self.image = render_info[self.env.active_cameras]["image"]
-        self.depth = render_info[self.env.active_cameras]["depth"]
-        if self.env.visualize == 1 and self.vision_src != "vae":
-            cv2.imshow("Vision input", cv2.cvtColor(self.image, cv2.COLOR_RGB2BGR))
-            cv2.waitKey(1)
-
-    def visualize_vae(self, recons):
-        imsize = self.vision_module.vae_imsize
-        actual_img, goal_img = [(lambda a: cv2.resize(a[60:390, 160:480], (imsize, imsize)))(a) for a in
-                                [self.image, self.goal_image]]
-        images = []
-        for idx, im in enumerate([actual_img, recons[0], goal_img, recons[1]]):
-            im = cv2.copyMakeBorder(im, 30, 10, 10, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255])
-            cv2.putText(im, ["actual", "actual rec", "goal", "goal rec"][idx], (10, 20), cv2.FONT_HERSHEY_SIMPLEX, .5,
-                        (0, 0, 0), 1, 0)
-            images.append(cv2.cvtColor(im, cv2.COLOR_RGB2BGR))
-        fig = np.vstack((np.hstack((images[0], images[1])), np.hstack((images[2], images[3]))))
-        cv2.imshow("Scene", fig)
-        cv2.waitKey(1)
-
-    def get_linkstates_unpacked(self):
-        o = []
-        [[o.append(x) for x in z] for z in self.env.robot.observe_all_links()]
-        return o
-
-    def get_additional_obs(self, d, robot):
-        info = d.copy()
-        info["additional_obs"] = {}
-        for key in d["additional_obs"]:
-            if key == "joints_xyz":
-                o = []
-                info["additional_obs"]["joints_xyz"] = self.get_linkstates_unpacked()
-            elif key == "joints_angles":
-                info["additional_obs"]["joints_angles"] = self.env.robot.get_joints_states()
-            elif key == "endeff_xyz":
-                info["additional_obs"]["endeff_xyz"] = self.vision_module.get_obj_position(robot, self.image, self.depth)[:3]
-            elif key == "endeff_6D":
-                info["additional_obs"]["endeff_6D"] = list(self.vision_module.get_obj_position(robot, self.image, self.depth)) \
-                                                      + list(self.vision_module.get_obj_orientation(robot))
-            elif key == "touch":
-                if hasattr(self.env.env_objects["actual_state"], "magnetized_objects"):
-                    obj_touch = self.env.env_objects["goal_state"]
-                else:
-                    obj_touch = self.env.env_objects["actual_state"]
-                touch = self.env.robot.touch_sensors_active(obj_touch) or len(self.env.robot.magnetized_objects)>0
-                info["additional_obs"]["touch"] = [1] if touch else [0]
-            elif key == "distractor":
-                poses = [self.vision_module.get_obj_position(self.env.task_objects["distractor"][x],\
-                                    self.image, self.depth) for x in range(len(self.env.task_objects["distractor"]))]
-                info["additional_obs"]["distractor"] = [p for sublist in poses for p in sublist]
-        return info
+        self.rddl_robot = None
+        self.current_task = None
+        self.current_action = None
+        self.scene_entities = []
+        self.scene_objects = []
+        self.rddl_world = RDDLWorld(allowed_actions=self.cfg_strings_to_classes(self.allowed_protoactions), 
+                                    allowed_entities=self.cfg_strings_to_classes(self.allowed_objects) + [TiagoGripper], 
+                                    allowed_initial_actions=self.cfg_strings_to_classes(self.allowed_protoactions))
+        #self.rddl_task = RDDLTask()
+
+    def sample_num_subtasks(self):
+        '''Whenever a new sequence of actions is desired, this function chooses a random
+          length of the sequence based on the range from config. '''
+        self.current_task_length = random.randint(*self.number_tasks_range)
+        return self.current_task_length
+    
+    def cfg_strings_to_classes(self, cfg:list):
+        """Looks up the classes corresponding to names in cfg and returns a list of them.
+        Args:
+            cfg (list): list of strings with names of entities, e.g., ["Approach", "Drop"] or ["Banana", "Tuna"]
+        Raises:
+            Exception: When the class is not found by rddl, that usually means it is not defined in scene_objects.py
+        Returns:
+            _type_: list of classes
+        """
+        list_of_classes = []
+        for n in cfg:
+            n_fixed = n[0].upper() + n[1:].lower()
+            try:
+                cl = getattr(myGym.envs.scene_objects, n_fixed)
+                list_of_classes.append(cl)
+            except:
+                raise Exception("Entity {} not found by RDDL! Check scene_objects.py for this class".format(n_fixed))
+        return list_of_classes
+
+    def create_new_task_sequence(self):
+        '''Calls rddl to make a new sequence of actions (subtasks).'''
+        n_samples = self.sample_num_subtasks()
+        # task_sequence = self.rddl_world.sample_generator(n_samples)
+        self.rddl_task = self.rddl_world.sample_world(n_samples)
+        self.current_task_sequence = self.rddl_task.get_generator()
+        print("Generated a new action sequence")
+
+    def get_next_task(self):
+        '''When the previous action (subtask) is finished, this function will jump to the next one.'''
+        if self.current_task_sequence is None:
+            # if there is no action sequence to follow, first make a new one
+            self.create_new_task_sequence()
+        self.current_action = next(self.current_task_sequence)
+        print(f"Current task: {self.current_action}")
+        print("Desired world state after action:")
+        self.rddl_task.show_current_state()
+
+    def build_scene_for_task_sequence(self):
+        '''After a new sequence of actions (subtasks) is created, this function will create the physical scene according to the
+        symbolic template. '''
+        if self.current_action is None:
+            # if there is no action sequence, first make a new one
+            self.get_next_task()
+        # scene_entities = self.rddl_world.get_created_variables()
+        scene_entities = self.rddl_task.gather_objects()
+        for entity in scene_entities:
+            if issubclass(entity.type, Gripper) and not entity.is_bound():
+                robot = entity.type(robot=self.env.robot_type, robot_action=self.env.robot_action, task_type=self.env.task_type, **self.env.robot_kwargs)
+                entity.bind(robot)
+                self.rddl_robot = entity
+                self.scene_objects.append(robot)
+            elif not entity.is_bound():
+                pos = entity.type.get_random_object_position(self.env.reachable_borders) # @TODO needs to be constrained by predicates
+                orn =  entity.type.get_random_z_rotation() #@TODO needs to be constrained by predicates
+
+                kw = {"env": self.env, "position":pos, "orientation":orn, "pybullet_client":self.p, "fixed":False, "observation":self.env.vision_source,
+                    "vae_path":self.env.vae_path, "yolact_path":self.env.yolact_path, "yolact_config":self.env.yolact_config}
+                self.spawn_object_for_rddl(entity, **kw)
 
-    def get_observation(self):
-        """
-        Get task relevant observation data based on reward signal source
+                
 
-        Returns:
-            :return self._observation: (array) Task relevant observation data, positions of task objects 
-        """
-        info_dict = self.obs_template.copy()
-        self.render_images() if "ground_truth" not in self.vision_src else None
-        if self.vision_src == "vae":
-            [info_dict["actual_state"], info_dict["goal_state"]], recons = (self.vision_module.encode_with_vae(
-                imgs=[self.image, self.goal_image], task=self.task_type, decode=self.env.visualize))
-            self.visualize_vae(recons) if self.env.visualize == 1 else None
+    def spawn_object_for_rddl(self, rddl_entity, env, position, orientation, pybullet_client, 
+                              fixed=False, observation="ground_truth", vae_path=None, yolact_path=None, yolact_config=None):
+        """Initializes EnvObject and adds it to the scene. The object will be bound to the symbolic entity provided as rddl_entity.
+
+        Args:
+            rddl_entity (Variable): unbound rddl entity to bind with
+            env (class instance): env instance
+            position (list): position of the object [x, y, z]
+            orientation (tuple): orientation of the object in quaternions (x1, x2, x3, x4)
+            pybullet_client (object): instance of pybullet simulator
+            fixed (bool): whether object has fixed base or not
+            observation (str): vision source ("ground_truth", "yolact", "vae")
+            vae_path (str): path to vae model, if used
+            yolact_path (str): path to yolact model, if used
+            yolact_config (str): path to saved yolact config
+        """
+        kw = {"env": env, "position":position, "orientation":orientation, "pybullet_client":pybullet_client, "fixed":fixed, 
+              "observation":observation, "vae_path":vae_path, "yolact_path":yolact_path, "yolact_config":yolact_config}
+        o = rddl_entity.type(**kw) # initialize EnvObject and add to scene
+        rddl_entity.bind(o) # bind spawned object to the symbolic one
+        if o.rgba is None: # if no colour bound to object, assign random rgb
+            o.set_color(self.env.get_random_color())
         else:
-            for key in ["actual_state", "goal_state"]:
-                    if "endeff" in info_dict[key]:
-                           xyz = self.vision_module.get_obj_position(self.env.task_objects["robot"], self.image, self.depth)
-                           xyz = xyz[:3] if "xyz" in info_dict else xyz
-                    else:
-                           xyz = self.vision_module.get_obj_position(self.env.task_objects[key],self.image,self.depth)
-                    info_dict[key] = xyz
-        self._observation = self.get_additional_obs(info_dict, self.env.task_objects["robot"])
-        return self._observation
-
+            o.set_color(o.get_color_rgba()) # assign correct colour to object as defined in scene_objects.py
+        self.scene_objects.append(o)
+        self.scene_entities.append(rddl_entity)
+        
 
     def get_world_state(self):
         """
@@ -150,457 +140,26 @@ def get_world_state(self):
         """
         pass
 
-    def check_vision_failure(self):
-        """
-        Check if YOLACT vision model fails repeatedly during episode
-
-        Returns:
-            :return: (bool)
-        """
-        self.stored_observation.append(self._observation["actual_state"])
-        self.stored_observation.append(self._observation["goal_state"])
-        if len(self.stored_observation) > 9:
-            self.stored_observation.pop(0)
-            if self.vision_src == "yolact": # Yolact assigns 10 to not detected objects
-                if all(10 in obs for obs in self.stored_observation):
-                    return True
-        return False
-
-    def check_time_exceeded(self):
-        """
-        Check if maximum episode time was exceeded
-
-        Returns:
-            :return: (bool)
-        """
-        if (time.time() - self.env.episode_start_time) > self.env.episode_max_time:
-            self.env.episode_info = "Episode maximum time {} s exceeded".format(self.env.episode_max_time)
-            return True
-        return False
-
-    def check_object_moved(self, object, threshold=0.3):
-        """
-        Check if object moved more than allowed threshold
-
-        Parameters:
-            :param object: (object) Object to check
-            :param threshold: (float) Maximum allowed object movement
-        Returns:
-            :return: (bool)
-        """
-        if self.vision_src != "vae":
-            object_position = object.get_position()
-            pos_diff = np.array(object_position[:2]) - np.array(object.init_position[:2])
-            distance = np.linalg.norm(pos_diff)
-            if distance > threshold:
-                self.env.episode_info = "The object has moved {:.2f} m, limit is {:.2f}".format(distance, threshold)
-                return True
-        return False
-
-    def check_turn_threshold(self, desired_angle=57):
-        turned = self.env.reward.get_angle()
-        if turned >= desired_angle:
-            return True
-        elif turned <= - desired_angle:
-            return -1
-        return False
-
-    def check_distance_threshold(self, observation, threshold=0.1):
-        """
-        Check if the distance between relevant task objects is under threshold for successful task completion
-        Returns:
-            :return: (bool)
-        """
-        self.current_norm_distance = self.calc_distance(observation["goal_state"], observation["actual_state"])
-        return self.current_norm_distance < threshold
-    
-    def check_distrot_threshold(self, observation, threshold=0.1):
-        """
-        Check if the distance between relevant task objects is under threshold for successful task completion
-        Returns:
-            :return: (bool)
-        """
-        self.current_norm_distance = self.calc_distance(observation["goal_state"], observation["actual_state"])
-        self.current_norm_rotation = self.calc_rot_quat(observation["goal_state"], observation["actual_state"])
-
-        if self.current_norm_distance < threshold and self.current_norm_rotation < threshold:
-            return True
-        return False
-
-    
-    def get_dice_value(self, quaternion):
-        def noramalize(q):
-            return q/np.linalg.norm(q)
-        
-        faces = np.array([
-            [0,0,1],
-            [0,1,0],
-            [1,0,0],
-            [0,0,-1],
-            [0,-1,0],
-            [-1,0,0],
-        ])
-
-        rot_mtx = Rotation.from_quat(noramalize(quaternion)).as_matrix()
-
-        rotated_faces = np.dot(rot_mtx, faces.T).T
-        top_face_index = np.argmax(rotated_faces[:,2])
-        
-        #face_nums = [2, 5, 1, 4, 6, 3] #states that first face has number 2 on it, second 5 and so on...
-        #return face_nums[top_face_index]
-        return top_face_index+1
-
-    def check_dice_moving(self, observation, threshold=0.1):
-        def calc_still(o1, o2):
-            result = 0
-            for i in range(len(o1)):
-                result+= np.power(o1[i]-o2[i],2)
-            result = np.sqrt(result)
-            
-            return result < 0.000005
-        #print(observation["goal_state"])
-        if len(observation)<3:
-            print("Invalid",observation)
-        x = np.array(observation["actual_state"][3:])
-        
-        #print(observation)
-        
-        if not self.check_distance_threshold(self._observation) and self.env.episode_steps > 25:
-            if calc_still(observation["actual_state"], self.stored_observation):
-                if (self.stored_observation == observation["actual_state"]):
-                    return 0
-                else:
-                    if self.writebool:
-                        print(self.get_dice_value(x))
-                        print(observation)
-                        self.writebool = False
-                    if self.get_dice_value(x) == 6:
-                        return 2
-                    return 1
-                
-            else:
-                self.stored_observation = observation["actual_state"]
-                return 0
-        else:
-            self.stored_observation = observation["actual_state"]
-            self.writebool = True
-            #print(self.get_dice_value(x))
-            return 0
-
-    def check_points_distance_threshold(self, threshold=0.1):
-        o1 = self.env.task_objects["actual_state"]
-        if (self.task_type == 'pnp') and (self.env.robot_action != 'joints_gripper') and (len(self.env.robot.magnetized_objects) == 0):
-            o2 = self.env.robot
-            closest_points = self.env.p.getClosestPoints(o2.get_uid(), o1.get_uid(), threshold,
-                                                         o2.end_effector_index, -1)
-        else:
-            o2 = self.env.task_objects["goal_state"]
-            idx = -1 if o1 != self.env.robot else self.env.robot.end_effector_index
-            closest_points = self.env.p.getClosestPoints(o1.get_uid(), o2.get_uid(), threshold, idx, -1)
-        return closest_points if len(closest_points) > 0 else False
-
-    def drop_magnetic(self):
-        """
-        Release the object if required point was reached and controls if task was compleated.
-        Returns:
-            :return: (bool)  
-        """
-        if self.env.reward.point_was_reached:
-            if not self.env.reward.was_dropped:
-                self.env.episode_over = False
-                self.env.robot.release_all_objects()
-                self.env.task.subtask_over = True
-                self.current_task = 0
-                self.env.reward.was_dropped = True
-        # print("drop episode", self.env.reward.drop_episode, "episode steps", self.env.episode_steps)
-        if self.env.reward.drop_episode and self.env.reward.drop_episode + 35 < self.env.episode_steps:
-            self.end_episode_success()
-            return True
-        else: 
-            return False
 
     def check_goal(self):
         """
         Check if goal of the task was completed successfully
         """
-        
-        finished = None
-        if self.task_type in ['reach', 'poke', 'pnp', 'pnpbgrip', 'FMOT', 'FROM', 'FROT', 'FMOM', 'FM','F','A','AG','AGM','AGMD','AGMDW']: #all tasks ending with R (FMR) have to have distrot checker
-            finished = self.check_distance_threshold(self._observation)  
-        if self.task_type in ['pnprot','pnpswipe','FMR', 'FMOR', 'FMLFR', 'compositional']:
-            finished = self.check_distrot_threshold(self._observation)  
-        if self.task_type in ["dropmag"]: #FMOT should be compositional
-            self.check_distance_threshold(self._observation)
-            finished = self.drop_magnetic()
-        if self.task_type in ['push', 'throw']:
-            self.check_distance_threshold(self._observation)
-            finished = self.check_points_distance_threshold()
-        if self.task_type == "switch":
-            self.check_distance_threshold(self._observation)
-            finished = abs(self.env.reward.get_angle()) >= 18
-        if self.task_type == "press":
-            self.check_distance_threshold(self._observation)
-            finished = self.env.reward.get_angle() >= 1.71
-        if self.task_type == "dice_throw":
-            finished = self.check_dice_moving(self._observation)
-            
-        if self.task_type == "turn":
-            self.check_distance_threshold(self._observation)
-            finished = self.check_turn_threshold()
-        self.last_distance = self.current_norm_distance
-        if self.init_distance is None:
-            self.init_distance = self.current_norm_distance
-        #if self.task_type == 'pnp' and self.env.robot_action != 'joints_gripper' and finished:
-        #    if len(self.env.robot.magnetized_objects) == 0 and self.env.episode_steps > 5:
-        #        self.end_episode_success()
-        #    else:
-        #        self.env.episode_over = False
-        if finished:
-            if self.task_type == "dice_throw":
-                
-                if finished == 1:
-                    self.end_episode_fail("Finished with wrong dice result thrown")
-                return finished
-            self.end_episode_success()
-        if self.check_time_exceeded() or self.env.episode_steps == self.env.max_steps:
-            self.end_episode_fail("Max amount of steps reached")
-        if "ground_truth" not in self.vision_src and (self.check_vision_failure()):
-            self.stored_observation = []
-            self.end_episode_fail("Vision fails repeatedly")
-
-    def end_episode_fail(self, message):
-        self.env.episode_over = True
-        self.env.episode_failed = True
-        self.env.episode_info = message
-        self.env.robot.release_all_objects()
-
-    def end_episode_success(self):
-        #print("Finished subtask {}".format(self.current_task))
-        if self.current_task == (self.number_tasks-1):
-            self.env.episode_over = True
-            self.env.robot.release_all_objects()
-            self.current_task = 0
-            if self.env.episode_steps == 1:
-                self.env.episode_info = "Task completed in initial configuration"
-            else:
-                self.env.episode_info = "Task completed successfully"
-        else:
-            self.env.episode_over = False
-            self.env.robot.release_all_objects()
-            self.subtask_over = True
-            self.current_task += 1
-
-    def calc_distance(self, obj1, obj2):
-        """
-        Calculate distance between two objects
-
-        Parameters:
-            :param obj1: (float array) First object position representation
-            :param obj2: (float array) Second object position representation
-        Returns: 
-            :return dist: (float) Distance between 2 float arrays
-        """
-        #print("gripper_position:", obj1[:3])
-        #print("goal_position_offset:", obj2[:3])
-        #TODO
-
-        if self.distance_type == "euclidean":
-            dist = np.linalg.norm(np.asarray(obj1[:3]) - np.asarray(obj2[:3]))
-            #print("Calculated distance:", dist)
-        elif self.distance_type == "manhattan":
-            dist = cityblock(obj1, obj2)
-        return dist
-    
-    def calc_height_diff(self, obj1, obj2):
-        """
-        Calculate height difference between objects
-
-        Parameters:
-            :param obj1: (float array) First object position representation
-            :param obj2: (float array) Second object position representation
-        Returns: 
-            :return dist: (float) Distance iz Z axis
-        """
-        #TODO
-        dist = abs(obj1[2] - obj2[2])
-        return dist
-    
-    def calc_rot_quat(self, obj1, obj2):
-        """
-        Calculate difference between two quaternions
-
-        Parameters:
-            :param obj1: (float array) First object quaternion
-            :param obj2: (float array) Second object object quaternion
-        Returns: 
-            :return dist: (float) Distance between 2 float arrays
-        """
-        #TODO
-        #tran = np.linalg.norm(np.asarray(obj1[:3]) - np.asarray(obj2[:3]))
-        rot = Quaternion.distance(Quaternion(obj1[3:]), Quaternion(obj2[3:]))  
-        #print(obj1[3:])
-        #print(obj2[3:])
-        #print(rot)
-        return rot
+        pass
 
-    def calc_rotation_diff(self, obj1, obj2):
+    def check_end_episode(self):
         """
-        Calculate diffrence between orientation of two objects
-
-        Parameters:
-            :param obj1: (float array) First object orientation (Euler angles)
-            :param obj2: (float array) Second object orientation (Euler angles)
-        Returns: 
-            :return diff: (float) Distance between 2 float arrays
+        Check if episode should finish based on fulfilled goal or exceeded number of steps
         """
-        if self.distance_type == "euclidean":
-            diff = np.linalg.norm(np.asarray(obj1) - np.asarray(obj2))
-        elif self.distance_type == "manhattan":
-            diff = cityblock(obj1, obj2)
-        return diff
-
-    def trajectory_distance(self, trajectory, point, last_nearest_index, n=10):
-        """Compute the distance of point from a trajectory, n points around the last nearest point"""
-        index1, index2 = last_nearest_index - (int(n / 2)), last_nearest_index + int(n / 2)
-        print("indexes:", index1, index2)
-        if (index1) < 0:  # index can't be less than zero
-            index1 -= last_nearest_index - (int(n / 2))
-            index2 -= last_nearest_index - (int(n / 2))
-        if (index2 > np.shape(trajectory)[1]):
-            index2 =np.shape(trajectory)[1]
-        trajectory_points = np.transpose(
-            trajectory[:, index1:index2])  # select n points around last nearest point of trajectory
-        distances = []
-        for i in range(index2-index1):
-            distances.append(self.calc_distance(point, trajectory_points[i]))
-        min_idx = int(np.argwhere(distances == np.min(distances)))
-        min = np.min(distances)
-        nearest_index = min_idx + index1
-        return min, nearest_index
-
-    def create_trajectory(self, fx, fy, fz, t):
-        """General trajectory creator. Pass functions fx, fy, fz and parametric vector t to create any 3D trajectory"""
-        trajectory = np.asarray([fx(t), fy(t), fz(t)])
-        return trajectory
-
-
-    def create_line(self, point1, point2, step=0.01):
-        """Creates line from point1 to point2 -> vectors of length 1/step"""
-        t = np.arange(0, 1, step)
-
-        def linemaker_x(t):
-            "axis: 0 = x, 1 = y, 2 = z"
-            return (point2[0] - point1[0]) * t + point1[0]
-
-        def linemaker_y(t):
-            return (point2[1] - point1[1]) * t + point1[1]
-
-        def linemaker_z(t):
-            return (point2[2] - point1[2]) * t + point1[2]
-
-        return self.create_trajectory(fx=linemaker_x, fy=linemaker_y, fz=linemaker_z, t=t)
-
-
-    def create_circular_trajectory(self, center, radius, rot_vector = [0.0, 0.0, 0.0], arc=np.pi, step=0.01, direction = 1):
-        """Creates a 2D circular trajectory in 3D space.
-        params: center ([x,y,z]), radius (float): self-explanatory
-                rot_vector ([x,y,z]): Axis of rotation. Angle of rotation is norm of rot_vector
-                arc (radians): 2pi means full circle, 1 pi means half a circle etc...
-        """
-        phi = np.arange(0, arc, step)
-        v = np.asarray(rot_vector)
-        theta = np.linalg.norm(v)
-
-        # creation of non-rotated circle of given radius located at [0,0,0]
-        base_circle = np.array([np.cos(phi) * radius, np.sin(phi) * radius, [0] * len(phi)])*direction
-        rotation = np.eye(3)
-        print(theta)
-        if theta != 0.0:
-            normalized_v = v * (1 / theta)
-            # Rodrigues' formula:
-            rotation = (np.eye(3) + np.sin(theta) * self.skew_symmetric(normalized_v) +
-                        (1 - np.cos(theta)) * np.matmul(self.skew_symmetric(normalized_v),
-                                                        self.skew_symmetric(normalized_v)))
-        rotated = np.asarray([[], [], []])
-        for i in range(len(phi)):
-            rotated_v = np.asarray([np.matmul(rotation, base_circle[:3, i])])
-            rotated = np.append(rotated, np.transpose(rotated_v), axis=1)
-        # moving circle to its center
-        move = np.asarray(center)
-        final_circle = np.transpose(np.transpose(rotated) + move)
-        return final_circle
-
-
-    def create_circular_trajectory_v2(self, point1, point2, radius, direction):
         pass
 
 
-    def generate_new_goal(self, object_area_borders, camera_id):
-        """
-        Generate an image of new goal for VEA vision model. This function is supposed to be called from env workspace.
-        
-        Parameters:
-            :param object_area_borders: (list) Volume in space where task objects can be located
-            :param camera_id: (int) ID of environment camera active for image rendering
-        """
-        if self.task_type == "push":
-            random_pos = self.env.task_objects[0].get_random_object_position(object_area_borders)
-            random_rot = self.env.task_objects[0].get_random_object_orientation()
-            self.env.robot.reset_up()
-            self.env.task_objects[0].set_position(random_pos)
-            self.env.task_objects[0].set_orientation(random_rot)
-            self.env.task_objects[1].set_position(random_pos)
-            self.env.task_objects[1].set_orientation(random_rot)
-            render_info = self.env.render(mode="rgb_array", camera_id = self.env.active_cameras)
-            self.goal_image = render_info[self.env.active_cameras]["image"]
-            random_pos = self.env.task_objects[0].get_random_object_position(object_area_borders)
-            random_rot = self.env.task_objects[0].get_random_object_orientation()
-            self.env.task_objects[0].set_position(random_pos)
-            self.env.task_objects[0].set_orientation(random_rot)
-        elif self.task_type == "reach":
-            bounded_action = [random.uniform(-3,-2.4) for x in range(2)]
-            action = [random.uniform(-2.9,2.9) for x in range(6)]
-            self.env.robot.reset_joints(bounded_action + action)
-            self.goal_image  = self.env.render(mode="rgb_array", camera_id=self.env.active_cameras)[self.env.active_cameras]['image']
-            self.env.robot.reset_up()
-            #self.goal_image = self.vision_module.vae_generate_sample()
 
-    def check_obs_template(self):
+    def reset_task(self):
         """
-        Checks if observations are set according to rules and computes observation dim
-
-        Returns:
-            :return obsdim: (int) Dimensionality of observation
+        Start a new episode with a new setup
         """
-        t = self.obs_template
-        assert "actual_state" and "goal_state" in t.keys(), \
-            "Observation setup in config must contain actual_state and goal_state"
-        if t["additional_obs"]:
-            assert [x in ["joints_xyz", "joints_angles", "endeff_xyz", "endeff_6D", "touch", "distractor"] for x in
-                    t["additional_obs"]], "Failed to parse some of the additional_obs in config"
-        assert t["actual_state"] in ["endeff_xyz", "endeff_6D", "obj_xyz", "obj_6D", "vae", "yolact", "voxel", "dope"],\
-            "failed to parse actual_state in Observation config"
-        assert t["goal_state"] in ["obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope"],\
-            "failed to parse goal_state in Observation config"
-        if "endeff" not in t["actual_state"]:
-            assert t["actual_state"] == t["goal_state"], \
-                "actual_state and goal_state in Observation must have the same format"
-        else:
-            assert t["actual_state"].split("_")[-1] == t["goal_state"] .split("_")[-1], "Actual state and goal state must " \
-                                                                                        "have the same number of dimensions!"
-            if "endeff_xyz" in t["additional_obs"] or "endeff_6D" in t["additional_obs"]:
-                warnings.warn("Observation config: endeff_xyz already in actual_state, no need to have it in additional_obs. Removing it")
-                [self.obs_template["additional_obs"].remove(x) for x in t["additional_obs"] if "endeff" in x]
-        obsdim = 0
-        for x in [t["actual_state"], t["goal_state"]]:
-            get_datalen = {"joints_xyz":len(self.get_linkstates_unpacked()),
-                           "joints_angles":len(self.env.robot.get_joints_states()),
-                           "endeff_xyz":len(self.vision_module.get_obj_position(self.env.robot, self.image, self.depth)[:3]),
-                           "endeff_6D":len(list(self.vision_module.get_obj_position(self.env.robot, self.image, self.depth)) \
-                                                      + list(self.vision_module.get_obj_orientation(self.env.robot))),
-                           "dope":7, "obj_6D":7, "distractor": 3, "touch":1, "yolact":3, "voxel":3, "obj_xyz":3,
-                           "vae":self.vision_module.obsdim}
-            obsdim += get_datalen[x]
-        for x in t["additional_obs"]:
-            obsdim += get_datalen[x]
-        return obsdim
\ No newline at end of file
+        self.current_task_length = None
+        self.current_task_sequence = None
+        self.subtask_over = False
+        self.current_action = None
\ No newline at end of file
diff --git a/myGym/envs/vision_module.py b/myGym/envs/vision_module.py
index c6209d68..23a1684d 100644
--- a/myGym/envs/vision_module.py
+++ b/myGym/envs/vision_module.py
@@ -6,33 +6,15 @@
 import numpy as np
 import cv2
 import random
-import pkg_resources
-currentdir = pkg_resources.resource_filename("myGym", "envs")
+import importlib.resources as resources
+currentdir = resources.files("myGym").joinpath("envs")
 
 # import vision models YOLACT, VAE
-sys.path.append(pkg_resources.resource_filename("myGym", "yolact_vision")) #may be moved somewhere else
-try:
-    from inference_tool import InfTool
-except:
-    print("Problem importing YOLACT.")
+sys.path.append(resources.files("myGym").joinpath("yolact_vision")) #may be moved somewhere else
 from myGym.vae.vis_helpers import load_checkpoint
 from myGym.vae import  sample
 
 
-def get_module_type(observation):
-    """
-    Get source of the information from environment (ground_truth, yolact, vae)
-
-    Returns:
-        :return source: (string) Source of information
-    """
-    if observation["actual_state"] not in ["vae", "yolact", "voxel", "dope"]:
-        src = "ground_truth_6D" if "6D" in observation["actual_state"] else "ground_truth"
-    else:
-        src = observation["actual_state"]
-    return src
-
-
 class VisionModule:
     """
     Vision class that retrieves information from environment based on a visual subsystem (YOLACT, VAE) or ground truth
@@ -44,8 +26,8 @@ class VisionModule:
         :param yolact_path: (string) Path to a trained Yolact in 3dvu reward type
         :param yolact_config: (string) Path to saved Yolact config obj or name of an existing one in the data/Config script or None for autodetection
     """
-    def __init__(self, observation={}, env=None, vae_path=None, yolact_path=None, yolact_config=None):
-        self.src = get_module_type(observation)
+    def __init__(self, observation="ground_truth", env=None, vae_path=None, yolact_path=None, yolact_config=None):
+        self.src = observation
         self.env = env
         self.vae_embedder = None
         self.vae_imsize = None
@@ -112,7 +94,7 @@ def get_obj_pixel_position(self, obj=None, img=None):
             else:
                 raise Exception("You need to provide image argument for segmentation")
 
-    def get_obj_bbox(self, obj=None, img=None):
+    def get_obj_bbox_for_obs(self, obj=None, img=None):
         """
         Get bounding box of an object from 2D image
 
@@ -145,7 +127,7 @@ def get_obj_bbox(self, obj=None, img=None):
         else:
             raise Exception("{} module does not provide bounding boxes!".format(self.src))
 
-    def get_obj_position(self, obj=None, img=None, depth=None):
+    def get_obj_position_for_obs(self, obj=None, img=None, depth=None):
         """
         Get object position in world coordinates of environment from 2D and depth image
 
@@ -186,7 +168,7 @@ def get_obj_position(self, obj=None, img=None, depth=None):
                 raise Exception("You need to provide image argument to infer object position")
         return
 
-    def get_obj_orientation(self, obj=None, img=None):
+    def get_obj_orientation_for_obs(self, obj=None, img=None):
         """
         Get object orientation in world coordinates of environment from 2D image
 
@@ -209,7 +191,7 @@ def get_obj_orientation(self, obj=None, img=None):
                 raise Exception("You need to provide image argument to infer orientation")
         return
 
-    def vae_generate_sample(self):
+    def vae_generate_sample_for_obs(self):
         """
         Generate image as a sample of VAE latent representation
 
@@ -222,7 +204,7 @@ def vae_generate_sample(self):
         dec_img = np.asarray((img * 255).cpu().detach(), dtype="uint8")
         return dec_img
 
-    def encode_with_vae(self, imgs, task="reach", decode=0):
+    def encode_with_vae_for_obs(self, imgs, task="reach", decode=0):
         """
         Encode the input image into an n-dimensional latent variable using VAE model
 
@@ -274,7 +256,7 @@ def _initialize_network(self, network):
             :param network: (string) Source of information from environment (yolact, vae)
         """
         if network == "vae":
-            weights_pth = pkg_resources.resource_filename("myGym", self.vae_path)
+            weights_pth = resources.files("myGym").joinpath(self.vae_path)
             try:
                 self.vae_embedder, imsize = load_checkpoint(weights_pth, use_cuda=True)
             except:
@@ -282,10 +264,14 @@ def _initialize_network(self, network):
             self.vae_imsize = imsize
             self.obsdim = (2*self.vae_embedder.n_latents) + 3
         elif network == "yolact":
-            weights = pkg_resources.resource_filename("myGym", self.yolact_path)
+            weights = resources.files("myGym").joinpath(self.yolact_path)
             if ".obj" in self.yolact_config:
-                config = pkg_resources.resource_filename("myGym", self.yolact_config)
+                config = resources.files("myGym").joinpath(self.yolact_config)
             try:
+                try:
+                    from inference_tool import InfTool
+                except:
+                    print("Problem importing YOLACT.")
                 self.yolact_cnn = InfTool(weights=weights, config=config, score_threshold=0.2)
             except:
                 raise Exception("For yolact observations, you need to download pre-trained vision model and specify its path in config. Specified {} and {} not found.".format(self.yolact_path, self.yolact_config))
diff --git a/myGym/envs/visualize_trajectory.py b/myGym/envs/visualize_trajectory.py
index da59920b..73b6473a 100644
--- a/myGym/envs/visualize_trajectory.py
+++ b/myGym/envs/visualize_trajectory.py
@@ -1,7 +1,7 @@
 from mpl_toolkits import mplot3d
 import numpy as np
 import matplotlib.pyplot as plt
-from stable_baselines import results_plotter
+#from stable_baselines import results_plotter
 import os
 import math
 from math import sqrt, fabs, exp, pi, asin
diff --git a/myGym/envs/wrappers.py b/myGym/envs/wrappers.py
index a80a2168..e08b4e38 100644
--- a/myGym/envs/wrappers.py
+++ b/myGym/envs/wrappers.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 import commentjson
 
 
diff --git a/myGym/generate_dataset.py b/myGym/generate_dataset.py
index 5837ddb5..407cc368 100644
--- a/myGym/generate_dataset.py
+++ b/myGym/generate_dataset.py
@@ -1,6 +1,6 @@
 ## script to generate train/test sets from the simulator in COCO or DOPE format. Used for vision training.
 
-import gym
+import gymnasium as gym
 from myGym import envs
 from matplotlib.pyplot import imshow, show
 import cv2
@@ -16,17 +16,18 @@
 import pybullet as p
 from bbox import BBox3D
 from myGym.envs.wrappers import RandomizedEnvWrapper
-import pkg_resources
+import importlib.resources as resources
+
 
 # config, specify here or pass as an input argument
-CONFIG_DEFAULT = pkg_resources.resource_filename("myGym", 'configs/dataset_coco.json')
+CONFIG_DEFAULT = resources.files("myGym").joinpath("configs/dataset_coco.json")
 
 # helper functions:
 def color_names_to_rgb():
     """
     Assign RGB colors to objects by name as specified in the training config file
     """
-    with open(pkg_resources.resource_filename("myGym", 'utils/rgbcolors.json'), "r") as read_file:
+    with open(resources.files("myGym").joinpath("utils/rgbcolors.json"), "r") as read_file:
         clr = json.load(read_file) #json file with suggested colors
         new_dict = {}
         for key, value in config['object_colors'].items():
@@ -498,7 +499,7 @@ def collect_data(self, steps):
         config_path = CONFIG_DEFAULT
         print('No config.json passed in argument. Loading default config: {}'.format(CONFIG_DEFAULT))
     else:
-        config_path = pkg_resources.resource_filename("myGym", sys.argv[1])
+        config_path = resources.files("myGym").joinpath(sys.argv[1])
     with open(config_path) as file:
         config = commentjson.load(file)
 
diff --git a/myGym/halfcheetah-appo.yaml b/myGym/halfcheetah-appo.yaml
deleted file mode 100644
index b423e0ed..00000000
--- a/myGym/halfcheetah-appo.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-# This can reach 9k reward in 2 hours on a Titan XP GPU
-# with 16 workers and 8 envs per worker.
-halfcheetah-appo:
-    env: HalfCheetah-v2
-    run: APPO
-    stop:
-        time_total_s: 10800
-    config:
-        # Works for both torch and tf.
-        framework: torch
-        vtrace: True
-        gamma: 0.99
-        lambda: 0.95
-        rollout_fragment_length: 512
-        train_batch_size: 4096
-        num_workers: 16
-        num_gpus: 1
-        broadcast_interval: 1
-        max_sample_requests_in_flight_per_worker: 1
-        num_multi_gpu_tower_stacks: 1
-        num_envs_per_worker: 32
-        minibatch_buffer_size: 16
-        num_sgd_iter: 32
-        clip_param: 0.2
-        lr_schedule: [
-            [0, 0.0005],
-            [150000000, 0.000001],
-        ]
-        vf_loss_coeff: 0.5
-        entropy_coeff: 0.01
-        grad_clip: 0.5
-        batch_mode: truncate_episodes
-        use_kl_loss: True
-        kl_coeff: 1.0
-        kl_target: 0.04
-        observation_filter: MeanStdFilter
-
diff --git a/myGym/humanoid-ppo.yaml b/myGym/humanoid-ppo.yaml
deleted file mode 100644
index fadd7e96..00000000
--- a/myGym/humanoid-ppo.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-humanoid-ppo:
-    env: Humanoid-v4
-    run: PPO
-    stop:
-        sampler_results/episode_reward_mean: 6000
-        timesteps_total: 40000000
-    config:
-        # Works for both torch and tf.
-        framework: torch
-        gamma: 0.995
-        kl_coeff: 1.0
-        num_sgd_iter: 20
-        lr: .0001
-        sgd_minibatch_size: 32768
-        train_batch_size: 320000
-        model:
-            free_log_std: true
-        use_gae: false
-        num_workers: 10
-        num_envs_per_worker: 20
-        num_gpus: 1
-        batch_mode: complete_episodes
-        observation_filter: MeanStdFilter
diff --git a/myGym/learnability.py b/myGym/learnability.py
deleted file mode 100644
index 7d4081ea..00000000
--- a/myGym/learnability.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import os
-import re
-import threading
-import subprocess
-from sklearn.model_selection import ParameterGrid
-import json
-import time
-
-parameters = {
-    "robot_action": ["step", "joints","joints_gripper"],
-    "task_type": ["reach", "push","pnp"],
-    "max_episode_steps": [1024, 512, 256]
-}
-parameter_grid = ParameterGrid(parameters)
-configfile = 'configs/multi.json'
-evaluation_results_paths = [None] * len(parameter_grid)
-threaded = True
-last_eval_results = {}
-
-
-def train(params, i):
-    if "task_type" in params and params["task_type"] != "reach":
-        params["task_objects"] = "cube_holes target"
-        params["reward"] = "complex_distance"
-    print((" ".join(f"--{key} {value}" for key, value in params.items())).split())
-    command = 'python train.py --config {configfile} '.format(configfile=configfile) + " ".join(f"--{key} {value}" for key, value in params.items())
-    output = subprocess.check_output(command.split())
-    evaluation_results_paths[i] = output.splitlines()[-1].decode('UTF-8') + "/evaluation_results.json"
-    with open(evaluation_results_paths[i]) as f:
-        data = json.load(f)
-        last_eval_results[str(list(params.values()))] = list(data.values())[-1]
-    print(last_eval_results)
-    with open("trained_models/multi_evaluation_results.json", 'w') as f:
-        json.dump(last_eval_results, f, indent=4)
-    
-    # os.system('python train.py --config {configfile} '.format(configfile=configfile)
-    #           + " ".join(f"--{key} {value}" for key, value in params.items()))
-
-
-if __name__ == '__main__':
-    threads = []
-    starttime=time.time()
-    for i, params in enumerate(parameter_grid):
-        if threaded:
-            thread = threading.Thread(target=train, args=(params, i))
-            thread.start()
-            threads.append(thread)
-        else:
-            train(params.copy(), i)
-    
-    if threaded:
-        for thread in threads:
-            thread.join()
-    endtime=time.time()
-    print (endtime-starttime)
diff --git a/myGym/multi_evaluation_task.py b/myGym/multi_evaluation_task.py
deleted file mode 100644
index 3eb3a461..00000000
--- a/myGym/multi_evaluation_task.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import os
-import re
-import threading
-import subprocess
-from sklearn.model_selection import ParameterGrid
-import json
-import time
-import argparse
-
-
-parser = argparse.ArgumentParser()
-parser.add_argument("-cfg", "--config", type=str, help="config file for evaluation")
-parser.add_argument("-out", "--output", type=str, help="output file")
-
-args = parser.parse_args()
-
-parameters = {
-    "robot": ["kuka", "panda",'ur5'],
-    "robot_action": ["step", "joints","absolute"],
-    "algo": ["ppo", "ppo2","sac"],
-}
-parameter_grid = ParameterGrid(parameters)
-configfile = args.config
-evaluation_results_paths = [None] * len(parameter_grid)
-threaded = True
-last_eval_results = {}
-
-
-def train(params, i):
-    print((" ".join(f"--{key} {value}" for key, value in params.items())).split())
-    command = 'python train.py --config {configfile} '.format(configfile=configfile) + " ".join(f"--{key} {value}" for key, value in params.items())
-    output = subprocess.check_output(command.split())
-    evaluation_results_paths[i] = output.splitlines()[-1].decode('UTF-8') + "/evaluation_results.json"
-    with open(evaluation_results_paths[i]) as f:
-        data = json.load(f)
-        last_eval_results[str(list(params.values()))] = list(data.values())[-1]
-    print(last_eval_results)
-    with open(args.output, 'w') as f:
-        json.dump(last_eval_results, f, indent=4)
-    
-    # os.system('python train.py --config {configfile} '.format(configfile=configfile)
-    #           + " ".join(f"--{key} {value}" for key, value in params.items()))
-
-
-if __name__ == '__main__':
-    threads = []
-    starttime=time.time()
-    for i, params in enumerate(parameter_grid):
-        if threaded:
-            thread = threading.Thread(target=train, args=(params, i))
-            thread.start()
-            threads.append(thread)
-        else:
-            train(params.copy(), i)
-    
-    if threaded:
-        for thread in threads:
-            thread.join()
-    endtime=time.time()
-    print (endtime-starttime)
diff --git a/myGym/mygymrl_ppc_.txt b/myGym/mygymrl_ppc_.txt
deleted file mode 100644
index 79783b9d..00000000
--- a/myGym/mygymrl_ppc_.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name <env> --file <this file>
-# platform: linux-64
-@EXPLICIT
-https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda
-https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.05.30-h06a4308_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda
-https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda
-https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda
-https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda
-https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda
-https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1u-h7f8727e_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.2-h5eee18b_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/python-3.7.16-h7a1cb2a_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/certifi-2022.12.7-py37h06a4308_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.38.4-py37h06a4308_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/setuptools-65.6.3-py37h06a4308_0.conda
-https://repo.anaconda.com/pkgs/main/linux-64/pip-22.3.1-py37h06a4308_0.conda
diff --git a/myGym/pendulum-ddppo.yaml b/myGym/pendulum-ddppo.yaml
deleted file mode 100644
index 1c3f5708..00000000
--- a/myGym/pendulum-ddppo.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-pendulum-ddppo:
-    env: Pendulum-v1
-    run: DDPPO
-    stop:
-        sampler_results/episode_reward_mean: -300
-        timesteps_total: 1500000
-    config:
-        framework: torch
-        train_batch_size: 2500  # per worker
-        num_gpus_per_worker: 1
-        num_workers: 4
-        num_envs_per_worker: 10
-        observation_filter: MeanStdFilter
-        gamma: 0.95
-        sgd_minibatch_size: 50
-        num_sgd_iter: 5
-        clip_param: 0.3
-        vf_clip_param: 10.0
-        lambda: 0.1
-        lr: 0.00015
diff --git a/myGym/pendulum-ppo.yaml b/myGym/pendulum-ppo.yaml
deleted file mode 100644
index f129279f..00000000
--- a/myGym/pendulum-ppo.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Can expect improvement to -140 reward in ~300-500k timesteps.
-pendulum-ppo:
-    env: Pendulum-v1
-    run: PPO
-    stop:
-        sampler_results/episode_reward_mean: -400
-        timesteps_total: 400000
-    config:
-        # Works for both torch and tf.
-        framework: torch
-        train_batch_size: 512
-        vf_clip_param: 10.0
-        num_gpus: 1
-        num_workers: 3
-        num_envs_per_worker: 20
-        lambda: 0.1
-        gamma: 0.95
-        lr: 0.0003
-        sgd_minibatch_size: 64
-        observation_filter: MeanStdFilter
-        model:
-            fcnet_activation: relu
-        enable_connectors: true
diff --git a/myGym/sim2real.py b/myGym/sim2real.py
deleted file mode 100644
index 2547d968..00000000
--- a/myGym/sim2real.py
+++ /dev/null
@@ -1,541 +0,0 @@
-from ast import arg
-import gym
-from myGym import envs
-import cv2
-from myGym.train import get_parser, get_arguments, configure_implemented_combos, configure_env
-import os, imageio
-import numpy as np
-import time
-from numpy import matrix
-import pybullet as p
-import pybullet_data
-import pkg_resources
-import random
-import getkey
-import signal
-from utils.nicomotors import NicoMotors
-
-
-clear = lambda: os.system('clear')
-
-AVAILABLE_SIMULATION_ENGINES = ["mujoco", "pybullet"]
-AVAILABLE_TRAINING_FRAMEWORKS = ["tensorflow", "pytorch"]
-
-def visualize_sampling_area(arg_dict):
-    rx = (arg_dict["task_objects"][0]["goal"]["sampling_area"][0] - arg_dict["task_objects"][0]["goal"]["sampling_area"][1])/2
-    ry = (arg_dict["task_objects"][0]["goal"]["sampling_area"][2] - arg_dict["task_objects"][0]["goal"]["sampling_area"][3])/2
-    rz = (arg_dict["task_objects"][0]["goal"]["sampling_area"][4] - arg_dict["task_objects"][0]["goal"]["sampling_area"][5])/2
-
-    visual = p.createVisualShape(shapeType=p.GEOM_BOX, halfExtents=[rx,ry,rz], rgbaColor=[1,0,0,.2])
-    collision = -1
-    
-    sampling = p.createMultiBody(
-        baseVisualShapeIndex=visual,
-        baseCollisionShapeIndex=collision,
-        baseMass=0,
-        basePosition=[arg_dict["task_objects"][0]["goal"]["sampling_area"][0]-rx, arg_dict["task_objects"][0]["goal"]["sampling_area"][2]-ry,arg_dict["task_objects"][0]["goal"]["sampling_area"][4]-rz],
-    )
-
-def visualize_trajectories (info,action):
-        
-                visualo = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[0,0,1,.3])
-                collision = -1
-                p.createMultiBody(
-                        baseVisualShapeIndex=visualo,
-                        baseCollisionShapeIndex=collision,
-                        baseMass=0,
-                        basePosition=info['o']['actual_state'],
-                )
-
-                #visualr = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[0,1,0,.5])
-                #p.createMultiBody(
-                #        baseVisualShapeIndex=visualr,
-                #        baseCollisionShapeIndex=collision,
-                #        baseMass=0,
-                #        basePosition=info['o']['additional_obs']['endeff_xyz'],
-                #)
-
-                visuala = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[1,0,0,.3])
-                p.createMultiBody(
-                        baseVisualShapeIndex=visuala,
-                        baseCollisionShapeIndex=collision,
-                        baseMass=0,
-                        basePosition=action[:3],
-                )  
-
-def visualize_goal(info):
-                    visualg = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[1,0,0,.5])
-                    collision = -1
-                    p.createMultiBody(
-                        baseVisualShapeIndex=visualg,
-                        baseCollisionShapeIndex=collision,
-                        baseMass=0,
-                        basePosition=info['o']['goal_state'],
-                    )
-def change_dynamics(cubex,lfriction,rfriction,ldamping,adamping):
-                    p.changeDynamics(cubex, -1, lateralFriction=p.readUserDebugParameter(lfriction))
-                    p.changeDynamics(cubex,-1,rollingFriction=p.readUserDebugParameter(rfriction))
-                    p.changeDynamics(cubex, -1, linearDamping=p.readUserDebugParameter(ldamping))
-                    p.changeDynamics(cubex, -1, angularDamping=p.readUserDebugParameter(adamping))
-
-    #visualrobot = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=1, rgbaColor=[0,1,0,.2])
-    #collisionrobot = -1
-    #sampling = p.createMultiBody(
-    #    baseVisualShapeIndex=visualrobot,
-    #    baseCollisionShapeIndex=collisionrobot,
-    #    baseMass=0,
-    #    basePosition=[0,0,0.3],
-    #)
-
-def visualize_infotext(action, env, info):
-    p.addUserDebugText(f"Episode:{env.env.episode_number}",
-        [.65, 1., 0.45], textSize=1.0, lifeTime=0.5, textColorRGB=[0.4, 0.2, .3])
-    p.addUserDebugText(f"Step:{env.env.episode_steps}",
-        [.67, 1, .40], textSize=1.0, lifeTime=0.5, textColorRGB=[0.2, 0.8, 1])   
-    p.addUserDebugText(f"Subtask:{env.env.task.current_task}",
-        [.69, 1, 0.35], textSize=1.0, lifeTime=0.5, textColorRGB=[0.4, 0.2, 1])
-    p.addUserDebugText(f"Network:{env.env.reward.current_network}",
-        [.71, 1, 0.3], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 0.0, 1])
-    p.addUserDebugText(f"Action (Gripper):{matrix(np.around(np.array(action),2))}",
-        [.73, 1, 0.25], textSize=1.0, lifeTime=0.5, textColorRGB=[1, 0, 0])
-    p.addUserDebugText(f"Actual_state:{matrix(np.around(np.array(env.env.observation['task_objects']['actual_state'][:3]),2))}",
-        [.75, 1, 0.2], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 1, 0.0])
-    p.addUserDebugText(f"End_effector:{matrix(np.around(np.array(env.env.robot.end_effector_pos),2))}",
-        [.77, 1, 0.15], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 1, 0.0])
-    p.addUserDebugText(f"        Object:{matrix(np.around(np.array(info['o']['actual_state']),2))}",
-        [.8, 1, 0.10], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 0.0, 1])
-    p.addUserDebugText(f"Velocity:{env.env.max_velocity}",
-        [.79, 1, 0.05], textSize=1.0, lifeTime=0.5, textColorRGB=[0.6, 0.8, .3])
-    p.addUserDebugText(f"Force:{env.env.max_force}",
-        [.81, 1, 0.00], textSize=1.0, lifeTime=0.5, textColorRGB=[0.3, 0.2, .4])
-
-def detect_key(keypress,arg_dict,action):
-
-    if 97 in keypress.keys() and keypress[97] == 1: # A
-        action[2] += .03
-        print(action)
-    if 122 in keypress.keys() and keypress[122] == 1: # Z/Y
-        action[2] -= .03
-        print(action)
-    if 65297 in keypress.keys() and keypress[65297] == 1: # ARROW UP
-        action[1] -= .03
-        print(action)
-    if 65298 in keypress.keys() and keypress[65298] == 1: # ARROW DOWN
-        action[1] += .03
-        print(action)
-    if 65295 in keypress.keys() and keypress[65295] == 1: # ARROW LEFT
-        action[0] += .03
-        print(action)
-    if 65296 in keypress.keys() and keypress[65296] == 1: # ARROW RIGHT
-        action[0] -= .03
-        print(action)
-    if 120 in keypress.keys() and keypress[120] == 1: # X
-        action[3] -= .03
-        action[4] -= .03
-        print(action)
-    if 99 in keypress.keys() and keypress[99] == 1: # C
-        action[3] += .03
-        action[4] += .03
-        print(action)
-    # if 100 in keypress.keys() and keypress[100] == 1:
-    #     cube[cubecount] = p.loadURDF(pkg_resources.resource_filename("myGym", os.path.join("envs", "objects/assembly/urdf/cube_holes.urdf")), [action[0], action[1],action[2]-0.2 ])
-    #     change_dynamics(cube[cubecount],lfriction,rfriction,ldamping,adamping)
-    #     cubecount +=1
-    if "step" in arg_dict["robot_action"]:
-        action[:3] = np.multiply(action [:3],10)
-    elif "joints" in arg_dict["robot_action"]:
-        print("Robot action: Joints - KEYBOARD CONTROL UNDER DEVELOPMENT")
-        quit()
-    #for i in range (env.action_space.shape[0]):
-    #    env.env.robot.joints_max_velo[i] = p.readUserDebugParameter(maxvelo)
-    #    env.env.robot.joints_max_force[i] = p.readUserDebugParameter(maxforce)
-    return action
-
-def test_env(env, arg_dict):
-    real_robot = True
-    motors = NicoMotors()
-    dofs = motors.dofs()
-    try:
-        motors.open()
-    except:
-        print('motors are not operational')
-    
-    for k in dofs:
-        motors.enableTorque(k)
-        torque = True
-    
-    
-    arg_dict["vsampling"] = 0
-    arg_dict["vinfo"] = 0
-    spawn_objects = False
-    env.render("human")
-    #env.reset()
-    #Prepare names for sliders
-    joints = ['Joint1','Joint2','Joint3','Joint4','Joint5','Joint6','Joint7','Joint 8','Joint 9', 'Joint10', 'Joint11','Joint12','Joint13','Joint14','Joint15','Joint16','Joint17','Joint 18','Joint 19']
-    jointparams = ['Jnt1','Jnt2','Jnt3','Jnt4','Jnt5','Jnt6','Jnt7','Jnt 8','Jnt 9', 'Jnt10', 'Jnt11','Jnt12','Jnt13','Jnt14','Jnt15','Jnt16','Jnt17','Jnt 18','Jnt 19']
-    cube = ['Cube1','Cube2','Cube3','Cube4','Cube5','Cube6','Cube7','Cube8','Cube9','Cube10','Cube11','Cube12','Cube13','Cube14','Cube15','Cube16','Cube17','Cube18','Cube19']
-    cubecount = 0
-    
-    if arg_dict["gui"] == 0:
-        print ("Add --gui 1 parameter to visualize environment")
-        quit()
-
-    p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0)
-    
-    p.resetDebugVisualizerCamera(1.2, 180, -30, [0.0, 0.5, 0.05])
-    p.setAdditionalSearchPath(pybullet_data.getDataPath())
-    #newobject = p.loadURDF("cube.urdf", [3.1,3.7,0.1])
-    #p.changeDynamics(newobject, -1, lateralFriction=1.00)
-    #p.setRealTimeSimulation(1)
-    if arg_dict["control"] == "slider":
-        p.configureDebugVisualizer(p.COV_ENABLE_GUI, 1)
-        if "joints" in arg_dict["robot_action"]:
-            if 'gripper' in arg_dict["robot_action"]:
-                print ("gripper is present")
-                for i in range (env.action_space.shape[0]):
-                    if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)):
-                        joints[i] = p.addUserDebugParameter(joints[i], env.action_space.low[i], env.action_space.high[i], env.env.robot.init_joint_poses[i])
-                    else:
-                        joints[i] = p.addUserDebugParameter(joints[i], env.action_space.low[i], env.action_space.high[i], .02)
-            else:
-                for i in range (env.action_space.shape[0]):
-                    joints[i] = p.addUserDebugParameter(joints[i],env.action_space.low[i], env.action_space.high[i], env.env.robot.init_joint_poses[i])
-        elif "absolute" in arg_dict["robot_action"]:
-            if 'gripper' in arg_dict["robot_action"]:
-                print ("gripper is present")
-                for i in range (env.action_space.shape[0]):
-                    if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)):
-                        joints[i] = p.addUserDebugParameter(joints[i], -1, 1, arg_dict["robot_init"][i])
-                    else:
-                        joints[i] = p.addUserDebugParameter(joints[i], -1, 1, .02)
-            else:
-                for i in range (env.action_space.shape[0]):
-                    joints[i] = p.addUserDebugParameter(joints[i], -1, 1, arg_dict["robot_init"][i])
-        elif "step" in arg_dict["robot_action"]:
-            if 'gripper' in arg_dict["robot_action"]:
-                print ("gripper is present")
-                for i in range (env.action_space.shape[0]):
-                    if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)):
-                        joints[i] = p.addUserDebugParameter(joints[i], -1, 1, 0)
-                    else:
-                        joints[i] = p.addUserDebugParameter(joints[i], -1, 1, .02)
-            else:
-                for i in range (env.action_space.shape[0]):
-                    joints[i] = p.addUserDebugParameter(joints[i], -1, 1, 0)
-    
-    
-    #maxvelo = p.addUserDebugParameter("Max Velocity", 0.1, 50, env.env.robot.joints_max_velo[0]) 
-    #maxforce = p.addUserDebugParameter("Max Force", 0.1, 300, env.env.robot.joints_max_force[0])
-    lfriction = p.addUserDebugParameter("Lateral Friction", 0, 100, 0)   
-    rfriction = p.addUserDebugParameter("Spinning Friction", 0, 100, 0)
-    ldamping = p.addUserDebugParameter("Linear Damping", 0, 100, 0)
-    adamping = p.addUserDebugParameter("Angular Damping", 0, 100, 0)
-            #action.append(jointparams[i])
-    if arg_dict["vsampling"] == True:
-        visualize_sampling_area(arg_dict)
-
-    #visualgr = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=.005, rgbaColor=[0,0,1,.1])
-
-    if arg_dict["control"] == "random":
-            action = env.action_space.sample()
-    if arg_dict["control"] == "keyboard":
-        action = arg_dict["robot_init"]
-        if "gripper" in arg_dict["robot_action"]:
-            action.append(.1)
-            action.append(.1)
-    if arg_dict["control"] == "slider":
-        action = [] 
-        for i in range (env.action_space.shape[0]):
-            jointparams[i] = p.readUserDebugParameter(joints[i])
-            action.append(jointparams[i])
-
-    for e in range(50):
-        env.reset()
-        #if spawn_objects:
-        #    cube[e] = p.loadURDF(pkg_resources.resource_filename("myGym", os.path.join("envs", "objects/assembly/urdf/cube_holes.urdf")), [0, 0.5, .1])
-        
-        #if visualize_traj:
-        #    visualize_goal(info)
-        trajectory=[]
-        degdiff =  []
-        actiondiff =  []
-
-
-
-
-        for t in range(arg_dict["max_episode_steps"]):
-
-
-            if arg_dict["control"] == "slider":
-                action = []
-                for i in range (env.action_space.shape[0]):
-                    jointparams[i] = p.readUserDebugParameter(joints[i])
-                    action.append(jointparams[i])
-                    #env.env.robot.joints_max_velo[i] = p.readUserDebugParameter(maxvelo)
-                    #env.env.robot.joints_max_force[i] = p.readUserDebugParameter(maxforce)
-            
-
-            if arg_dict["control"] == "observation":
-                if t == 0:
-                    action = env.action_space.sample()
-                else:    
-
-                    if "joints" in arg_dict["robot_action"]:
-                        action = info['o']["additional_obs"]["joints_angles"] #n
-                    elif "absolute" in arg_dict["robot_action"]:
-                        action = info['o']["actual_state"]
-                    else:
-                        action = [0,0,0]
-            
-            if arg_dict["control"] == "oraculum":
-                if t == 0:
-                    action = env.action_space.sample()
-                else:    
-
-                    if "absolute" in arg_dict["robot_action"]:
-                        action = info['o']["goal_state"]
-                    else:
-                        print("ERROR - Oraculum mode only works for absolute actions")
-                        quit()
-
-
-            elif arg_dict["control"] == "keyboard":
-                keypress = p.getKeyboardEvents()
-                #print(action)    
-                action =  detect_key(keypress,arg_dict,action)
-            elif arg_dict["control"] == "random":
-                action = env.action_space.sample()
-            
-            prejointaction = env.env.robot.get_joints_states()
-
-            
-            jointaction = env.env.robot.get_joints_states()
-            predeg = np.rad2deg(prejointaction)
-            actiondeg = np.rad2deg(env.env.robot.joint_poses)
-            
-            deg = np.rad2deg(jointaction)
-            #print("Prejointaction: ", predeg)
-            #print("Actiondeg: ", actiondeg)
-            #print("Jointaction: ", deg)
-            degdifference = deg - predeg
-            actiondifference = actiondeg - deg
-            if arg_dict["ik_solver"]:
-                print("Step: ", t,end="")
-                for i in range(len(REALJOINTS)):
-                    print(" - {}:{:.2f}, ".format(REALJOINTS[i],degdifference[i]), end="")
-                print(" ")
-                print(actiondifference)
-                degdiff.append(degdifference)
-                actiondiff.append(actiondifference)
-                #time.sleep(0.06)
-
-            #Execute action on real robot
-            if arg_dict["real_robot"]:
-                #jointaction = info['o']["additional_obs"]["joints_angles"]                
-                for i,realjoint in enumerate(REALJOINTS):
-                    robot.setAngle(realjoint,deg[i],DEFAULT_SPEED)
-                time.sleep(SIMREALDELAY)
-                print("Step:" + str(t))
-                #if t % 10 == 0:
-                #         print('Temperature:', str(robot.getTemperature('r_shoulder_y')))
-                #        print('Current:', str(robot.getCurrent('r_shoulder_y')))
-
-            print (f"Action:{action}")
-            observation, reward, done, info = env.step(action)
-            if real_robot:
-                for k in dofs:
-                    motors.setPositionDg(k,int(values[k]))
-
-            if arg_dict["vtrajectory"] == True:
-                visualize_trajectories(info,action)
-            if arg_dict["vinfo"] == True:
-                visualize_infotext(action, env, info)
-
-                
-                #visualize_goal(info)
-            #if debug_mode:
-                #print("Reward is {}, observation is {}".format(reward, observation))
-                #if t>=1:
-                    #action = matrix(np.around(np.array(action),5))
-                    #oaction = env.env.robot.get_joints_states()
-                    #oaction = matrix(np.around(np.array(oaction[0:action.shape[0]]),5))
-                    #diff = matrix(np.around(np.array(action-oaction),5))
-                    #print(env.env.robot.get_joints_states())
-                    #print(f"Step:{t}")
-                    #print (f"RAction:{action}")
-                    #print(f"OAction:{oaction}")
-                    #print(f"DAction:{diff}")
-                    #p.addUserDebugText(f"DAction:{diff}",
-                    #                    [1, 1, 0.1], textSize=1.0, lifeTime=0.05, textColorRGB=[0.6, 0.0, 0.6])
-            #time.sleep(.4)
-                    #clear()
-                    
-            #if action_control == "slider":
-            #    action=[]
-            if "step" in arg_dict["robot_action"]:
-                action[:3] = [0,0,0] 
-            
-            if arg_dict["visualize"]:
-                visualizations = [[],[]]
-                env.render("human")
-                for camera_id in range(len(env.cameras)):
-                    camera_render = env.render(mode="rgb_array", camera_id=camera_id)
-                    image = cv2.cvtColor(camera_render[camera_id]["image"], cv2.COLOR_RGB2BGR)
-                    depth = camera_render[camera_id]["depth"]
-                    image = cv2.copyMakeBorder(image, 30, 10, 10, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255])
-                    cv2.putText(image, 'Camera {}'.format(camera_id), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, .5,
-                                (0, 0, 0), 1, 0)
-                    visualizations[0].append(image)
-                    depth = cv2.copyMakeBorder(depth, 30, 10, 10, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255])
-                    cv2.putText(depth, 'Camera {}'.format(camera_id), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, .5,
-                                (0, 0, 0), 1, 0)
-                    visualizations[1].append(depth)
-                    
-                if len(visualizations[0])%2 !=0:
-                        visualizations[0].append(255*np.ones(visualizations[0][0].shape, dtype=np.uint8))
-                        visualizations[1].append(255*np.ones(visualizations[1][0].shape, dtype=np.float32))
-                fig_rgb = np.vstack((np.hstack((visualizations[0][0::2])),np.hstack((visualizations[0][1::2]))))
-                fig_depth = np.vstack((np.hstack((visualizations[1][0::2])),np.hstack((visualizations[1][1::2]))))
-                cv2.imshow('Camera RGB renders', fig_rgb)
-                cv2.imshow('Camera depthrenders', fig_depth)
-                cv2.waitKey(1)
-
-            if done:
-                print("Episode finished after {} timesteps".format(t + 1))
-                if arg_dict["ik_solver"]:
-                    file = open(os.path.join("trajectory.txt"), 'a')
-                    for value in trajectory:
-                        file.write(f"{value}\n")
-                    file.write("\n")
-                    file.close()
-                    #Plot actiondiff in graph
-                    fig, axs = plt.subplots(2)
-                    axs[0].plot(actiondiff)
-                    axs[0].set_title('Difference between Action and Sim position')
-                    axs[0].set_xlabel('Steps')
-                    axs[0].set_ylabel('Difference in degrees')                    
-                    axs[1].plot(degdiff)
-                    axs[1].set_title('Difference between SimPreaction and SimPostaction')
-                    axs[1].set_xlabel('Steps')
-                    axs[1].set_ylabel('Difference in degrees')   
-                    plt.tight_layout()
-                    plt.show()
-                    input("Press key to continue...")
-                    plt.close
-                if arg_dict["real_robot"]:
-                    time.sleep(FINISHDELAY)
-                    reset_robot(robot)
-
-                break
-
-def test_model(env, model=None, implemented_combos=None, arg_dict=None, model_logdir=None, deterministic=False):
-
-    try:
-        if "multi" in arg_dict["algo"]:
-            model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1]
-            model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(arg_dict["model_path"], env=model_args[1].env)
-        else:
-            model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(arg_dict["model_path"])
-    except:
-        if (arg_dict["algo"] in implemented_combos.keys()) and (arg_dict["train_framework"] not in list(implemented_combos[arg_dict["algo"]].keys())):
-            err = "{} is only implemented with {}".format(arg_dict["algo"],list(implemented_combos[arg_dict["algo"]].keys())[0])
-        elif arg_dict["algo"] not in implemented_combos.keys():
-            err = "{} algorithm is not implemented.".format(arg_dict["algo"])
-        else:
-            err = "invalid model_path argument"
-        raise Exception(err)
-
-    images = []  # Empty list for gif images
-    success_episodes_num = 0
-    distance_error_sum = 0
-    vel= arg_dict["max_velocity"]
-    force = arg_dict["max_force"]
-    steps_sum = 0
-    p.resetDebugVisualizerCamera(1.2, 180, -30, [0.0, 0.5, 0.05])
-    #p.setRealTimeSimulation(1)
-    #p.setTimeStep(0.01)
-
-    for e in range(arg_dict["eval_episodes"]):
-        done = False
-        obs = env.reset()
-        is_successful = 0
-        distance_error = 0
-        step_sum = 0
-        while not done:
-            steps_sum += 1
-            action, _state = model.predict(obs, deterministic=deterministic)
-            obs, reward, done, info = env.step(action)
-            is_successful = not info['f']
-            distance_error = info['d']
-            if arg_dict["vinfo"] == True:
-                visualize_infotext(action, env, info)
-
-
-            if (arg_dict["record"] > 0) and (len(images) < 8000):
-                render_info = env.render(mode="rgb_array", camera_id = arg_dict["camera"])
-                image = render_info[arg_dict["camera"]]["image"]
-                images.append(image)
-                print(f"appending image: total size: {len(images)}]")
-
-        success_episodes_num += is_successful
-        distance_error_sum += distance_error
-
-    mean_distance_error = distance_error_sum / arg_dict["eval_episodes"]
-    mean_steps_num = steps_sum // arg_dict["eval_episodes"]
-
-    print("#---------Evaluation-Summary---------#")
-    print("{} of {} episodes ({} %) were successful".format(success_episodes_num, arg_dict["eval_episodes"], success_episodes_num / arg_dict["eval_episodes"]*100))
-    print("Mean distance error is {:.2f}%".format(mean_distance_error * 100))
-    print("Mean number of steps {}".format(mean_steps_num))
-    print("#------------------------------------#")
-    model_name = arg_dict["algo"] + '_' + str(arg_dict["steps"])
-    file = open(os.path.join(model_logdir, "train_" + model_name + ".txt"), 'a')
-    file.write("\n")
-    file.write("#Evaluation results: \n")
-    file.write("#{} of {} episodes were successful \n".format(success_episodes_num, arg_dict["eval_episodes"]))
-    file.write("#Mean distance error is {:.2f}% \n".format(mean_distance_error * 100))
-    file.write("#Mean number of steps {}\n".format(mean_steps_num))
-    file.close()
-
-    if arg_dict["record"] == 1:
-        gif_path = os.path.join(model_logdir, "train_" + model_name + ".gif")
-        imageio.mimsave(gif_path, [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=15)
-        os.system('./utils/gifopt -O3 --lossy=5 -o {dest} {source}'.format(source=gif_path, dest=gif_path))
-        print("Record saved to " + gif_path)
-    elif arg_dict["record"] == 2:
-        video_path = os.path.join(model_logdir, "train_" + model_name + ".avi")
-        height, width, layers = image.shape
-        out = cv2.VideoWriter(video_path,cv2.VideoWriter_fourcc(*'XVID'), 30, (width,height))
-        for img in images:
-            out.write(cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
-        out.release()
-        print("Record saved to " + video_path)
-
-
-def main():
-    parser = get_parser()
-    parser.add_argument("-ct", "--control", default="slider", help="How to control robot during testing. Valid arguments: keyboard, observation, random, oraculum, slider")
-    parser.add_argument("-vs", "--vsampling", action="store_true", help="Visualize sampling area.")
-    parser.add_argument("-vt", "--vtrajectory", action="store_true", help="Visualize gripper trajectgory.")
-    parser.add_argument("-vn", "--vinfo", action="store_true", help="Visualize info. Valid arguments: True, False")
-    parser.add_argument("-nl", "--natural_language", default=False, help="NL Valid arguments: True, False")      
-    arg_dict = get_arguments(parser)
-    model_logdir = os.path.dirname(arg_dict.get("model_path",""))
-    # Check if we chose one of the existing engines
-    if arg_dict["engine"] not in AVAILABLE_SIMULATION_ENGINES:
-        print(f"Invalid simulation engine. Valid arguments: --engine {AVAILABLE_SIMULATION_ENGINES}.")
-        return
-    if arg_dict.get("model_path") is None:
-        print("Path to the model using --model_path argument not specified. Testing random actions in selected environment.")
-        arg_dict["gui"] = 1
-        env = configure_env(arg_dict, model_logdir, for_train=0)
-        test_env(env, arg_dict)
-        
-    else:        
-        env = configure_env(arg_dict, model_logdir, for_train=0)
-        implemented_combos = configure_implemented_combos(env, model_logdir, arg_dict)
-        test_model(env, None, implemented_combos, arg_dict, model_logdir)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/myGym/stable_baselines_mygym/TorchPPO.py b/myGym/stable_baselines_mygym/TorchPPO.py
index d0247050..0e84dc2d 100644
--- a/myGym/stable_baselines_mygym/TorchPPO.py
+++ b/myGym/stable_baselines_mygym/TorchPPO.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import torch as th
-from gym import spaces
+from gymnasium import spaces
 from torch.nn import functional as F
 
 from stable_baselines3.common import logger
diff --git a/myGym/stable_baselines_mygym/algo.py b/myGym/stable_baselines_mygym/algo.py
index 0ec97b90..0aaac838 100644
--- a/myGym/stable_baselines_mygym/algo.py
+++ b/myGym/stable_baselines_mygym/algo.py
@@ -1,6 +1,6 @@
 import time
 
-import gym
+import gymnasium as gym
 import numpy as np
 import tensorflow as tf
 
diff --git a/myGym/stable_baselines_mygym/multi_acktr.py b/myGym/stable_baselines_mygym/multi_acktr.py
index 2b7aa7d9..19858c2c 100644
--- a/myGym/stable_baselines_mygym/multi_acktr.py
+++ b/myGym/stable_baselines_mygym/multi_acktr.py
@@ -2,7 +2,7 @@
 import warnings
 
 import tensorflow as tf
-from gym.spaces import Box, Discrete
+from gymnasium.spaces import Box, Discrete
 
 from stable_baselines import logger
 from stable_baselines.a2c.a2c import A2CRunner
@@ -17,7 +17,7 @@
 from stable_baselines.common.misc_util import set_global_seeds
 import commentjson
 import numpy as np
-import gym
+import gymnasium as gym
 import os
 from collections import deque
 
diff --git a/myGym/stable_baselines_mygym/multi_ppo2.py b/myGym/stable_baselines_mygym/multi_ppo2.py
index df73e39b..f1a43f8d 100644
--- a/myGym/stable_baselines_mygym/multi_ppo2.py
+++ b/myGym/stable_baselines_mygym/multi_ppo2.py
@@ -1,7 +1,7 @@
 from pickle import NONE
 import time
 import os
-import gym
+import gymnasium as gym
 import numpy as np
 from numpy.lib.function_base import append
 from myGym.utils.callbacks import SaveOnTopRewardCallback
diff --git a/myGym/stable_baselines_mygym/policies.py b/myGym/stable_baselines_mygym/policies.py
index f1fa2c6f..31171d33 100644
--- a/myGym/stable_baselines_mygym/policies.py
+++ b/myGym/stable_baselines_mygym/policies.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import tensorflow as tf
-from gym.spaces import Discrete
+from gymnasium.spaces import Discrete
 
 from stable_baselines.common.tf_util import batch_to_seq, seq_to_batch
 from stable_baselines.common.tf_layers import conv, linear, conv_to_fc, lstm
diff --git a/myGym/stable_baselines_mygym/ppo2.py b/myGym/stable_baselines_mygym/ppo2.py
index 6e31c0d8..9d3c125d 100644
--- a/myGym/stable_baselines_mygym/ppo2.py
+++ b/myGym/stable_baselines_mygym/ppo2.py
@@ -1,6 +1,6 @@
 import time
 
-import gym
+import gymnasium as gym
 import numpy as np
 import tensorflow as tf
 
diff --git a/myGym/stable_baselines_mygym/reference.py b/myGym/stable_baselines_mygym/reference.py
index a04da18c..20b1eb62 100644
--- a/myGym/stable_baselines_mygym/reference.py
+++ b/myGym/stable_baselines_mygym/reference.py
@@ -1,6 +1,6 @@
 import time
 
-import gym
+import gymnasium as gym
 import numpy as np
 import tensorflow as tf
 
diff --git a/myGym/stable_baselines_mygym/testalgo.py b/myGym/stable_baselines_mygym/testalgo.py
index 2fffe28f..03ec4524 100644
--- a/myGym/stable_baselines_mygym/testalgo.py
+++ b/myGym/stable_baselines_mygym/testalgo.py
@@ -1,6 +1,6 @@
 import time
 
-import gym
+import gymnasium as gym
 import numpy as np
 import tensorflow as tf
 
diff --git a/myGym/stable_baselines_mygym/testpolicies.py b/myGym/stable_baselines_mygym/testpolicies.py
index f1fa2c6f..31171d33 100644
--- a/myGym/stable_baselines_mygym/testpolicies.py
+++ b/myGym/stable_baselines_mygym/testpolicies.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import tensorflow as tf
-from gym.spaces import Discrete
+from gymnasium.spaces import Discrete
 
 from stable_baselines.common.tf_util import batch_to_seq, seq_to_batch
 from stable_baselines.common.tf_layers import conv, linear, conv_to_fc, lstm
diff --git a/myGym/test.py b/myGym/test.py
deleted file mode 100644
index ad266732..00000000
--- a/myGym/test.py
+++ /dev/null
@@ -1,462 +0,0 @@
-from ast import arg
-import gym
-from myGym import envs
-import cv2
-from myGym.train import get_parser, get_arguments, configure_implemented_combos, configure_env
-import os, imageio
-import numpy as np
-import time
-from numpy import matrix
-import pybullet as p
-import pybullet_data
-import pkg_resources
-import random
-import getkey
-
-
-
-clear = lambda: os.system('clear')
-
-AVAILABLE_SIMULATION_ENGINES = ["mujoco", "pybullet"]
-AVAILABLE_TRAINING_FRAMEWORKS = ["tensorflow", "pytorch"]
-
-def visualize_sampling_area(arg_dict):
-    rx = (arg_dict["task_objects"][0]["goal"]["sampling_area"][0] - arg_dict["task_objects"][0]["goal"]["sampling_area"][1])/2
-    ry = (arg_dict["task_objects"][0]["goal"]["sampling_area"][2] - arg_dict["task_objects"][0]["goal"]["sampling_area"][3])/2
-    rz = (arg_dict["task_objects"][0]["goal"]["sampling_area"][4] - arg_dict["task_objects"][0]["goal"]["sampling_area"][5])/2
-
-    visual = p.createVisualShape(shapeType=p.GEOM_BOX, halfExtents=[rx,ry,rz], rgbaColor=[1,0,0,.2])
-    collision = -1
-    
-    sampling = p.createMultiBody(
-        baseVisualShapeIndex=visual,
-        baseCollisionShapeIndex=collision,
-        baseMass=0,
-        basePosition=[arg_dict["task_objects"][0]["goal"]["sampling_area"][0]-rx, arg_dict["task_objects"][0]["goal"]["sampling_area"][2]-ry,arg_dict["task_objects"][0]["goal"]["sampling_area"][4]-rz],
-    )
-
-def visualize_trajectories (info,action):
-        
-                visualo = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[0,0,1,.3])
-                collision = -1
-                p.createMultiBody(
-                        baseVisualShapeIndex=visualo,
-                        baseCollisionShapeIndex=collision,
-                        baseMass=0,
-                        basePosition=info['o']['actual_state'],
-                )
-
-                #visualr = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[0,1,0,.5])
-                #p.createMultiBody(
-                #        baseVisualShapeIndex=visualr,
-                #        baseCollisionShapeIndex=collision,
-                #        baseMass=0,
-                #        basePosition=info['o']['additional_obs']['endeff_xyz'],
-                #)
-
-                visuala = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[1,0,0,.3])
-                p.createMultiBody(
-                        baseVisualShapeIndex=visuala,
-                        baseCollisionShapeIndex=collision,
-                        baseMass=0,
-                        basePosition=action[:3],
-                )  
-
-def visualize_goal(info):
-                    visualg = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=0.01, rgbaColor=[1,0,0,.5])
-                    collision = -1
-                    p.createMultiBody(
-                        baseVisualShapeIndex=visualg,
-                        baseCollisionShapeIndex=collision,
-                        baseMass=0,
-                        basePosition=info['o']['goal_state'],
-                    )
-def change_dynamics(cubex,lfriction,rfriction,ldamping,adamping):
-                    p.changeDynamics(cubex, -1, lateralFriction=p.readUserDebugParameter(lfriction))
-                    p.changeDynamics(cubex,-1,rollingFriction=p.readUserDebugParameter(rfriction))
-                    p.changeDynamics(cubex, -1, linearDamping=p.readUserDebugParameter(ldamping))
-                    p.changeDynamics(cubex, -1, angularDamping=p.readUserDebugParameter(adamping))
-
-    #visualrobot = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=1, rgbaColor=[0,1,0,.2])
-    #collisionrobot = -1
-    #sampling = p.createMultiBody(
-    #    baseVisualShapeIndex=visualrobot,
-    #    baseCollisionShapeIndex=collisionrobot,
-    #    baseMass=0,
-    #    basePosition=[0,0,0.3],
-    #)
-
-def visualize_infotext(action, env, info):
-    p.addUserDebugText(f"Episode:{env.env.episode_number}",
-        [.65, 1., 0.45], textSize=1.0, lifeTime=0.5, textColorRGB=[0.4, 0.2, .3])
-    p.addUserDebugText(f"Step:{env.env.episode_steps}",
-        [.67, 1, .40], textSize=1.0, lifeTime=0.5, textColorRGB=[0.2, 0.8, 1])   
-    p.addUserDebugText(f"Subtask:{env.env.task.current_task}",
-        [.69, 1, 0.35], textSize=1.0, lifeTime=0.5, textColorRGB=[0.4, 0.2, 1])
-    p.addUserDebugText(f"Network:{env.env.reward.current_network}",
-        [.71, 1, 0.3], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 0.0, 1])
-    p.addUserDebugText(f"Action (Gripper):{matrix(np.around(np.array(action),2))}",
-        [.73, 1, 0.25], textSize=1.0, lifeTime=0.5, textColorRGB=[1, 0, 0])
-    p.addUserDebugText(f"Actual_state:{matrix(np.around(np.array(env.env.observation['task_objects']['actual_state'][:3]),2))}",
-        [.75, 1, 0.2], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 1, 0.0])
-    p.addUserDebugText(f"End_effector:{matrix(np.around(np.array(env.env.robot.end_effector_pos),2))}",
-        [.77, 1, 0.15], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 1, 0.0])
-    p.addUserDebugText(f"        Object:{matrix(np.around(np.array(info['o']['actual_state']),2))}",
-        [.8, 1, 0.10], textSize=1.0, lifeTime=0.5, textColorRGB=[0.0, 0.0, 1])
-    p.addUserDebugText(f"Velocity:{env.env.max_velocity}",
-        [.79, 1, 0.05], textSize=1.0, lifeTime=0.5, textColorRGB=[0.6, 0.8, .3])
-    p.addUserDebugText(f"Force:{env.env.max_force}",
-        [.81, 1, 0.00], textSize=1.0, lifeTime=0.5, textColorRGB=[0.3, 0.2, .4])
-
-def detect_key(keypress,arg_dict,action):
-
-    if 97 in keypress.keys() and keypress[97] == 1: # A
-        action[2] += .03
-        print(action)
-    if 122 in keypress.keys() and keypress[122] == 1: # Z/Y
-        action[2] -= .03
-        print(action)
-    if 65297 in keypress.keys() and keypress[65297] == 1: # ARROW UP
-        action[1] -= .03
-        print(action)
-    if 65298 in keypress.keys() and keypress[65298] == 1: # ARROW DOWN
-        action[1] += .03
-        print(action)
-    if 65295 in keypress.keys() and keypress[65295] == 1: # ARROW LEFT
-        action[0] += .03
-        print(action)
-    if 65296 in keypress.keys() and keypress[65296] == 1: # ARROW RIGHT
-        action[0] -= .03
-        print(action)
-    if 120 in keypress.keys() and keypress[120] == 1: # X
-        action[3] -= .03
-        action[4] -= .03
-        print(action)
-    if 99 in keypress.keys() and keypress[99] == 1: # C
-        action[3] += .03
-        action[4] += .03
-        print(action)
-    # if 100 in keypress.keys() and keypress[100] == 1:
-    #     cube[cubecount] = p.loadURDF(pkg_resources.resource_filename("myGym", os.path.join("envs", "objects/assembly/urdf/cube_holes.urdf")), [action[0], action[1],action[2]-0.2 ])
-    #     change_dynamics(cube[cubecount],lfriction,rfriction,ldamping,adamping)
-    #     cubecount +=1
-    if "step" in arg_dict["robot_action"]:
-        action[:3] = np.multiply(action [:3],10)
-    elif "joints" in arg_dict["robot_action"]:
-        print("Robot action: Joints - KEYBOARD CONTROL UNDER DEVELOPMENT")
-        quit()
-    #for i in range (env.action_space.shape[0]):
-    #    env.env.robot.joints_max_velo[i] = p.readUserDebugParameter(maxvelo)
-    #    env.env.robot.joints_max_force[i] = p.readUserDebugParameter(maxforce)
-    return action
-
-def test_env(env, arg_dict):
-    arg_dict["vsampling"] = 0
-    arg_dict["vinfo"] = 0
-    spawn_objects = False
-    #env.render("human")
-    #env.reset()
-    #Prepare names for sliders
-    joints = ['Joint1','Joint2','Joint3','Joint4','Joint5','Joint6','Joint7','Joint 8','Joint 9', 'Joint10', 'Joint11','Joint12','Joint13','Joint14','Joint15','Joint16','Joint17','Joint 18','Joint 19']
-    jointparams = ['Jnt1','Jnt2','Jnt3','Jnt4','Jnt5','Jnt6','Jnt7','Jnt 8','Jnt 9', 'Jnt10', 'Jnt11','Jnt12','Jnt13','Jnt14','Jnt15','Jnt16','Jnt17','Jnt 18','Jnt 19']
-    cube = ['Cube1','Cube2','Cube3','Cube4','Cube5','Cube6','Cube7','Cube8','Cube9','Cube10','Cube11','Cube12','Cube13','Cube14','Cube15','Cube16','Cube17','Cube18','Cube19']
-    cubecount = 0
-    
-    if arg_dict["gui"] == 0:
-        print ("Add --gui 1 parameter to visualize environment")
-        quit()
-
-    p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0)
-    
-    p.resetDebugVisualizerCamera(1.2, 180, -30, [0.0, 0.5, 0.05])
-    p.setAdditionalSearchPath(pybullet_data.getDataPath())
-    #newobject = p.loadURDF("cube.urdf", [3.1,3.7,0.1])
-    #p.changeDynamics(newobject, -1, lateralFriction=1.00)
-    #p.setRealTimeSimulation(1)
-    if arg_dict["control"] == "slider":
-        p.configureDebugVisualizer(p.COV_ENABLE_GUI, 1)
-        if "joints" in arg_dict["robot_action"]:
-            if 'gripper' in arg_dict["robot_action"]:
-                print ("gripper is present")
-                for i in range (env.action_space.shape[0]):
-                    if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)):
-                        joints[i] = p.addUserDebugParameter(joints[i], env.action_space.low[i], env.action_space.high[i], env.env.robot.init_joint_poses[i])
-                    else:
-                        joints[i] = p.addUserDebugParameter(joints[i], env.action_space.low[i], env.action_space.high[i], .02)
-            else:
-                for i in range (env.action_space.shape[0]):
-                    joints[i] = p.addUserDebugParameter(joints[i],env.action_space.low[i], env.action_space.high[i], env.env.robot.init_joint_poses[i])
-        elif "absolute" in arg_dict["robot_action"]:
-            if 'gripper' in arg_dict["robot_action"]:
-                print ("gripper is present")
-                for i in range (env.action_space.shape[0]):
-                    if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)):
-                        joints[i] = p.addUserDebugParameter(joints[i], -1, 1, arg_dict["robot_init"][i])
-                    else:
-                        joints[i] = p.addUserDebugParameter(joints[i], -1, 1, .02)
-            else:
-                for i in range (env.action_space.shape[0]):
-                    joints[i] = p.addUserDebugParameter(joints[i], -1, 1, arg_dict["robot_init"][i])
-        elif "step" in arg_dict["robot_action"]:
-            if 'gripper' in arg_dict["robot_action"]:
-                print ("gripper is present")
-                for i in range (env.action_space.shape[0]):
-                    if i < (env.action_space.shape[0] - len(env.env.robot.gjoints_rest_poses)):
-                        joints[i] = p.addUserDebugParameter(joints[i], -1, 1, 0)
-                    else:
-                        joints[i] = p.addUserDebugParameter(joints[i], -1, 1, .02)
-            else:
-                for i in range (env.action_space.shape[0]):
-                    joints[i] = p.addUserDebugParameter(joints[i], -1, 1, 0)
-    
-    
-    #maxvelo = p.addUserDebugParameter("Max Velocity", 0.1, 50, env.env.robot.joints_max_velo[0]) 
-    #maxforce = p.addUserDebugParameter("Max Force", 0.1, 300, env.env.robot.joints_max_force[0])
-    lfriction = p.addUserDebugParameter("Lateral Friction", 0, 100, 0)   
-    rfriction = p.addUserDebugParameter("Spinning Friction", 0, 100, 0)
-    ldamping = p.addUserDebugParameter("Linear Damping", 0, 100, 0)
-    adamping = p.addUserDebugParameter("Angular Damping", 0, 100, 0)
-            #action.append(jointparams[i])
-    if arg_dict["vsampling"] == True:
-        visualize_sampling_area(arg_dict)
-
-    #visualgr = p.createVisualShape(shapeType=p.GEOM_SPHERE, radius=.005, rgbaColor=[0,0,1,.1])
-
-    if arg_dict["control"] == "random":
-            action = env.action_space.sample()
-    if arg_dict["control"] == "keyboard":
-        action = arg_dict["robot_init"]
-        if "gripper" in arg_dict["robot_action"]:
-            action.append(.1)
-            action.append(.1)
-    if arg_dict["control"] == "slider":
-        action = [] 
-        for i in range (env.action_space.shape[0]):
-            jointparams[i] = p.readUserDebugParameter(joints[i])
-            action.append(jointparams[i])
-
-    for e in range(50):
-        env.reset()
-        #if spawn_objects:
-        #    cube[e] = p.loadURDF(pkg_resources.resource_filename("myGym", os.path.join("envs", "objects/assembly/urdf/cube_holes.urdf")), [0, 0.5, .1])
-        
-        #if visualize_traj:
-        #    visualize_goal(info)
-
-        for t in range(arg_dict["max_episode_steps"]):
-
-
-            if arg_dict["control"] == "slider":
-                action = []
-                for i in range (env.action_space.shape[0]):
-                    jointparams[i] = p.readUserDebugParameter(joints[i])
-                    action.append(jointparams[i])
-                    #env.env.robot.joints_max_velo[i] = p.readUserDebugParameter(maxvelo)
-                    #env.env.robot.joints_max_force[i] = p.readUserDebugParameter(maxforce)
-            
-
-            if arg_dict["control"] == "observation":
-                if t == 0:
-                    action = env.action_space.sample()
-                else:    
-
-                    if "joints" in arg_dict["robot_action"]:
-                        action = info['o']["additional_obs"]["joints_angles"] #n
-                    elif "absolute" in arg_dict["robot_action"]:
-                        action = info['o']["actual_state"]
-                    else:
-                        action = [0,0,0]
-            
-            if arg_dict["control"] == "oraculum":
-                if t == 0:
-                    action = env.action_space.sample()
-                else:    
-
-                    if "absolute" in arg_dict["robot_action"]:
-                        action = info['o']["goal_state"]
-                    else:
-                        print("ERROR - Oraculum mode only works for absolute actions")
-                        quit()
-
-
-            elif arg_dict["control"] == "keyboard":
-                keypress = p.getKeyboardEvents()
-                #print(action)    
-                action =  detect_key(keypress,arg_dict,action)
-            elif arg_dict["control"] == "random":
-                action = env.action_space.sample()
-
-            
-
-            print (f"Action:{action}")
-            observation, reward, done, info = env.step(action)
-            
-            if arg_dict["vtrajectory"] == True:
-                visualize_trajectories(info,action)
-            if arg_dict["vinfo"] == True:
-                visualize_infotext(action, env, info)
-
-                
-                #visualize_goal(info)
-            #if debug_mode:
-            #print("Reward: {}  \n Observation: {} \n EnvObservation: {}".format(reward, observation, env.env.observation))
-                #if t>=1:
-                    #action = matrix(np.around(np.array(action),5))
-                    #oaction = env.env.robot.get_joints_states()
-                    #oaction = matrix(np.around(np.array(oaction[0:action.shape[0]]),5))
-                    #diff = matrix(np.around(np.array(action-oaction),5))
-                    #print(env.env.robot.get_joints_states())
-                    #print(f"Step:{t}")
-                    #print (f"RAction:{action}")
-                    #print(f"OAction:{oaction}")
-                    #print(f"DAction:{diff}")
-                    #p.addUserDebugText(f"DAction:{diff}",
-                    #                    [1, 1, 0.1], textSize=1.0, lifeTime=0.05, textColorRGB=[0.6, 0.0, 0.6])
-            #time.sleep(.4)
-                    #clear()
-                    
-            #if action_control == "slider":
-            #    action=[]
-            if "step" in arg_dict["robot_action"]:
-                action[:3] = [0,0,0] 
-            
-            if arg_dict["visualize"]:
-                visualizations = [[],[]]
-                env.render("human")
-                for camera_id in range(len(env.cameras)):
-                    camera_render = env.render(mode="rgb_array", camera_id=camera_id)
-                    image = cv2.cvtColor(camera_render[camera_id]["image"], cv2.COLOR_RGB2BGR)
-                    depth = camera_render[camera_id]["depth"]
-                    image = cv2.copyMakeBorder(image, 30, 10, 10, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255])
-                    cv2.putText(image, 'Camera {}'.format(camera_id), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, .5,
-                                (0, 0, 0), 1, 0)
-                    visualizations[0].append(image)
-                    depth = cv2.copyMakeBorder(depth, 30, 10, 10, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255])
-                    cv2.putText(depth, 'Camera {}'.format(camera_id), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, .5,
-                                (0, 0, 0), 1, 0)
-                    visualizations[1].append(depth)
-                    
-                if len(visualizations[0])%2 !=0:
-                        visualizations[0].append(255*np.ones(visualizations[0][0].shape, dtype=np.uint8))
-                        visualizations[1].append(255*np.ones(visualizations[1][0].shape, dtype=np.float32))
-                fig_rgb = np.vstack((np.hstack((visualizations[0][0::2])),np.hstack((visualizations[0][1::2]))))
-                fig_depth = np.vstack((np.hstack((visualizations[1][0::2])),np.hstack((visualizations[1][1::2]))))
-                cv2.imshow('Camera RGB renders', fig_rgb)
-                cv2.imshow('Camera depthrenders', fig_depth)
-                cv2.waitKey(1)
-
-            if done:
-                print("Episode finished after {} timesteps".format(t + 1))
-                break
-
-def test_model(env, model=None, implemented_combos=None, arg_dict=None, model_logdir=None, deterministic=False):
-
-    try:
-        if "multi" in arg_dict["algo"]:
-            model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1]
-            model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(arg_dict["model_path"], env=model_args[1].env)
-        else:
-            model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(arg_dict["model_path"])
-    except:
-        if (arg_dict["algo"] in implemented_combos.keys()) and (arg_dict["train_framework"] not in list(implemented_combos[arg_dict["algo"]].keys())):
-            err = "{} is only implemented with {}".format(arg_dict["algo"],list(implemented_combos[arg_dict["algo"]].keys())[0])
-        elif arg_dict["algo"] not in implemented_combos.keys():
-            err = "{} algorithm is not implemented.".format(arg_dict["algo"])
-        else:
-            err = "invalid model_path argument"
-        raise Exception(err)
-
-    images = []  # Empty list for gif images
-    success_episodes_num = 0
-    distance_error_sum = 0
-    vel= arg_dict["max_velocity"]
-    force = arg_dict["max_force"]
-    steps_sum = 0
-    p.resetDebugVisualizerCamera(1.2, 180, -30, [0.0, 0.5, 0.05])
-    #p.setRealTimeSimulation(1)
-    #p.setTimeStep(0.01)
-
-    for e in range(arg_dict["eval_episodes"]):
-        done = False
-        obs = env.reset()
-        is_successful = 0
-        distance_error = 0
-        step_sum = 0
-        while not done:
-            steps_sum += 1
-            action, _state = model.predict(obs, deterministic=deterministic)
-            obs, reward, done, info = env.step(action)
-            is_successful = not info['f']
-            distance_error = info['d']
-            if arg_dict["vinfo"] == True:
-                visualize_infotext(action, env, info)
-
-
-            if (arg_dict["record"] > 0) and (len(images) < 8000):
-                render_info = env.render(mode="rgb_array", camera_id = arg_dict["camera"])
-                image = render_info[arg_dict["camera"]]["image"]
-                images.append(image)
-                print(f"appending image: total size: {len(images)}]")
-
-        success_episodes_num += is_successful
-        distance_error_sum += distance_error
-
-    mean_distance_error = distance_error_sum / arg_dict["eval_episodes"]
-    mean_steps_num = steps_sum // arg_dict["eval_episodes"]
-
-    print("#---------Evaluation-Summary---------#")
-    print("{} of {} episodes ({} %) were successful".format(success_episodes_num, arg_dict["eval_episodes"], success_episodes_num / arg_dict["eval_episodes"]*100))
-    print("Mean distance error is {:.2f}%".format(mean_distance_error * 100))
-    print("Mean number of steps {}".format(mean_steps_num))
-    print("#------------------------------------#")
-    model_name = arg_dict["algo"] + '_' + str(arg_dict["steps"])
-    file = open(os.path.join(model_logdir, "train_" + model_name + ".txt"), 'a')
-    file.write("\n")
-    file.write("#Evaluation results: \n")
-    file.write("#{} of {} episodes were successful \n".format(success_episodes_num, arg_dict["eval_episodes"]))
-    file.write("#Mean distance error is {:.2f}% \n".format(mean_distance_error * 100))
-    file.write("#Mean number of steps {}\n".format(mean_steps_num))
-    file.close()
-
-    if arg_dict["record"] == 1:
-        gif_path = os.path.join(model_logdir, "train_" + model_name + ".gif")
-        imageio.mimsave(gif_path, [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=15)
-        os.system('./utils/gifopt -O3 --lossy=5 -o {dest} {source}'.format(source=gif_path, dest=gif_path))
-        print("Record saved to " + gif_path)
-    elif arg_dict["record"] == 2:
-        video_path = os.path.join(model_logdir, "train_" + model_name + ".avi")
-        height, width, layers = image.shape
-        out = cv2.VideoWriter(video_path,cv2.VideoWriter_fourcc(*'XVID'), 30, (width,height))
-        for img in images:
-            out.write(cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
-        out.release()
-        print("Record saved to " + video_path)
-
-
-def main():
-    parser = get_parser()
-    parser.add_argument("-ct", "--control", default="slider", help="How to control robot during testing. Valid arguments: keyboard, observation, random, oraculum, slider")
-    parser.add_argument("-vs", "--vsampling", action="store_true", help="Visualize sampling area.")
-    parser.add_argument("-vt", "--vtrajectory", action="store_true", help="Visualize gripper trajectgory.")
-    parser.add_argument("-vn", "--vinfo", action="store_true", help="Visualize info. Valid arguments: True, False")
-    parser.add_argument("-nl", "--natural_language", default=False, help="NL Valid arguments: True, False")      
-    arg_dict = get_arguments(parser)
-    model_logdir = os.path.dirname(arg_dict.get("model_path",""))
-    # Check if we chose one of the existing engines
-    if arg_dict["engine"] not in AVAILABLE_SIMULATION_ENGINES:
-        print(f"Invalid simulation engine. Valid arguments: --engine {AVAILABLE_SIMULATION_ENGINES}.")
-        return
-    if arg_dict.get("model_path") is None:
-        print("Path to the model using --model_path argument not specified. Testing random actions in selected environment.")
-        arg_dict["gui"] = 1
-        env = configure_env(arg_dict, model_logdir, for_train=0)
-        test_env(env, arg_dict)
-        
-    else:        
-        env = configure_env(arg_dict, model_logdir, for_train=0)
-        implemented_combos = configure_implemented_combos(env, model_logdir, arg_dict)
-        test_model(env, None, implemented_combos, arg_dict, model_logdir)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/myGym/test.urdf b/myGym/test.urdf
deleted file mode 100644
index e69de29b..00000000
diff --git a/myGym/train.py b/myGym/train.py
deleted file mode 100644
index 148ad428..00000000
--- a/myGym/train.py
+++ /dev/null
@@ -1,352 +0,0 @@
-import random
-import copy
-
-import pkg_resources
-import os, sys, time, yaml
-import argparse
-import numpy as np
-import matplotlib.pyplot as plt
-import json, commentjson
-import gym
-from myGym import envs
-import myGym.utils.cfg_comparator as cfg
-#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
-#from stable_baselines.common.policies import MlpPolicy
-#from stable_baselines.common import make_vec_env
-#from stable_baselines.common.vec_env import DummyVecEnv
-#from stable_baselines.bench import Monitor
-#from stable_baselines import results_plotter
-#from stable_baselines.her import GoalSelectionStrategy, HERGoalEnvWrapper
-# For now I am importing both with slightly modified names P-PyTorch T-TensorFlow
-#from stable_baselines import PPO1 as PPO1_T, PPO2 as PPO2_T, HER as HER_T, SAC as SAC_T, DDPG as DDPG_T
-#from stable_baselines import TD3 as TD3_T, A2C as A2C_T, ACKTR as ACKTR_T, TRPO as TRPO_T, GAIL as GAIL_T
-#try:
-#    from stable_baselines3 import PPO as PPO_P, A2C as A2C_P, SAC as SAC_P, TD3 as TD3_P
-#except:
-#    print("Torch isn't probably installed correctly")
-
-#from myGym.stable_baselines_mygym.algo import MyAlgo
-#from myGym.stable_baselines_mygym.reference import REFER
-#from myGym.stable_baselines_mygym.multi_ppo2 import MultiPPO2
-#from myGym.stable_baselines_mygym.multi_acktr import MultiACKTR
-#from myGym.stable_baselines_mygym.policies import MyMlpPolicy
-#from myGym.stable_baselines_mygym.TorchPPO import TorchPPO
-#from myGym.stable_baselines_mygym.TorchPPOpolicies import TorchMlpPolicy
-
-
-#from stable_baselines.gail import ExpertDataset, generate_expert_traj
-#from stable_baselines.sac.policies import MlpPolicy as MlpPolicySAC
-#from stable_baselines.ddpg.policies import MlpPolicy as MlpPolicyDDPG
-#from stable_baselines.td3.policies import MlpPolicy as MlpPolicyTD3
-
-# Import helper classes and functions for monitoring
-#from myGym.utils.callbacks import ProgressBarManager, SaveOnBestTrainingRewardCallback,  PlottingCallback, CustomEvalCallback
-from myGym.envs.natural_language import NaturalLanguage
-
-# This is global variable for the type of engine we are working with
-AVAILABLE_SIMULATION_ENGINES = ["mujoco", "pybullet"]
-AVAILABLE_TRAINING_FRAMEWORKS = ["tensorflow", "pytorch"]
-
-
-def save_results(arg_dict, model_name, env, model_logdir=None, show=False):
-    if model_logdir is None:
-        model_logdir = arg_dict["logdir"]
-    print(f"model_logdir: {model_logdir}")
-
-    #results_plotter.EPISODES_WINDOW = 100
-    #results_plotter.plot_results([model_logdir], arg_dict["steps"], results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " reward")
-    #plt.gcf().set_size_inches(8, 6)
-    #plt.savefig(os.path.join(model_logdir, model_name) + '_reward_results.png')
-    #plot_extended_results(model_logdir, 'd', results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " distance", "Episode Distances")
-    #plt.gcf().set_size_inches(8, 6)
-    #plt.savefig(os.path.join(model_logdir, model_name) + '_distance_results.png')
-    #plt.close()
-    #plt.close()
-    #if isinstance(env, HERGoalEnvWrapper):
-    #    results_plotter.plot_curves([(np.arange(len(env.env.episode_final_distance)),np.asarray(env.env.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance')
-    #else:
-    #    results_plotter.plot_curves([(np.arange(len(env.unwrapped.episode_final_distance)),np.asarray(env.unwrapped.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance')
-    #plt.gcf().set_size_inches(8, 6)
-    #plt.ylabel("Step Distances")
-    #plt.savefig(os.path.join(model_logdir, model_name) + "_final_distance_results.png")
-    #plt.close()
-    print("Congratulations! Training with {} timesteps succeed!".format(arg_dict["steps"]))
-    #if show:
-    #    plt.show()
-
-def configure_env(arg_dict, model_logdir=None, for_train=True):
-    env_arguments = {"render_on": True, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"], "framework":"stable_baselines",
-                     "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"],
-                     "robot_action": arg_dict["robot_action"],"max_velocity": arg_dict["max_velocity"], 
-                     "max_force": arg_dict["max_force"],"task_type": arg_dict["task_type"],
-                     "action_repeat": arg_dict["action_repeat"],
-                     "task_objects":arg_dict["task_objects"], "observation":arg_dict["observation"], "distractors":arg_dict["distractors"],
-                     "num_networks":arg_dict.get("num_networks", 1), "network_switcher":arg_dict.get("network_switcher", "gt"),
-                     "distance_type": arg_dict["distance_type"], "used_objects": arg_dict["used_objects"],
-                     "active_cameras": arg_dict["camera"], "color_dict":arg_dict.get("color_dict", {}),
-                     "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"],
-                     "reward": arg_dict["reward"], "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"],
-                     "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"],
-                     "natural_language": bool(arg_dict["natural_language"]),
-                     "training": bool(for_train)
-                     }
-    if for_train:
-        env_arguments["gui_on"] = arg_dict["gui"]
-    else:
-        env_arguments["gui_on"] = arg_dict["gui"]
-
-    if arg_dict["algo"] == "her":
-        env = gym.make(arg_dict["env_name"], **env_arguments, obs_space="dict")  # her needs obs as a dict
-    else:
-        env = gym.make(arg_dict["env_name"], **env_arguments)
-    if for_train:
-        if arg_dict["engine"] == "mujoco":
-            env = VecMonitor(env, model_logdir) if arg_dict["multiprocessing"] else Monitor(env, model_logdir)
-        elif arg_dict["engine"] == "pybullet":
-           try:
-                env = Monitor(env, model_logdir, info_keywords=tuple('d'))
-           except:
-                pass
-
-    if arg_dict["algo"] == "her":
-        env = HERGoalEnvWrapper(env)
-    return env
-
-
-def configure_implemented_combos(env, model_logdir, arg_dict):
-    implemented_combos = {"ppo2": {"tensorflow": [PPO2_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]},
-                          "ppo": {"tensorflow": [PPO1_T, (MlpPolicy, env),  {"verbose": 1, "tensorboard_log": model_logdir}],},
-                          "her": {"tensorflow": [HER_T, (MlpPolicyDDPG, env, DDPG_T), {"goal_selection_strategy": 'future', "verbose": 1,"tensorboard_log": model_logdir}]},
-                          "sac": {"tensorflow": [SAC_T, (MlpPolicySAC, env), {"verbose": 1, "tensorboard_log": model_logdir}],},
-                          "ddpg": {"tensorflow": [DDPG_T, (MlpPolicyDDPG, env),{"verbose": 1, "tensorboard_log": model_logdir}]},
-                          "td3": {"tensorflow": [TD3_T, (MlpPolicyTD3, env), {"verbose": 1, "tensorboard_log": model_logdir}],},
-                          "acktr": {"tensorflow": [ACKTR_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]},
-                          "trpo": {"tensorflow": [TRPO_T, (MlpPolicy, env), {"verbose": 1, "tensorboard_log": model_logdir}]},
-                          "gail": {"tensorflow": [GAIL_T, (MlpPolicy, env), {"verbose": 1, "tensorboard_log": model_logdir}]},
-                          "a2c":    {"tensorflow": [A2C_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}],},
-                          "torchppo": {"tensorflow": [TorchPPO, (TorchMlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]},
-                          "myalgo": {"tensorflow": [MyAlgo, (MyMlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]},
-                          "ref":   {"tensorflow": [REFER,  (MlpPolicy, env),    {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]},
-                          "multippo2":  {"tensorflow": [MultiPPO2,   (MlpPolicy, env),    {"n_steps": arg_dict["algo_steps"],"n_models": arg_dict["num_networks"], "verbose": 1, "tensorboard_log": model_logdir}]},
-                          "multiacktr":  {"tensorflow": [MultiACKTR,   (MlpPolicy, env),    {"n_steps": arg_dict["algo_steps"],"n_models": arg_dict["num_networks"], "verbose": 1, "tensorboard_log": model_logdir}]}}
-
-    if "PPO_P" in sys.modules:
-        implemented_combos["ppo"]["pytorch"] = [PPO_P, ('MlpPolicy', env), {"n_steps": 1024, "verbose": 1, "tensorboard_log": model_logdir}]
-        implemented_combos["sac"]["pytorch"] = [SAC_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}]
-        implemented_combos["td3"]["pytorch"] = [TD3_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}]
-        implemented_combos["a2c"]["pytorch"] = [A2C_P, ('MlpPolicy', env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]
-
-    return implemented_combos
-
-
-def train(env, implemented_combos, model_logdir, arg_dict, pretrained_model=None):
-    model_name = arg_dict["algo"] + '_' + str(arg_dict["steps"])
-    conf_pth   = os.path.join(model_logdir, "train.json")
-    model_path = os.path.join(model_logdir, "best_model.zip")
-    arg_dict["model_path"] = model_path
-    seed = arg_dict.get("seed", None)
-    with open(conf_pth, "w") as f:
-        json.dump(arg_dict, f, indent=4)
-
-    model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1]
-    model_kwargs = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][2]
-    if seed is not None:
-        np.random.seed(seed)
-        random.seed(seed)
-        model_kwargs["seed"] = seed
-    if pretrained_model:
-        if not os.path.isabs(pretrained_model):
-            pretrained_model = pkg_resources.resource_filename("myGym", pretrained_model)
-        env = model_args[1]
-        vec_env = DummyVecEnv([lambda: env])
-        model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(pretrained_model, vec_env)
-    else:
-        model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0](*model_args, **model_kwargs)
-
-    if arg_dict["algo"] == "gail":
-        # Multi processing: (using MPI)
-        if arg_dict["train_framework"] == 'tensorflow':
-            # Generate expert trajectories (train expert)
-            generate_expert_traj(model, model_name, n_timesteps=3000, n_episodes=100)
-            # Load the expert dataset
-            dataset = ExpertDataset(expert_path=model_name+'.npz', traj_limitation=10, verbose=1)
-            kwargs = {"verbose":1}
-            if seed is not None:
-                kwargs["seed"] = seed
-            model = GAIL_T('MlpPolicy', model_name, dataset, **kwargs)
-            # Note: in practice, you need to train for 1M steps to have a working policy
-
-    start_time = time.time()
-    callbacks_list = []
-    if pretrained_model:
-        model_logdir = pretrained_model.split('/')
-        model_logdir = model_logdir[:-1]
-        model_logdir = "/".join(model_logdir)
-        auto_save_callback = SaveOnBestTrainingRewardCallback(check_freq=1024, logdir=model_logdir, env=env, engine=arg_dict["engine"], multiprocessing=arg_dict["multiprocessing"])
-    else:
-        auto_save_callback = SaveOnBestTrainingRewardCallback(check_freq=1024, logdir=model_logdir, env=env, engine=arg_dict["engine"], multiprocessing=arg_dict["multiprocessing"])
-    callbacks_list.append(auto_save_callback)
-    if arg_dict["eval_freq"]:
-        #eval_env = configure_env(arg_dict, model_logdir, for_train=False)
-        eval_env = env
-        eval_callback = CustomEvalCallback(eval_env, log_path=model_logdir,
-                                           eval_freq=arg_dict["eval_freq"],
-                                           algo_steps=arg_dict["algo_steps"],
-                                           n_eval_episodes=arg_dict["eval_episodes"],                                           
-                                           record=arg_dict["record"],
-                                           camera_id=arg_dict["camera"])
-        callbacks_list.append(eval_callback)
-    #callbacks_list.append(PlottingCallback(model_logdir))
-    #with ProgressBarManager(total_timesteps=arg_dict["steps"]) as progress_callback:
-    #    callbacks_list.append(progress_callback)
-    model.learn(total_timesteps=arg_dict["steps"], callback=callbacks_list)
-    model.save(os.path.join(model_logdir, model_name))
-    print("Training time: {:.2f} s".format(time.time() - start_time))
-    print("Training steps: {:} s".format(model.num_timesteps))
-
-    # info_keywords in monitor class above is neccessary for pybullet to save_results
-    # when using the info_keywords for mujoco we get an error
-    if arg_dict["engine"] == "pybullet":
-        save_results(arg_dict, model_name, env, model_logdir)
-    return model
-
-def get_parser():
-    parser = argparse.ArgumentParser()
-    #Envinronment
-    parser.add_argument("-cfg", "--config", default="./configs/train_A_RDDL.json", help="Can be passed instead of all arguments")
-    parser.add_argument("-n", "--env_name", type=str, help="The name of environment")
-    parser.add_argument("-ws", "--workspace", type=str, help="The name of workspace")
-    parser.add_argument("-p", "--engine", type=str,  help="Name of the simulation engine you want to use")
-    parser.add_argument("-sd", "--seed", type=int, help="Seed number")
-    parser.add_argument("-d", "--render", type=str,  help="Type of rendering: opengl, opencv")
-    parser.add_argument("-c", "--camera", type=int, help="The number of camera used to render and record")
-    parser.add_argument("-vi", "--visualize", type=int,  help="Whether visualize camera render and vision in/out or not: 1 or 0")
-    parser.add_argument("-vg", "--visgym", type=int,  help="Whether visualize gym background: 1 or 0")
-    parser.add_argument("-g", "--gui", type=int, help="Wether the GUI of the simulation should be used or not: 1 or 0")
-    #Robot
-    parser.add_argument("-b", "--robot", type=str, help="Robot to train: kuka, panda, jaco ...")
-    parser.add_argument("-bi", "--robot_init", nargs="*", type=float, help="Initial robot's end-effector position")
-    parser.add_argument("-ba", "--robot_action", type=str, help="Robot's action control: step - end-effector relative position, absolute - end-effector absolute position, joints - joints' coordinates")
-    parser.add_argument("-mv", "--max_velocity", type=float, help="Maximum velocity of robotic arm")
-    parser.add_argument("-mf", "--max_force", type=float, help="Maximum force of robotic arm")
-    parser.add_argument("-ar", "--action_repeat", type=int, help="Substeps of simulation without action from env")
-    #Task
-    parser.add_argument("-tt", "--task_type", type=str,  help="Type of task to learn: reach, push, throw, pick_and_place")
-    parser.add_argument("-to", "--task_objects", nargs="*", type=str, help="Object (for reach) or a pair of objects (for other tasks) to manipulate with")
-    parser.add_argument("-u", "--used_objects", nargs="*", type=str, help="List of extra objects to randomly appear in the scene")
-    #Distractors
-    parser.add_argument("-di", "--distractors", type=str, help="Object (for reach) to evade")
-    parser.add_argument("-dm", "--distractor_moveable", type=int, help="can distractor move (0/1)")
-    parser.add_argument("-ds", "--distractor_constant_speed", type=int, help="is speed of distractor constant (0/1)")
-    parser.add_argument("-dd", "--distractor_movement_dimensions", type=int, help="in how many directions can the distractor move (1/2/3)")
-    parser.add_argument("-de", "--distractor_movement_endpoints", nargs="*", type=float, help="2 coordinates (starting point and ending point)")
-    parser.add_argument("-no", "--observed_links_num", type=int, help="number of robot links in observation space")
-    #Reward
-    parser.add_argument("-re", "--reward", type=str,  help="Defines how to compute the reward")
-    parser.add_argument("-dt", "--distance_type", type=str, help="Type of distance metrics: euclidean, manhattan")
-    #Train
-    parser.add_argument("-w", "--train_framework", type=str,  help="Name of the training framework you want to use: {tensorflow, pytorch}")
-    parser.add_argument("-a", "--algo", type=str,  help="The learning algorithm to be used (ppo2 or her)")
-    parser.add_argument("-s", "--steps", type=int, help="The number of steps to train")
-    parser.add_argument("-ms", "--max_episode_steps", type=int,  help="The maximum number of steps per episode")
-    parser.add_argument("-ma", "--algo_steps", type=int,  help="The number of steps per for algo training (PPO2,A2C)")
-    #Evaluation
-    parser.add_argument("-ef", "--eval_freq", type=int,  help="Evaluate the agent every eval_freq steps")
-    parser.add_argument("-e", "--eval_episodes", type=int,  help="Number of episodes to evaluate performance of the robot")
-    #Saving and Logging
-    parser.add_argument("-l", "--logdir", type=str,  help="Where to save results of training and trained models")
-    parser.add_argument("-r", "--record", type=int, help="1: make a gif of model perfomance, 2: make a video of model performance, 0: don't record")
-    #Mujoco
-    parser.add_argument("-i", "--multiprocessing", type=int,  help="True: multiprocessing on (specify also the number of vectorized environemnts), False: multiprocessing off")
-    parser.add_argument("-v", "--vectorized_envs", type=int,  help="The number of vectorized environments to run at once (mujoco multiprocessing only)")
-    #Paths
-    parser.add_argument("-m", "--model_path", type=str, help="Path to the the trained model to test")
-    parser.add_argument("-vp", "--vae_path", type=str, help="Path to a trained VAE in 2dvu reward type")
-    parser.add_argument("-yp", "--yolact_path", type=str, help="Path to a trained Yolact in 3dvu reward type")
-    parser.add_argument("-yc", "--yolact_config", type=str, help="Path to saved config obj or name of an existing one in the data/Config script (e.g. 'yolact_base_config') or None for autodetection")
-    parser.add_argument('-ptm', "--pretrained_model", type=str, help="Path to a model that you want to continue training")
-    #Language
-    # parser.add_argument("-nl", "--natural_language", type=str, default="",
-    #                     help="If passed, instead of training the script will produce a natural language output "
-    #                          "of the given type, save it to the predefined file (for communication with other scripts) "
-    #                          "and exit the program (without the actual training taking place). Expected values are \"description\" "
-    #                          "(generate a task description) or \"new_tasks\" (generate new tasks)")
-    return parser
-
-def get_arguments(parser):
-    args = parser.parse_args()
-    with open(args.config, "r") as f:
-            arg_dict = commentjson.load(f)
-    for key, value in vars(args).items():
-        if value is not None and key is not "config":
-            if key in ["robot_init"]:
-                arg_dict[key] = [float(arg_dict[key][i]) for i in range(len(arg_dict[key]))]
-            elif key in ["task_objects"]:
-                arg_dict[key] = task_objects_replacement(value, arg_dict[key], arg_dict["task_type"])
-            else:
-                arg_dict[key] = value
-    return arg_dict
-
-
-def task_objects_replacement(task_objects_new, task_objects_old, task_type):
-    """
-    If task_objects is given as a parameter, this method converts string into a proper format depending on task_type (null init for task_type reach)
-
-    [{"init":{"obj_name":"null"}, "goal":{"obj_name":"cube_holes","fixed":1,"rand_rot":0, "sampling_area":[-0.5, 0.2, 0.3, 0.6, 0.1, 0.4]}}]
-    """
-    ret = copy.deepcopy(task_objects_old)
-    if len(task_objects_new) > len(task_objects_old):
-        msg = "More objects given than there are subtasks."
-        raise Exception(msg)
-    dest = "" #init or goal
-    if task_type == "reach":
-        dest = "goal"
-    else:
-        dest = "init"
-    for i in range(len(task_objects_new)):
-        ret[i][dest]["obj_name"] = task_objects_new[i]
-    return ret
-
-
-
-
-def process_natural_language_command(cmd, env, output_relative_path=os.path.join("envs", "examples", "natural_language.txt")):
-    env.reset()
-    nl = NaturalLanguage(env)
-
-    if cmd in ["description", "new_tasks"]:
-        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), output_relative_path), "w") as file:
-            file.write(nl.generate_task_description() if cmd == "description" else "\n".join(nl.generate_new_tasks()))
-    else:
-        msg = f"Unknown natural language command: {cmd}"
-        raise Exception(msg)
-
-def main():
-    parser = get_parser()
-    arg_dict = get_arguments(parser)
-
-    # Check if we chose one of the existing engines
-    if arg_dict["engine"] not in AVAILABLE_SIMULATION_ENGINES:
-        print(f"Invalid simulation engine. Valid arguments: --engine {AVAILABLE_SIMULATION_ENGINES}.")
-        return
-    if not os.path.isabs(arg_dict["logdir"]):
-        arg_dict["logdir"] = os.path.join("./", arg_dict["logdir"])
-    os.makedirs(arg_dict["logdir"], exist_ok=True)
-    model_logdir_ori = os.path.join(arg_dict["logdir"], "_".join((arg_dict["task_type"],arg_dict["workspace"],arg_dict["robot"],arg_dict["robot_action"],arg_dict["algo"])))
-    model_logdir = model_logdir_ori
-    add = 2
-    while True:
-        try:
-            os.makedirs(model_logdir, exist_ok=False)
-            break
-        except:
-            model_logdir = "_".join((model_logdir_ori, str(add)))
-            add += 1
-
-    env = configure_env(arg_dict, model_logdir, for_train=1)
-    implemented_combos = configure_implemented_combos(env, model_logdir, arg_dict)
-    train(env, implemented_combos, model_logdir, arg_dict, arg_dict["pretrained_model"])
-    print(model_logdir)
-
-if __name__ == "__main__":
-    main()
diff --git a/myGym/train_checker.sh b/myGym/train_checker.sh
deleted file mode 100644
index 3be75cb4..00000000
--- a/myGym/train_checker.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/sh
-#This is minimal unit test to check, whether the current version of myGym is able to train. 
-# It will do simle train and evaluate in 2 minutes. If success rate is above 30 percdents, it works.
-rm -r ./trained_models/check
-python train.py --config ./configs/train_reach.json --gui 1 --algo ppo2 -b kuka -ba joints --max_episode_steps 256 --algo_steps 256 --steps 20000 --eval_episodes 10 --logdir ./trained_models/check
-#python test.py -cfg ./trained_models/check/reach_table_kuka_joints_ppo2/train.json
-#rm -r ./trained_models/check
\ No newline at end of file
diff --git a/myGym/train_multithread.py b/myGym/train_multithread.py
deleted file mode 100644
index 536dc47b..00000000
--- a/myGym/train_multithread.py
+++ /dev/null
@@ -1,217 +0,0 @@
-import random
-import pkg_resources
-import os, sys, time, yaml
-import argparse
-import numpy as np
-import json, commentjson
-from myGym import envs
-import ray
-from ray.rllib.algorithms.ppo import PPOConfig
-from ray.rllib.algorithms.sac.sac import SACConfig
-from ray.rllib.algorithms.appo import APPOConfig
-from ray.rllib.algorithms.ddpg import DDPGConfig
-from ray.rllib.algorithms.marwil import MARWILConfig
-from ray.rllib.evaluation import Episode, RolloutWorker
-from ray.rllib.policy import Policy
-from ray.rllib.policy.sample_batch import SampleBatch
-from ray.rllib.env.base_env import BaseEnv
-from ray.tune.logger import pretty_print
-from train import get_parser, get_arguments, AVAILABLE_SIMULATION_ENGINES
-from gymnasium.wrappers import EnvCompatibility
-from ray.tune.registry import register_env
-from myGym.envs.gym_env import GymEnv
-
-def save_results(arg_dict, model_name, env, model_logdir=None, show=False):
-    if model_logdir is None:
-        model_logdir = arg_dict["logdir"]
-    print(f"model_logdir: {model_logdir}")
-
-    #results_plotter.EPISODES_WINDOW = 100
-    #results_plotter.plot_results([model_logdir], arg_dict["steps"], results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " reward")
-    #plt.gcf().set_size_inches(8, 6)
-    #plt.savefig(os.path.join(model_logdir, model_name) + '_reward_results.png')
-    #plot_extended_results(model_logdir, 'd', results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " distance", "Episode Distances")
-    #plt.gcf().set_size_inches(8, 6)
-    #plt.savefig(os.path.join(model_logdir, model_name) + '_distance_results.png')
-    #plt.close()
-    #plt.close()
-    #if isinstance(env, HERGoalEnvWrapper):
-    #    results_plotter.plot_curves([(np.arange(len(env.env.episode_final_distance)),np.asarray(env.env.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance')
-    #else:
-    #    results_plotter.plot_curves([(np.arange(len(env.unwrapped.episode_final_distance)),np.asarray(env.unwrapped.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance')
-    #plt.gcf().set_size_inches(8, 6)
-    #plt.ylabel("Step Distances")
-    #plt.savefig(os.path.join(model_logdir, model_name) + "_final_distance_results.png")
-    #plt.close()
-    print("Congratulations! Training with {} timesteps succeed!".format(arg_dict["steps"]))
-    #if show:
-    #    plt.show()
-
-def configure_env(arg_dict, for_train=True):
-    env_arguments = {"render_on": True, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"],
-                     "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"],
-                     "robot_action": arg_dict["robot_action"],"max_velocity": arg_dict["max_velocity"], 
-                     "max_force": arg_dict["max_force"],"task_type": arg_dict["task_type"],
-                     "action_repeat": arg_dict["action_repeat"],
-                     "task_objects":arg_dict["task_objects"], "observation":arg_dict["observation"], "distractors":arg_dict["distractors"],
-                     "num_networks":arg_dict.get("num_networks", 1), "network_switcher":arg_dict.get("network_switcher", "gt"),
-                     "distance_type": arg_dict["distance_type"], "used_objects": arg_dict["used_objects"],
-                     "active_cameras": arg_dict["camera"], "color_dict":arg_dict.get("color_dict", {}),
-                     "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"],
-                     "reward": arg_dict["reward"], "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"],
-                     "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"], "algo":arg_dict["algo"]}
-    if for_train:
-        env_arguments["gui_on"] = arg_dict["gui"]
-    else:
-        env_arguments["gui_on"] = arg_dict["gui"]
-    return env_arguments
-
-def env_creator(env_config):
-        return EnvCompatibility(GymEnv(**env_config))    
-
-def configure_implemented_combos(arg_dict):
-    implemented_combos = {"ppo": PPOConfig, "sac": SACConfig, "marwil": MARWILConfig, "appo":APPOConfig, "ddpg":DDPGConfig}
-    return implemented_combos[arg_dict["algo"]]
-
-def get_parser():
-    parser = argparse.ArgumentParser()
-    #Envinronment
-    parser.add_argument("-cfg", "--config", default="./configs/rllib_debug.json", help="Can be passed instead of all arguments")
-    parser.add_argument("-rt", "--ray_tune", action='store_true', help="Whether to train with ray grid search")
-    parser.add_argument("-n", "--env_name", type=str, help="The name of environment")
-    parser.add_argument("-ws", "--workspace", type=str, help="The name of workspace")
-    parser.add_argument("-p", "--engine", type=str,  help="Name of the simulation engine you want to use")
-    parser.add_argument("-sd", "--seed", type=int, help="Seed number")
-    parser.add_argument("-d", "--render", type=str,  help="Type of rendering: opengl, opencv")
-    parser.add_argument("-c", "--camera", type=int, help="The number of camera used to render and record")
-    parser.add_argument("-vi", "--visualize", type=int,  help="Whether visualize camera render and vision in/out or not: 1 or 0")
-    parser.add_argument("-vg", "--visgym", type=int,  help="Whether visualize gym background: 1 or 0")
-    parser.add_argument("-g", "--gui", type=int, help="Wether the GUI of the simulation should be used or not: 1 or 0")
-    #Robot
-    parser.add_argument("-b", "--robot", type=str, help="Robot to train: kuka, panda, jaco ...")
-    parser.add_argument("-bi", "--robot_init", nargs="*", type=float, help="Initial robot's end-effector position")
-    parser.add_argument("-ba", "--robot_action", type=str, help="Robot's action control: step - end-effector relative position, absolute - end-effector absolute position, joints - joints' coordinates")
-    parser.add_argument("-mv", "--max_velocity", type=float, help="Maximum velocity of robotic arm")
-    parser.add_argument("-mf", "--max_force", type=float, help="Maximum force of robotic arm")
-    parser.add_argument("-ar", "--action_repeat", type=int, help="Substeps of simulation without action from env")
-    #Task
-    parser.add_argument("-tt", "--task_type", type=str,  help="Type of task to learn: reach, push, throw, pick_and_place")
-    parser.add_argument("-to", "--task_objects", nargs="*", type=str, help="Object (for reach) or a pair of objects (for other tasks) to manipulate with")
-    parser.add_argument("-u", "--used_objects", nargs="*", type=str, help="List of extra objects to randomly appear in the scene")
-    #Distractors
-    parser.add_argument("-di", "--distractors", type=str, help="Object (for reach) to evade")
-    parser.add_argument("-dm", "--distractor_moveable", type=int, help="can distractor move (0/1)")
-    parser.add_argument("-ds", "--distractor_constant_speed", type=int, help="is speed of distractor constant (0/1)")
-    parser.add_argument("-dd", "--distractor_movement_dimensions", type=int, help="in how many directions can the distractor move (1/2/3)")
-    parser.add_argument("-de", "--distractor_movement_endpoints", nargs="*", type=float, help="2 coordinates (starting point and ending point)")
-    parser.add_argument("-no", "--observed_links_num", type=int, help="number of robot links in observation space")
-    #Reward
-    parser.add_argument("-re", "--reward", type=str,  help="Defines how to compute the reward")
-    parser.add_argument("-dt", "--distance_type", type=str, help="Type of distance metrics: euclidean, manhattan")
-    #Train
-    parser.add_argument("-w", "--train_framework", type=str,  help="Name of the training framework you want to use: {tensorflow, pytorch}")
-    parser.add_argument("-a", "--algo", type=str,  help="The learning algorithm to be used (ppo2 or her)")
-    parser.add_argument("-s", "--steps", type=int, help="The number of steps to train")
-    parser.add_argument("-ms", "--max_episode_steps", type=int,  help="The maximum number of steps per episode")
-    parser.add_argument("-ma", "--algo_steps", type=int,  help="The number of steps per for algo training (PPO2,A2C)")
-    #Evaluation
-    parser.add_argument("-ef", "--eval_freq", type=int,  help="Evaluate the agent every eval_freq steps")
-    parser.add_argument("-e", "--eval_episodes", type=int,  help="Number of episodes to evaluate performance of the robot")
-    #Saving and Logging
-    parser.add_argument("-l", "--logdir", type=str,  help="Where to save results of training and trained models")
-    parser.add_argument("-r", "--record", type=int, help="1: make a gif of model perfomance, 2: make a video of model performance, 0: don't record")
-    #Mujoco
-    parser.add_argument("-i", "--multiprocessing", type=int,  help="True: multiprocessing on (specify also the number of vectorized environemnts), False: multiprocessing off")
-    parser.add_argument("-v", "--vectorized_envs", type=int,  help="The number of vectorized environments to run at once (mujoco multiprocessing only)")
-    #Paths
-    parser.add_argument("-m", "--model_path", type=str, help="Path to the the trained model to test")
-    parser.add_argument("-vp", "--vae_path", type=str, help="Path to a trained VAE in 2dvu reward type")
-    parser.add_argument("-yp", "--yolact_path", type=str, help="Path to a trained Yolact in 3dvu reward type")
-    parser.add_argument("-yc", "--yolact_config", type=str, help="Path to saved config obj or name of an existing one in the data/Config script (e.g. 'yolact_base_config') or None for autodetection")
-    parser.add_argument('-ptm', "--pretrained_model", type=str, help="Path to a model that you want to continue training")
-    return parser
-
-def get_arguments(parser):
-    args = parser.parse_args()
-    with open(args.config, "r") as f:
-            arg_dict = commentjson.load(f)
-    for key, value in vars(args).items():
-        if value is not None and key != "config":
-            if key in ["robot_init"]:
-                arg_dict[key] = [float(arg_dict[key][i]) for i in range(len(arg_dict[key]))]
-            else:
-                arg_dict[key] = value
-    return arg_dict, args
-
-def train(args, arg_dict, algorithm, num_steps, algo_steps):
-    if not args.ray_tune:
-        # manual training with train loop using PPO and fixed learning rate
-        print("Running manual train loop without Ray Tune.")
-        # use fixed learning rate instead of grid search (needs tune)
-        algo = (
-            algorithm()
-            .rollouts(num_rollout_workers=10, batch_mode="complete_episodes") # You can try to increase or decrease based on your systems specs
-            .resources(num_gpus=1, num_gpus_per_worker=0.1) # You can try to increase or decrease based on your systems specs
-            .environment(env='GymEnv-v0', env_config=arg_dict)
-            .training(train_batch_size=256)
-            .build()
-        )
-        # run manual training loop and print results after each iteration
-        for _ in range(int(num_steps/algo_steps)):
-            result = algo.train() # Runs one logical iteration of training.
-            print(pretty_print(result))
-            # stop training of the target train steps or reward are reached
-            if result["timesteps_total"] >= num_steps:
-                break
-        algo.stop()
-    else:
-        print("Not Implemented, TODO")
-        # automated run with Tune and grid search and TensorBoard
-        # tuner = tune.Tuner(
-        #     args.run,
-        #     param_space=config.to_dict(),
-        #     run_config=air.RunConfig(stop=stop),
-        # )
-        # results = tuner.fit()
-
-        # if args.as_test:
-        #     print("Checking if learning goals were achieved")
-        #     check_learning_achieved(results, args.stop_reward)
-
-
-def main():
-    parser = get_parser()
-    arg_dict, args = get_arguments(parser)
-    ray.init(num_gpus=1, num_cpus=10)
-
-    # Check if we chose one of the existing engines
-    if arg_dict["engine"] not in AVAILABLE_SIMULATION_ENGINES:
-        print(f"Invalid simulation engine. Valid arguments: --engine {AVAILABLE_SIMULATION_ENGINES}.")
-        return
-    if not os.path.isabs(arg_dict["logdir"]):
-        arg_dict["logdir"] = os.path.join("./", arg_dict["logdir"])
-    os.makedirs(arg_dict["logdir"], exist_ok=True)
-    model_logdir_ori = os.path.join(arg_dict["logdir"], "_".join((arg_dict["task_type"],arg_dict["workspace"],arg_dict["robot"],arg_dict["robot_action"],arg_dict["algo"])))
-    model_logdir = model_logdir_ori
-    add = 2
-    while True:
-        try:
-            os.makedirs(model_logdir, exist_ok=False)
-            break
-        except:
-            model_logdir = "_".join((model_logdir_ori, str(add)))
-            add += 1
-
-    num_steps = arg_dict["steps"]
-    algo_steps = arg_dict["algo_steps"]
-    arg_dict = configure_env(arg_dict, for_train=1)
-    register_env('GymEnv-v0', env_creator)
-    if not args.ray_tune:
-        assert arg_dict["algo"] == "ppo", "Training without ray tune only works with PPO (rllib limitation)"
-    algorithm = configure_implemented_combos(arg_dict)
-    arg_dict.pop("algo")
-    train(args, arg_dict, algorithm, num_steps, algo_steps)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/myGym/train_parallel.py b/myGym/train_parallel.py
deleted file mode 100644
index fcb05aca..00000000
--- a/myGym/train_parallel.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import os
-import threading
-
-algos = [' ppo ',' ppo2 ','sac','trpo']
-api = '~/myGym/myGym'
-configfile = 'configs/train.json'
-script_path = api + '/train.py'
-
-def train(algo):
-    os.system('cd {api};python {script_path} --config {configfile} --algo {algos}'.format(script_path=script_path, api=api, configfile=configfile, algos=algo))
-    
-    
-
-if __name__ == '__main__':
-    threads = []
-    for i, algo in enumerate(algos):
-        thread = threading.Thread(target=train, args=(algo,))
-        thread.start()
-        threads.append(thread)
-
-    for thread in threads:
-        thread.join()
-
-
-    
-
-
-# os.system("./train.py --task_type reach --algo ppo2 --reward_type gt --robot kuka -ba step --env_name CrowWorkspaceEnv-v0 --steps 300000 -to hammer -w tensorflow -g 0 -p pybullet  -r 0 -c 6 -dt euclidian -i 0 -t 1 -f 0 -d opengl")
\ No newline at end of file
diff --git a/myGym/traintf2.py b/myGym/trainprag.py
similarity index 52%
rename from myGym/traintf2.py
rename to myGym/trainprag.py
index 5a64ba92..2aaf89b5 100644
--- a/myGym/traintf2.py
+++ b/myGym/trainprag.py
@@ -1,42 +1,25 @@
-import pkg_resources
+import importlib.resources as resources
 import os, sys, time, yaml
 import argparse
 import numpy as np
 import matplotlib.pyplot as plt
 import json, commentjson
-import gym
+import gymnasium as gym
 from myGym import envs
 import myGym.utils.cfg_comparator as cfg
-from stable_baselines.common.policies import MlpPolicy
-from stable_baselines.common import make_vec_env
-from stable_baselines.common.vec_env import DummyVecEnv
-from stable_baselines.bench import Monitor
-from stable_baselines import results_plotter
-from stable_baselines.her import GoalSelectionStrategy, HERGoalEnvWrapper
-# For now I am importing both with slightly modified names P-PyTorch T-TensorFlow
-from stable_baselines import PPO1 as PPO1_T, PPO2 as PPO2_T, HER as HER_T, SAC as SAC_T
-from stable_baselines import TD3 as TD3_T, A2C as A2C_T, ACKTR as ACKTR_T, TRPO as TRPO_T
+from stable_baselines3.common.vec_env import DummyVecEnv
+from stable_baselines3.her import GoalSelectionStrategy #, HERGoalEnvWrapper
 try:
     from stable_baselines3 import PPO as PPO_P, A2C as A2C_P, SAC as SAC_P, TD3 as TD3_P
 except:
     print("Torch isn't probably installed correctly")
 
-from myGym.stable_baselines_mygym.algo import MyAlgo
-from myGym.stable_baselines_mygym.policies import MyMlpPolicy
-from myGym.stable_baselines_mygym.TorchPPO import TorchPPO
-from myGym.stable_baselines_mygym.TorchPPOpolicies import TorchMlpPolicy
-
-
-#from stable_baselines.gail import ExpertDataset, generate_expert_traj
-from stable_baselines.sac.policies import MlpPolicy as MlpPolicySAC
-#from stable_baselines.ddpg.policies import MlpPolicy as MlpPolicyDDPG
-from stable_baselines.td3.policies import MlpPolicy as MlpPolicyTD3
-
 # Import helper classes and functions for monitoring
-from myGym.utils.callbackstf2 import ProgressBarManager, SaveOnBestTrainingRewardCallback,  PlottingCallback, CustomEvalCallback
+from myGym.utils.callbackstf2 import ProgressBarManager, SaveOnBestTrainingRewardCallback, CustomEvalCallback
+import myGym
+
 
 # This is global variable for the type of engine we are working with
-AVAILABLE_SIMULATION_ENGINES = ["mujoco", "pybullet"]
 AVAILABLE_TRAINING_FRAMEWORKS = ["tensorflow", "pytorch"]
 
 
@@ -45,8 +28,8 @@ def save_results(arg_dict, model_name, env, model_logdir=None, show=False):
         model_logdir = arg_dict["logdir"]
     print(f"model_logdir: {model_logdir}")
 
-    results_plotter.EPISODES_WINDOW = 100
-    results_plotter.plot_results([model_logdir], arg_dict["steps"], results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " reward")
+    #results_plotter.EPISODES_WINDOW = 100
+    #results_plotter.plot_results([model_logdir], arg_dict["steps"], results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " reward")
     plt.gcf().set_size_inches(8, 6)
     plt.savefig(os.path.join(model_logdir, model_name) + '_reward_results.png')
     #plot_extended_results(model_logdir, 'd', results_plotter.X_TIMESTEPS, arg_dict["algo"] + " " + arg_dict["env_name"] + " distance", "Episode Distances")
@@ -54,10 +37,7 @@ def save_results(arg_dict, model_name, env, model_logdir=None, show=False):
     plt.savefig(os.path.join(model_logdir, model_name) + '_distance_results.png')
     plt.close()
     plt.close()
-    if isinstance(env, HERGoalEnvWrapper):
-        results_plotter.plot_curves([(np.arange(len(env.env.episode_final_distance)),np.asarray(env.env.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance')
-    else:
-        results_plotter.plot_curves([(np.arange(len(env.unwrapped.episode_final_distance)),np.asarray(env.unwrapped.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance')
+    #results_plotter.plot_curves([(np.arange(len(env.unwrapped.episode_final_distance)),np.asarray(env.unwrapped.episode_final_distance))],'episodes',arg_dict["algo"] + " " + arg_dict["env_name"] + ' final step distance')
     plt.gcf().set_size_inches(8, 6)
     plt.ylabel("Step Distances")
     plt.savefig(os.path.join(model_logdir, model_name) + "_final_distance_results.png")
@@ -67,45 +47,31 @@ def save_results(arg_dict, model_name, env, model_logdir=None, show=False):
         plt.show()
 
 def configure_env(arg_dict, model_logdir=None, for_train=True):
-    if arg_dict["engine"] == "pybullet":
-        env_arguments = {"render_on": True, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"],
-                         "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"],
-                         "robot_action": arg_dict["robot_action"], "task_type": arg_dict["task_type"], "num_subgoals": arg_dict["num_subgoals"],
-                         "task_objects":arg_dict["task_objects"], "distractors":arg_dict["distractors"],
-                         "distractor_moveable":arg_dict["distractor_moveable"],
-                         "distractor_constant_speed":arg_dict["distractor_constant_speed"],
-                         "distractor_movement_dimensions":arg_dict["distractor_movement_dimensions"],
-                         "distractor_movement_endpoints":arg_dict["distractor_movement_endpoints"],
-                         "coefficient_kd":arg_dict["coefficient_kd"],
-                         "coefficient_kw":arg_dict["coefficient_kw"],
-                         "coefficient_ka":arg_dict["coefficient_ka"],
-                         "observed_links_num":arg_dict["observed_links_num"],
-                         "reward_type": arg_dict["reward_type"],
-                         "distance_type": arg_dict["distance_type"], "used_objects": arg_dict["used_objects"],
-                         "object_sampling_area": arg_dict["object_sampling_area"], "active_cameras": arg_dict["camera"],
-                         "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"],
-                         "reward": arg_dict["reward"], "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"],
-                         "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"]}
-        if for_train:
-            env_arguments["gui_on"] = False
-        else:
-            env_arguments["gui_on"] = arg_dict["gui"]
+    env_arguments = {"render_on": True, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"], "framework":"stable_baselines",
+                     "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"],
+                     "robot_action": arg_dict["robot_action"],"max_velocity": arg_dict["max_velocity"],
+                     "max_force": arg_dict["max_force"],
+                     "action_repeat": arg_dict["action_repeat"], "rddl": arg_dict["rddl"],
+                     "observation":arg_dict["observation"], "distractors":arg_dict["distractors"],
+                     "num_networks":arg_dict.get("num_networks", 1), "network_switcher":arg_dict.get("network_switcher", "gt"),
+                     "active_cameras": arg_dict["camera"], "color_dict":arg_dict.get("color_dict", {}),
+                     "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"],
+                     "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"],
+                     "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"],
+                     "natural_language": bool(arg_dict["natural_language"]),
+                     "training": bool(for_train)
+                     }
+    env_arguments["gui_on"] = arg_dict["gui"]
 
-        if arg_dict["algo"] == "her":
-            env = gym.make(arg_dict["env_name"], **env_arguments, obs_space="dict")  # her needs obs as a dict
-        else:
-            env = gym.make(arg_dict["env_name"], **env_arguments)
-    elif arg_dict["engine"] == "mujoco":
-        if arg_dict["multiprocessing"]:
-            # ACKTR, PPO2, A2C, DDPG can use vectorized environments, but the only way to display the results (for me) is using CV2 imshow. -(TensorFlow comment)
-            env = make_vec_env(arg_dict["env_name"], n_envs=arg_dict["vectorized_envs"])
-        else:
-            env = gym.make(arg_dict["env_name"])
+    if arg_dict["algo"] == "her":
+        env = gym.make(arg_dict["env_name"], **env_arguments, obs_space="dict")  # her needs obs as a dict
+    else:
+        env = gym.make(arg_dict["env_name"], **env_arguments)
     if for_train:
-        if arg_dict["engine"] == "mujoco":
-            env = VecMonitor(env, model_logdir) if arg_dict["multiprocessing"] else Monitor(env, model_logdir)
-        elif arg_dict["engine"] == "pybullet":
+        try:
             env = Monitor(env, model_logdir, info_keywords=tuple('d'))
+        except:
+            pass
 
     if arg_dict["algo"] == "her":
         env = HERGoalEnvWrapper(env)
@@ -113,22 +79,11 @@ def configure_env(arg_dict, model_logdir=None, for_train=True):
 
 
 def configure_implemented_combos(env, model_logdir, arg_dict):
-    implemented_combos = {"ppo2": {"tensorflow": [PPO2_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}]},
-                          "ppo": {"tensorflow": [PPO1_T, (MlpPolicy, env),  {"verbose": 1}],},
-                          "sac": {"tensorflow": [SAC_T, (MlpPolicySAC, env), {"verbose": 1}],},
-                          "td3": {"tensorflow": [TD3_T, (MlpPolicyTD3, env), {"verbose": 1}],},
-                          "acktr": {"tensorflow": [ACKTR_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}]},
-                          "trpo": {"tensorflow": [TRPO_T, (MlpPolicy, env), {"verbose": 1}]},
-                          "a2c":    {"tensorflow": [A2C_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}],},
-                          "torchppo": {"tensorflow": [TorchPPO, (TorchMlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}]},
-                          "myalgo": {"tensorflow": [MyAlgo, (MyMlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}]},
-                          "dual":   {"tensorflow": [PPO2_T, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1}]}}
-
-    if "PPO_P" in sys.modules:
-        implemented_combos["ppo"]["pytorch"] = [PPO_P, ('MlpPolicy', env), {"n_steps": 1024, "verbose": 1, "tensorboard_log": model_logdir}]
-        implemented_combos["sac"]["pytorch"] = [SAC_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}]
-        implemented_combos["td3"]["pytorch"] = [TD3_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}]
-        implemented_combos["a2c"]["pytorch"] = [A2C_P, ('MlpPolicy', env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]
+    implemented_combos = {"ppo":{}, "sac":{}, "td3":{}, "a2c":{}}
+    implemented_combos["ppo"]["pytorch"] = [PPO_P, ('MlpPolicy', env), {"n_steps": 1024, "verbose": 1, "tensorboard_log": model_logdir}]
+    implemented_combos["sac"]["pytorch"] = [SAC_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}]
+    implemented_combos["td3"]["pytorch"] = [TD3_P, ('MlpPolicy', env), {"verbose": 1, "tensorboard_log": model_logdir}]
+    implemented_combos["a2c"]["pytorch"] = [A2C_P, ('MlpPolicy', env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]
 
     return implemented_combos
 
@@ -141,23 +96,16 @@ def train(env, implemented_combos, model_logdir, arg_dict, pretrained_model=None
     with open(conf_pth, "w") as f:
         json.dump(arg_dict, f, indent=4)
 
-    try:
-        model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1]
-        model_kwargs = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][2]
-        if pretrained_model:
-            if not os.path.isabs(pretrained_model):
-                pretrained_model = pkg_resources.resource_filename("myGym", pretrained_model)
-            env = model_args[1]
-            vec_env = DummyVecEnv([lambda: env])
-            model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(pretrained_model, vec_env)
-        else:
-            model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0](*model_args, **model_kwargs)
-    except:
-        if arg_dict["algo"] in implemented_combos.keys():
-            err = "{} is only implemented with {}".format(arg_dict["algo"], list(implemented_combos[arg_dict["algo"]].keys())[0])
-        else:
-            err = "{} algorithm is not implemented.".format(arg_dict["algo"])
-        raise Exception(err)
+    model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1]
+    model_kwargs = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][2]
+    if pretrained_model:
+        if not os.path.isabs(pretrained_model):
+            pretrained_model = resources.files("myGym").joinpath(pretrained_model)
+        env = model_args[1]
+        vec_env = DummyVecEnv([lambda: env])
+        model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(pretrained_model, vec_env)
+    else:
+        model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0](*model_args, **model_kwargs)
 
     if arg_dict["algo"] == "gail":
         # Multi processing: (using MPI)
@@ -171,8 +119,7 @@ def train(env, implemented_combos, model_logdir, arg_dict, pretrained_model=None
 
     start_time = time.time()
     callbacks_list = []
-    auto_save_callback = SaveOnBestTrainingRewardCallback(check_freq=1024, logdir=model_logdir, env=env, engine=arg_dict["engine"], multiprocessing=arg_dict["multiprocessing"])
-    callbacks_list.append(auto_save_callback)
+
     if arg_dict["eval_freq"]:
         eval_env = configure_env(arg_dict, model_logdir, for_train=False)
         eval_callback = CustomEvalCallback(eval_env, log_path=model_logdir,
@@ -181,23 +128,17 @@ def train(env, implemented_combos, model_logdir, arg_dict, pretrained_model=None
                                            record=arg_dict["record"],
                                            camera_id=arg_dict["camera"])
         callbacks_list.append(eval_callback)
-    # plotting_callback = PlottingCallback(model_logdir)
-    with ProgressBarManager(total_timesteps=arg_dict["steps"]) as progress_callback:
-        callbacks_list.append(progress_callback)
-        model.learn(total_timesteps=arg_dict["steps"], callback=callbacks_list)
+
+    model.learn(total_timesteps=arg_dict["steps"], callback=callbacks_list)
     model.save(os.path.join(model_logdir, model_name))
     print("Training time: {:.2f} s".format(time.time() - start_time))
 
-    # info_keywords in monitor class above is neccessary for pybullet to save_results
-    # when using the info_keywords for mujoco we get an error
-    if arg_dict["engine"] == "pybullet":
-        save_results(arg_dict, model_name, env, model_logdir)
     return model
 
 def get_parser():
     parser = argparse.ArgumentParser()
     #Envinronment
-    parser.add_argument("-cfg", "--config", default="configs/train.json", help="Can be passed instead of all arguments")
+    parser.add_argument("-cfg", "--config", default="configs/train_rddl.json", help="Can be passed instead of all arguments")
     parser.add_argument("-n", "--env_name", type=str, help="The name of environment")
     parser.add_argument("-ws", "--workspace", type=str, help="The name of workspace")
     parser.add_argument("-p", "--engine", type=str,  help="Name of the simulation engine you want to use")
@@ -241,7 +182,6 @@ def get_parser():
     parser.add_argument("-r", "--record", type=int, help="1: make a gif of model perfomance, 2: make a video of model performance, 0: don't record")
     #Mujoco
     parser.add_argument("-i", "--multiprocessing", type=int,  help="True: multiprocessing on (specify also the number of vectorized environemnts), False: multiprocessing off")
-    parser.add_argument("-v", "--vectorized_envs", type=int,  help="The number of vectorized environments to run at once (mujoco multiprocessing only)")
     #Paths
     parser.add_argument("-m", "--model_path", type=str, help="Path to the the trained model to test")
     parser.add_argument("-vp", "--vae_path", type=str, help="Path to a trained VAE in 2dvu reward type")
@@ -252,10 +192,15 @@ def get_parser():
 
 def get_arguments(parser):
     args = parser.parse_args()
+    if not os.path.exists(args.config):
+        # try looking in the package main dir
+        args.config = os.path.join(myGym.__path__[0], args.config)
+        if not os.path.exists(args.config):
+            raise ValueError("Could not find config file: {}".format(args.config))
     with open(args.config, "r") as f:
             arg_dict = commentjson.load(f)
     for key, value in vars(args).items():
-        if value is not None and key is not "config":
+        if value != None and key != "config":
             if key in ["robot_init", "object_sampling_area"]:
                 arg_dict[key] = [float(arg_dict[key][i]) for i in range(len(arg_dict[key]))]
             else:
@@ -268,13 +213,10 @@ def main():
     arg_dict = get_arguments(parser)
 
     # Check if we chose one of the existing engines
-    if arg_dict["engine"] not in AVAILABLE_SIMULATION_ENGINES:
-        print(f"Invalid simulation engine. Valid arguments: --engine {AVAILABLE_SIMULATION_ENGINES}.")
-        return
     if not os.path.isabs(arg_dict["logdir"]):
-        arg_dict["logdir"] = pkg_resources.resource_filename("myGym", arg_dict["logdir"])
+        arg_dict["logdir"] = os.path.join("./", arg_dict["logdir"])
     os.makedirs(arg_dict["logdir"], exist_ok=True)
-    model_logdir_ori = os.path.join(arg_dict["logdir"], "_".join((arg_dict["task_type"],arg_dict["workspace"],arg_dict["robot"],arg_dict["robot_action"],arg_dict["reward_type"],arg_dict["algo"])))
+    model_logdir_ori = os.path.join(arg_dict["logdir"], "_".join((arg_dict["workspace"],arg_dict["robot"],arg_dict["robot_action"],arg_dict["algo"])))
     model_logdir = model_logdir_ori
     add = 2
     while True:
@@ -290,5 +232,6 @@ def main():
     train(env, implemented_combos, model_logdir, arg_dict, arg_dict["pretrained_model"])
     print(model_logdir)
 
+
 if __name__ == "__main__":
     main()
diff --git a/myGym/trainray.py b/myGym/trainray.py
deleted file mode 100644
index 7b0b57c4..00000000
--- a/myGym/trainray.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from ray.rllib.algorithms.ppo import PPOConfig
-config = PPOConfig()  
-config = config.training(gamma=0.9, lr=0.01, kl_coeff=0.3)  
-config = config.resources(num_gpus=1)  
-config = config.rollouts(num_rollout_workers=4)  
-print(config.to_dict())  
-# Build a Algorithm object from the config and run 1 training iteration.
-algo = config.build(env="CartPole-v1")  
-algo.train()
\ No newline at end of file
diff --git a/myGym/trainray_ppo_reach.py b/myGym/trainray_ppo_reach.py
deleted file mode 100644
index 648776f4..00000000
--- a/myGym/trainray_ppo_reach.py
+++ /dev/null
@@ -1,208 +0,0 @@
-from ray.rllib.algorithms.ppo import PPOConfig
-from ray.rllib.algorithms.callbacks import DefaultCallbacks
-from ray.rllib.evaluation import Episode, RolloutWorker
-from ray.rllib.policy import Policy
-from ray.rllib.policy.sample_batch import SampleBatch
-from ray.rllib.env.base_env import BaseEnv
-from ray.tune.logger import pretty_print
-from ray import tune
-from train import get_parser, get_arguments, AVAILABLE_SIMULATION_ENGINES
-import os
-import gymnasium as gym
-from gymnasium.wrappers import EnvCompatibility
-from ray.tune.registry import register_env
-from myGym.envs.gym_env import GymEnv
-import numpy as np
-import time
-
-from typing import Dict, Tuple
-import argparse
-import gymnasium as gym
-
-
-class MyCallbacks(DefaultCallbacks):
-    def on_episode_start(
-        self,
-        *,
-        worker: RolloutWorker,
-        base_env: BaseEnv,
-        policies: Dict[str, Policy],
-        episode: Episode,
-        env_index: int,
-        **kwargs,
-    ):
-        # Make sure this episode has just been started (only initial obs
-        # logged so far).
-        assert episode.length == 0, (
-            "ERROR: `on_episode_start()` callback should be called right "
-            "after env reset!"
-        )
-        # Create lists to store angles in
-        episode.user_data["pole_angles"] = []
-        episode.hist_data["pole_angles"] = []
-
-    def on_episode_step(
-        self,
-        *,
-        worker: RolloutWorker,
-        base_env: BaseEnv,
-        policies: Dict[str, Policy],
-        episode: Episode,
-        env_index: int,
-        **kwargs,
-    ):
-        # Make sure this episode is ongoing.
-        assert episode.length > 0, (
-            "ERROR: `on_episode_step()` callback should not be called right "
-            "after env reset!"
-        )
-        pole_angle = abs(episode.last_observation_for()[2])
-        raw_angle = abs(episode.last_raw_obs_for()[2])
-        assert pole_angle == raw_angle
-        episode.user_data["pole_angles"].append(pole_angle)
-
-        # Sometimes our pole is moving fast. We can look at the latest velocity
-        # estimate from our environment and log high velocities.
-        if np.abs(episode.last_info_for()["pole_angle_vel"]) > 0.25:
-            print("This is a fast pole!")
-
-    def on_episode_end(
-        self,
-        *,
-        worker: RolloutWorker,
-        base_env: BaseEnv,
-        policies: Dict[str, Policy],
-        episode: Episode,
-        env_index: int,
-        **kwargs,
-    ):
-        # Check if there are multiple episodes in a batch, i.e.
-        # "batch_mode": "truncate_episodes".
-        if worker.config.batch_mode == "truncate_episodes":
-            # Make sure this episode is really done.
-            assert episode.batch_builder.policy_collectors["default_policy"].batches[
-                -1
-            ]["dones"][-1], (
-                "ERROR: `on_episode_end()` should only be called "
-                "after episode is done!"
-            )
-        pole_angle = np.mean(episode.user_data["pole_angles"])
-        episode.custom_metrics["pole_angle"] = pole_angle
-        episode.hist_data["pole_angles"] = episode.user_data["pole_angles"]
-
-    def on_sample_end(self, *, worker: RolloutWorker, samples: SampleBatch, **kwargs):
-        # We can also do our own sanity checks here.
-        assert (
-            samples.count == 2000
-        ), f"I was expecting 2000 here, but got {samples.count}!"
-
-    def on_train_result(self, *, algorithm, result: dict, **kwargs):
-        # you can mutate the result dict to add new fields to return
-        result["callback_ok"] = True
-
-        # Normally, RLlib would aggregate any custom metric into a mean, max and min
-        # of the given metric.
-        # For the sake of this example, we will instead compute the variance and mean
-        # of the pole angle over the evaluation episodes.
-        pole_angle = result["custom_metrics"]["pole_angle"]
-        var = np.var(pole_angle)
-        mean = np.mean(pole_angle)
-        result["custom_metrics"]["pole_angle_var"] = var
-        result["custom_metrics"]["pole_angle_mean"] = mean
-        # We are not interested in these original values
-        del result["custom_metrics"]["pole_angle"]
-        del result["custom_metrics"]["num_batches"]
-
-    def on_learn_on_batch(
-        self, *, policy: Policy, train_batch: SampleBatch, result: dict, **kwargs
-    ) -> None:
-        result["sum_actions_in_train_batch"] = train_batch["actions"].sum()
-        # Log the sum of actions in the train batch.
-        print(
-            "policy.learn_on_batch() result: {} -> sum actions: {}".format(
-                policy, result["sum_actions_in_train_batch"]
-            )
-        )
-
-    def on_postprocess_trajectory(
-        self,
-        *,
-        worker: RolloutWorker,
-        episode: Episode,
-        agent_id: str,
-        policy_id: str,
-        policies: Dict[str, Policy],
-        postprocessed_batch: SampleBatch,
-        original_batches: Dict[str, Tuple[Policy, SampleBatch]],
-        **kwargs,
-    ):
-        if "num_batches" not in episode.custom_metrics:
-            episode.custom_metrics["num_batches"] = 0
-        episode.custom_metrics["num_batches"] += 1
-
-
-def main():
-    #Start time counter and print it
-    start_time = time.time()
-    print(start_time)
-    parser = get_parser()
-    arg_dict = get_arguments(parser)
-
-    if not os.path.isabs(arg_dict["logdir"]):
-        arg_dict["logdir"] = os.path.join("./", arg_dict["logdir"])
-    os.makedirs(arg_dict["logdir"], exist_ok=True)
-    model_logdir_ori = os.path.join(arg_dict["logdir"], "_".join((arg_dict["task_type"],arg_dict["workspace"],arg_dict["robot"],arg_dict["robot_action"],arg_dict["algo"])))
-    model_logdir = model_logdir_ori
-    add = 2
-    while True:
-        try:
-            os.makedirs(model_logdir, exist_ok=False)
-            break
-        except:
-            model_logdir = "_".join((model_logdir_ori, str(add)))
-            add += 1
-
-    # Set env arguments 
-    env_arguments = {"render_on": False, "visualize": arg_dict["visualize"], "workspace": arg_dict["workspace"],
-                     "robot": arg_dict["robot"], "robot_init_joint_poses": arg_dict["robot_init"],
-                     "robot_action": arg_dict["robot_action"],"max_velocity": arg_dict["max_velocity"], 
-                     "max_force": arg_dict["max_force"],"task_type": arg_dict["task_type"],
-                     "action_repeat": arg_dict["action_repeat"],
-                     "task_objects":arg_dict["task_objects"], "observation":arg_dict["observation"], "distractors":arg_dict["distractors"],
-                     "num_networks":arg_dict.get("num_networks", 1), "network_switcher":arg_dict.get("network_switcher", "gt"),
-                     "distance_type": arg_dict["distance_type"], "used_objects": arg_dict["used_objects"],
-                     "active_cameras": arg_dict["camera"], "color_dict":arg_dict.get("color_dict", {}),
-                     "max_steps": arg_dict["max_episode_steps"], "visgym":arg_dict["visgym"],
-                     "reward": arg_dict["reward"], "logdir": arg_dict["logdir"], "vae_path": arg_dict["vae_path"],
-                     "yolact_path": arg_dict["yolact_path"], "yolact_config": arg_dict["yolact_config"]}
-
-    env_arguments["gui_on"] = False
-
-    # Register OpenAI gym env in gymnasium
-    def env_creator(env_config):
-        return EnvCompatibility(GymEnv(**env_config))    
-    register_env('GymEnv-v0', env_creator)
-
-    # Set up algo
-    algo = (
-        PPOConfig()
-        .rollouts(num_rollout_workers=12) # You can try to increase or decrease based on your systems specs
-        .resources(num_gpus=1) # You can try to increase or decrease based on your systems specs
-        .environment(env='GymEnv-v0', env_config=env_arguments)
-        .build()
-    )
-
-    #Train
-    for i in range(200):
-        result = algo.train()
-        print(pretty_print(result))
-
-        if i % 100 == 0:
-            #action = algo.compute_single_action(obs)
-            #obs, reward, terminated, truncated, info = env.step(action)
-            checkpoint_dir = algo.save()
-            print(f"Checkpoint saved in directory {checkpoint_dir}")
-    #Print start time minus end time
-    print("--- %s seconds ---" % (time.time() - start_time))
-if __name__ == "__main__":
-    main()
diff --git a/myGym/trainrayppo.py b/myGym/trainrayppo.py
deleted file mode 100644
index 47b79f55..00000000
--- a/myGym/trainrayppo.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from ray.rllib.algorithms.ppo import PPOConfig
-from ray.tune.logger import pretty_print
-
-
-algo = (
-    PPOConfig()
-    .rollouts(num_rollout_workers=10)
-    .resources(num_gpus=1)
-    .environment(env="CartPole-v1")
-    .build()
-)
-
-for i in range(10):
-    result = algo.train()
-    print(pretty_print(result))
-
-    if i % 5 == 0:
-        checkpoint_dir = algo.save()
-        print(f"Checkpoint saved in directory {checkpoint_dir}")
\ No newline at end of file
diff --git a/myGym/trajectory.txt b/myGym/trajectory.txt
deleted file mode 100644
index c186c647..00000000
--- a/myGym/trajectory.txt
+++ /dev/null
@@ -1,340 +0,0 @@
-[ 13.63947071  46.49404337  34.98897878 154.40660495  -8.43316019
-   8.75505647   0.        ]
-[ 23.03022382  52.9324022   10.26970663 112.64983594   3.3500255
-  -4.11878627  -3.19748864]
-[ 23.83120954  53.17292871  12.69943217 112.71191671   6.4846686
-  -5.70568116  -5.00478868]
-[ 24.72088475  53.40316953  15.04529638 112.70807287   9.36316623
-  -7.28295541  -6.02350917]
-[ 25.76633313  53.51639673  17.27593977 112.61602464  11.85630538
-  -9.09920692  -4.77974021]
-[ 26.882498    53.58348956  19.50573275 112.48776724  14.225859
- -10.96198333  -2.96720977]
-[ 27.99026135  53.85817431  21.69865119 112.5866467   16.63596035
- -13.00688196  -1.20832591]
-[ 29.09910952  54.34645002  23.82666771 112.91138525  19.08409007
- -14.89854166  -0.21009449]
-[ 30.28514499  54.77586352  25.93350029 113.20282977  21.43471072
- -16.22740888   0.        ]
-[ 31.52547203  55.16245197  28.02814389 113.4717309   23.72266285
- -17.31672281   0.        ]
-
-[ 35.17233607  56.882108    50.03606026 171.40176078  -8.35708235
-   6.60459623   0.        ]
-[ 23.03022382  52.9324022   10.26970663 112.64983594   3.3500255
-  -4.11878627  -3.19748864]
-[ 23.83120954  53.17292871  12.69943217 112.71191671   6.4846686
-  -5.70568116  -5.00478868]
-[ 24.72088475  53.40316953  15.04529638 112.70807287   9.36316623
-  -7.28295541  -6.02350917]
-[ 25.76633313  53.51639673  17.27593977 112.61602464  11.85630538
-  -9.09920692  -4.77974021]
-[ 26.882498    53.58348956  19.50573275 112.48776724  14.225859
- -10.96198333  -2.96720977]
-[ 27.99026135  53.85817431  21.69865119 112.5866467   16.63596035
- -13.00688196  -1.20832591]
-[ 29.09910952  54.34645002  23.82666771 112.91138525  19.08409007
- -14.89854166  -0.21009449]
-[ 30.28514499  54.77586352  25.93350029 113.20282977  21.43471072
- -16.22740888   0.        ]
-[ 31.52547203  55.16245197  28.02814389 113.4717309   23.72266285
- -17.31672281   0.        ]
-
-[ 55.06680045  36.23442066  12.52413422 130.23721056  17.25970869
- -14.96050798  -6.3933651 ]
-[ 21.93234962  53.64374994  11.19624587 112.73470817  10.28428424
-  -8.53039365  -5.74159576]
-[ 22.8296819   53.95476189  13.45793531 112.672769    13.11268535
-  -9.99127093  -6.95387006]
-[ 23.8857069   54.12360863  15.59245058 112.53540778  15.52109033
- -11.72898384  -5.69393502]
-[ 25.01696652  54.23397831  17.7206604  112.36615181  17.79311531
- -13.54786236  -3.70743207]
-[ 26.19981059  54.31517212  19.88144033 112.17725194  20.01566549
- -15.37259418  -1.59154829]
-[ 27.3862054   54.69648575  21.96902841 112.35516157  22.29075241
- -17.04744247  -0.17509974]
-[ 28.5977246   55.15229457  24.03942953 112.61249171  24.57905903
- -18.28756357   0.        ]
-[ 29.86297885  55.56051318  26.09872822 112.84994598  26.8058054
- -19.29243524   0.        ]
-[ 31.16612578  55.94029701  28.15384476 113.07425872  28.9984629
- -20.27102218   0.        ]
-
-[ 30.80651705  34.81330784  15.15328846 131.81201321   9.9057741
-  -6.91257465  -3.66080344]
-[ 21.93234962  53.64374994  11.19624587 112.73470817  10.28428424
-  -8.53039365  -5.74159576]
-[ 22.8296819   53.95476189  13.45793531 112.672769    13.11268535
-  -9.99127093  -6.95387006]
-[ 23.8857069   54.12360863  15.59245058 112.53540778  15.52109033
- -11.72898384  -5.69393502]
-[ 25.01696652  54.23397831  17.7206604  112.36615181  17.79311531
- -13.54786236  -3.70743207]
-[ 26.19981059  54.31517212  19.88144033 112.17725194  20.01566549
- -15.37259418  -1.59154829]
-[ 27.3862054   54.69648575  21.96902841 112.35516157  22.29075241
- -17.04744247  -0.17509974]
-[ 28.5977246   55.15229457  24.03942953 112.61249171  24.57905903
- -18.28756357   0.        ]
-[ 29.86297885  55.56051318  26.09872822 112.84994598  26.8058054
- -19.29243524   0.        ]
-[ 31.16612578  55.94029701  28.15384476 113.07425872  28.9984629
- -20.27102218   0.        ]
-
-[ 43.66754056  59.38762511  54.53285684 175.22040931  -7.67350031
-   5.74375543   0.        ]
-[ 23.03022382  52.9324022   10.26970663 112.64983594   3.3500255
-  -4.11878627  -3.19748864]
-[ 23.83120954  53.17292871  12.69943217 112.71191671   6.4846686
-  -5.70568116  -5.00478868]
-[ 24.72088475  53.40316953  15.04529638 112.70807287   9.36316623
-  -7.28295541  -6.02350917]
-[ 25.76633313  53.51639673  17.27593977 112.61602464  11.85630538
-  -9.09920692  -4.77974021]
-[ 26.882498    53.58348956  19.50573275 112.48776724  14.225859
- -10.96198333  -2.96720977]
-[ 27.99026135  53.85817431  21.69865119 112.5866467   16.63596035
- -13.00688196  -1.20832591]
-[ 29.09910952  54.34645002  23.82666771 112.91138525  19.08409007
- -14.89854166  -0.21009449]
-[ 30.28514499  54.77586352  25.93350029 113.20282977  21.43471072
- -16.22740888   0.        ]
-[ 31.52547203  55.16245197  28.02814389 113.4717309   23.72266285
- -17.31672281   0.        ]
-
-[ 60.48958311  61.91817165  55.04372464 176.90363438  -1.15329069
-   4.20244993   0.        ]
-[ 22.85398006  53.01226526  10.36746474 112.62187142   5.61425724
-  -4.58182503  -3.392914  ]
-[ 23.66714208  53.26776935  12.77573327 112.64838505   8.71351727
-  -6.13742572  -5.18691076]
-[ 24.54708409  53.53480814  15.1271719  112.62440686  11.62396454
-  -7.62146732  -6.68892836]
-[ 25.59124677  53.67301693  17.35662769 112.51511504  14.12939298
-  -9.3646357   -5.77452605]
-[ 26.72118046  53.74445835  19.57226133 112.36516203  16.47474061
- -11.20056414  -3.96906869]
-[ 27.83797492  54.05309244  21.74422092 112.47407417  18.873036
- -13.25286415  -2.22030575]
-[ 28.93400846  54.59271365  23.88271345 112.78256969  21.36056905
- -15.41104757  -1.12555719]
-[ 30.10504616  55.08884569  26.00144843 113.0618512   23.75349985
- -17.2898791   -0.27351956]
-[ 31.35930642  55.49755398  28.0884849  113.31275202  26.02649678
- -18.59180028   0.        ]
-
-[ 14.07201469  52.27759173  47.05772404 166.26703175 -12.33084162
-   9.25262087   0.        ]
-[ 23.03022382  52.9324022   10.26970663 112.64983594   3.3500255
-  -4.11878627  -3.19748864]
-[ 23.83120954  53.17292871  12.69943217 112.71191671   6.4846686
-  -5.70568116  -5.00478868]
-[ 24.72088475  53.40316953  15.04529638 112.70807287   9.36316623
-  -7.28295541  -6.02350917]
-[ 25.76633313  53.51639673  17.27593977 112.61602464  11.85630538
-  -9.09920692  -4.77974021]
-[ 26.882498    53.58348956  19.50573275 112.48776724  14.225859
- -10.96198333  -2.96720977]
-[ 27.99026135  53.85817431  21.69865119 112.5866467   16.63596035
- -13.00688196  -1.20832591]
-[ 29.09910952  54.34645002  23.82666771 112.91138525  19.08409007
- -14.89854166  -0.21009449]
-[ 30.28514499  54.77586352  25.93350029 113.20282977  21.43471072
- -16.22740888   0.        ]
-[ 31.52547203  55.16245197  28.02814389 113.4717309   23.72266285
- -17.31672281   0.        ]
-
-[ 64.95232478  58.50308888  40.17768839 167.40993306   7.65502604
-   1.54838064   0.        ]
-[ 22.63183159  53.12238553  10.48973951 112.56379054   8.53353439
-  -5.1811641   -3.64157638]
-[ 23.46252657  53.39529534  12.86966459 112.54610013  11.58556877
-  -6.69601396  -5.41829635]
-[ 24.35119568  53.68766488  15.20724839 112.48656569  14.48340533
-  -8.11222831  -7.14249378]
-[ 25.38191585  53.87535358  17.44835775 112.35967864  17.04049134
-  -9.71973689  -6.95917921]
-[ 26.52814057  53.95393275  19.64834253 112.18412466  19.36116594
- -11.51451033  -5.21415609]
-[ 27.65680919  54.3068743   21.79438987 112.30746737  21.74630508
- -13.57475694  -3.50072405]
-[ 28.77810364  54.85064663  23.91642857 112.58582176  24.20116093
- -15.71157142  -2.4098174 ]
-[ 29.94843472  55.39490109  26.03750772 112.84075053  26.61535923
- -17.84231094  -1.44620457]
-[ 31.18618173  55.89541257  28.1379666  113.07509103  28.9421511
- -19.79973245  -0.5402476 ]
-
-[ 80.00209693  64.75992481  57.8368658  179.32432717   6.36778728
-   2.82108446   0.        ]
-[ 22.63183159  53.12238553  10.48973951 112.56379054   8.53353439
-  -5.1811641   -3.64157638]
-[ 23.46252657  53.39529534  12.86966459 112.54610013  11.58556877
-  -6.69601396  -5.41829635]
-[ 24.35119568  53.68766488  15.20724839 112.48656569  14.48340533
-  -8.11222831  -7.14249378]
-[ 25.38191585  53.87535358  17.44835775 112.35967864  17.04049134
-  -9.71973689  -6.95917921]
-[ 26.52814057  53.95393275  19.64834253 112.18412466  19.36116594
- -11.51451033  -5.21415609]
-[ 27.65680919  54.3068743   21.79438987 112.30746737  21.74630508
- -13.57475694  -3.50072405]
-[ 28.77810364  54.85064663  23.91642857 112.58582176  24.20116093
- -15.71157142  -2.4098174 ]
-[ 29.94843472  55.39490109  26.03750772 112.84075053  26.61535923
- -17.84231094  -1.44620457]
-[ 31.18618173  55.89541257  28.1379666  113.07509103  28.9421511
- -19.79973245  -0.5402476 ]
-
-[ 51.33380635  55.43864475  37.69848379 163.83643658   4.72213294
-   2.51080027   0.        ]
-[ 22.63183159  53.12238553  10.48973951 112.56379054   8.53353439
-  -5.1811641   -3.64157638]
-[ 23.46252657  53.39529534  12.86966459 112.54610013  11.58556877
-  -6.69601396  -5.41829635]
-[ 24.35119568  53.68766488  15.20724839 112.48656569  14.48340533
-  -8.11222831  -7.14249378]
-[ 25.38191585  53.87535358  17.44835775 112.35967864  17.04049134
-  -9.71973689  -6.95917921]
-[ 26.52814057  53.95393275  19.64834253 112.18412466  19.36116594
- -11.51451033  -5.21415609]
-[ 27.65680919  54.3068743   21.79438987 112.30746737  21.74630508
- -13.57475694  -3.50072405]
-[ 28.77810364  54.85064663  23.91642857 112.58582176  24.20116093
- -15.71157142  -2.4098174 ]
-[ 29.94843472  55.39490109  26.03750772 112.84075053  26.61535923
- -17.84231094  -1.44620457]
-[ 31.18618173  55.89541257  28.1379666  113.07509103  28.9421511
- -19.79973245  -0.5402476 ]
-
-[ 57.65079851  54.15196691  32.6200129  159.77221184   9.17932293
-  -0.55001958   0.        ]
-[ 22.39841407  53.27429778  10.71663228 112.61549421   9.08139949
-  -6.66257719  -3.44821121]
-[ 23.24156095  53.56892133  13.06642928 112.58436825  12.08446189
-  -8.13865027  -5.20710911]
-[ 24.15464957  53.86565248  15.36799544 112.50927443  14.90939737
-  -9.55792832  -6.67340488]
-[ 25.23086849  54.01743215  17.55823011 112.3623276   17.33952575
- -11.26218864  -5.66515416]
-[ 26.39218651  54.09665937  19.74170338 112.18077151  19.6194065
- -13.07004462  -3.76024201]
-[ 27.5255863   54.48898469  21.87644306 112.32918266  21.9983143
- -15.15606812  -2.06921091]
-[ 28.65951447  55.04484052  23.99597791 112.60277837  24.43817877
- -17.29956498  -0.98911784]
-[ 29.87685154  55.53240146  26.08738476 112.85024833  26.76139137
- -19.06121218  -0.16577415]
-[ 31.16797522  55.935525    28.15108839 113.07672318  28.9791304
- -20.24601014   0.        ]
-
-[ 64.83735606  54.03117125  30.52631813 157.96374997  13.79527764
-  -3.84333133   0.        ]
-[ 22.06619114  53.51343571  11.04189533 112.68751643   9.89178841
-  -8.81294143  -3.159954  ]
-[ 22.92807565  53.83745407  13.34880052 112.63780549  12.82411109
- -10.23426617  -4.89374218]
-[ 23.90610646  54.09730974  15.56914513 112.53093419  15.45929133
- -11.75652488  -5.29628663]
-[ 25.02119179  54.22874096  17.7140026  112.36740542  17.77703676
- -13.52553488  -3.67463218]
-[ 26.2013959   54.31318827  19.87726745 112.17920161  20.00661003
- -15.34228061  -1.61668244]
-[ 27.38602446  54.69602957  21.9664359  112.35649251  22.28542078
- -17.03383551  -0.19028211]
-[ 28.59708715  55.15246865  24.03715483 112.61395397  24.57458789
- -18.28397907   0.        ]
-[ 29.86222196  55.560885    26.09655007 112.85148606  26.80160371
- -19.28971326   0.        ]
-[ 31.16533887  55.94070893  28.15170271 113.07584573  28.99433663
- -20.26839318   0.        ]
-
-[ 80.00209693  64.46981706  50.30532624 175.57235224  15.12032428
-   0.46626288   0.        ]
-[ 22.5213138   53.19267812  10.59691099 112.5883235    8.79144207
-  -5.8793521   -3.55107159]
-[ 23.35783276  53.47597464  12.9625778  112.56422659  11.82036775
-  -7.37581582  -5.31933762]
-[ 24.25518997  53.77417557  15.28593395 112.4983134   14.6914882
-  -8.78475031  -6.98484637]
-[ 25.30988742  53.94215396  17.5004734  112.36083648  17.18225459
- -10.44457384  -6.36019345]
-[ 26.46350688  54.02065397  19.69233962 112.18226399  19.48290621
- -12.24640291  -4.53283607]
-[ 27.59450998  54.39179703  21.83282899 112.31725941  21.86458759
- -14.31904987  -2.82586094]
-[ 28.72168662  54.94135891  23.95371648 112.59337269  24.3124838
- -16.45888361  -1.7409444 ]
-[ 29.898588    55.48996451  26.07293673 112.84626219  26.71828057
- -18.59018671  -0.77690756]
-[ 3.11754031e+01  5.59187049e+01  2.81455713e+01  1.13075901e+02
-  2.89632891e+01 -2.01071007e+01 -9.78944316e-02]
-
-[ 33.12632999  59.70995423  58.0478631  177.02865875  -6.57008903
-   6.26912983  -0.54837016]
-[ 22.19420359  52.77960534   7.96044938 112.57129678   0.52827677
-  -2.46426973  -2.61610008]
-[ 23.00523578  52.95701493  10.38752009 112.65369427   3.68299262
-  -4.25322616  -3.61848687]
-[ 23.87545568  53.14571633  12.73963736 112.67812295   6.62653568
-  -5.97282868  -4.19010357]
-[ 24.80522397  53.3312602   15.04772622 112.6534682    9.40147029
-  -7.65061118  -4.41112826]
-[ 25.84571276  53.45645008  17.29573011 112.57378389  11.9338627
-  -8.98310054  -4.35603669]
-[ 27.0003678   53.49405757  19.50908587 112.44935806  14.25591318
-  -9.91346599  -4.11617556]
-[ 28.19054647  53.65902944  21.63579505 112.53257594  16.51191862
- -10.65189207  -3.90251969]
-[ 29.26347297  54.0173019   23.49655508 112.84702306  18.78699998
- -11.4272626   -3.73170965]
-[ 30.22671677  54.44829364  25.11822618 113.23448629  21.0584698
- -12.18900707  -3.61629061]
-[ 31.17256498  54.90087093  26.65680967 113.64296208  23.3200012
- -12.94080297  -3.53080778]
-[ 32.12277812  55.35994769  28.1481875  114.05923413  25.56834466
- -13.67850173  -3.46417003]
-[ 33.08429277  55.82110149  29.60268683 114.48206141  27.80207914
- -14.39569971  -3.41254614]
-[ 34.05867192  56.2833412   31.02196629 114.91383871  30.0208477
- -15.0855936   -3.37458172]
-[ 35.05279538  56.74402492  32.41744628 115.35581247  32.22505736
- -15.745129    -3.34758066]
-[ 36.07105567  57.20140494  33.79678175 115.80999234  34.41555769
- -16.37171029  -3.32909486]
-[ 37.11508775  57.65484489  35.16272931 116.27978809  36.5933962
- -16.96035012  -3.3174486 ]
-[ 38.18669821  58.10430994  36.51872311 116.7689966   38.76025388
- -17.50812875  -3.31058887]
-[ 39.28694332  58.55044768  37.86701786 117.28192439  40.91819013
- -18.01236684  -3.30672445]
-[ 40.41745203  58.99400477  39.2103872  117.82349837  43.06950545
- -18.46879507  -3.30469864]
-[ 41.57985882  59.43471441  40.55333694 118.40165967  45.1853799
- -18.87253778  -3.30232815]
-[ 42.77213148  59.86495343  41.90789295 119.04612557  47.01236062
- -19.20816803  -3.29295444]
-[ 43.99797528  60.28025177  43.28702415 119.76008756  48.53146061
- -19.46744447  -3.27336939]
-[ 45.25697445  60.68380607  44.68799603 120.52987225  49.88211509
- -19.65477301  -3.2440274 ]
-[ 46.54420629  61.07978893  46.10796839 121.36093796  51.04062542
- -19.77454915  -3.20551038]
-[ 47.87169328  61.46843222  47.56880895 122.26356789  51.95960922
- -19.83101459  -3.15872246]
-[ 49.17927412  61.86214404  48.98903937 123.21769459  52.67390849
- -19.83260771  -3.10482615]
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/myGym/utils/callbackstf2.py b/myGym/utils/callbackstf2.py
index 738600d5..817793c3 100644
--- a/myGym/utils/callbackstf2.py
+++ b/myGym/utils/callbackstf2.py
@@ -1,6 +1,5 @@
-from stable_baselines.results_plotter import load_results, ts2xy
-from stable_baselines.common.callbacks import BaseCallback, EvalCallback
-from stable_baselines import results_plotter
+#from stable_baselines3.results_plotter import load_results, ts2xy
+from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
 
 import os
 import matplotlib.pyplot as plt
@@ -11,13 +10,13 @@
 from typing import Union, List, Dict, Any, Optional
 from tqdm.auto import tqdm
 import numpy as np
-import tensorflow as tf
-import gym
+#import tensorflow as tf
+import gymnasium as gym
 
 import warnings
 
-from stable_baselines.common.vec_env import VecEnv, sync_envs_normalization, DummyVecEnv
-from stable_baselines.common.evaluation import evaluate_policy
+from stable_baselines3.common.vec_env import VecEnv, sync_envs_normalization, DummyVecEnv
+from stable_baselines3.common.evaluation import evaluate_policy
 
 
 class CustomEvalCallback(EvalCallback):
@@ -50,7 +49,7 @@ def __init__(self, eval_env: Union[gym.Env, VecEnv],
                  gui_on = True,
                  record=False,
                  camera_id=0,
-                 record_steps_limit=256): # pybullet or mujoco
+                 record_steps_limit=256): 
         super(EvalCallback, self).__init__(callback_on_new_best, verbose=verbose)
         self.n_eval_episodes = n_eval_episodes
         self.eval_freq = eval_freq
@@ -123,9 +122,6 @@ def evaluate_policy(
                         images.append(image)
                         print(f"appending image: total size: {len(images)}]")
 
-                if self.physics_engine == "mujoco" and self.gui_on: # Rendering for mujoco engine
-                    self.eval_env.render()
-
             episode_rewards.append(episode_reward)
             success_episodes_num += is_successful
             distance_error_sum += distance_error
@@ -259,31 +255,6 @@ def _on_step(self) -> bool:
                         print("Saving new best model to {}".format(self.save_path))
                     self.model.save(self.save_path)
 
-                if self.engine == "mujoco":  # Mujoco has additional prints
-                    # Temporal workaround multiprocessing
-                    if not self.multiprocessing and self.verbose > 0:
-                        # Current success rate over the last 'self.STATS_EVERY' episodes
-                        current_success_rate = np.mean(self.env.successfull_failed_episodes[-self.STATS_EVERY:])
-                        print(f"Best average reward: {self.best_average_reward:.2f} \
-                              - Last average reward: {average_reward:.2f} \
-                              and current success rate: {current_success_rate:.2f} \
-                              over the last {self.STATS_EVERY} episodes \
-                              - Current touch threshold: {self.env.sensor_goal_threshold:.2f} - \
-                              Current episode: {episode}")
-
-        if self.engine == "mujoco":
-            # Save graph of success rate every 'self.save_success_graph_every_steps'.
-            if self.n_calls % self.save_success_graph_every_steps == 0 and not self.multiprocessing:
-                # Save graph
-                generate_and_save_mean_graph_from_1_or_2arrays(
-                    data_array_1=self.env.successfull_failed_episodes,
-                    data_array_2=None,
-                    save_dir=self.logdir,
-                    average_x_axis_span=self.success_graph_mean_past_episodes ,
-                    axis_x_name="episodes",
-                    axis_y_names=["success_rate"])
-        return True
-
 
 class ProgressBarCallback(BaseCallback):
     """
diff --git a/myGym/utils/helpers.py b/myGym/utils/helpers.py
index 80f9951d..0f13ee2f 100644
--- a/myGym/utils/helpers.py
+++ b/myGym/utils/helpers.py
@@ -6,49 +6,49 @@ def get_workspace_dict():
                                             'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]},
                                             'camera': {'position': [[0.56, -1.71, 0.6], [-1.3, 3.99, 0.6], [-3.43, 0.67, 1.0], [2.76, 2.68, 1.0], [-0.54, 1.19, 3.4]],
                                                         'target': [[0.53, -1.62, 0.59], [-1.24, 3.8, 0.55], [-2.95, 0.83, 0.8], [2.28, 2.53, 0.8], [-0.53, 1.2, 3.2]]},
-                                            'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]},
+                                            'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9], 'reachable_borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]},
                                 'collabtable': {'urdf': 'collabtable.urdf', 'texture': 'collabtable.jpg',
                                             'transform': {'position':[0.45, -5.1, -1.05], 'orientation':[0.0, 0.0, -0.35*np.pi]},
                                             'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]},
                                             'camera': {'position': [[-0.25, 3.24, 1.2], [-0.44, -1.34, 1.0], [-1.5, 2.6, 1.0], [1.35, -1.0, 1.0], [-0.1, 1.32, 1.4]],
                                                         'target': [[-0.0, 0.56, 0.6], [-0.27, 0.42, 0.7], [-1, 2.21, 0.8], [-0.42, 2.03, 0.2], [-0.1, 1.2, 0.7]]},
-                                            'borders':[-0.7, 0.7, 0.5, 1.2, 0.2, 0.2]},
+                                            'borders':[-0.7, 0.7, 0.5, 1.2, 0.2, 0.2], 'reachable_borders':[-0.7, 0.7, 0.5, 1.2, 0.2, 0.2]},
                                 'darts':    {'urdf': 'darts.urdf', 'texture': 'darts.jpg',
                                             'transform': {'position':[-1.4, -6.7, -1.05], 'orientation':[0.0, 0.0, -1.0*np.pi]},
                                             'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]},
                                             'camera': {'position': [[-0.0, 2.1, 1.0], [0.0, -1.5, 1.2], [2.3, 0.5, 1.0], [-2.6, 0.5, 1.0], [-0.0, 1.1, 4.9]],
                                                         'target': [[0.0, 0.0, 0.7], [-0.0, 1.3, 0.6], [1.0, 0.9, 0.9], [-1.6, 0.9, 0.9], [-0.0, 1.2, 3.1]]},
-                                            'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]},
+                                            'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9], 'reachable_borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]},
                                 'drawer':   {'urdf': 'drawer.urdf', 'texture': 'drawer.jpg',
                                             'transform': {'position':[-4.81, 1.75, -1.05], 'orientation':[0.0, 0.0, 0.0*np.pi]},
                                             'robot': {'position': [0.0, 0.2, 0.0], 'orientation': [0, 0, 0.5*np.pi]},
                                             'camera': {'position': [[-0.14, -1.63, 1.0], [-0.14, 3.04, 1.0], [-1.56, -0.92, 1.0], [1.2, -1.41, 1.0], [-0.18, 0.88, 2.5]],
                                                         'target': [[-0.14, -0.92, 0.8], [-0.14, 2.33, 0.8], [-0.71, -0.35, 0.7], [0.28, -0.07, 0.6], [-0.18, 0.84, 2.1]]},
-                                            'borders':[-0.7, 0.7, 0.4, 1.3, 0.8, 0.1]},
+                                            'borders':[-0.7, 0.7, 0.4, 1.3, 0.8, 0.1], 'reachable_borders':[-0.7, 0.7, 0.4, 1.3, 0.8, 0.1]},
                                 'football': {'urdf': 'football.urdf', 'texture': 'football.jpg',
                                             'transform': {'position':[4.2, -5.4, -1.05], 'orientation':[0.0, 0.0, -1.0*np.pi]},
                                             'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]},
                                             'camera': {'position': [[-0.0, 2.1, 1.0], [0.0, -1.7, 1.2], [3.5, -0.6, 1.0], [-3.5, -0.7, 1.0], [-0.0, 2.0, 4.9]],
                                                         'target': [[0.0, 0.0, 0.7], [-0.0, 1.3, 0.2], [3.05, -0.2, 0.9], [-2.9, -0.2, 0.9], [-0.0, 2.1, 3.6]]},
-                                            'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]},
+                                            'borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9], 'reachable_borders':[-0.7, 0.7, 0.3, 1.3, -0.9, -0.9]},
                                 'fridge':   {'urdf': 'fridge.urdf', 'texture': 'fridge.jpg',
                                             'transform': {'position':[1.6, -5.95, -1.05], 'orientation':[0.0, 0.0, 0*np.pi]},
                                             'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]},
                                             'camera': {'position': [[0.0, -1.3, 1.0], [0.0, 2.35, 1.2], [-1.5, 0.85, 1.0], [1.4, 0.85, 1.0], [0.0, 0.55, 2.5]],
                                                         'target': [[0.0, 0.9, 0.7], [0.0, 0.9, 0.6], [0.0, 0.55, 0.5], [0.4, 0.55, 0.7], [0.0, 0.45, 1.8]]},
-                                            'borders':[-0.7, 0.7, 0.3, 0.5, -0.9, -0.9]},
+                                            'borders':[-0.7, 0.7, 0.3, 0.5, -0.9, -0.9], 'reachable_borders':[-0.7, 0.7, 0.3, 0.5, -0.9, -0.9]},
                                 'maze':     {'urdf': 'maze.urdf', 'texture': 'maze.jpg',
                                             'transform': {'position':[6.7, -3.1, 0.0], 'orientation':[0.0, 0.0, -0.5*np.pi]},
                                             'robot': {'position': [0.0, 0.0, 0.1], 'orientation': [0.0, 0.0, 0.5*np.pi]},
                                             'camera': {'position': [[0.0, -1.4, 2.3], [-0.0, 5.9, 1.9], [4.7, 2.7, 2.0], [-3.2, 2.7, 2.0], [-0.0, 3.7, 5.0]],
                                                         'target': [[0.0, -1.0, 1.9], [-0.0, 5.6, 1.7], [3.0, 2.7, 1.5], [-2.9, 2.7, 1.7], [-0.0, 3.65, 4.8]]},
-                                            'borders':[-2.5, 2.2, 0.7, 4.7, 0.05, 0.05]},
+                                            'borders':[-2.5, 2.2, 0.7, 4.7, 0.05, 0.05], 'reachable_borders':[-2.5, 2.2, 0.7, 4.7, 0.05, 0.05]},
                                 'stairs':   {'urdf': 'stairs.urdf', 'texture': 'stairs.jpg',
                                             'transform': {'position':[-5.5, -0.08, -1.05], 'orientation':[0.0, 0.0, -0.20*np.pi]},
                                             'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]},
                                             'camera': {'position': [[0.04, -1.64, 1.0], [0.81, 3.49, 1.0], [-2.93, 1.76, 1.0], [4.14, 0.33, 1.0], [2.2, 1.24, 3.2]],
                                                         'target': [[0.18, -1.12, 0.85], [0.81, 2.99, 0.8], [-1.82, 1.57, 0.7], [3.15, 0.43, 0.55], [2.17, 1.25, 3.1]]},
-                                            'borders':[-0.5, 2.5, 0.8, 1.6, 0.1, 0.1]},
+                                            'borders':[-0.5, 2.5, 0.8, 1.6, 0.1, 0.1], 'reachable_borders':[-0.5, 2.5, 0.8, 1.6, 0.1, 0.1]},
                                 'table':    {'urdf': 'table.urdf', 'texture': 'table.jpg',
                                             'transform': {'position':[-0.0, -0.0, -1.05], 'orientation':[0.0, 0.0, 0*np.pi]},
                                             'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]},
@@ -56,7 +56,7 @@ def get_workspace_dict():
                                                                     [0.0, 1.6, 0.8], [-0.0, -0.5, 0.8], [0.8, 0.9, 0.6], [-0.8, 0.9, 0.8], [0.0, 0.9, 1.]],
                                                         'target': [[0.0, 2.1, 0.9], [-0.0, -0.8, 0.9], [1.4, 0.9, 0.88], [-1.4, 0.9, 0.88], [0.0, 0.80, 1.],
                                                                    [0.0, 1.3, 0.5], [-0.0, -0.0, 0.6], [0.6, 0.9, 0.4], [-0.6, 0.9, 0.5], [0.0, 0.898, 0.8]]},
-                                            'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1]},
+                                            'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1], 'reachable_borders':[-0.5, 0.5, 0.5, 0.9, 0.05, 0.4]},
                                 'table_tiago': {'urdf': 'table_tiago.urdf', 'texture': 'table.jpg',
                                             'transform': {'position':[-0.0, -0.0, -1.05], 'orientation':[0.0, 0.0, 0*np.pi]},
                                             'robot': {'position': [0.0, 0.3, -0.85], 'orientation': [0.0, 0.0, 0.5*np.pi]},
@@ -64,7 +64,7 @@ def get_workspace_dict():
                                                                     [0.0, 1.6, 0.8], [-0.0, -0.5, 0.8], [0.8, 0.9, 0.6], [-0.8, 0.9, 0.8], [0.0, 0.9, 1.]],
                                                         'target': [[0.0, 2.1, 0.9], [-0.0, -0.8, 0.9], [1.4, 0.9, 0.88], [-1.4, 0.9, 0.88], [0.0, 0.80, 1.],
                                                                    [0.0, 1.3, 0.5], [-0.0, -0.0, 0.6], [0.6, 0.9, 0.4], [-0.6, 0.9, 0.5], [0.0, 0.898, 0.8]]},
-                                            'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1]},
+                                            'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1], 'reachable_borders':[-0.5, 0.5, 0.5, 0.9, 0.05, 0.4]},
                                 'table_nico': {'urdf': 'table_nico.urdf', 'texture': 'table.jpg',
                                             'transform': {'position':[-0.0, -0.0, -1.05], 'orientation':[0.0, 0.0, 0*np.pi]},
                                             'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]},
@@ -72,19 +72,19 @@ def get_workspace_dict():
                                                                     [0.0, 1.6, 0.8], [-0.0, -0.5, 0.8], [0.8, 0.9, 0.6], [-0.8, 0.9, 0.8], [0.0, 0.9, 1.]],
                                                         'target': [[0.0, 2.1, 0.9], [-0.0, -0.8, 0.9], [1.4, 0.9, 0.88], [-1.4, 0.9, 0.88], [0.0, 0.80, 1.],
                                                                    [0.0, 1.3, 0.5], [-0.0, -0.0, 0.6], [0.6, 0.9, 0.4], [-0.6, 0.9, 0.5], [0.0, 0.898, 0.8]]},
-                                            'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1]},
+                                            'borders':[-0.7, 0.7, 0.5, 1.3, 0.1, 0.1], 'reachable_borders':[-0.5, 0.5, 0.5, 0.9, 0.05, 0.4]},
                                 'verticalmaze': {'urdf': 'verticalmaze.urdf', 'texture': 'verticalmaze.jpg',
                                             'transform': {'position':[-5.7, -7.55, -1.05], 'orientation':[0.0, 0.0, 0.5*np.pi]},
                                             'robot': {'position': [0.0, 0.0, 0.0], 'orientation': [0.0, 0.0, 0.5*np.pi]},
                                             'camera': {'position': [[-0.0, -1.25, 1.0], [0.0, 1.35, 1.3], [1.7, -1.25, 1.0], [-1.6, -1.25, 1.0], [0.0, 0.05, 2.5]],
                                                         'target': [[-0.0, -1.05, 1.0], [0.0, 0.55, 1.3], [1.4, -0.75, 0.9], [-1.3, -0.75, 0.9], [0.0, 0.15, 2.1]]},
-                                            'borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4]},
+                                            'borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4], 'reachable_borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4]},
                                 'modularmaze': {'urdf': 'modularmaze.urdf', 'texture': 'verticalmaze.jpg',
                                             'transform': {'position':[-7, 8, 0.0], 'orientation':[0.0, 0.0, 0.0]},
                                             'robot': {'position': [0.0, -0.5, 0.05], 'orientation': [0.0, 0.0, 0.5*np.pi]},
                                             'camera': {'position': [[-0.0, -1.25, 1.0], [0.0, 1.35, 1.3], [1.7, -1.25, 1.0], [-1.6, -1.25, 1.0], [0.0, 0.7, 2.1], [-0.0, -0.3, 0.2]],
                                                         'target': [[-0.0, -1.05, 0.9], [0.0, 0.55, 1.3], [1.4, -0.75, 0.9], [-1.3, -0.75, 0.9], [0.0, 0.71, 1.8], [-0.0, -0.25, 0.199]]},
-                                            'borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4]}}
+                                            'borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4], 'reachable_borders':[-0.7, 0.8, 0.65, 0.65, 0.7, 1.4]}}
     return ws_dict
 
 
@@ -127,3 +127,17 @@ def get_robot_dict():
                               'hsr': {'path': '/envs/robots/hsr/hsrb4s.urdf', 'position': np.array([0.0, -0.15, -0.4]), 'orientation': [0.0, 0.0, 0 * np.pi]},
                              }
     return r_dict
+
+
+def get_module_type(observation):
+    """
+    Get source of the information from environment (ground_truth, yolact, vae)
+
+    Returns:
+        :return source: (string) Source of information
+    """
+    if observation["actual_state"] not in ["vae", "yolact", "voxel", "dope"]:
+        src = "ground_truth_6D" if "6D" in observation["actual_state"] else "ground_truth"
+    else:
+        src = observation["actual_state"]
+    return src
diff --git a/myGym/utils/modder.py b/myGym/utils/modder.py
deleted file mode 100644
index 3f833b7e..00000000
--- a/myGym/utils/modder.py
+++ /dev/null
@@ -1,457 +0,0 @@
-"""
-Utilites for changing textures and materials after creating a
-MuJoCo simulation. This allows for super fast scene generation.
-"""
-from collections import defaultdict
-import numpy as np
-from mujoco_py import cymj
-
-
-class BaseModder():
-
-    def __init__(self, sim, random_state=None):
-        self.sim = sim
-        if random_state is None:
-            self.random_state = np.random.RandomState()
-        elif isinstance(random_state, int):
-            # random_state assumed to be an int
-            self.random_state = np.random.RandomState(random_state)
-        else:
-            self.random_state = random_state
-
-    @property
-    def model(self):
-        # Available for quick convenience access
-        return self.sim.model
-
-
-class LightModder(BaseModder):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def set_pos(self, name, value):
-        lightid = self.get_lightid(name)
-        assert lightid > -1, "Unkwnown light %s" % name
-
-        value = list(value)
-        assert len(value) == 3, "Expected 3-dim value, got %s" % value
-
-        self.model.light_pos[lightid] = value
-
-    def set_dir(self, name, value):
-        lightid = self.get_lightid(name)
-        assert lightid > -1, "Unkwnown light %s" % name
-
-        value = list(value)
-        assert len(value) == 3, "Expected 3-dim value, got %s" % value
-
-        self.model.light_dir[lightid] = value
-
-    def set_active(self, name, value):
-        lightid = self.get_lightid(name)
-        assert lightid > -1, "Unkwnown light %s" % name
-
-        self.model.light_active[lightid] = value
-
-    def set_specular(self, name, value):
-        lightid = self.get_lightid(name)
-        assert lightid > -1, "Unkwnown light %s" % name
-
-        value = list(value)
-        assert len(value) == 3, "Expected 3-dim value, got %s" % value
-
-        self.model.light_specular[lightid] = value
-
-    def set_ambient(self, name, value):
-        lightid = self.get_lightid(name)
-        assert lightid > -1, "Unkwnown light %s" % name
-
-        value = list(value)
-        assert len(value) == 3, "Expected 3-dim value, got %s" % value
-
-        self.model.light_ambient[lightid] = value
-
-    def set_diffuse(self, name, value):
-        lightid = self.get_lightid(name)
-        assert lightid > -1, "Unkwnown light %s" % name
-
-        value = list(value)
-        assert len(value) == 3, "Expected 3-dim value, got %s" % value
-
-        self.model.light_diffuse[lightid] = value
-
-    def set_castshadow(self, name, value):
-        lightid = self.get_lightid(name)
-        assert lightid > -1, "Unkwnown light %s" % name
-        self.model.light_castshadow[lightid] = value
-
-    def get_lightid(self, name):
-        return self.model.light_name2id(name)
-
-
-class CameraModder(BaseModder):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def set_fovy(self, name, value):
-        camid = self.get_camid(name)
-        assert 0 < value < 180
-        assert camid > -1, "Unknown camera %s" % name
-        self.model.cam_fovy[camid] = value
-
-    def get_quat(self, name):
-        camid = self.get_camid(name)
-        assert camid > -1, "Unknown camera %s" % name
-        return self.model.cam_quat[camid]
-
-    def set_quat(self, name, value):
-        value = list(value)
-        assert len(value) == 4, (
-            "Expectd value of length 3, instead got %s" % value)
-        camid = self.get_camid(name)
-        assert camid > -1, "Unknown camera %s" % name
-        self.model.cam_quat[camid] = value
-
-    def get_pos(self, name):
-        camid = self.get_camid(name)
-        assert camid > -1, "Unknown camera %s" % name
-        return self.model.cam_pos[camid]
-
-    def set_pos(self, name, value):
-        value = list(value)
-        assert len(value) == 3, (
-            "Expected value of length 3, instead got %s" % value)
-        camid = self.get_camid(name)
-        assert camid > -1
-        self.model.cam_pos[camid] = value
-
-    def get_camid(self, name):
-        return self.model.camera_name2id(name)
-
-
-class MaterialModder(BaseModder):
-    """
-    Modify material properties of a model. Example use:
-
-        sim = MjSim(...)
-        modder = MaterialModder(sim)
-        modder.set_specularity('some_geom', 0.5)
-        .rand_all('another_geom')
-
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def set_specularity(self, name, value):
-        assert 0 <= value <= 1.0
-        mat_id = self.get_mat_id(name)
-        self.model.mat_specular[mat_id] = value
-
-    def set_shininess(self, name, value):
-        assert 0 <= value <= 1.0
-        mat_id = self.get_mat_id(name)
-        self.model.mat_shininess[mat_id] = value
-
-    def set_reflectance(self, name, value):
-        assert 0 <= value <= 1.0
-        mat_id = self.get_mat_id(name)
-        self.model.mat_reflectance[mat_id] = value
-
-    def set_texrepeat(self, name, repeat_x, repeat_y):
-        mat_id = self.get_mat_id(name)
-        # ensure the following is set to false, so that repeats are
-        # relative to the extent of the body.
-        self.model.mat_texuniform[mat_id] = 0
-        self.model.mat_texrepeat[mat_id, :] = [repeat_x, repeat_y]
-
-    def rand_all(self, name):
-        self.rand_specularity(name)
-        self.rand_shininess(name)
-        self.rand_reflectance(name)
-
-    def rand_specularity(self, name):
-        value = 0.1 + 0.2 * self.random_state.uniform()
-        self.set_specularity(name, value)
-
-    def rand_shininess(self, name):
-        value = 0.1 + 0.5 * self.random_state.uniform()
-        self.set_shininess(name, value)
-
-    def rand_reflectance(self, name):
-        value = 0.1 + 0.5 * self.random_state.uniform()
-        self.set_reflectance(name, value)
-
-    def rand_texrepeat(self, name, max_repeat=5):
-        repeat_x = self.random_state.randint(0, max_repeat) + 1
-        repeat_y = self.random_state.randint(0, max_repeat) + 1
-        self.set_texrepeat(name, repeat_x, repeat_y)
-
-    def get_mat_id(self, name):
-        """ Returns the material id based on the geom name. """
-        geom_id = self.model.geom_name2id(name)
-        return self.model.geom_matid[geom_id]
-
-
-class TextureModder(BaseModder):
-    """
-    Modify textures in model. Example use:
-
-        sim = MjSim(...)
-        modder = TextureModder(sim)
-        modder.whiten_materials()  # ensures materials won't impact colors
-        modder.set_checker('some_geom', (255, 0, 0), (0, 0, 0))
-        modder.rand_all('another_geom')
-
-    Note: in order for the textures to take full effect, you'll need to set
-    the rgba values for all materials to [1, 1, 1, 1], otherwise the texture
-    colors will be modulated by the material colors. Call the
-    `whiten_materials` helper method to set all material colors to white.
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        self.textures = [Texture(self.model, i)
-                         for i in range(self.model.ntex)]
-        self._build_tex_geom_map()
-
-        # These matrices will be used to rapidly synthesize
-        # checker pattern bitmaps
-        self._cache_checker_matrices()
-
-    def get_texture(self, name):
-        if name == 'skybox':
-            tex_id = -1
-            for i in range(self.model.ntex):
-                # TODO: Don't hardcode this
-                skybox_textype = 2
-                if self.model.tex_type[i] == skybox_textype:
-                    tex_id = i
-            assert tex_id >= 0, "Model has no skybox"
-        else:
-            geom_id = self.model.geom_name2id(name)
-            mat_id = self.model.geom_matid[geom_id]
-            assert mat_id >= 0, "Geom has no assigned material"
-            tex_id = self.model.mat_texid[mat_id]
-            assert tex_id >= 0, "Material has no assigned texture"
-
-        texture = self.textures[tex_id]
-
-        return texture
-
-    def get_checker_matrices(self, name):
-        if name == 'skybox':
-            return self._skybox_checker_mat
-        else:
-            geom_id = self.model.geom_name2id(name)
-            return self._geom_checker_mats[geom_id]
-
-    def set_checker(self, name, rgb1, rgb2):
-        bitmap = self.get_texture(name).bitmap
-        cbd1, cbd2 = self.get_checker_matrices(name)
-
-        rgb1 = np.asarray(rgb1).reshape([1, 1, -1])
-        rgb2 = np.asarray(rgb2).reshape([1, 1, -1])
-        bitmap[:] = rgb1 * cbd1 + rgb2 * cbd2
-
-        self.upload_texture(name)
-        return bitmap
-
-    def set_gradient(self, name, rgb1, rgb2, vertical=True):
-        """
-        Creates a linear gradient from rgb1 to rgb2.
-
-        Args:
-        - rgb1 (array): start color
-        - rgb2 (array): end color
-        - vertical (bool): if True, the gradient in the positive
-            y-direction, if False it's in the positive x-direction.
-        """
-        # NOTE: MuJoCo's gradient uses a sigmoid. Here we simplify
-        # and just use a linear gradient... We could change this
-        # to just use a tanh-sigmoid if needed.
-        bitmap = self.get_texture(name).bitmap
-        h, w = bitmap.shape[:2]
-        if vertical:
-            p = np.tile(np.linspace(0, 1, h)[:, None], (1, w))
-        else:
-            p = np.tile(np.linspace(0, 1, w), (h, 1))
-
-        for i in range(3):
-            bitmap[..., i] = rgb2[i] * p + rgb1[i] * (1.0 - p)
-
-        self.upload_texture(name)
-        return bitmap
-
-    def set_rgb(self, name, rgb):
-        bitmap = self.get_texture(name).bitmap
-        bitmap[..., :] = np.asarray(rgb)
-
-        self.upload_texture(name)
-        return bitmap
-
-    def set_noise(self, name, rgb1, rgb2, fraction=0.9):
-        """
-        Args:
-        - name (str): name of geom
-        - rgb1 (array): background color
-        - rgb2 (array): color of random noise foreground color
-        - fraction (float): fraction of pixels with foreground color
-        """
-        bitmap = self.get_texture(name).bitmap
-        h, w = bitmap.shape[:2]
-        mask = self.random_state.uniform(size=(h, w)) < fraction
-
-        bitmap[..., :] = np.asarray(rgb1)
-        bitmap[mask, :] = np.asarray(rgb2)
-
-        self.upload_texture(name)
-        return bitmap
-
-    def randomize(self):
-        for name in self.sim.model.geom_names:
-            self.rand_all(name)
-
-    def rand_all(self, name):
-        choices = [
-            self.rand_checker,
-            self.rand_gradient,
-            self.rand_rgb,
-            self.rand_noise,
-        ]
-        choice = self.random_state.randint(len(choices))
-        return choices[choice](name)
-
-    def rand_checker(self, name):
-        rgb1, rgb2 = self.get_rand_rgb(2)
-        return self.set_checker(name, rgb1, rgb2)
-
-    def rand_gradient(self, name):
-        rgb1, rgb2 = self.get_rand_rgb(2)
-        vertical = bool(self.random_state.uniform() > 0.5)
-        return self.set_gradient(name, rgb1, rgb2, vertical=vertical)
-
-    def rand_rgb(self, name):
-        rgb = self.get_rand_rgb()
-        return self.set_rgb(name, rgb)
-
-    def rand_noise(self, name):
-        fraction = 0.1 + self.random_state.uniform() * 0.8
-        rgb1, rgb2 = self.get_rand_rgb(2)
-        return self.set_noise(name, rgb1, rgb2, fraction)
-
-    def upload_texture(self, name):
-        """
-        Uploads the texture to the GPU so it's available in the rendering.
-        """
-        texture = self.get_texture(name)
-        if not self.sim.render_contexts:
-            cymj.MjRenderContextOffscreen(self.sim)
-        for render_context in self.sim.render_contexts:
-            render_context.upload_texture(texture.id)
-
-    def whiten_materials(self, geom_names=None):
-        """
-        Helper method for setting all material colors to white, otherwise
-        the texture modifications won't take full effect.
-
-        Args:
-        - geom_names (list): list of geom names whose materials should be
-            set to white. If omitted, all materials will be changed.
-        """
-        geom_names = geom_names or []
-        if geom_names:
-            for name in geom_names:
-                geom_id = self.model.geom_name2id(name)
-                mat_id = self.model.geom_matid[geom_id]
-                self.model.mat_rgba[mat_id, :] = 1.0
-        else:
-            self.model.mat_rgba[:] = 1.0
-
-    def get_rand_rgb(self, n=1):
-        def _rand_rgb():
-            return np.array(self.random_state.uniform(size=3) * 255,
-                            dtype=np.uint8)
-
-        if n == 1:
-            return _rand_rgb()
-        else:
-            return tuple(_rand_rgb() for _ in range(n))
-
-    def _build_tex_geom_map(self):
-        # Build a map from tex_id to geom_ids, so we can check
-        # for collisions.
-        self._geom_ids_by_tex_id = defaultdict(list)
-        for geom_id in range(self.model.ngeom):
-            mat_id = self.model.geom_matid[geom_id]
-            if mat_id >= 0:
-                tex_id = self.model.mat_texid[mat_id]
-                if tex_id >= 0:
-                    self._geom_ids_by_tex_id[tex_id].append(geom_id)
-
-    def _cache_checker_matrices(self):
-        """
-        Cache two matrices of the form [[1, 0, 1, ...],
-                                        [0, 1, 0, ...],
-                                        ...]
-        and                            [[0, 1, 0, ...],
-                                        [1, 0, 1, ...],
-                                        ...]
-        for each texture. To use for fast creation of checkerboard patterns
-        """
-        self._geom_checker_mats = []
-        for geom_id in range(self.model.ngeom):
-            mat_id = self.model.geom_matid[geom_id]
-            tex_id = self.model.mat_texid[mat_id]
-            texture = self.textures[tex_id]
-            h, w = texture.bitmap.shape[:2]
-            self._geom_checker_mats.append(self._make_checker_matrices(h, w))
-
-        # add skybox
-        skybox_tex_id = -1
-        for tex_id in range(self.model.ntex):
-            skybox_textype = 2
-            if self.model.tex_type[tex_id] == skybox_textype:
-                skybox_tex_id = tex_id
-        if skybox_tex_id >= 0:
-            texture = self.textures[skybox_tex_id]
-            h, w = texture.bitmap.shape[:2]
-            self._skybox_checker_mat = self._make_checker_matrices(h, w)
-        else:
-            self._skybox_checker_mat = None
-
-    def _make_checker_matrices(self, h, w):
-        re = np.r_[((w + 1) // 2) * [0, 1]]
-        ro = np.r_[((w + 1) // 2) * [1, 0]]
-        cbd1 = np.expand_dims(np.row_stack(((h + 1) // 2) * [re, ro]), -1)[:h, :w]
-        cbd2 = np.expand_dims(np.row_stack(((h + 1) // 2) * [ro, re]), -1)[:h, :w]
-        return cbd1, cbd2
-
-
-# From mjtTexture
-MJT_TEXTURE_ENUM = ['2d', 'cube', 'skybox']
-
-
-class Texture():
-    """
-    Helper class for operating on the MuJoCo textures.
-    """
-
-    __slots__ = ['id', 'type', 'height', 'width', 'tex_adr', 'tex_rgb']
-
-    def __init__(self, model, tex_id):
-        self.id = tex_id
-        self.type = MJT_TEXTURE_ENUM[model.tex_type[tex_id]]
-        self.height = model.tex_height[tex_id]
-        self.width = model.tex_width[tex_id]
-        self.tex_adr = model.tex_adr[tex_id]
-        self.tex_rgb = model.tex_rgb
-
-    @property
-    def bitmap(self):
-        size = self.height * self.width * 3
-        data = self.tex_rgb[self.tex_adr:self.tex_adr + size]
-        return data.reshape((self.height, self.width, 3))
diff --git a/myGym/utils/results_plotter.py b/myGym/utils/results_plotter.py
new file mode 100644
index 00000000..9df80687
--- /dev/null
+++ b/myGym/utils/results_plotter.py
@@ -0,0 +1,340 @@
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+import pandas, json
+import os
+from glob import glob
+import time, gym, csv
+from typing import Tuple, Dict, Any, List, Optional
+
+# matplotlib.use('TkAgg')  # Can change to 'Agg' for non-interactive mode
+plt.rcParams['svg.fonttype'] = 'none'
+
+X_TIMESTEPS = 'timesteps'
+X_EPISODES = 'episodes'
+X_WALLTIME = 'walltime_hrs'
+POSSIBLE_X_AXES = [X_TIMESTEPS, X_EPISODES, X_WALLTIME]
+EPISODES_WINDOW = 100
+COLORS = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'purple', 'pink',
+          'brown', 'orange', 'teal', 'coral', 'lightblue', 'lime', 'lavender', 'turquoise',
+          'darkgreen', 'tan', 'salmon', 'gold', 'lightpurple', 'darkred', 'darkblue']
+
+class LoadMonitorResultsError(Exception):
+    """
+    Raised when loading the monitor log fails.
+    """
+    pass
+
+class Monitor(gym.Wrapper):
+    """
+    A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
+
+    :param env: (gym.Env) The environment
+    :param filename: (Optional[str]) the location to save a log file, can be None for no log
+    :param allow_early_resets: (bool) allows the reset of the environment before it is done
+    :param reset_keywords: (tuple) extra keywords for the reset call, if extra parameters are needed at reset
+    :param info_keywords: (tuple) extra information to log, from the information return of environment.step
+    """
+    EXT = "monitor.csv"
+    file_handler = None
+
+    def __init__(self,
+                 env: gym.Env,
+                 filename: Optional[str],
+                 allow_early_resets: bool = True,
+                 reset_keywords=(),
+                 info_keywords=()):
+        super(Monitor, self).__init__(env=env)
+        self.t_start = time.time()
+        if filename is None:
+            self.file_handler = None
+            self.logger = None
+        else:
+            if not filename.endswith(Monitor.EXT):
+                if os.path.isdir(filename):
+                    filename = os.path.join(filename, Monitor.EXT)
+                else:
+                    filename = filename + "." + Monitor.EXT
+            self.file_handler = open(filename, "wt")
+            self.file_handler.write('#%s\n' % json.dumps({"t_start": self.t_start, 'env_id': env.spec and env.spec.id}))
+            self.logger = csv.DictWriter(self.file_handler,
+                                         fieldnames=('r', 'l', 't') + reset_keywords + info_keywords)
+            self.logger.writeheader()
+            self.file_handler.flush()
+
+        self.reset_keywords = reset_keywords
+        self.info_keywords = info_keywords
+        self.allow_early_resets = allow_early_resets
+        self.rewards = None
+        self.needs_reset = True
+        self.episode_rewards = []
+        self.episode_lengths = []
+        self.episode_times = []
+        self.total_steps = 0
+        self.current_reset_info = {}  # extra info about the current episode, that was passed in during reset()
+
+    def reset(self, **kwargs) -> np.ndarray:
+        """
+        Calls the Gym environment reset. Can only be called if the environment is over, or if allow_early_resets is True
+
+        :param kwargs: Extra keywords saved for the next episode. only if defined by reset_keywords
+        :return: (np.ndarray) the first observation of the environment
+        """
+        if not self.allow_early_resets and not self.needs_reset:
+            raise RuntimeError("Tried to reset an environment before done. If you want to allow early resets, "
+                               "wrap your env with Monitor(env, path, allow_early_resets=True)")
+        self.rewards = []
+        self.needs_reset = False
+        for key in self.reset_keywords:
+            value = kwargs.get(key)
+            if value is None:
+                raise ValueError('Expected you to pass kwarg {} into reset'.format(key))
+            self.current_reset_info[key] = value
+        return self.env.reset(**kwargs)
+
+    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict[Any, Any]]:
+        """
+        Step the environment with the given action
+
+        :param action: (np.ndarray) the action
+        :return: (Tuple[np.ndarray, float, bool, Dict[Any, Any]]) observation, reward, done, information
+        """
+        if self.needs_reset:
+            raise RuntimeError("Tried to step environment that needs reset")
+        observation, reward, done, info = self.env.step(action)
+        self.rewards.append(reward)
+        if done:
+            self.needs_reset = True
+            ep_rew = sum(self.rewards)
+            eplen = len(self.rewards)
+            ep_info = {"r": round(ep_rew, 6), "l": eplen, "t": round(time.time() - self.t_start, 6)}
+            for key in self.info_keywords:
+                ep_info[key] = info[key]
+            self.episode_rewards.append(ep_rew)
+            self.episode_lengths.append(eplen)
+            self.episode_times.append(time.time() - self.t_start)
+            ep_info.update(self.current_reset_info)
+            if self.logger:
+                self.logger.writerow(ep_info)
+                self.file_handler.flush()
+            info['episode'] = ep_info
+        self.total_steps += 1
+        return observation, reward, done, info
+
+    def close(self):
+        """
+        Closes the environment
+        """
+        super(Monitor, self).close()
+        if self.file_handler is not None:
+            self.file_handler.close()
+
+    def get_total_steps(self) -> int:
+        """
+        Returns the total number of timesteps
+
+        :return: (int)
+        """
+        return self.total_steps
+
+    def get_episode_rewards(self) -> List[float]:
+        """
+        Returns the rewards of all the episodes
+
+        :return: ([float])
+        """
+        return self.episode_rewards
+
+    def get_episode_lengths(self) -> List[int]:
+        """
+        Returns the number of timesteps of all the episodes
+
+        :return: ([int])
+        """
+        return self.episode_lengths
+
+    def get_episode_times(self) -> List[float]:
+        """
+        Returns the runtime in seconds of all the episodes
+
+        :return: ([float])
+        """
+        return self.episode_times
+
+def get_monitor_files(path: str) -> List[str]:
+    """
+    get all the monitor files in the given path
+
+    :param path: (str) the logging folder
+    :return: ([str]) the log files
+    """
+    return glob(os.path.join(path, "*" + Monitor.EXT))
+
+def load_results(path: str) -> pandas.DataFrame:
+    """
+    Load all Monitor logs from a given directory path matching ``*monitor.csv`` and ``*monitor.json``
+
+    :param path: (str) the directory path containing the log file(s)
+    :return: (pandas.DataFrame) the logged data
+    """
+    # get both csv and (old) json files
+    monitor_files = (glob(os.path.join(path, "*monitor.json")) + get_monitor_files(path))
+    if not monitor_files:
+        raise LoadMonitorResultsError("no monitor files of the form *%s found in %s" % (Monitor.EXT, path))
+    data_frames = []
+    headers = []
+    for file_name in monitor_files:
+        with open(file_name, 'rt') as file_handler:
+            if file_name.endswith('csv'):
+                first_line = file_handler.readline()
+                assert first_line[0] == '#'
+                header = json.loads(first_line[1:])
+                data_frame = pandas.read_csv(file_handler, index_col=None)
+                headers.append(header)
+            elif file_name.endswith('json'):  # Deprecated json format
+                episodes = []
+                lines = file_handler.readlines()
+                header = json.loads(lines[0])
+                headers.append(header)
+                for line in lines[1:]:
+                    episode = json.loads(line)
+                    episodes.append(episode)
+                data_frame = pandas.DataFrame(episodes)
+            else:
+                assert 0, 'unreachable'
+            data_frame['t'] += header['t_start']
+        data_frames.append(data_frame)
+    data_frame = pandas.concat(data_frames)
+    data_frame.sort_values('t', inplace=True)
+    data_frame.reset_index(inplace=True)
+    data_frame['t'] -= min(header['t_start'] for header in headers)
+    # data_frame.headers = headers  # HACK to preserve backwards compatibility
+    return data_frame
+
+def rolling_window(array, window):
+    """
+    apply a rolling window to a np.ndarray
+
+    :param array: (np.ndarray) the input Array
+    :param window: (int) length of the rolling window
+    :return: (np.ndarray) rolling window on the input array
+    """
+    shape = array.shape[:-1] + (array.shape[-1] - window + 1, window)
+    strides = array.strides + (array.strides[-1],)
+    return np.lib.stride_tricks.as_strided(array, shape=shape, strides=strides)
+
+
+def window_func(var_1, var_2, window, func):
+    """
+    apply a function to the rolling window of 2 arrays
+
+    :param var_1: (np.ndarray) variable 1
+    :param var_2: (np.ndarray) variable 2
+    :param window: (int) length of the rolling window
+    :param func: (numpy function) function to apply on the rolling window on variable 2 (such as np.mean)
+    :return: (np.ndarray, np.ndarray)  the rolling output with applied function
+    """
+    var_2_window = rolling_window(var_2, window)
+    function_on_var2 = func(var_2_window, axis=-1)
+    return var_1[window - 1:], function_on_var2
+
+
+def ts2xy(timesteps, xaxis):
+    """
+    Decompose a timesteps variable to x ans ys
+
+    :param timesteps: (Pandas DataFrame) the input data
+    :param xaxis: (str) the axis for the x and y output
+        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
+    :return: (np.ndarray, np.ndarray) the x and y output
+    """
+    if xaxis == X_TIMESTEPS:
+        x_var = np.cumsum(timesteps.l.values)
+        y_var = timesteps.r.values
+    elif xaxis == X_EPISODES:
+        x_var = np.arange(len(timesteps))
+        y_var = timesteps.r.values
+    elif xaxis == X_WALLTIME:
+        x_var = timesteps.t.values / 3600.
+        y_var = timesteps.r.values
+    else:
+        raise NotImplementedError
+    return x_var, y_var
+
+
+def plot_curves(xy_list, xaxis, title):
+    """
+    plot the curves
+
+    :param xy_list: ([(np.ndarray, np.ndarray)]) the x and y coordinates to plot
+    :param xaxis: (str) the axis for the x and y output
+        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
+    :param title: (str) the title of the plot
+    """
+
+    plt.figure(figsize=(8, 2))
+    maxx = max(xy[0][-1] for xy in xy_list)
+    minx = 0
+    for (i, (x, y)) in enumerate(xy_list):
+        color = COLORS[i]
+        plt.scatter(x, y, s=2)
+        # Do not plot the smoothed curve at all if the timeseries is shorter than window size.
+        if x.shape[0] >= EPISODES_WINDOW:
+            # Compute and plot rolling mean with window of size EPISODE_WINDOW
+            x, y_mean = window_func(x, y, EPISODES_WINDOW, np.mean)
+            plt.plot(x, y_mean, color=color)
+    plt.xlim(minx, maxx)
+    plt.title(title)
+    plt.xlabel(xaxis)
+    plt.ylabel("Episode Rewards")
+    plt.tight_layout()
+
+
+def plot_results(dirs, num_timesteps, xaxis, task_name):
+    """
+    plot the results
+
+    :param dirs: ([str]) the save location of the results to plot
+    :param num_timesteps: (int or None) only plot the points below this value
+    :param xaxis: (str) the axis for the x and y output
+        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
+    :param task_name: (str) the title of the task to plot
+    """
+
+    tslist = []
+    for folder in dirs:
+        timesteps = load_results(folder)
+        if num_timesteps is not None:
+            timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps]
+        tslist.append(timesteps)
+    xy_list = [ts2xy(timesteps_item, xaxis) for timesteps_item in tslist]
+    plot_curves(xy_list, xaxis, task_name)
+
+
+def main():
+    """
+    Example usage in jupyter-notebook
+
+    .. code-block:: python
+
+        from stable_baselines import results_plotter
+        %matplotlib inline
+        results_plotter.plot_results(["./log"], 10e6, results_plotter.X_TIMESTEPS, "Breakout")
+
+    Here ./log is a directory containing the monitor.csv files
+    """
+    import argparse
+    import os
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--dirs', help='List of log directories', nargs='*', default=['./log'])
+    parser.add_argument('--num_timesteps', type=int, default=int(10e6))
+    parser.add_argument('--xaxis', help='Varible on X-axis', default=X_TIMESTEPS)
+    parser.add_argument('--task_name', help='Title of plot', default='Breakout')
+    args = parser.parse_args()
+    args.dirs = [os.path.abspath(folder) for folder in args.dirs]
+    plot_results(args.dirs, args.num_timesteps, args.xaxis, args.task_name)
+    plt.show()
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/myGym/vae/train_vae.py b/myGym/vae/train_vae.py
index 5b326586..8ad6014f 100644
--- a/myGym/vae/train_vae.py
+++ b/myGym/vae/train_vae.py
@@ -12,8 +12,8 @@
 from datetime import datetime
 from myGym.vae import sample
 from configparser import ConfigParser
-import pkg_resources
-from myGym.vae.vis_helpers import load_checkpoint
+import importlib.resources as resources
+currentdir = resources.files("myGym").joinpath("envs")
 
 SEED = 1111
 random.seed(SEED)
@@ -219,7 +219,7 @@ def load_images_all(path, imsize):
                         help='vae config')
     args = parser.parse_args()
     config = ConfigParser()
-    config.read(pkg_resources.resource_filename("myGym", '/vae/{}'.format(args.config)))
+    config.read(resources.files("myGym").joinpath("vae", args.config))
 
     with open(os.path.join(SAVE_DIR,'config.ini'), 'w') as f:
          config.write(f)
diff --git a/myGym/visualize_multieval.py b/myGym/visualize_multieval.py
deleted file mode 100644
index 8ee2361a..00000000
--- a/myGym/visualize_multieval.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import os
-import re
-import threading
-import subprocess
-from sklearn.model_selection import ParameterGrid
-import json
-import matplotlib.pyplot as plt
-import matplotlib.colors as colors
-import pandas as pd
-from mpl_toolkits.mplot3d import Axes3D
-import numpy as np
-import pkg_resources
-import numpy as np
-from matplotlib.lines import Line2D
-import operator
-
-currentdir = pkg_resources.resource_filename("myGym", "trained_models")
-algo_1_results_path = os.path.join(currentdir, "multi_reach.json")
-algo_2_results_path = os.path.join(currentdir, "multi_reach.json")
-algo_3_results_path = os.path.join(currentdir, "multi_evaluation_results_ppo.json")
-algo_4_results_path = os.path.join(currentdir, "multi_evaluation_results_td3.json")
-algo_5_results_path = os.path.join(currentdir, "multi_evaluation_results_trpo.json")
-algo_6_results_path = os.path.join(currentdir, "multi_evaluation_results_ddpg.json")
-algo_7_results_path = os.path.join(currentdir, "multi_evaluation_results_acktr.json")
-algo_8_results_path = os.path.join(currentdir, "multi_evaluation_results_a2c.json")
-selected_metric = "success_rate"
-
-
-def load_algo_results(results_path):
-    with open(results_path) as f:
-        results = json.load(f)
-    list_results = [json.loads(key.replace('\'', '"'))
-                    + [float(value[selected_metric])] for key, value
-                    in results.items()]# if str(fixed_parameter_value) in key]
-    #list_results = list(map(lambda x: [v for v in x if v != fixed_parameter_value], list_results))
-    df = pd.DataFrame(list_results)
-    order2=['reach', 'push', 'pnp']
-    order1=['step', 'joints', 'joints_gripper']
-    order3=[1024, 512, 256]
-    df[1] = pd.Categorical(df[1], order1)
-    df[2] = pd.Categorical(df[2], order2)
-    df[0] = pd.Categorical(df[0], order3)
-    df = df.sort_values([2,1,0])
-    list_results = df.values.tolist()
-    return list_results
-
-def plot_algo_results(ax, list_results, label, c):
-    w_labels = [result[0] for result in list_results]
-    w_set = sorted(set(w_labels), key=w_labels.index)
-    w = np.asarray([w_set.index(label) for label in w_labels])
-
-    x_labels = [result[1] for result in list_results]
-    #x_labels = [x.replace('joints_gripper', 'gripper') for x in x_labels]
-    x_set = sorted(set(x_labels), key=x_labels.index)
-    x = np.asarray([x_set.index(label) for label in x_labels])
-
-    y_labels = [result[2] for result in list_results]
-    y_set = sorted(set(y_labels), key=y_labels.index)
-    y = np.asarray([y_set.index(label) for label in y_labels])
-
-    z = [result[3] for result in list_results]
-    z += z[:1]
-
-    # Make data: I have 3 groups and 7 subgroups
-    group_names = y_set
-    n_groups = len(group_names)
-    group_size = [100/n_groups]*n_groups
-    
-    subgroup_names = x_set*n_groups
-    n_subgroups = len(subgroup_names)
-    subgroup_size = [100/n_subgroups]*n_subgroups
-    
-    subsubgroup_names = w_set*n_subgroups
-    n_subsubgroups = len(subsubgroup_names)
-    subsubgroup_size = [100/n_subsubgroups]*n_subsubgroups
-    
-    # Create colors
-    r, g, b=[plt.cm.Reds, plt.cm.Greens, plt.cm.Blues]
-    
-    # First Ring (outside)
-    ax.set(aspect="equal")
-    mypie, txt = ax.pie(group_size, radius=1.6, labels=group_names, startangle=0, pctdistance=-5.125, colors=[r(0.4), r(0.6), r(0.8)], rotatelabels=True)
-    plt.setp(mypie, width=0.12, edgecolor='white')
-
-    # Second Ring (Inside)
-    mypie2, txt2 = ax.pie(subgroup_size, radius=1.6-0.12, labels=subgroup_names, startangle=0, colors=[b(0.4), b(0.6), b(0.8)]*n_groups, rotatelabels=True)
-    plt.setp(mypie2, width=0.12, edgecolor='white')
-
-    # Third Ring    
-    mypie3, txt3 = ax.pie(subsubgroup_size, radius=1.6-0.24, labels=subsubgroup_names, startangle=0, colors=[g(0.4), g(0.6), g(0.8)]*n_subgroups, rotatelabels=True)
-    plt.setp(mypie3, width=0.12, edgecolor='white')
-    plt.margins(0,0)
-
-    for t in range(len(txt)):
-        txt[t]._x = np.cos((0.5+t)*group_size[0]/100*np.pi*2)*(1.49)
-        txt[t]._y = np.sin((0.5+t)*group_size[0]/100*np.pi*2)*(1.49)
-        txt[t]._rotation -= 90*np.sign(txt[t]._x)
-    for t in range(len(txt2)):
-        txt2[t]._x = np.cos((0.5+t)*subgroup_size[0]/100*np.pi*2)*(1.35)
-        txt2[t]._y = np.sin((0.5+t)*subgroup_size[0]/100*np.pi*2)*(1.35)
-        txt2[t]._rotation -= 90*np.sign(txt2[t]._x)
-    for t in range(len(txt3)):
-        txt3[t]._x = np.cos((0.5+t)*subsubgroup_size[0]/100*np.pi*2)*(1.25)
-        txt3[t]._y = np.sin((0.5+t)*subsubgroup_size[0]/100*np.pi*2)*(1.25)
-        txt3[t]._rotation -= 90*np.sign(txt3[t]._x)
-
-    #plt.show()
-
-    categories=w_labels
-    N = len(categories)
-    angles = [n / float(N) * 2 * np.pi + 0.5 / float(N) * 2 * np.pi for n in range(N)]
-    angles += angles[:1]
-    
-    # Initialise the spider plot
-    radar = fig.add_subplot(111, polar=True)
-    
-    # Draw one axe per variable + add labels labels yet
-    plt.xticks(angles[:-1], categories, color='None')
-    
-    # Draw ylabels
-    yticks = np.arange(0,101,10)
-    yticks_labels = [str(tick)+' %' for tick in yticks]
-    radar.set_rlabel_position(90)
-    plt.yticks(yticks, labels=yticks_labels, color="black", size=8)
-    plt.ylim(0,110)
-    
-    # Plot data
-    radar.plot(angles, z, c, linewidth=1, linestyle='solid')
-    radar.fill(angles, z, c, alpha=0.3)
-
-    #plt.show()
-    learnability = np.mean(z)
-    print("Learnability for {}: {}".format(label, learnability))
-    return learnability
-
-if __name__ == '__main__':
-    algo_1_list_results = load_algo_results(algo_1_results_path)
-    algo_2_list_results = load_algo_results(algo_1_results_path)
-
-    fig = plt.figure()
-    ax = fig.subplots()    
-
-    learnability_ppo2 = plot_algo_results(ax, algo_1_list_results, "ppo2", "r")
-    learnability_ppo = plot_algo_results(ax, algo_2_list_results, "ppo2", "g")
-
-
-    txt = plt.title('Learnability', fontsize=18)
-    txt._x=1.07
-    txt._y=1.08
-    legend_elements = [Line2D([0], [1], linewidth=2, color='r', label='PPO2, {:.2f} %'.format(learnability_ppo2)),
-                       Line2D([0], [1], linewidth=2, color='m', label='TRPO, {:.2f} %'.format(learnability_ppo))]
-                       #Line2D([0], [1], linewidth=2, color='c', label='TD3, {:.2f} %'.format(learnability_td3)),
-                       #Line2D([0], [1], linewidth=2, color='y', label='DDPG, {:.2f} %'.format(learnability_ddpg)),
-                       #Line2D([0], [1], linewidth=2, color='r', label='A2C, {:.2f} %'.format(learnability_a2c))]
-    ax.legend(handles=legend_elements, loc=(0.97, 0.85), prop={'size': 16})
-    plt.savefig(os.path.join(currentdir, "multieval_visualization.png"))
-
-    plt.show()
diff --git a/myGym/walker2d-ppo.yaml b/myGym/walker2d-ppo.yaml
deleted file mode 100644
index 8f16acd6..00000000
--- a/myGym/walker2d-ppo.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-walker2d-v1-ppo:
-    env: Walker2d-v4
-    run: PPO
-    config:
-        # Works for both torch and tf.
-        framework: torch
-        kl_coeff: 1.0
-        num_sgd_iter: 20
-        lr: .0001
-        sgd_minibatch_size: 32768
-        train_batch_size: 320000
-        num_workers: 64
-        num_gpus: 1
-        batch_mode: complete_episodes
-        observation_filter: MeanStdFilter
diff --git a/myGym/yolact_vision/inference_tool.py b/myGym/yolact_vision/inference_tool.py
index b5054477..b4650706 100644
--- a/myGym/yolact_vision/inference_tool.py
+++ b/myGym/yolact_vision/inference_tool.py
@@ -9,7 +9,6 @@
 import torch
 import cv2
 import numpy as np
-import pkg_resources
 import dill
 import numpy as np
 
diff --git a/pyproject.toml b/pyproject.toml
index 08eced32..443b4d08 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "myGym"
-version = "0.1.0"
+version = "3.10.0"
 description = "myGym"
 authors = [
     {name = "Incognite group CIIRC CVUT", email = "michal.vavrecka@cvut.cz"},
@@ -22,21 +22,22 @@ dependencies = [
     "opencv-python==4.9.0.80",
     "pybullet==3.2.6",
     "open3d==0.18.0",
-    "gym==0.26.2",
-    "tensorflow>=2.11.0",
-    "torch<2.5,>=2.2",
+    "stable-baselines3==2.4.0",
+    "tensorboard>=2.18.0",
+    "torch==2.5.1",
     "torchvision>=0.17.1",
     "torchaudio>=2.2.1",
     "matplotlib>=3.8.3",
     "pyquaternion>=0.9.9",
     "commentjson>=0.9.0",
-    "ray[rllib,tune]>=2.9.3",
-    "gymnasium>=0.28.1",
+    "gymnasium>=1.0.0",
     "protobuf<4,>3",
+    "stable-baselines3>=2.4.0",
+    "shimmy==0.2.1",
 ]
 requires-python = ">=3.10,<3.11"
 readme = "README.md"
-license = {text = "MIT License\n\nCopyright (c) 2020 CIIRC, CTU in Prague\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n\n"}
+license = {text = "MIT License\n\nCopyright (c) 2024 CIIRC, CTU in Prague\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n\n"}
 maintainers = [
     {email = "michal.vavrecka@cvut.cz"},
 ]
diff --git a/rllib.yml b/rllib.yml
index 19e965be..1c0d41d1 100644
--- a/rllib.yml
+++ b/rllib.yml
@@ -14,14 +14,15 @@ dependencies:
   - tensorflow-base==2.11.1
   - pip:
     - commentjson==0.9.0
-    - ray==2.7.2
-    - gym==0.24.0
-    - gymnasium==0.29.1
+    - ray==2.23.0
+    - gym==0.26.2
+    - gymnasium==0.28.1
     - gymnasium-notices==0.0.1
     - h5py==3.10.0
     - imageio==2.33.1
     - google==3.0.0
     - google-cloud==0.34.0
+    - memory-profiler==0.61.0
     - matplotlib==3.8.2
     - numpy==1.26.4
     - opencv-python==4.9.0.80
@@ -39,10 +40,12 @@ dependencies:
     - pywavelets==1.5.0
     - pyyaml==6.0.1
     - scikit-image==0.22.0
+    - Shimmy==2.0.0
     - tensorflow-addons==0.23.0
     - tensorflow-io-gcs-filesystem==0.36.0
     - torch==2.2.0
-    - torchvision==0.17.0
+    - torchvision==0.17.2
     - urllib3==2.2.0
+    - stable-baselines3==2.3.2
 prefix: /home/sejnogab/.conda/envs/mygymrl