Farama-Foundation · younik · Jan 25, 2024 · Aug 21, 2023 · Aug 25, 2023 · Aug 25, 2023
diff --git a/docs/content/basic_usage.md b/docs/content/basic_usage.md
@@ -125,7 +125,7 @@ env = gym.make('CartPole-v1')
 env = DataCollector(env, record_infos=True, max_buffer_steps=100000)
 
 total_episodes = 100
-dataset_name = "CartPole-v1-test-v0"
+dataset_name = "cartpole-test-v0"
 dataset = None
 if dataset_name in minari.list_local_datasets():
     dataset = minari.load_dataset(dataset_name)

diff --git a/docs/content/dataset_standards.md b/docs/content/dataset_standards.md
@@ -554,5 +554,12 @@ The `sampled_episodes` variable will be a list of 10 `EpisodeData` elements, eac
 | `rewards`         | `np.ndarray`                         | Rewards for each timestep.                                    |
 | `terminations`    | `np.ndarray`                         | Terminations for each timestep.                               |
 | `truncations`     | `np.ndarray`                         | Truncations for each timestep.                                |
+| `infos`           | `dict`                               | A dictionary containing additional information.               |
 
 As mentioned in the `Supported Spaces` section, many different observation and action spaces are supported so the data type for these fields are dependent on the environment being used.
+
+## Additional Information Formatting
+
+When creating a dataset with `DataCollector`, if the `DataCollector` is initialized with `record_infos=True`, an info dict must be provided from every call to the environment's `step` and `reset` function. The structure of the info dictionary must be the same across timesteps.
+
+Given that it is not guaranteed that all Gymnasium environments provide infos at every timestep, we provide the `StepDataCallback` which can modify the infos from a non-compliant environment so they have the same structure at every timestep. An example of this pattern is available in our test `test_data_collector_step_data_callback_info_correction` in test_step_data_callback.py.
diff --git a/minari/data_collector/data_collector.py b/minari/data_collector/data_collector.py
@@ -7,7 +7,7 @@
 import shutil
 import tempfile
 import warnings
-from typing import Any, Callable, Dict, List, Optional, SupportsFloat, Type, Union
+from typing import Any, Callable, Dict, List, Optional, SupportsFloat, Type
 
 import gymnasium as gym
 import numpy as np
@@ -128,6 +128,7 @@ def __init__(
         )
 
         self._record_infos = record_infos
+        self._reference_info = None
         self.max_buffer_steps = max_buffer_steps
 
         # Initialzie empty buffer
@@ -136,11 +137,11 @@ def __init__(
         self._step_id = -1
         self._episode_id = -1
 
-    def _add_to_episode_buffer(
+    def _add_step_data(
         self,
         episode_buffer: EpisodeBuffer,
-        step_data: Union[StepData, Dict[str, StepData]],
-    ) -> EpisodeBuffer:
+        step_data: StepData,
+    ):
         """Add step data dictionary to episode buffer.
 
         Args:
@@ -150,31 +151,43 @@ def _add_to_episode_buffer(
         Returns:
             Dict: new dictionary episode buffer with added values from step_data
         """
+        dict_data = dict(step_data)
+        if not self._record_infos:
+            dict_data = {k: v for k, v in step_data.items() if k != "infos"}
+        else:
+            assert self._reference_info is not None
+            if not _check_infos_same_shape(
+                self._reference_info, step_data["infos"]
+            ):
+                raise ValueError(
+                    "Info structure inconsistent with info structure returned by original reset."
+                )
+
+        self._add_to_episode_buffer(episode_buffer, dict_data)
+
+    def _add_to_episode_buffer(
+        self,
+        episode_buffer: EpisodeBuffer,
+        step_data: Dict[str, Any],
+    ):
         for key, value in step_data.items():
-            if (not self._record_infos and key == "infos") or (value is None):
+            if value is None:
                 continue
 
             if key not in episode_buffer:
-                if isinstance(value, dict):
-                    episode_buffer[key] = self._add_to_episode_buffer({}, value)
-                else:
-                    episode_buffer[key] = [value]
+                episode_buffer[key] = {} if isinstance(value, dict) else []
+
+            if isinstance(value, dict):
+                assert isinstance(
+                    episode_buffer[key], dict
+                ), f"Element to be inserted is type 'dict', but buffer accepts type {type(episode_buffer[key])}"
+
+                self._add_to_episode_buffer(episode_buffer[key], value)
             else:
-                if isinstance(value, dict):
-                    assert isinstance(
-                        episode_buffer[key], dict
-                    ), f"Element to be inserted is type 'dict', but buffer accepts type {type(episode_buffer[key])}"
-
-                    episode_buffer[key] = self._add_to_episode_buffer(
-                        episode_buffer[key], value
-                    )
-                else:
-                    assert isinstance(
-                        episode_buffer[key], list
-                    ), f"Element to be inserted is type 'list', but buffer accepts type {type(episode_buffer[key])}"
-                    episode_buffer[key].append(value)
-
-        return episode_buffer
+                assert isinstance(
+                    episode_buffer[key], list
+                ), f"Element to be inserted is type 'list', but buffer accepts type {type(episode_buffer[key])}"
+                episode_buffer[key].append(value)
 
     def step(
         self, action: ActType
@@ -191,6 +204,9 @@ def step(
             terminated=terminated,
             truncated=truncated,
         )
+
+        # Force step data dictionary to include keys corresponding to Gymnasium step returns:
+        # actions, observations, rewards, terminations, truncations, and infos
         assert STEP_DATA_KEYS.issubset(
             step_data.keys()
         ), "One or more required keys is missing from 'step-data'."
@@ -203,7 +219,7 @@ def step(
         ), "Actions are not in action space."
 
         self._step_id += 1
-        self._buffer[-1] = self._add_to_episode_buffer(self._buffer[-1], step_data)
+        self._add_step_data(self._buffer[-1], step_data)
 
         if (
             self.max_buffer_steps is not None
@@ -215,11 +231,15 @@ def step(
         if step_data["terminations"] or step_data["truncations"]:
             self._episode_id += 1
             eps_buff = {"id": self._episode_id}
-            previous_data = {
+            previous_data: StepData = {
                 "observations": step_data["observations"],
                 "infos": step_data["infos"],
+                "rewards": None,
+                "actions": None,
+                "terminations": None,
+                "truncations": None
             }
-            eps_buff = self._add_to_episode_buffer(eps_buff, previous_data)
+            self._add_step_data(eps_buff, previous_data)
             self._buffer.append(eps_buff)
 
         return obs, rew, terminated, truncated, info
@@ -253,6 +273,9 @@ def reset(
         step_data = self._step_data_callback(env=self.env, obs=obs, info=info)
         self._episode_id += 1
 
+        if self._record_infos and self._reference_info is None:
+            self._reference_info = step_data["infos"]
+
         assert STEP_DATA_KEYS.issubset(
             step_data.keys()
         ), "One or more required keys is missing from 'step-data'"
@@ -262,7 +285,7 @@ def reset(
             "seed": str(None) if seed is None else seed,
             "id": self._episode_id
         }
-        episode_buffer = self._add_to_episode_buffer(episode_buffer, step_data)
+        self._add_step_data(episode_buffer, step_data)
         self._buffer.append(episode_buffer)
         return obs, info
 
@@ -418,3 +441,16 @@ def close(self):
 
         self._buffer.clear()
         shutil.rmtree(self._tmp_dir.name)
+
+
+def _check_infos_same_shape(info_1: dict, info_2: dict):
+    if info_1.keys() != info_2.keys():
+        return False
+    for key in info_1.keys():
+        if type(info_1[key]) is not type(info_2[key]):
+            return False
+        if isinstance(info_1[key], dict):
+            return _check_infos_same_shape(info_1[key], info_2[key])
+        elif isinstance(info_1[key], np.ndarray):
+            return (info_1[key].shape == info_2[key].shape) and (info_1[key].dtype == info_2[key].dtype)
+    return True
diff --git a/minari/dataset/episode_data.py b/minari/dataset/episode_data.py
@@ -19,6 +19,7 @@ class EpisodeData:
     rewards: np.ndarray
     terminations: np.ndarray
     truncations: np.ndarray
+    infos: dict
 
     def __repr__(self) -> str:
         return (
@@ -30,7 +31,8 @@ def __repr__(self) -> str:
             f"actions={EpisodeData._repr_space_values(self.actions)}, "
             f"rewards=ndarray of {len(self.rewards)} floats, "
             f"terminations=ndarray of {len(self.terminations)} bools, "
-            f"truncations=ndarray of {len(self.truncations)} bools"
+            f"truncations=ndarray of {len(self.truncations)} bools, "
+            f"infos=dict with keys of :{list(self.infos.keys())}"
             ")"
         )
 

diff --git a/minari/dataset/minari_storage.py b/minari/dataset/minari_storage.py
@@ -88,7 +88,10 @@ def new(
         obj._action_space = action_space
 
         if env_spec is not None:
-            metadata["env_spec"] = env_spec.to_json()
+            try:
+                metadata["env_spec"] = env_spec.to_json()
+            except TypeError:
+                pass
         with h5py.File(obj._file_path, "a") as file:
             file.attrs.update(metadata)
         return obj
@@ -161,6 +164,19 @@ def apply(
         ep_dicts = self.get_episodes(episode_indices)
         return map(function, ep_dicts)
 
+    def _decode_infos(self, infos: h5py.Group):
+        result = {}
+        for key in infos.keys():
+            if isinstance(infos[key], h5py.Group):
+                result[key] = self._decode_infos(infos[key])
+            elif isinstance(infos[key], h5py.Dataset):
+                result[key] = infos[key][()]
+            else:
+                raise ValueError(
+                    "Infos are in an unsupported format; see Minari documentation for supported formats."
+                )
+        return result
+
     def _decode_space(
         self,
         hdf_ref: Union[h5py.Group, h5py.Dataset, h5py.Datatype],
@@ -219,6 +235,9 @@ def get_episodes(self, episode_indices: Iterable[int]) -> List[dict]:
                     "actions": self._decode_space(
                         ep_group["actions"], self.action_space
                     ),
+                    "infos": self._decode_infos(ep_group["infos"])
+                    if "infos" in ep_group
+                    else {},
                 }
                 for key in {"rewards", "terminations", "truncations"}:
                     group_value = ep_group[key]