diff --git a/src/agentlab/benchmarks/abstract_env.py b/src/agentlab/benchmarks/abstract_env.py
new file mode 100644
index 00000000..0529a128
--- /dev/null
+++ b/src/agentlab/benchmarks/abstract_env.py
@@ -0,0 +1,59 @@
+import gym
+from abc import ABC, abstractmethod
+
+
+class AbstractEnvArgs(ABC):
+    """Easily serialiazable class to store the arguments of an environment"""
+
+    @abstractmethod
+    def make_env(self, action_mapping, exp_dir, exp_task_kwargs) -> "AbstractEnv":
+        """Create an instance of the environment with the arguments stored in this object.
+
+        Args:
+            action_mapping (dict[str,str]): mapping from the agent's action space to the environment's action space
+                see AbstractActionSet.to_python_code from BrowserGym for an example
+            exp_dir (str): directory where the experiment is stored
+            exp_task_kwargs (dict[str,Any]): additional arguments for the environment
+
+        Returns:
+            env (AbstractEnv): instance of the environment.
+        """
+
+
+class AbstractEnv(gym.Env, ABC):
+
+    @abstractmethod
+    def reset(self, seed: int = None) -> tuple[dict[str, any], dict[str, any]]:
+        """Reset the environment to the initial state, ready for an agent to start a new episode.
+
+        Args:
+            seed (int): seed to be used for the environment's random number generator. Some task may
+                be deterministic and not require a seed.
+
+        Returns:
+            obs (dict[str,Any]): dictionary containing the observations
+            env_info (dict[str,Any]): additional information about the environment (see step's docstring)
+        """
+
+    @abstractmethod
+    def step(self, action: str):
+        """Exection action in the environment and return the next observations
+
+        Args:
+            action (str): action to be executed in the environment, as a string
+
+        Returns:
+            obs (dict[str,Any]): dictionary containing the observations
+            reward (float): reward obtained after executing the action
+            terminated (bool): whether the episode is terminated. The MDP reached a terminal state
+            truncated (bool): whether the episode is truncated. The episode was truncated due to external reasons
+            env_info (dict[str,Any]): additional information about the environment
+                task_info (str): Some potential debugging information about the task, not intended for the agent
+                action_exec_start (float): time when the action execution started
+                action_exec_stop (float): time when the action execution ended
+                action_exec_timeout (float): TODO I don't remember exactly what this is
+        """
+
+    @abstractmethod
+    def close(self):
+        """Close any resources used by the environment"""
diff --git a/src/agentlab/benchmarks/tau_bench.py b/src/agentlab/benchmarks/tau_bench.py
new file mode 100644
index 00000000..41ad55f1
--- /dev/null
+++ b/src/agentlab/benchmarks/tau_bench.py
@@ -0,0 +1,82 @@
+from dataclasses import dataclass
+from agentlab.benchmarks.abstract_env import AbstractEnv, AbstractEnvArgs
+import bgym
+
+
+@dataclass
+class TauBenchEnvArgs(AbstractEnvArgs):
+    """All arguments parameterizing a task in tau-bench"""
+
+    task_name: str
+    task_seed: int  # is there any seeds or tasks are deterministic?
+
+    def __init__(self):
+        super().__init__()
+
+    def make_env(self, action_mapping, exp_dir, exp_task_kwargs) -> "AbstractEnv":
+        # TODO look at how bgym does it. You need to register tasks and do gym.make(task_name)
+        pass
+
+
+class TauBenchEnv(AbstractEnv):
+    def __init__(self):
+        super().__init__()
+
+    def reset(self, seed=None):
+        pass
+
+    def step(self, action: str):
+        pass
+
+    def close(self):
+        pass
+
+
+@dataclass
+class TauBenchActionSetArgs:
+    """Holds hyperparameters for the TauBenchActionSet"""
+
+    def make_action_set(self):
+        return TauBenchActionSet()
+
+
+class TauBenchActionSet(bgym.AbstractActionSet):
+    # TODO: Get inspiration from bgym's HighLevelActionSet, perhaps reusing code there, TBD
+
+    def describe(self, with_long_description: bool = True, with_examples: bool = True) -> str:
+        # TODO: Implement this method
+        pass
+
+    def example_action(self, abstract: bool) -> str:
+        # TODO: Implement this method
+
+        pass
+
+    def to_python_code(self, action) -> str:
+        # TODO: Implement this method
+
+        pass
+
+
+def _make_env_args_list():
+    # TODO generate all evn_args for the benchmark, get inspiration from bgym's task_list_from_metadata and make_env_args_list_from_repeat_tasks
+    return [TauBenchEnvArgs()]
+
+
+def _task_metadata():
+    # load a dataframe containing configuration for all tasks
+    pass
+
+
+def make_tau_benchmark():
+    return bgym.Benchmark(
+        name="tau-bench",
+        high_level_action_set_args=TauBenchActionSet(),
+        is_multi_tab=False,
+        supports_parallel_seeds=True,
+        backends=[
+            "taubench"
+        ],  # TODO this is not an implemented backend yet and bgym's make_backed implementation with match case needs to be revised
+        env_args_list=_make_env_args_list(),  # TODO adapt
+        task_metadata=_task_metadata(),  # TODO adapt
+    )