BatsResearch · maxzuo · Jun 25, 2024 · Jun 24, 2024 · Jun 24, 2024 · Jun 25, 2024
diff --git a/README.md b/README.md
@@ -1 +1,77 @@
-# planetarium
+# planetarium🪐
+
+Planetarium🪐 is a [dataset](https://huggingface.co/datasets/BatsResearch/planetarium) and benchmark for assessing LLMs in translating natural language descriptions of planning problems into PDDL. We developed a robust method for comparing PDDL problem descriptions using graph isomorphism.
+
+## Installation
+To install the `planetarium` package, you can use the following command:
+```bash
+pip install git+https://github.com/BatsResearch/planetarium.git
+```
+
+For development or using our evaluate & finetune scripts, you can clone the repository and install all dependencies using the following commands:
+```bash
+git clone https://github.com/BatsResearch/planetarium.git
+cd planetarium
+poetry install --with all
+```
+
+To use `planetarium.downward`, you will need to have the [Fast-Downward](https://www.fast-downward.org/) planner installed, and the [VAL](https://github.com/KCL-Planning/VAL) plan validator. The following commands is one way to install them with minimal overhead:
+```bash
+# Fast-Downward via Apptainer
+apptainer pull fast-downward.sif docker://aibasel/downward:latest
+# VAL download link might not work, follow instructions to download binary at: https://github.com/KCL-Planning/VAL
+mkdir tmp
+curl -o tmp/VAL.zip https://dev.azure.com/schlumberger/4e6bcb11-cd68-40fe-98a2-e3777bfec0a6/_apis/build/builds/77/artifacts?artifactName=linux64\&api-version=7.1\&%24format=zip
+unzip tmp/VAL.zip -d tmp/
+tar -xzvf tmp/linux64/*.tar.gz -C tmp/ --strip-components=1
+# clean up
+rm -rf tmp
+# Make sure to add fast-downward.sif and VAL to your PATH or make aliases.
+```
+
+## Basic Usage
+To evaluate a PDDL problem description, we can use the `planetarium.evaluate` module:
+```python
+from planetarium import evaluate
+...
+evaluate.evaluate(gt_pddl_str, pred_pddl_str)
+```
+The supported domains are `blocksworld` and `gripper` domains.
+
+## Dataset
+The main page for the dataset can be found [here](https://huggingface.co/datasets/BatsResearch/planetarium).
+
+Here is an example of how to load the dataset:
+```python
+from datasets import load_dataset
+
+dataset = load_dataset("BatsResearch/planetarium")
+```
+
+You can reporduce the dataset, the splits, and a report by running the following command:
+```bash
+python dataset_generator.py -c dataset_config.yaml
+```
+
+By modifying the `dataset_config.yaml` file, you can change the dataset splits, the number of samples, and produce even more examples!
+
+### Dataset Report
+Here is a summary of the types of PDDL problems in the dataset:
+
+Total number of problems: $132,037$.
+
+#### Abstractness Split
+| Init | Goal | blocksworld | gripper |
+|:---:|:---:|---:|---:|
+| abstract | abstract | $23,144$ | $10,632$ |
+| abstract | explicit | $23,086$ | $9,518$ |
+| explicit | abstract | $23,087$ | $10,313$ |
+| explicit | explicit | $23,033$ | $9,224$ |
+#### Size Splits (Number of Propositions in Ground Truth)
+| Num. of Propositions | blocksworld | gripper |
+|:---:|---:|---:|
+| $0$ - $20$ | $1,012$ | $379$ |
+| $20$ - $40$ | $10,765$ | $2,112$ |
+| $40$ - $60$ | $50,793$ | $9,412$ |
+| $60$ - $80$ | $26,316$ | $25,346$ |
+| $80$ - inf | $3,464$ | $2,438$ |
diff --git a/evaluate.py b/evaluate.py
@@ -17,8 +17,6 @@
 from planetarium import builder, graph, metric, oracle
 import llm_planner as llmp
 
-from utils import apply_template
-
 HF_USER_TOKEN = os.getenv("HF_USER_TOKEN")
 
 
@@ -82,26 +80,23 @@ def plan(
     context = []
     for example_problem in example_problems:
         context.extend(
-            apply_template(
-                example_problem,
+            example_problem.apply_template(
                 domain_prompt,
                 problem_prompt,
             )
         )
 
     if isinstance(problem, llmp.PlanningProblem):
         messages = [
-            apply_template(
-                problem,
+            problem.apply_template(
                 domain_prompt,
                 problem_prompt,
                 include_answer=False,
             )
         ]
     else:
         messages = [
-            apply_template(
-                p,
+            p.apply_template(
                 domain_prompt,
                 problem_prompt,
                 include_answer=False,

diff --git a/finetune.py b/finetune.py
@@ -26,7 +26,6 @@
 import tqdm as tqdm
 
 import llm_planner as llmp
-from utils import apply_template
 
 from accelerate import Accelerator
 
@@ -137,8 +136,7 @@ def preprocess(
     inputs = [
         strip(
             tokenizer.apply_chat_template(
-                apply_template(
-                    llmp.PlanningProblem(nl, d, p),
+                    llmp.PlanningProblem(nl, d, p).apply_template(
                     domain_prompt,
                     problem_prompt,
                 ),

diff --git a/llm_planner.py b/llm_planner.py
@@ -31,6 +31,39 @@ def __init__(
         self.domain = domain
         self.problem = problem
 
+    def apply_template(
+        self,
+        domain_prompt: str = "",
+        problem_prompt: str = "",
+        include_answer: bool = True,
+    ) -> list[dict[str, str]]:
+        """Apply problem template to the problem.
+
+        Args:
+            domain_prompt (str, optional): How to prompt the domain. Defaults to "".
+            problem_prompt (str, optional): How to prompt the problem. Defaults to "".
+            include_answer (bool, optional): Whether to include the answer. Defaults to True.
+
+        Returns:
+            list[dict[str, str]]: Problem prompt.
+        """
+        return [
+            {
+                "role": "user",
+                "content": f"{problem_prompt} {self.natural_language} "
+                + f"{domain_prompt}\n{self.domain}\n",
+            },
+        ] + (
+            [
+                {
+                    "role": "assistant",
+                    "content": " " + self.problem,
+                },
+            ]
+            if include_answer
+            else []
+        )
+
 
 class Planner(abc.ABC):
     @abc.abstractmethod

diff --git a/planetarium/graph.py b/planetarium/graph.py
@@ -4,6 +4,8 @@
 import enum
 from functools import cached_property
 
+import matplotlib.pyplot as plt
+import networkx as nx
 import rustworkx as rx
 
 
@@ -360,6 +362,40 @@ def __eq__(self, other: "PlanGraph") -> bool:
             and self.domain == other.domain
         )
 
+    def plot(self, fig: plt.Figure | None = None) -> plt.Figure:
+        """Generate a plot of the graph, sorted by topological generation.
+
+        Args:
+            fig (plt.Figure | None, optional): The figure to plot on. Defaults
+                to None.
+
+        Returns:
+            plt.Figure: The figure containing the plot.
+        """
+        # rx has no plotting functionality
+        nx_graph = nx.MultiDiGraph()
+        nx_graph.add_edges_from(
+            [(u.node, v.node, {"data": edge}) for u, v, edge in self.edges]
+        )
+
+        for layer, nodes in enumerate(nx.topological_generations(nx_graph)):
+            for node in nodes:
+                nx_graph.nodes[node]["layer"] = layer
+
+        pos = nx.multipartite_layout(
+            nx_graph,
+            align="horizontal",
+            subset_key="layer",
+            scale=-1,
+        )
+
+        if fig is None:
+            fig = plt.figure()
+
+        nx.draw(nx_graph, pos=pos, ax=fig.gca(), with_labels=True)
+
+        return fig
+
 
 class SceneGraph(PlanGraph):
     """
@@ -523,8 +559,6 @@ def goal_predicates(self) -> list[dict[str, Any]]:
 
         return predicates
 
-
-
     @cached_property
     def _decompose(self) -> tuple[SceneGraph, SceneGraph]:
         """