From 2dea7b84818caf8a07a12e13931f0a6cef56fd15 Mon Sep 17 00:00:00 2001
From: Maximilian Naumann <m-naumann@naumail.de>
Date: Thu, 18 Jan 2024 16:16:37 +0100
Subject: [PATCH] Housekeeping and static code analysis (#15)

* Move to pyproject.toml and update CODEOWNERS

* Increase build requirements to ensure editable build

* Add basic pylint checks

* Add mypy and make it happy

* Add and run isort
---
 .github/workflows/ci.yml                      | 13 ++++-
 CODEOWNERS                                    |  6 +--
 pyproject.toml                                | 51 +++++++++++++++++++
 requirements.txt                              | 14 -----
 scripts/run_a_star.py                         |  2 +-
 setup.py                                      | 16 ------
 .../graph_search/a_star.py                    |  4 +-
 .../mdp/mdp.py                                | 51 +++++++++++--------
 .../utils/grid_plotting.py                    |  1 +
 tests/test_a_star.py                          |  2 +-
 tests/test_mdp.py                             |  6 +--
 tests/utils/test_grid_plotting.py             |  8 +--
 12 files changed, 107 insertions(+), 67 deletions(-)
 create mode 100644 pyproject.toml
 delete mode 100644 requirements.txt
 delete mode 100644 setup.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9ec9e07..4f378e9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,11 +18,20 @@ jobs:
           python-version: 3.7
 
       - name: set up env
-        run: python -m pip install -e .
+        run: python -m pip install -e .[docs,dev]
 
       - name: run black
         run: black --check .
 
+      - name: run isort
+        run: isort .
+
+      - name: run pylint for mdp folder
+        run: pylint src/behavior_generation_lecture_python/mdp --errors-only
+
+      - name: run mypy for mdp folder
+        run: mypy src/behavior_generation_lecture_python/mdp
+
       - name: test
         run: pytest
 
@@ -49,7 +58,7 @@ jobs:
           python-version: 3.7
 
       - name: set up env
-        run: python -m pip install -e .
+        run: python -m pip install -e .[docs]
 
       - name: copy notebooks to docs folder
         run: cp -r notebooks/* docs/notebooks
diff --git a/CODEOWNERS b/CODEOWNERS
index a1539aa..232bd05 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,3 +1,3 @@
-* m-naumann@naumail.de
-* julian.truetsch@kit.edu
-* kevin.roesch@kit.edu
+* @m-naumann
+* @jtruetsch
+* @keroe
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..99f88cb
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,51 @@
+[project]
+name = "behavior_generation_lecture_python"
+version = "0.0.2"
+description = "Python code for the respective lecture at KIT"
+readme = "README.md"
+requires-python = ">=3.7"
+license = {file = "LICENSE"}
+authors = [
+  {name = "Organizers of the lecture 'Verhaltensgenerierung für Fahrzeuge' at KIT" }
+]
+maintainers = [
+  {name = "Maximilian Naumann", email = "maximilian.naumann@de.bosch.com" }
+]
+
+dependencies = [
+  "numpy",
+  "matplotlib>=2.2.4",
+  "scipy",
+  "jupyter",
+  "python-statemachine"
+]
+
+[project.optional-dependencies]
+dev = [
+  "black[jupyter]==22.3.0",
+  "pytest",
+  "pytest-cov>=3.0.0",
+  "pylint",
+  "mypy",
+  "isort"
+]
+docs = [
+  "mkdocs-material",
+  "mkdocs-jupyter",
+  "mkdocstrings[python]>=0.18",
+  "mkdocs-gen-files",
+  "mkdocs-literate-nav",
+  "mkdocs-section-index"
+]
+
+[project.urls]  # Optional
+"Homepage" = "https://kit-mrt.github.io/behavior_generation_lecture_python/"
+"Bug Reports" = "hhttps://github.com/KIT-MRT/behavior_generation_lecture_python/issues"
+"Source" = "https://github.com/KIT-MRT/behavior_generation_lecture_python"
+
+[build-system]
+requires = ["setuptools>=64.0.0", "wheel", "pip>=21.3.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.isort]
+profile = "black"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 47fe805..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-numpy
-matplotlib>=2.2.4
-scipy
-jupyter
-black[jupyter]==22.3.0
-pytest
-pytest-cov>=3.0.0
-python-statemachine
-mkdocs-material
-mkdocs-jupyter
-mkdocstrings[python]>=0.18
-mkdocs-gen-files
-mkdocs-literate-nav
-mkdocs-section-index
diff --git a/scripts/run_a_star.py b/scripts/run_a_star.py
index 29d9031..82349da 100644
--- a/scripts/run_a_star.py
+++ b/scripts/run_a_star.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from behavior_generation_lecture_python.graph_search.a_star import Node, Graph
+from behavior_generation_lecture_python.graph_search.a_star import Graph, Node
 
 
 def main():
diff --git a/setup.py b/setup.py
deleted file mode 100644
index e7d274a..0000000
--- a/setup.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from setuptools import find_packages, setup
-
-# we don't distinguish between the project and environment requirements, for details
-# see https://packaging.python.org/discussions/install-requires-vs-requirements/
-with open("requirements.txt") as f:
-    required_packages = f.read().splitlines()
-
-setup(
-    name="behavior_generation_lecture_python",
-    version="0.0.1",
-    author="Organizers of the lecture 'Verhaltensgenerierung für Fahrzeuge' at KIT",
-    package_dir={"": "src"},
-    packages=find_packages(where="src"),
-    python_requires=">=3.6, <4",
-    install_requires=required_packages,
-)
diff --git a/src/behavior_generation_lecture_python/graph_search/a_star.py b/src/behavior_generation_lecture_python/graph_search/a_star.py
index 16b9920..598ac37 100644
--- a/src/behavior_generation_lecture_python/graph_search/a_star.py
+++ b/src/behavior_generation_lecture_python/graph_search/a_star.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
+from typing import Dict, List, Set
+
 import matplotlib.pyplot as plt
 import numpy as np
 
-from typing import Dict, List, Set
-
 
 class Node:
     def __init__(
diff --git a/src/behavior_generation_lecture_python/mdp/mdp.py b/src/behavior_generation_lecture_python/mdp/mdp.py
index cfa13b3..5879a74 100644
--- a/src/behavior_generation_lecture_python/mdp/mdp.py
+++ b/src/behavior_generation_lecture_python/mdp/mdp.py
@@ -93,7 +93,7 @@ def __init__(
             for action in self.actions:
                 if (state, action) not in transition_probabilities:
                     continue
-                total_prob = 0
+                total_prob = 0.0
                 for prob, next_state in transition_probabilities[(state, action)]:
                     assert (
                         next_state in self.states
@@ -151,14 +151,17 @@ def sample_next_state(self, state, action) -> Any:
         return prob_per_transition[choice][1]
 
 
+GridState = Tuple[int, int]
+
+
 class GridMDP(MDP):
     def __init__(
         self,
         grid: List[List[Union[float, None]]],
-        initial_state: Tuple[int, int],
-        terminal_states: Set[Tuple[int, int]],
+        initial_state: GridState,
+        terminal_states: Set[GridState],
         transition_probabilities_per_action: Dict[
-            Tuple[int, int], List[Tuple[float, Tuple[int, int]]]
+            GridState, List[Tuple[float, GridState]]
         ],
         restrict_actions_to_available_states: Optional[bool] = False,
     ) -> None:
@@ -186,7 +189,9 @@ def __init__(
             for y in range(rows):
                 if grid[y][x] is not None:
                     states.add((x, y))
-                    reward[(x, y)] = grid[y][x]
+                    reward_xy = grid[y][x]
+                    assert reward_xy is not None
+                    reward[(x, y)] = reward_xy
 
         transition_probabilities = {}
         for state in states:
@@ -260,8 +265,11 @@ def _next_state_deterministic(
         return state
 
 
+StateValueTable = Dict[Any, float]
+
+
 def expected_utility_of_action(
-    mdp: MDP, state: Any, action: Any, utility_of_states: Dict[Any, float]
+    mdp: MDP, state: Any, action: Any, utility_of_states: StateValueTable
 ) -> float:
     """Compute the expected utility of taking an action in a state.
 
@@ -283,7 +291,7 @@ def expected_utility_of_action(
     )
 
 
-def derive_policy(mdp: MDP, utility_of_states: Dict[Any, float]) -> Dict[Any, Any]:
+def derive_policy(mdp: MDP, utility_of_states: StateValueTable) -> Dict[Any, Any]:
     """Compute the best policy for an MDP given the utility of the states.
 
     Args:
@@ -310,7 +318,7 @@ def value_iteration(
     epsilon: float,
     max_iterations: int,
     return_history: Optional[bool] = False,
-) -> Union[Dict[Any, float], List[Dict[Any, float]]]:
+) -> Union[StateValueTable, List[StateValueTable]]:
     """Derive a utility estimate by means of value iteration.
 
     Args:
@@ -326,11 +334,11 @@ def value_iteration(
         The final utility estimate, if return_history is false. The
         history of utility estimates as list, if return_history is true.
     """
-    utility = {state: 0 for state in mdp.get_states()}
+    utility = {state: 0.0 for state in mdp.get_states()}
     utility_history = [utility.copy()]
     for _ in range(max_iterations):
         utility_old = utility.copy()
-        max_delta = 0
+        max_delta = 0.0
         for state in mdp.get_states():
             utility[state] = max(
                 expected_utility_of_action(
@@ -348,8 +356,11 @@ def value_iteration(
     raise RuntimeError(f"Did not converge in {max_iterations} iterations")
 
 
+QTable = Dict[Tuple[Any, Any], float]
+
+
 def best_action_from_q_table(
-    *, state: Any, available_actions: Set[Any], q_table: Dict[Tuple[Any, Any], float]
+    *, state: Any, available_actions: Set[Any], q_table: QTable
 ) -> Any:
     """Derive the best action from a Q table.
 
@@ -361,9 +372,9 @@ def best_action_from_q_table(
     Returns:
         The best action according to the Q table.
     """
-    available_actions = list(available_actions)
-    values = np.array([q_table[(state, action)] for action in available_actions])
-    action = available_actions[np.argmax(values)]
+    available_actions_list = list(available_actions)
+    values = np.array([q_table[(state, action)] for action in available_actions_list])
+    action = available_actions_list[np.argmax(values)]
     return action
 
 
@@ -376,15 +387,13 @@ def random_action(available_actions: Set[Any]) -> Any:
     Returns:
         A random action.
     """
-    available_actions = list(available_actions)
-    num_actions = len(available_actions)
+    available_actions_list = list(available_actions)
+    num_actions = len(available_actions_list)
     choice = np.random.choice(num_actions)
-    return available_actions[choice]
+    return available_actions_list[choice]
 
 
-def greedy_value_estimate_for_state(
-    *, q_table: Dict[Tuple[Any, Any], float], state: Any
-) -> float:
+def greedy_value_estimate_for_state(*, q_table: QTable, state: Any) -> float:
     """Compute the greedy (best possible) value estimate for a state from the Q table.
 
     Args:
@@ -407,7 +416,7 @@ def q_learning(
     epsilon: float,
     iterations: int,
     return_history: Optional[bool] = False,
-) -> Dict[Tuple[Any, Any], float]:
+) -> Union[QTable, List[QTable]]:
     """Derive a value estimate for state-action pairs by means of Q learning.
 
     Args:
diff --git a/src/behavior_generation_lecture_python/utils/grid_plotting.py b/src/behavior_generation_lecture_python/utils/grid_plotting.py
index 023c378..37262bb 100644
--- a/src/behavior_generation_lecture_python/utils/grid_plotting.py
+++ b/src/behavior_generation_lecture_python/utils/grid_plotting.py
@@ -1,4 +1,5 @@
 from collections import defaultdict
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/tests/test_a_star.py b/tests/test_a_star.py
index d5fa657..80debb5 100644
--- a/tests/test_a_star.py
+++ b/tests/test_a_star.py
@@ -1,7 +1,7 @@
 import matplotlib
 import numpy as np
 
-from behavior_generation_lecture_python.graph_search.a_star import Node, Graph
+from behavior_generation_lecture_python.graph_search.a_star import Graph, Node
 
 
 def test_example_graph():
diff --git a/tests/test_mdp.py b/tests/test_mdp.py
index f2da1fa..582026a 100644
--- a/tests/test_mdp.py
+++ b/tests/test_mdp.py
@@ -5,13 +5,13 @@
     MDP,
     SIMPLE_MDP_DICT,
     GridMDP,
+    best_action_from_q_table,
     derive_policy,
     expected_utility_of_action,
-    value_iteration,
-    best_action_from_q_table,
-    random_action,
     greedy_value_estimate_for_state,
     q_learning,
+    random_action,
+    value_iteration,
 )
 
 
diff --git a/tests/utils/test_grid_plotting.py b/tests/utils/test_grid_plotting.py
index 78a3fdc..a6f0bff 100644
--- a/tests/utils/test_grid_plotting.py
+++ b/tests/utils/test_grid_plotting.py
@@ -1,14 +1,14 @@
 import matplotlib
 
-from behavior_generation_lecture_python.utils.grid_plotting import (
-    make_plot_grid_step_function,
-    make_plot_policy_step_function,
-)
 from behavior_generation_lecture_python.mdp.mdp import (
     GRID_MDP_DICT,
     GridMDP,
     derive_policy,
 )
+from behavior_generation_lecture_python.utils.grid_plotting import (
+    make_plot_grid_step_function,
+    make_plot_policy_step_function,
+)
 
 TRUE_UTILITY_GRID_MDP = {
     (0, 0): 0.705,