Merge branch 'main' of github.com:loreloc/sos-npcs

april-tools · Jul 21, 2024 · 26c246b · 26c246b
2 parents 633d4d8 + 3b97b5f
commit 26c246b
Show file tree

Hide file tree

Showing 8 changed files with 140 additions and 23 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ __pycache__
 /tboard*
 /checkpoints*
 /wandb*
+/slurm/logs/*
 figures
 mnt
 

diff --git a/pytest.ini b/pytest.ini
@@ -1,3 +1,6 @@
 [pytest]
 pythonpath = src
 testpaths = src/tests
+markers =
+    slow: marks tests as slow (deselect with '-m "not slow"')
+
diff --git a/slurm/launch.sh b/slurm/launch.sh
@@ -4,36 +4,42 @@ export PROJECT_NAME="sos-npcs"
 export PYTHONPATH=${PYTHONPATH:-src}
 
 # These flags need to be updated accordingly:
-# SCRATCH_DIR: a directory within the local filesystem of a node
 # EXPS_ID: some identifier for the experiments
 # VENV_PATH: the path containing the pip virtual environment
-export SCRATCH_DIR=${SCRATCH_DIR:-/disk/scratch_big/$USER}
 export EXPS_ID=${EXPS_ID:-exps}
 export VENV_PATH=${VENV_PATH:-venv}
 
 # The Slurm partition to use, e.g.,
 #PARTITION=PGR-Standard
-PARTITION=${PARTITION:-}
+PARTITION=${PARTITION:-PGR-Standard}
 # An optional list of Slurm node to exclude, e.g.,
 #EXCL_NODES=${EXCL_NODES:-busynode[01-07]}
-EXCL_NODES=${EXCL_NODES:-}
-# An optional list of Slurm node to allow
-LIST_NODES=${LIST_NODES:-}
+EXCL_NODES=${EXCL_NODES:-crannog[01-07],damnii[05-08]}
 # The maximum number of parallel jobs to dispatch
 MAX_PARALLEL_JOBS=12
 
 # Resources and maximum execution time
 NUM_CPUS=2
 NUM_GPUS=1
-TIME=120:00:00
+TIME=167:00:00
 
 JOB_NAME="$PROJECT_NAME-$EXPS_ID"
-OUTPUT="slurm/logs/$JOB_NAME-%j.out"
+LOG_DIRECTORY="slurm/logs/$PROJECT_NAME/$EXPS_ID"
+LOG_OUTPUT="$LOG_DIRECTORY/%j.out"
 EXPS_FILE="$1"
 NUM_EXPS=`cat ${EXPS_FILE} | wc -l`
 
-sbatch --job-name $JOB_NAME --output "$OUTPUT" --partition "$PARTITION" \
-  --nodes 1 --ntasks 1 --cpus-per-task $NUM_CPUS --gres=gpu:$NUM_GPUS \
-  --time $TIME --exclude "$EXCL_NODES" \
+echo "Creating slurm logging directory $LOG_DIRECTORY"
+mkdir -p "$LOG_DIRECTORY"
+
+echo "Slurm job settings"
+echo "Partition: $PARTITION"
+echo "Excl nodes: $EXCL_NODES"
+
+sbatch --job-name $JOB_NAME --output "$LOG_OUTPUT" --error "$LOG_OUTPUT" \
+  --partition "$PARTITION" --nodes 1 --ntasks 1 \
+  --cpus-per-task $NUM_CPUS --gres=gpu:$NUM_GPUS \
+  --time $TIME --exclude="$EXCL_NODES" \
   --array=1-${NUM_EXPS}%${MAX_PARALLEL_JOBS} \
   slurm/run.sh "$EXPS_FILE"
+
diff --git a/slurm/logs/.gitignore b/slurm/logs/.gitignore
diff --git a/slurm/run.sh b/slurm/run.sh
@@ -1,5 +1,16 @@
 #!/bin/bash
 
+# Find a suitable scratch directory
+SCRATCH_DIR="/disk/scratch_big"
+if [ ! -w "$SCRATCH_DIR" ]
+then
+	SCRATCH_DIR="/disk/scratch"
+fi
+
+echo "Running job on the partition $SLURM_JOB_PARTITION"
+echo "        and on the node $SLURMD_NODENAME"
+echo "Using scratch directory $SCRATCH_DIR"
+
 RESULTS_PATH="$SCRATCH_DIR/$SLURM_JOB_ID"
 DESTINATION_PATH="$HOME/$PROJECT_NAME"
 TBOARD_DIR="$RESULTS_PATH/tboard-runs/$EXPS_ID"

diff --git a/src/layers.py b/src/layers.py
@@ -0,0 +1,91 @@
+from typing import Optional, Dict, Any, Tuple, cast
+
+import torch
+from cirkit.backend.torch.compiler import TorchCompiler
+from cirkit.backend.torch.layers import TorchSumLayer, TorchDenseLayer
+from cirkit.backend.torch.optimization.registry import LayerOptMatch
+from cirkit.backend.torch.parameters.parameter import TorchParameter
+from cirkit.backend.torch.semiring import Semiring
+
+
+class TorchDenseProductLayer(TorchSumLayer):
+    def __init__(
+        self,
+        num_input_units: int,
+        num_output_units: int,
+        *,
+        num_folds: int = 1,
+        weight1: TorchParameter,
+        weight2: TorchParameter,
+        semiring: Optional[Semiring] = None,
+    ) -> None:
+        assert num_input_units == weight1.shape[1] * weight2.shape[1]
+        assert num_output_units == weight1.shape[0] * weight2.shape[0]
+        assert weight1.num_folds == num_folds
+        assert weight2.num_folds == num_folds
+        super().__init__(
+            num_input_units,
+            num_output_units,
+            arity=1,
+            num_folds=num_folds,
+            semiring=semiring,
+        )
+        self._in_shape = (weight1.shape[1], weight2.shape[1])
+        self.weight1 = weight1
+        self.weight2 = weight2
+
+    @property
+    def config(self) -> Dict[str, Any]:
+        return {
+            "num_input_units": self.num_input_units,
+            "num_output_units": self.num_output_units,
+            "num_folds": self.num_folds,
+        }
+
+    @property
+    def params(self) -> Dict[str, TorchParameter]:
+        return dict(weight1=self.weight1, weight2=self.weight2)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # x: (F, H=1, B, Ki) -> (F, B, Ki)
+        x = x.squeeze(dim=1)
+        # x: (F, B, Ki) -> (F, B, Kj, Kk)
+        x = x.view(x.shape[0], x.shape[1], *self._in_shape)
+        # weight1: (F, Kp, Kj)
+        weight1 = self.weight1()
+        # weight2: (F, Kq, Kk)
+        weight2 = self.weight2()
+        # y: (F, B, Kp, Kk)
+        y = self.semiring.einsum(
+            "fbjk,fpj->fbpk", inputs=(x,), operands=(weight1,), dim=-2, keepdim=True
+        )
+        # y: (F, B, Kp, Kq)
+        y = self.semiring.einsum(
+            "fbpk,fqk->fbpq", inputs=(y,), operands=(weight2,), dim=-1, keepdim=True
+        )
+        # return y: (F, B, Kp * Kq) = (F, B, Ko)
+        return y.view(y.shape[0], y.shape[1], -1)
+
+
+def apply_dense_product(
+    compiler: "TorchCompiler", match: LayerOptMatch
+) -> Tuple[TorchDenseProductLayer]:
+    # Retrieve the matched dense layer and the inputs to the kronecker parameter node
+    dense = cast(TorchDenseLayer, match.entries[0])
+    weight_patterns = match.pentries[0]["weight"]
+    kronecker = weight_patterns[0].entries[0]
+    weight1_output, weight2_output = dense.weight.node_inputs(kronecker)
+
+    # Build new torch parameter computational graphs by taking
+    # the sub-computational graph rooted at the inputs of the kronecker parameter node
+    weight1, weight2 = dense.weight.extract_subgraphs(weight1_output, weight2_output)
+
+    # Instantiate two torch dense product layer
+    dprod = TorchDenseProductLayer(
+        dense.num_input_units,
+        dense.num_output_units,
+        weight1=weight1,
+        weight2=weight2,
+        semiring=compiler.semiring,
+    )
+    return (dprod,)
diff --git a/src/models.py b/src/models.py
@@ -5,6 +5,7 @@
 import torch
 from cirkit.backend.torch.circuits import TorchCircuit, TorchConstantCircuit
 from cirkit.backend.torch.layers import TorchSumLayer, TorchLayer
+from cirkit.backend.torch.optimization.layers import DenseKroneckerPattern
 from cirkit.pipeline import PipelineContext, compile
 from cirkit.symbolic.circuit import Circuit
 from cirkit.symbolic.dtypes import DataType
@@ -18,8 +19,7 @@
 from cirkit.symbolic.parameters import (
     ExpParameter,
     Parameter,
-    ClampParameter,
-    TensorParameter,
+    TensorParameter, ScaledSigmoidParameter,
 )
 from cirkit.templates.region_graph import (
     RegionGraph,
@@ -31,6 +31,8 @@
 
 import cirkit.symbolic.functional as SF
 
+from layers import apply_dense_product
+
 
 class PC(nn.Module, ABC):
     def __init__(self, num_variables: int) -> None:
@@ -180,6 +182,10 @@ def __init__(
         self._pipeline = PipelineContext(
             backend="torch", semiring="complex-lse-sum", fold=True, optimize=True
         )
+        # Use a different optimization rule for the dense-kronecker pattern
+        self._pipeline._compiler._optimization_registry["layer_shatter"].add_rule(
+            apply_dense_product, signature=DenseKroneckerPattern
+        )
         self._circuit, self._int_sq_circuit = self._build_circuits(
             num_input_units,
             num_sum_units,
@@ -325,7 +331,7 @@ def categorical_layer_factory(
             num_channels,
             num_categories=input_layer_kwargs["num_categories"],
             logits_factory=lambda shape: Parameter.from_leaf(
-                TensorParameter(*shape, initializer=NormalInitializer(0.0, 3e-1))
+                TensorParameter(*shape, initializer=NormalInitializer(0.0, 1e-1))
             ),
         )
 
@@ -340,9 +346,8 @@ def gaussian_layer_factory(
                 TensorParameter(*shape, initializer=NormalInitializer(0.0, 1.0))
             ),
             stddev_factory=lambda shape: Parameter.from_sequence(
-                TensorParameter(*shape, initializer=NormalInitializer(0.0, 3e-1)),
-                ExpParameter(shape),
-                ClampParameter(shape, vmin=1e-5),
+                TensorParameter(*shape, initializer=NormalInitializer(0.0, 1e-1)),
+                ScaledSigmoidParameter(shape, vmin=1e-5, vmax=1.0),
             ),
         )
 
@@ -360,7 +365,7 @@ def dense_layer_factory(
             num_output_units,
             weight_factory=lambda shape: Parameter.from_unary(
                 ExpParameter(shape),
-                TensorParameter(*shape, initializer=NormalInitializer(0.0, 3e-1)),
+                TensorParameter(*shape, initializer=NormalInitializer(0.0, 1e-1)),
             ),
         )
 
@@ -406,7 +411,7 @@ def categorical_layer_factory(
             num_channels,
             num_categories=input_layer_kwargs["num_categories"],
             logits_factory=lambda shape: Parameter.from_leaf(
-                TensorParameter(*shape, initializer=NormalInitializer(0.0, 3e-1))
+                TensorParameter(*shape, initializer=NormalInitializer(0.0, 1e-1))
             ),
         )
 
@@ -421,9 +426,8 @@ def gaussian_layer_factory(
                 TensorParameter(*shape, initializer=NormalInitializer(0.0, 1.0))
             ),
             stddev_factory=lambda shape: Parameter.from_sequence(
-                TensorParameter(*shape, initializer=NormalInitializer(0.0, 3e-1)),
-                ExpParameter(shape),
-                ClampParameter(shape, vmin=1e-5),
+                TensorParameter(*shape, initializer=NormalInitializer(0.0, 1e-1)),
+                ScaledSigmoidParameter(shape, vmin=1e-5, vmax=1.0)
             ),
         )
 

diff --git a/src/tests/test_likelihood.py b/src/tests/test_likelihood.py
@@ -109,6 +109,7 @@ def test_discrete_complex_sos_pc(
     check_evi_ll(model, data)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "num_components,num_units,region_graph",
     list(itertools.product([1], [2], ["rnd-bt"])),
@@ -126,6 +127,7 @@ def test_continuous_monotonic_pc(num_components, num_units, region_graph):
     check_pdf(model)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "num_squares,num_units,region_graph",
     list(itertools.product([1], [2], ["rnd-bt"])),
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,6 +8,7 @@ __pycache__ @@
     /tboard*
     /checkpoints*
     /wandb*
+    /slurm/logs/*
     figures
     mnt
@@ Expand Down @@