pyg-team · mattjhayes3 · Nov 6, 2024 · Dec 13, 2024 · Dec 14, 2024 · Dec 14, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Add TransD KGE and Bernoulli corruption ([#9864](https://github.com/pyg-team/pytorch_geometric/pull/9864))
 - Update Dockerfile to use latest from NVIDIA ([#9794](https://github.com/pyg-team/pytorch_geometric/pull/9794))
 - Added various GRetriever Architecture Benchmarking examples ([#9666](https://github.com/pyg-team/pytorch_geometric/pull/9666))
 - Added `profiler.nvtxit` with some examples ([#9666](https://github.com/pyg-team/pytorch_geometric/pull/9666))

@@ -1,22 +1,25 @@
 import argparse
 import os.path as osp
+import time
 
 import torch
 import torch.optim as optim
 
 from torch_geometric.datasets import FB15k_237
-from torch_geometric.nn import ComplEx, DistMult, RotatE, TransE
+from torch_geometric.nn import ComplEx, DistMult, RotatE, TransD, TransE
 
 model_map = {
     'transe': TransE,
     'complex': ComplEx,
     'distmult': DistMult,
     'rotate': RotatE,
+    'transd': TransD,
 }
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--model', choices=model_map.keys(), type=str.lower,
                     required=True)
+parser.add_argument('--bern', action='store_true')
 args = parser.parse_args()
 
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -26,11 +29,17 @@
 val_data = FB15k_237(path, split='val')[0].to(device)
 test_data = FB15k_237(path, split='test')[0].to(device)
 
-model_arg_map = {'rotate': {'margin': 9.0}}
+model_arg_map = {model: {'hidden_channels': 50} for model in model_map.keys()}
+model_arg_map['rotate']['margin'] = 9.0
+model_arg_map['transd'] = {
+    'hidden_channels_node': 50,
+    'hidden_channels_rel': 50,
+    'bern': args.bern,
+}
+
 model = model_map[args.model](
     num_nodes=train_data.num_nodes,
     num_relations=train_data.num_edge_types,
-    hidden_channels=50,
     **model_arg_map.get(args.model, {}),
 ).to(device)
 
@@ -44,6 +53,7 @@
 
 optimizer_map = {
     'transe': optim.Adam(model.parameters(), lr=0.01),
+    'transd': optim.Adam(model.parameters(), lr=0.01),
     'complex': optim.Adagrad(model.parameters(), lr=0.001, weight_decay=1e-6),
     'distmult': optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-6),
     'rotate': optim.Adam(model.parameters(), lr=1e-3),
@@ -76,11 +86,13 @@ def test(data):
     )
 
 
+start = time.time()
 for epoch in range(1, 501):
     loss = train()
     print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
     if epoch % 25 == 0:
         rank, mrr, hits = test(val_data)
+        print(f"Time: {(time.time() - start) / epoch}")
         print(f'Epoch: {epoch:03d}, Val Mean Rank: {rank:.2f}, '
               f'Val MRR: {mrr:.4f}, Val Hits@10: {hits:.4f}')
 

@@ -1,3 +1,4 @@
+import pytest
 import torch
 
 from torch_geometric.nn import ComplEx
@@ -37,8 +38,10 @@ def test_complex_scoring():
     assert score.tolist() == [58., 8.]
 
 
-def test_complex():
-    model = ComplEx(num_nodes=10, num_relations=5, hidden_channels=32)
+@pytest.mark.parametrize('bern', [False, True])
+def test_complex(bern):
+    model = ComplEx(num_nodes=10, num_relations=5, hidden_channels=32,
+                    bern=bern)
     assert str(model) == 'ComplEx(10, num_relations=5, hidden_channels=32)'
 
     head_index = torch.tensor([0, 2, 4, 6, 8])

@@ -1,10 +1,13 @@
+import pytest
 import torch
 
 from torch_geometric.nn import DistMult
 
 
-def test_distmult():
-    model = DistMult(num_nodes=10, num_relations=5, hidden_channels=32)
+@pytest.mark.parametrize('bern', [False, True])
+def test_distmult(bern):
+    model = DistMult(num_nodes=10, num_relations=5, hidden_channels=32,
+                     bern=bern)
     assert str(model) == 'DistMult(10, num_relations=5, hidden_channels=32)'
 
     head_index = torch.tensor([0, 2, 4, 6, 8])

@@ -1,10 +1,13 @@
+import pytest
 import torch
 
 from torch_geometric.nn import RotatE
 
 
-def test_rotate():
-    model = RotatE(num_nodes=10, num_relations=5, hidden_channels=32)
+@pytest.mark.parametrize('bern', [False, True])
+def test_rotate(bern):
+    model = RotatE(num_nodes=10, num_relations=5, hidden_channels=32,
+                   bern=bern)
     assert str(model) == 'RotatE(10, num_relations=5, hidden_channels=32)'
 
     head_index = torch.tensor([0, 2, 4, 6, 8])

@@ -0,0 +1,33 @@
+import pytest
+import torch
+
+from torch_geometric.nn import TransD
+
+
+@pytest.mark.parametrize('channels_node_rel', [(16, 32), {32, 16}])
+@pytest.mark.parametrize('bern', [False, True])
+def test_transd(channels_node_rel, bern):
+    channels_node, channels_rel = channels_node_rel
+    model = TransD(num_nodes=10, num_relations=5,
+                   hidden_channels_node=channels_node,
+                   hidden_channels_rel=channels_rel, bern=bern)
+    assert str(model) == ('TransD(10, num_relations=5,'
+                          f' hidden_channels_node={channels_node},'
+                          f' hidden_channels_rel={channels_rel})')
+
+    head_index = torch.tensor([0, 2, 4, 6, 8])
+    rel_type = torch.tensor([0, 1, 2, 3, 4])
+    tail_index = torch.tensor([1, 3, 5, 7, 9])
+
+    loader = model.loader(head_index, rel_type, tail_index, batch_size=5)
+    for h, r, t in loader:
+        out = model(h, r, t)
+        assert out.size() == (5, )
+
+        loss = model.loss(h, r, t)
+        assert loss >= 0.
+
+        mean_rank, mrr, hits = model.test(h, r, t, batch_size=5, log=False)
+        assert 0 <= mean_rank <= 10
+        assert 0 < mrr <= 1
+        assert hits == 1.0
@@ -1,10 +1,13 @@
+import pytest
 import torch
 
 from torch_geometric.nn import TransE
 
 
-def test_transe():
-    model = TransE(num_nodes=10, num_relations=5, hidden_channels=32)
+@pytest.mark.parametrize('bern', [False, True])
+def test_transe(bern):
+    model = TransE(num_nodes=10, num_relations=5, hidden_channels=32,
+                   bern=bern)
     assert str(model) == 'TransE(10, num_relations=5, hidden_channels=32)'
 
     head_index = torch.tensor([0, 2, 4, 6, 8])

@@ -1,6 +1,7 @@
 r"""Knowledge Graph Embedding (KGE) package."""
 
 from .base import KGEModel
+from .transd import TransD
 from .transe import TransE
 from .complex import ComplEx
 from .distmult import DistMult
@@ -9,6 +10,7 @@
 __all__ = classes = [
     'KGEModel',
     'TransE',
+    'TransD',
     'ComplEx',
     'DistMult',
     'RotatE',

@@ -6,6 +6,21 @@
 from tqdm import tqdm
 
 from torch_geometric.nn.kge.loader import KGTripletLoader
+from torch_geometric.utils import scatter
+
+
+def _avg_count_per_r(x_idx, r_idx):
+    # Assume no duplicate triples, so e.g. each occurence of a tail index
+    # represents a different head to count for that tail.
+
+    # Map the tuple (x_idx, r_idx) to unique indices in a new combined index.
+    num_x = x_idx.max() + 1
+    rx_idx = r_idx * num_x + x_idx
+    # Get counts of each unique (x_idx, r_idx) pair.
+    unique_rx, rx_counts = torch.unique(rx_idx, return_counts=True)
+    # Average those counts grouped by r_idx.
+    r_idx = unique_rx // num_x
+    return scatter(rx_counts, r_idx, reduce='mean')
 
 
 class KGEModel(torch.nn.Module):
@@ -24,12 +39,14 @@ def __init__(
         num_relations: int,
         hidden_channels: int,
         sparse: bool = False,
+        bern: bool = False,
     ):
         super().__init__()
 
         self.num_nodes = num_nodes
         self.num_relations = num_relations
         self.hidden_channels = hidden_channels
+        self.bern = bern
 
         self.node_emb = Embedding(num_nodes, hidden_channels, sparse=sparse)
         self.rel_emb = Embedding(num_relations, hidden_channels, sparse=sparse)
@@ -150,16 +167,31 @@ def random_sample(
             rel_type (torch.Tensor): The relation type.
             tail_index (torch.Tensor): The tail indices.
         """
-        # Random sample either `head_index` or `tail_index` (but not both):
-        num_negatives = head_index.numel() // 2
         rnd_index = torch.randint(self.num_nodes, head_index.size(),
                                   device=head_index.device)
-
         head_index = head_index.clone()
-        head_index[:num_negatives] = rnd_index[:num_negatives]
         tail_index = tail_index.clone()
-        tail_index[num_negatives:] = rnd_index[num_negatives:]
 
+        if not self.bern:
+            # Random sample either `head_index` or `tail_index` (but not both):
+            num_negatives = head_index.numel() // 2
+
+            head_index[:num_negatives] = rnd_index[:num_negatives]
+            tail_index[num_negatives:] = rnd_index[num_negatives:]
+
+            return head_index, rel_type, tail_index
+
+        # Bernoulli: decide whether to corrupt the head or tail proportional to
+        # the number of heads per tail and tails per head for each relation.
+        # I.e. if there are more tails per head than heads per tail, we should
+        # corrupt the head more often to get fewer false negatives.
+        hpt = _avg_count_per_r(tail_index, rel_type)
+        tph = _avg_count_per_r(head_index, rel_type)
+        berns = tph / (tph + hpt)
+        head_mask = berns[rel_type].bernoulli().type(torch.bool)
+        tail_mask = ~head_mask
+        head_index[head_mask] = rnd_index[head_mask]
+        tail_index[tail_mask] = rnd_index[tail_mask]
         return head_index, rel_type, tail_index
 
     def __repr__(self) -> str:

@@ -18,6 +18,14 @@ class ComplEx(KGEModel):
     .. math::
         d(h, r, t) = Re(< \mathbf{e}_h,  \mathbf{e}_r, \mathbf{e}_t>)
 
+    This score is optimized with the :obj:`margin_ranking_loss` by creating
+    corrupted triplets. By default either the head or the tail of is corrupted
+    uniformly at random. When :obj:`bern=True`, the head or tail is chosen
+    proportional to the average number of heads per tail and tails per head for
+    the relation, as described in the `"Knowledge Graph Embedding by
+    Translating on Hyperplanes" <https://cdn.aaai.org/ojs/8870/
+    8870-13-12398-1-2-20201228.pdf>`_ paper.
+
     .. note::
 
         For an example of using the :class:`ComplEx` model, see
@@ -32,14 +40,11 @@ class ComplEx(KGEModel):
         sparse (bool, optional): If set to :obj:`True`, gradients w.r.t. to
             the embedding matrices will be sparse. (default: :obj:`False`)
     """
-    def __init__(
-        self,
-        num_nodes: int,
-        num_relations: int,
-        hidden_channels: int,
-        sparse: bool = False,
-    ):
-        super().__init__(num_nodes, num_relations, hidden_channels, sparse)
+    def __init__(self, num_nodes: int, num_relations: int,
+                 hidden_channels: int, sparse: bool = False,
+                 bern: bool = False):
+        super().__init__(num_nodes, num_relations, hidden_channels, sparse,
+                         bern)
 
         self.node_emb_im = Embedding(num_nodes, hidden_channels, sparse=sparse)
         self.rel_emb_im = Embedding(num_relations, hidden_channels,

@@ -17,6 +17,14 @@ class DistMult(KGEModel):
     .. math::
         d(h, r, t) = < \mathbf{e}_h,  \mathbf{e}_r, \mathbf{e}_t >
 
+    This score is optimized with the :obj:`margin_ranking_loss` by creating
+    corrupted triplets. By default either the head or the tail of is corrupted
+    uniformly at random. When :obj:`bern=True`, the head or tail is chosen
+    proportional to the average number of heads per tail and tails per head for
+    the relation, as described in the `"Knowledge Graph Embedding by
+    Translating on Hyperplanes" <https://cdn.aaai.org/ojs/8870/
+    8870-13-12398-1-2-20201228.pdf>`_ paper.
+
     .. note::
 
         For an example of using the :class:`DistMult` model, see
@@ -40,8 +48,10 @@ def __init__(
         hidden_channels: int,
         margin: float = 1.0,
         sparse: bool = False,
+        bern: bool = False,
     ):
-        super().__init__(num_nodes, num_relations, hidden_channels, sparse)
+        super().__init__(num_nodes, num_relations, hidden_channels, sparse,
+                         bern)
 
         self.margin = margin
 

@@ -24,6 +24,14 @@ class RotatE(KGEModel):
     .. math::
         d(h, r, t) = - {\| \mathbf{e}_h \circ \mathbf{e}_r - \mathbf{e}_t \|}_p
 
+    This score is optimized with the :obj:`margin_ranking_loss` by creating
+    corrupted triplets. By default either the head or the tail of is corrupted
+    uniformly at random. When :obj:`bern=True`, the head or tail is chosen
+    proportional to the average number of heads per tail and tails per head for
+    the relation, as described in the `"Knowledge Graph Embedding by
+    Translating on Hyperplanes" <https://cdn.aaai.org/ojs/8870/
+    8870-13-12398-1-2-20201228.pdf>`_ paper.
+
     .. note::
 
         For an example of using the :class:`RotatE` model, see
@@ -39,15 +47,11 @@ class RotatE(KGEModel):
         sparse (bool, optional): If set to :obj:`True`, gradients w.r.t. to
             the embedding matrices will be sparse. (default: :obj:`False`)
     """
-    def __init__(
-        self,
-        num_nodes: int,
-        num_relations: int,
-        hidden_channels: int,
-        margin: float = 1.0,
-        sparse: bool = False,
-    ):
-        super().__init__(num_nodes, num_relations, hidden_channels, sparse)
+    def __init__(self, num_nodes: int, num_relations: int,
+                 hidden_channels: int, margin: float = 1.0,
+                 sparse: bool = False, bern: bool = False):
+        super().__init__(num_nodes, num_relations, hidden_channels, sparse,
+                         bern)
 
         self.margin = margin
         self.node_emb_im = Embedding(num_nodes, hidden_channels, sparse=sparse)