Commit #24 : Final commit for GCN, GAT

apepe91 · Aug 3, 2018 · 536ba32 · 536ba32
1 parent 91f77b9
commit 536ba32
Show file tree

Hide file tree

Showing 8 changed files with 29 additions and 57 deletions.
diff --git a/README.md b/README.md
@@ -1,12 +1,12 @@
 <p align="center"><img width="40%" src="./imgs/pytorch.png"></p>
 
-Pytorch implementation of Graph Convolution Networks.
+Pytorch implementation of Graph Convolution Networks & Graph Attention Convolutional Networks.
 
 This project is made by Bumsoo Kim, Ph.D Candidate in Korea University.
 This repo has been forked from [https://github.com/tkipf/pygcn](https://github.com/tkipf/pygcn).
 
 ## Graph Convolutional Networks
-Many important real-world datasets come in the form of graphs or networks: social networks, knowledge graphs, protein-interaction networks, the World Wide Web, etc. In this repository, we introduce a basic tutorial for generalizing neural netowrks to work on arbitrarily structured graphs, along with a proposal of a new structure that outperforms the state-of-the-art performance of current Graph Convolutional Networks([Attention GCN](https://arxiv.org/abs/1710.10903)).
+Many important real-world datasets come in the form of graphs or networks: social networks, knowledge graphs, protein-interaction networks, the World Wide Web, etc. In this repository, we introduce a basic tutorial for generalizing neural netowrks to work on arbitrarily structured graphs, along with Graph Attention Convolutional Networks([Attention GCN](https://arxiv.org/abs/1710.10903)).
 
 Currently, most graph neural network models have a somewhat universal architecture in common. They are referred as Graph Convoutional Networks(GCNs) since filter parameters are typically shared over all locations in the graph.
 

diff --git a/layers.py b/layers.py
@@ -76,8 +76,6 @@ def __init__(self, in_features, out_features, dropout, alpha, concat=True):
         self.concat = concat
 
         self.W = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(in_features, out_features).type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor), gain=np.sqrt(2.0)), requires_grad=True)
-        #self.a = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(2*out_features, 1).type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor), gain=np.sqrt(2.0)), requires_grad=True)
-
         self.a1 = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(out_features, 1).type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor), gain=np.sqrt(2.0)), requires_grad=True)
         self.a2 = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(out_features, 1).type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor), gain=np.sqrt(2.0)), requires_grad=True)
 
@@ -87,8 +85,6 @@ def forward(self, input, adj):
         h = torch.mm(input, self.W)
         N = h.size()[0]
 
-        #a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features)
-        #e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))
         f_1 = torch.matmul(h, self.a1)
         f_2 = torch.matmul(h, self.a2)
         e = self.leakyrelu(f_1 + f_2.transpose(0,1))

diff --git a/opts.py b/opts.py
@@ -12,9 +12,9 @@ def initialize(self):
         self.parser.add_argument('--dataset', type=str, default='pubmed', help='[cora | citeseer | pubmed]')
         self.parser.add_argument('--num_hidden', type=int, default=8, help='number of features')
         self.parser.add_argument('--dropout', type=float, default=0.6, help='dropout')
-        self.parser.add_argument('--weight_decay', type=float, default=0, help='weight decay')
+        self.parser.add_argument('--weight_decay', type=float, default=5e-4, help='weight decay')
         self.parser.add_argument('--init_type', type=str, default='uniform', help='[uniform | xavier]')
-        self.parser.add_argument('--model', type=str, default='basic', help='[basic | drop_in]')
+        self.parser.add_argument('--model', type=str, default='basic', help='[basic | drop_in | attention | res_attention]')
 
     def parse(self):
         if not self.initialized:
@@ -30,9 +30,9 @@ class TrainOptions(BaseOptions):
     # Override
     def initialize(self):
         BaseOptions.initialize(self)
-        self.parser.add_argument('--lr', type=float, default=1e-2, help='initial learning rate')
-        self.parser.add_argument('--optimizer', type=str, default='SGD', help='[sgd | adam]')
-        self.parser.add_argument('--epoch', type=int, default=30000, help='number of training epochs')
+        self.parser.add_argument('--lr', type=float, default=5e-3, help='initial learning rate')
+        self.parser.add_argument('--optimizer', type=str, default='adam', help='[sgd | adam]')
+        self.parser.add_argument('--epoch', type=int, default=800, help='number of training epochs')
         self.parser.add_argument('--lr_decay_epoch', type=int, default=5000, help='multiply by a gamma every set iter')
         self.parser.add_argument('--nb_heads', type=int, default=8, help='number of head attentions')
         self.parser.add_argument('--alpha', type=float, default=0.2, help='Alpha value for the leaky_relu')

diff --git a/scripts/att_train.sh b/scripts/att_train.sh
@@ -4,8 +4,7 @@ python train.py \
     --nb_heads 8 \
     --dropout 0.6 \
     --weight_decay 5e-4 \
-    --model attention \
+    --model res_attention \
     --lr 5e-3 \
     --optimizer adam \
-    --epoch 800 \
-    --lr_decay_epoch 800
+    --epoch 800 
diff --git a/scripts/train.sh b/scripts/train.sh
@@ -1,11 +1,10 @@
 python train.py \
     --dataset pubmed \
     --num_hidden 32 \
-    --nb_heads 0 \
     --dropout 0.5 \
     --weight_decay 0 \
     --model basic \
     --lr 1e-2 \
-    --optimizer SGD \
+    --optimizer sgd \
     --epoch 10000 \
     --lr_decay_epoch 2500
diff --git a/test.py b/test.py
@@ -46,7 +46,7 @@
 print("| Label matrix     : {}".format(labels.shape))
 
 load_model = torch.load(os.path.join('checkpoint', opt.dataset, '%s.t7' %(opt.model)))
-model = load_model['model']
+model = load_model['model'].cpu()
 acc_val = load_model['acc']
 
 if use_gpu:
@@ -55,8 +55,13 @@
 
 def test():
     print("\n[STEP 4] : Testing")
+
     model.eval()
     output = model(features, adj)
+
+    print(output[idx_test].shape)
+    print(labels[idx_test].shape)
+
     acc_test = accuracy(output[idx_test], labels[idx_test])
     print("| Validation acc : {}%".format(acc_val.data.cpu().numpy() * 100))
     print("| Test acc : {}%\n".format(acc_test.data.cpu().numpy() * 100))

diff --git a/train.py b/train.py
@@ -20,7 +20,7 @@
 
 from torch.autograd import Variable
 from utils import *
-from models import GCN, GCN_drop_in, GAT
+from models import GCN, GAT
 from opts import TrainOptions
 
 """
@@ -63,15 +63,6 @@
             dropout = opt.dropout,
             init = opt.init_type
     )
-elif (opt.model == 'drop_in'):
-    print("| Constructing input dropout GCN model...")
-    model = GCN_drop_in(
-            nfeat = features.shape[1],
-            nhid = opt.num_hidden,
-            nclass = labels.max().item() + 1,
-            dropout = opt.dropout,
-            init = opt.init_type
-    )
 elif (opt.model == 'attention'):
     print("| Constructing Attention GCN model...")
     model = GAT(
@@ -159,20 +150,14 @@ def train(epoch):
 
 
 # Main code for training
-#if __name__ == "__main__":
-print("\n[STEP 2] : Obtain (adjacency, feature, label) matrix")
-print("| Adjacency matrix : {}".format(adj.shape))
-print("| Feature matrix   : {}".format(features.shape))
-print("| Label matrix     : {}".format(labels.shape))
-
-
-# Test forward
-#print("\n[STEP 3'] : Dummy Forward")
-#output = model(features, adj)
-#print("| Shape of result : {}".format(output.shape))
-
-# Training
-print("\n[STEP 3] : Training")
-for epoch in range(1, opt.epoch+1):
-    train(epoch)
-print("\n=> Training finished!")
+if __name__ == "__main__":
+    print("\n[STEP 2] : Obtain (adjacency, feature, label) matrix")
+    print("| Adjacency matrix : {}".format(adj.shape))
+    print("| Feature matrix   : {}".format(features.shape))
+    print("| Label matrix     : {}".format(labels.shape))
+
+    # Training
+    print("\n[STEP 3] : Training")
+    for epoch in range(1, opt.epoch+1):
+        train(epoch)
+    print("\n=> Training finished!")
diff --git a/utils.py b/utils.py
@@ -13,15 +13,6 @@ def parse_index_file(filename):
 
     return index
 
-def sparse_mx_to_torch_sparse_tensor(sparse_mx):
-    """Convert a scipy sparse matrix to a torch sparse tensor."""
-    sparse_mx = sparse_mx.tocoo().astype(np.float32)
-    indices = torch.from_numpy(np.vstack((sparse_mx.row,
-                                          sparse_mx.col))).long()
-    values = torch.from_numpy(sparse_mx.data)
-    shape = torch.Size(sparse_mx.shape)
-    return torch.sparse.FloatTensor(indices, values, shape)
-
 def normalize(mx):
     """Row-normalize sparse matrix"""
     rowsum = np.array(mx.sum(1))
@@ -99,13 +90,12 @@ def load_data(path="/home/bumsoo/Data/Planetoid", dataset="cora"):
     print("| # of edges : {}".format(adj.sum().sum()/2))
 
     features = normalize(features)
-    adj = normalize(adj + sp.eye(adj.shape[0]))
+    adj = normalize_adj(adj + sp.eye(adj.shape[0]))
     print("| # of features : {}".format(features.shape[1]))
     print("| # of clases   : {}".format(ally.shape[1]))
 
     features = torch.FloatTensor(np.array(features.todense()))
     sparse_mx = adj.tocoo().astype(np.float32)
-    #adj = sparse_mx_to_torch_sparse_tensor(adj)
     adj = torch.FloatTensor(np.array(adj.todense()))
 
     labels = np.vstack((ally, ty))
@@ -136,8 +126,6 @@ def missing_elements(L):
         for element in missing:
             save_label = np.insert(save_label, element, 0)
 
-        print(save_label.shape)
-
         labels = torch.LongTensor(save_label)
 
     return adj, features, labels, idx_train, idx_val, idx_test