[TESTED]Question5

wong-1994 · Dec 18, 2024 · 0a02216 · 0a02216
1 parent bcb654a
commit 0a02216
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 8 deletions.
diff --git a/apps/mlp_resnet.py b/apps/mlp_resnet.py
@@ -13,7 +13,15 @@
 
 def ResidualBlock(dim, hidden_dim, norm=nn.BatchNorm1d, drop_prob=0.1):
     ### BEGIN YOUR SOLUTION
-    raise NotImplementedError()
+    residual_part = nn.Sequential(
+        nn.Linear(dim, hidden_dim),
+        norm(hidden_dim),
+        nn.ReLU(),
+        nn.Dropout(p=drop_prob),
+        nn.Linear(hidden_dim, dim),
+        norm(dim),
+    )
+    return nn.Sequential(nn.Residual(residual_part), nn.ReLU())
     ### END YOUR SOLUTION
 
 
@@ -26,14 +34,44 @@ def MLPResNet(
     drop_prob=0.1,
 ):
     ### BEGIN YOUR SOLUTION
-    raise NotImplementedError()
+    mlp_resnet = [
+        nn.Linear(dim, hidden_dim),
+        nn.ReLU()
+    ]
+    for i in range(num_blocks):
+        mlp_resnet.append(ResidualBlock(
+            dim=hidden_dim, 
+            hidden_dim=hidden_dim//2, 
+            norm=norm, 
+            drop_prob=drop_prob
+        ))
+    mlp_resnet.append(nn.Linear(hidden_dim, num_classes))
+    return nn.Sequential(*mlp_resnet)
     ### END YOUR SOLUTION
 
 
 def epoch(dataloader, model, opt=None):
     np.random.seed(4)
     ### BEGIN YOUR SOLUTION
-    raise NotImplementedError()
+    if opt:
+        model.train()
+    else:
+        model.eval()
+    error_tot = 0.
+    loss_tot = 0.
+    data_num = 0
+    for X, y in dataloader:
+        y_hat = model.forward(X)
+        loss = nn.SoftmaxLoss().forward(y_hat, y)
+        if opt:
+            opt.reset_grad()
+            loss.backward()
+            opt.step()
+
+        data_num += X.shape[0]
+        loss_tot += (loss.numpy() * X.shape[0])
+        error_tot += (np.argmax(y_hat.numpy(), axis=1) != y.numpy()).sum()
+    return float(error_tot / data_num), float(loss_tot / data_num)
     ### END YOUR SOLUTION
 
 
@@ -48,7 +86,24 @@ def train_mnist(
 ):
     np.random.seed(4)
     ### BEGIN YOUR SOLUTION
-    raise NotImplementedError()
+    train_image_filename = os.path.join(data_dir, "train-images-idx3-ubyte.gz")
+    train_label_filename = os.path.join(data_dir, "train-labels-idx1-ubyte.gz")
+    mnist_train_dataset = ndl.data.MNISTDataset(train_image_filename, train_label_filename)
+    mnist_train_dataloader = ndl.data.DataLoader(mnist_train_dataset, batch_size, shuffle=True)
+
+    model = MLPResNet(dim=mnist_train_dataset[0][0].shape[1], hidden_dim=hidden_dim)
+    opt = optimizer(model.parameters(), lr=lr, weight_decay=weight_decay)
+
+    for e in range(epochs):
+        training_loss, training_error = epoch(mnist_train_dataloader, model, opt)
+
+    test_image_filename = os.path.join(data_dir, "t10k-images-idx3-ubyte.gz")
+    test_label_filename = os.path.join(data_dir, "t10k-labels-idx1-ubyte.gz")
+    mnist_test_dataset = ndl.data.MNISTDataset(test_image_filename, test_label_filename)
+    mnist_test_dataloader = ndl.data.DataLoader(mnist_test_dataset, batch_size)
+    test_loss, test_error = epoch(mnist_test_dataloader, model)
+
+    return training_loss, training_error, test_loss, test_error
     ### END YOUR SOLUTION
 
 

diff --git a/python/needle/data/data_basic.py b/python/needle/data/data_basic.py
@@ -54,8 +54,6 @@ def __init__(
         self.dataset = dataset
         self.shuffle = shuffle
         self.batch_size = batch_size
-        print("init")
-        print(len(self.dataset))
         if not self.shuffle:
             self.ordering = np.array_split(np.arange(len(dataset)), 
                                            range(batch_size, len(dataset), batch_size))

diff --git a/python/needle/data/datasets/mnist_dataset.py b/python/needle/data/datasets/mnist_dataset.py
@@ -83,7 +83,7 @@ def __getitem__(self, index) -> object:
         ### BEGIN YOUR SOLUTION
         if self.transforms:
             return self.apply_transforms(self.images[index]), self.labels[index]
-        return self.images[index], self.labels[index]
+        return self.images[index].reshape((-1, 28 * 28)), self.labels[index]
         ### END YOUR SOLUTION
 
     def __len__(self) -> int:

diff --git a/python/needle/nn/nn_basic.py b/python/needle/nn/nn_basic.py
@@ -214,7 +214,7 @@ def __init__(self, p=0.5):
     def forward(self, x: Tensor) -> Tensor:
         ### BEGIN YOUR SOLUTION
         if self.training:
-            return x / (1 - self.p) * init.randb(*x.shape, p=self.p, device=x.device)
+            return x / (1 - self.p) * init.randb(*x.shape, p=1-self.p, device=x.device)
         else:
             return x
         ### END YOUR SOLUTION