Skip to content

Commit

Permalink
Merge pull request #11 from dirac-institute/stamp_dataloader
Browse files Browse the repository at this point in the history
Getting Stamp dataset, CNN, and resnet50 working
  • Loading branch information
drewoldag authored Jan 24, 2025
2 parents 6a7bf63 + 3f31d5d commit a2643e4
Show file tree
Hide file tree
Showing 10 changed files with 330 additions and 152 deletions.
93 changes: 0 additions & 93 deletions example_config.toml

This file was deleted.

Empty file.
93 changes: 93 additions & 0 deletions src/kbmod_ml/data_sets/kbmod_stamps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import os

import numpy as np
import torch
from fibad.data_sets.data_set_registry import fibad_data_set
from torch.utils.data.sampler import SubsetRandomSampler


@fibad_data_set
class KbmodStamps:
"""TODO: what is the actual shape of the data that we're going to want to import?
my initial thoughts is that we'll have a single numpy array that we stitch together
from the two datasets (adding a column with a classification based on which set
they come from). We should also have the option to select which stamp type we are using
(mean, median, sum, and var weighted). So we can just have all those stored as individual rows.
We could have an "active columns" variable, with the indices of the columns we want to grab
(corresponding to which coadd type we want to use), which could reflect in the `shape` function.
"""

def __init__(self, config, split: str):
coadd_type_to_column = {
"median": 0,
"mean": 1,
"sum": 2,
"var_weighted": 3,
}

cols = []

for c in ["mean"]:
cols.append(coadd_type_to_column[c])

self.active_columns = np.array(cols)

data_dir = config["general"]["data_dir"]
true_positive_file_name = config["kbmod_ml"]["true_positive_file_name"]
false_positive_file_name = config["kbmod_ml"]["false_positive_file_name"]

true_data_path = os.path.join(data_dir, true_positive_file_name)
false_data_path = os.path.join(data_dir, false_positive_file_name)

if not os.path.isfile(true_data_path):
raise ValueError(f"Could not find {true_positive_file_name} in provided {data_dir}")
if not os.path.isfile(false_data_path):
raise ValueError(f"could not find {false_positive_file_name} in provided {data_dir}")

true_positive_samples = np.load(true_data_path)
false_positive_samples = np.load(false_data_path)

self._labels = np.concatenate(
[
np.ones(len(true_positive_samples), dtype=np.int8),
np.zeros(len(false_positive_samples), dtype=np.int8),
]
)
self._data = np.concatenate([true_positive_samples[:, :3, :, :], false_positive_samples])

if split != "test":
num_train = len(self)
indices = list(range(num_train))
split_idx = 0
if config["data_set"]["validate_size"]:
split_idx = int(np.floor(config["data_set"]["validate_size"] * num_train))

random_seed = None
if config["data_set"]["seed"]:
random_seed = config["data_set"]["seed"]
np.random.seed(random_seed)
np.random.shuffle(indices)

train_idx, valid_idx = indices[split_idx:], indices[:split_idx]

# These samplers are used by PyTorch's DataLoader to split the dataset
# into training and validation sets.
self.train_sampler = SubsetRandomSampler(train_idx)
self.validation_sampler = SubsetRandomSampler(valid_idx)

def shape(self):
"""data shape, including currently enabled columns"""
cols = len(self.active_columns)
width, height = self._data[0][0].shape

return (cols, width, height)

def __getitem__(self, idx):
row = self._data[idx][self.active_columns]
label = self._labels[idx]

return torch.tensor(row), torch.tensor(label, dtype=torch.int8)

def __len__(self):
return len(self._data)
13 changes: 13 additions & 0 deletions src/kbmod_ml/default_config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[data_set]
name = "kbmod_ml.data_sets.kbmod_stamps.KbmodStamps"

train_size = 0.8

validate_size = 0.2

[kbmod_ml]
# The file name of the true positive samples
true_positive_file_name = 'true_positive_stamps_full.npy'

# The file name of the false positive samples
false_positive_file_name = 'false_positive_stamps_trimmed.npy'
14 changes: 0 additions & 14 deletions src/kbmod_ml/example_benchmarks.py

This file was deleted.

Empty file added src/kbmod_ml/models/__init__.py
Empty file.
38 changes: 12 additions & 26 deletions src/kbmod_ml/models/cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import torch
import torch.nn as nn
import torch.nn.functional as F # noqa N812
import torch.optim as optim
from fibad.models.model_registry import fibad_model

logger = logging.getLogger(__name__)
Expand All @@ -16,29 +15,22 @@
@fibad_model
class CNN(nn.Module):
def __init__(self, config, shape):
logger.info("This is an external model, not in FIBAD!!!")
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
self.conv1 = nn.Conv2d(shape[0], 16, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.fc1 = nn.Linear(64 * shape[1] * shape[2], 1)

self.config = config

# Optimizer and criterion could be set directly, i.e. `self.optimizer = optim.SGD(...)`
# but we define them as methods as a way to allow for more flexibility in the future.
self.optimizer = self._optimizer()
self.criterion = self._criterion()

def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
if isinstance(x, tuple):
x, _ = x
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
x = torch.sigmoid(self.fc1(x))
return x

def train_step(self, batch):
Expand All @@ -59,16 +51,10 @@ def train_step(self, batch):

self.optimizer.zero_grad()
outputs = self(inputs)
loss = self.criterion(outputs, labels)
loss = self.criterion(outputs, labels.unsqueeze(1).float())
loss.backward()
self.optimizer.step()
return {"loss": loss.item()}

def _criterion(self):
return nn.CrossEntropyLoss()

def _optimizer(self):
return optim.SGD(self.parameters(), lr=0.001, momentum=0.9)

def save(self):
torch.save(self.state_dict(), self.config.get("weights_filepath", "example_cnn.pth"))
return nn.BCELoss()
42 changes: 23 additions & 19 deletions src/kbmod_ml/models/resnet50.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
# ruff: noqa: D101, D102

# This example model is taken from the PyTorch CIFAR10 tutorial:
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#define-a-convolutional-neural-network
import logging

import torch
import torch.nn as nn
import torch.nn.functional as F # noqa N812
import torch.optim as optim
from fibad.models.model_registry import fibad_model
from torchvision.models import resnet50

Expand All @@ -16,20 +11,19 @@

@fibad_model
class RESNET50(nn.Module):
def __init__(self, model_config, shape):
logger.info("This is an external model, not in FIBAD!!!")
def __init__(self, config, shape):
super().__init__()

self.config = model_config
self.config = config

self.model = resnet50(pretrained=False, num_classes=self.config["model"]["num_classes"])
self.model = resnet50(num_classes=2)

# Optimizer and criterion could be set directly, i.e. `self.optimizer = optim.SGD(...)`
# but we define them as methods as a way to allow for more flexibility in the future.
self.optimizer = self._optimizer()
self.criterion = self._criterion()
# Modify the input channels to 1 (e.g., for grayscale images)
self.model = self.modify_resnet_input_channels(self.model, num_channels=shape[0])

def forward(self, x):
if isinstance(x, tuple):
x, _ = x
return self.model(x)

def train_step(self, batch):
Expand All @@ -55,11 +49,21 @@ def train_step(self, batch):
self.optimizer.step()
return {"loss": loss.item()}

def _criterion(self):
return nn.CrossEntropyLoss()
def modify_resnet_input_channels(self, model, num_channels):
# Get the first convolutional layer
first_conv_layer = model.conv1

def _optimizer(self):
return optim.SGD(self.parameters(), lr=0.001, momentum=0.9)
# Create a new convolutional layer with the desired number of input channels
new_conv_layer = nn.Conv2d(
in_channels=num_channels,
out_channels=first_conv_layer.out_channels,
kernel_size=first_conv_layer.kernel_size,
stride=first_conv_layer.stride,
padding=first_conv_layer.padding,
bias=first_conv_layer.bias,
)

def save(self):
torch.save(self.state_dict(), self.config.get("weights_filepath"))
# Replace the first convolutional layer in the model
model.conv1 = new_conv_layer

return model
Loading

0 comments on commit a2643e4

Please sign in to comment.