From 47a4ae7e05782de7f79e9db3673849a897ee5f40 Mon Sep 17 00:00:00 2001 From: Hassan Sarwat Date: Sat, 27 Jan 2024 20:25:11 +0100 Subject: [PATCH 1/9] updated code to later versions of torch and tf --- .gitignore | 5 +++++ bin_mean_shift.py | 2 +- data_tools/RecordReaderAll.py | 3 ++- data_tools/convert_tfrecords.py | 16 +++++++++------- utils/loss.py | 11 ++++++----- utils/metric.py | 4 ++-- 6 files changed, 25 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 076b72e..bc480cd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,11 @@ #ide .idea/ +# Data +output_dir/ +datasets/ + + # checkpoints experiments/ diff --git a/bin_mean_shift.py b/bin_mean_shift.py index 913b7f5..b7fb278 100644 --- a/bin_mean_shift.py +++ b/bin_mean_shift.py @@ -117,7 +117,7 @@ def merge_center(self, seed_point, bandwidth=0.25): # merge center if distance between two points less than bandwidth sorted_intensity, indices = torch.sort(intensity, descending=True) - is_center = np.ones(n, dtype=np.bool) + is_center = np.ones(n, dtype=bool) indices = indices.cpu().numpy() center = np.zeros(n, dtype=np.uint8) diff --git a/data_tools/RecordReaderAll.py b/data_tools/RecordReaderAll.py index 6125919..3985f6a 100644 --- a/data_tools/RecordReaderAll.py +++ b/data_tools/RecordReaderAll.py @@ -1,5 +1,6 @@ # modified from https://github.com/art-programmer/PlaneNet -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() HEIGHT=192 WIDTH=256 diff --git a/data_tools/convert_tfrecords.py b/data_tools/convert_tfrecords.py index 7418561..9b40ac7 100644 --- a/data_tools/convert_tfrecords.py +++ b/data_tools/convert_tfrecords.py @@ -1,11 +1,13 @@ -import tensorflow as tf import numpy as np import os import argparse from RecordReaderAll import * -os.environ['CUDA_VISIBLE_DEVICES']='' +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() + +#os.environ['CUDA_VISIBLE_DEVICES']='0' parser = argparse.ArgumentParser() parser.add_argument('--input_tfrecords_file', type=str, @@ -27,14 +29,14 @@ os.makedirs(output_dir) if data_type == 'train': - file_list = open(output_dir + '/train.txt', 'w') output_dir = os.path.join(output_dir, 'train') - os.makedirs(output_dir) + os.makedirs(output_dir,exist_ok=True) + file_list = open(output_dir + '/train.txt', 'w') max_num = 50000 elif data_type == 'val': - file_list = open(output_dir + '/val.txt', 'w') output_dir = os.path.join(output_dir, 'val') - os.makedirs(output_dir) + os.makedirs(output_dir, exist_ok=True) + file_list = open(output_dir + '/val.txt', 'w') max_num = 760 else: print("unsupported data type") @@ -74,7 +76,7 @@ file_list.write('%d.npz\n' % (i, )) - if i % 100 == 99: + if i % 1000 == 99: print(i) file_list.close() diff --git a/utils/loss.py b/utils/loss.py index 2b8aba1..788a9ea 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -45,9 +45,10 @@ def hinge_embedding_loss(embedding, num_planes, segmentation, device, t_pull=0.5 embedding = embedding[0] segmentation = segmentation[0] embeddings = [] + # print(segmentation[0, :, :].view(1, h, w)) # select embedding with segmentation for i in range(num_planes): - feature = torch.transpose(torch.masked_select(embedding, segmentation[i, :, :].view(1, h, w)).view(c, -1), 0, 1) + feature = torch.transpose(torch.masked_select(embedding, segmentation[i, :, :].view(1, h, w).bool()).view(c, -1), 0, 1) embeddings.append(feature) centers = [] @@ -90,8 +91,8 @@ def surface_normal_loss(prediction, surface_normal, valid_region): valid_predition = torch.transpose(prediction.view(c, -1), 0, 1) valid_surface_normal = torch.transpose(surface_normal.view(c, -1), 0, 1) else: - valid_predition = torch.transpose(torch.masked_select(prediction, valid_region).view(c, -1), 0, 1) - valid_surface_normal = torch.transpose(torch.masked_select(surface_normal, valid_region).view(c, -1), 0, 1) + valid_predition = torch.transpose(torch.masked_select(prediction, valid_region.bool()).view(c, -1), 0, 1) + valid_surface_normal = torch.transpose(torch.masked_select(surface_normal, valid_region.bool()).view(c, -1), 0, 1) similarity = torch.nn.functional.cosine_similarity(valid_predition, valid_surface_normal, dim=1) @@ -107,8 +108,8 @@ def parameter_loss(prediction, param, valid_region): valid_predition = torch.transpose(prediction.view(c, -1), 0, 1) valid_param = torch.transpose(param.view(c, -1), 0, 1) else: - valid_predition = torch.transpose(torch.masked_select(prediction, valid_region).view(c, -1), 0, 1) - valid_param = torch.transpose(torch.masked_select(param, valid_region).view(c, -1), 0, 1) + valid_predition = torch.transpose(torch.masked_select(prediction, valid_region.bool()).view(c, -1), 0, 1) + valid_param = torch.transpose(torch.masked_select(param, valid_region.bool()).view(c, -1), 0, 1) return torch.mean(torch.sum(torch.abs(valid_predition - valid_param), dim=1)) diff --git a/utils/metric.py b/utils/metric.py index 9bde5dc..a1661ac 100644 --- a/utils/metric.py +++ b/utils/metric.py @@ -14,8 +14,8 @@ def eval_iou(annotation,segmentation): """ - annotation = annotation.astype(np.bool) - segmentation = segmentation.astype(np.bool) + annotation = annotation.astype(bool) + segmentation = segmentation.astype(bool) if np.isclose(np.sum(annotation),0) and np.isclose(np.sum(segmentation),0): return 1 From 926e2b37aa21b23b46031a4927939e9384223d13 Mon Sep 17 00:00:00 2001 From: Hassan Sarwat Date: Thu, 1 Feb 2024 19:36:36 +0100 Subject: [PATCH 2/9] Added dpt --- main.py | 3 ++- poetry.lock | 7 +++++++ pyproject.toml | 15 +++++++++++++++ requirements.txt | 6 +++--- 4 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 poetry.lock create mode 100644 pyproject.toml diff --git a/main.py b/main.py index 893c7d4..30810e6 100644 --- a/main.py +++ b/main.py @@ -187,7 +187,8 @@ def train(_run, _log): random.seed(cfg.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - + print('Device:',device) + print('*'*100) if not (_run._id is None): checkpoint_dir = os.path.join(_run.observers[0].basedir, str(_run._id), 'checkpoints') if not os.path.exists(checkpoint_dir): diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..b6be44c --- /dev/null +++ b/poetry.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +package = [] + +[metadata] +lock-version = "2.0" +python-versions = "3.8.10" +content-hash = "ff0abf5ff0eeff0cf2570180eaa9c41878150c7126d7d1437ee02139947f66e9" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..95d776d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +[tool.poetry] +name = "ml3d" +version = "0.1.0" +description = "ml3d project" +authors = ["Your Name "] +license = "n" +readme = "README.md" + +[tool.poetry.dependencies] +python = "3.8.10" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/requirements.txt b/requirements.txt index 247c99b..f85d1c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -torch==0.4.1 -torchvision==0.2.0 -tensorflow==1.14.0 +# torch==0.4.1 +# torchvision==0.2.0 +# tensorflow==1.14.0 imageio scipy argparse From 5f4ef949e36a72536161d83bd85192edb6c8fbf2 Mon Sep 17 00:00:00 2001 From: Hassan Sarwat Date: Thu, 1 Feb 2024 19:38:11 +0100 Subject: [PATCH 3/9] added dpt --- configs/config.yaml | 12 +++++++---- main.py | 43 ++++++++++++++++++++-------------------- models/baseline_same.py | 44 +++++++++++++++++++++++++++++++++-------- 3 files changed, 65 insertions(+), 34 deletions(-) diff --git a/configs/config.yaml b/configs/config.yaml index 470671c..3f19508 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -1,8 +1,9 @@ seed: 123 num_gpus: 1 -num_epochs: 100 -resume_dir: None -print_interval: 10 +num_epochs: 5 +resume_dir: /cluster/52/sarwath/snet/output/models/ +print_interval: 100 + solver: method: adam @@ -10,7 +11,7 @@ solver: weight_decay: 0.00001 dataset: - root_dir: /new_disk2/yuzh/PlaneNetData/ + root_dir: /cluster/52/sarwath/snet/output/processed/ batch_size: 16 num_workers: 8 @@ -19,3 +20,6 @@ model: pretrained: True embed_dims: 2 fix_bn: False + name: baseline_4 + dpt: False + semantic: False diff --git a/main.py b/main.py index 30810e6..cc86a2d 100644 --- a/main.py +++ b/main.py @@ -181,25 +181,27 @@ def load_dataset(subset, cfg): @ex.command def train(_run, _log): cfg = edict(_run.config) - + checkpoint_dir = cfg.resume_dir + model_name = cfg.model.name torch.manual_seed(cfg.seed) np.random.seed(cfg.seed) random.seed(cfg.seed) - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - print('Device:',device) - print('*'*100) - if not (_run._id is None): - checkpoint_dir = os.path.join(_run.observers[0].basedir, str(_run._id), 'checkpoints') - if not os.path.exists(checkpoint_dir): - os.makedirs(checkpoint_dir) + # print('Device:',device) + # print('*'*100) + # if not (_run._id is None): + # checkpoint_dir = os.path.join(_run.observers[0].basedir, str(_run._id), 'checkpoints') + # if not os.path.exists(checkpoint_dir): + # os.makedirs(checkpoint_dir) + # print(checkpoint_dir) + # print('_-_'*80) # build network network = UNet(cfg.model) - - if not (cfg.resume_dir == 'None'): - model_dict = torch.load(cfg.resume_dir, map_location=lambda storage, loc: storage) - network.load_state_dict(model_dict) + + # if not (cfg.resume_dir == 'None'): + # model_dict = torch.load(cfg.resume_dir, map_location=lambda storage, loc: storage) + # network.load_state_dict(model_dict) # load nets into gpu if cfg.num_gpus > 1 and torch.cuda.is_available(): @@ -356,9 +358,9 @@ def train(_run, _log): history['rmses'].append(rmses.avg) # save checkpoint - if not (_run._id is None): - torch.save(network.state_dict(), os.path.join(checkpoint_dir, f"network_epoch_{epoch}.pt")) - pickle.dump(history, open(os.path.join(checkpoint_dir, 'history.pkl'), 'wb')) + # if not (_run._id is None): + torch.save(network.state_dict(), os.path.join(checkpoint_dir, f"{model_name}.pt")) + pickle.dump(history, open(os.path.join(checkpoint_dir, 'history.pkl'), 'wb')) @ex.command @@ -371,17 +373,14 @@ def eval(_run, _log): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - if not (_run._id is None): - checkpoint_dir = os.path.join('experiments', str(_run._id), 'checkpoints') - if not os.path.exists(checkpoint_dir): - os.makedirs(checkpoint_dir) + checkpoint_dir = cfg.resume_dir + model_name = cfg.model.name # build network network = UNet(cfg.model) - if not (cfg.resume_dir == 'None'): - model_dict = torch.load(cfg.resume_dir, map_location=lambda storage, loc: storage) - network.load_state_dict(model_dict) + model_dict = torch.load('/cluster/52/sarwath/snet/output/models/baseline_4.pt', map_location=lambda storage, loc: storage) + network.load_state_dict(model_dict) # load nets into gpu if cfg.num_gpus > 1 and torch.cuda.is_available(): diff --git a/models/baseline_same.py b/models/baseline_same.py index 22be26b..2f99508 100644 --- a/models/baseline_same.py +++ b/models/baseline_same.py @@ -2,6 +2,8 @@ import torch.nn as nn from models import resnet_scene as resnet +from transformers import DPTFeatureExtractor as dpt + class ResNet(nn.Module): @@ -38,19 +40,20 @@ def forward(self, x): x4 = self.layer3(x3) x5 = self.layer4(x4) - return x1, x2, x3, x4, x5 - + return x1, x2, x3, x4, x5 class Baseline(nn.Module): def __init__(self, cfg): super(Baseline, self).__init__() - + self.feature_extractor = dpt.from_pretrained("Intel/dpt-large") + self.dpt = cfg.dpt orig_resnet = resnet.__dict__[cfg.arch](pretrained=cfg.pretrained) self.backbone = ResNet(orig_resnet) + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.relu = nn.ReLU(inplace=True) - channel = 64 + channel = 3 if cfg.dpt else 64 # top down self.upsample = nn.Upsample(scale_factor=2, mode='bilinear') self.up_conv5 = nn.Conv2d(channel, channel, (1, 1)) @@ -96,11 +99,34 @@ def top_down(self, x): return p0, p1, p2, p3, p4, p5 def forward(self, x): - # bottom up - c1, c2, c3, c4, c5 = self.backbone(x) - # top down - p0, p1, p2, p3, p4, p5 = self.top_down((c1, c2, c3, c4, c5)) + # Garbage, can delete + # print(x.size()) + # print('11'*111) + # print(x[0]) + # feature_extractor = dpt.from_pretrained("Intel/dpt-large") + # test = self.feature_extractor(x,do_resize=False,return_tensors='pt') + # print(test.data.keys()) + # print('x'*100) + # print(test.data['pixel_values'][0]) + # print('00'*111) + # print(test.data['pixel_values'].size()) + # print('00'*111) + + if self.dpt: + p0 = self.feature_extractor(x, do_resize=False, return_tensors='pt').data['pixel_values'].to(self.device) + else: + # bottom up + c1, c2, c3, c4, c5 = self.backbone(x) + # print('_'*100) + # print(c1.size(),'_',c2.size(),'_',c3.size(),'_',c4.size(),'_',c5.size()) + # print('..'*100) + + # top down + p0, p1, p2, p3, p4, p5 = self.top_down((c1, c2, c3, c4, c5)) + # print(p0.size(),'_',p1.size(),'_',p2.size(),'_',p3.size(),'_',p4.size(),'_',p5.size()) + # print('='*100) + # output prob = self.pred_prob(p0) @@ -110,3 +136,5 @@ def forward(self, x): param = self.pred_param(p0) return prob, embedding, depth, surface_normal, param + + From 74b65eb30f72109e8e12e343e46fd9d4e8ab71d9 Mon Sep 17 00:00:00 2001 From: Hassan Sarwat Date: Tue, 6 Feb 2024 22:09:58 +0100 Subject: [PATCH 4/9] Modularized, updated dpt, added semantic loss --- configs/config.yaml | 8 +- main.py | 116 +++++++++++++++------ models/baseline_same.py | 216 +++++++++++++++++++++++++++++++++++++--- predict.py | 2 +- utils/loss.py | 10 ++ 5 files changed, 304 insertions(+), 48 deletions(-) diff --git a/configs/config.yaml b/configs/config.yaml index 3f19508..c12c8ee 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -2,7 +2,7 @@ seed: 123 num_gpus: 1 num_epochs: 5 resume_dir: /cluster/52/sarwath/snet/output/models/ -print_interval: 100 +print_interval: 10 solver: @@ -16,10 +16,8 @@ dataset: num_workers: 8 model: - arch: resnet101 + arch: resnet101 # dpt # pretrained: True embed_dims: 2 fix_bn: False - name: baseline_4 - dpt: False - semantic: False + semantic: True diff --git a/main.py b/main.py index cc86a2d..6be75d6 100644 --- a/main.py +++ b/main.py @@ -24,9 +24,12 @@ from utils.disp import colors_256 as colors from bin_mean_shift import Bin_Mean_Shift from modules import get_coordinate_map -from utils.loss import Q_loss +from utils.loss import Q_loss, semantic_loss from instance_parameter_loss import InstanceParameterLoss from match_segmentation import MatchSegmentation +from transformers import DPTImageProcessor + +image_processor = DPTImageProcessor().from_pretrained('Intel/dpt-large-ade') ex = Experiment() @@ -114,10 +117,13 @@ def __getitem__(self, index): image = data['image'] image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + resized_image = cv2.resize(src=image, dsize=(256,256)) image = Image.fromarray(image) - + resized_image = Image.fromarray(resized_image) if self.transform is not None: image = self.transform(image) + resized_image = self.transform(resized_image) plane = data['plane'] num_planes = data['num_planes'][0] @@ -143,18 +149,23 @@ def __getitem__(self, index): # since some depth is missing, we use plane to recover those depth following PlaneNet gt_depth = data['depth'].reshape(192, 256) depth = self.plane2depth(plane_parameters, num_planes, gt_segmentation, gt_depth).reshape(1, 192, 256) + gt_semantics = data['semantics'] + gt_semantics = gt_semantics.astype(float) + sample = { + 'resized_image':resized_image, 'image': image, 'num_planes': num_planes, 'instance': torch.ByteTensor(segmentation), # one for planar and zero for non-planar - 'semantic': 1 - torch.FloatTensor(segmentation[num_planes, :, :]).unsqueeze(0), + 'planar': 1 - torch.FloatTensor(segmentation[num_planes, :, :]).unsqueeze(0), 'gt_seg': torch.LongTensor(gt_segmentation), 'depth': torch.FloatTensor(depth), 'plane_parameters': torch.FloatTensor(plane_parameters), 'valid_region': torch.ByteTensor(valid_region.astype(np.uint8)).unsqueeze(0), - 'plane_instance_parameter': torch.FloatTensor(plane_instance_parameter) + 'plane_instance_parameter': torch.FloatTensor(plane_instance_parameter), + 'gt_class': torch.FloatTensor(gt_semantics) } return sample @@ -177,15 +188,15 @@ def load_dataset(subset, cfg): return loaders - @ex.command def train(_run, _log): cfg = edict(_run.config) checkpoint_dir = cfg.resume_dir - model_name = cfg.model.name + # model_name = cfg.model.name torch.manual_seed(cfg.seed) np.random.seed(cfg.seed) random.seed(cfg.seed) + model_path = f"{cfg.resume_dir}/baseline_{cfg.model.arch}_semantic.pt" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # print('Device:',device) # print('*'*100) @@ -210,7 +221,8 @@ def train(_run, _log): # set up optimizers optimizer = get_optimizer(network.parameters(), cfg.solver) - + if device =='cpu': + cfg.dataset.num_workers=4 # data loader data_loader = load_dataset('train', cfg.dataset) @@ -238,39 +250,61 @@ def train(_run, _log): rmses = AverageMeter() instance_rmses = AverageMeter() mean_angles = AverageMeter() + losses_semantic = AverageMeter() + tic = time.time() for iter, sample in enumerate(data_loader): + resized_image = sample['resized_image'].to(device) image = sample['image'].to(device) instance = sample['instance'].to(device) - semantic = sample['semantic'].to(device) + planar = sample['planar'].to(device) gt_depth = sample['depth'].to(device) gt_seg = sample['gt_seg'].to(device) gt_plane_parameters = sample['plane_parameters'].to(device) valid_region = sample['valid_region'].to(device) gt_plane_instance_parameter = sample['plane_instance_parameter'].to(device) + gt_class = sample['gt_class'].to(device) + x = image # forward pass - logit, embedding, _, _, param = network(image) - + if cfg.model.arch=='dpt': + x = image_processor(resized_image, do_resize=False, return_tensors='pt')['pixel_values'].to(device) + + if cfg.model.semantic: + logit, embedding, _, _, param, semantic, combi = network(image) + else: + logit, embedding, _, _, param = network(image) + + # print(semantic) + # print('00'*100) + # print(combi) + # print('1'*100) + tempc = embedding + if cfg.model.semantic: + tempc = combi segmentations, sample_segmentations, sample_params, centers, sample_probs, sample_gt_segs = \ - bin_mean_shift(logit, embedding, param, gt_seg) + bin_mean_shift(logit, tempc, param, gt_seg) # calculate loss - loss, loss_pull, loss_push, loss_binary, loss_depth, loss_normal, loss_parameters, loss_pw, loss_instance \ - = 0., 0., 0., 0., 0., 0., 0., 0., 0. + loss, loss_pull, loss_push, loss_binary, loss_depth, loss_normal, loss_parameters, loss_pw, loss_instance, loss_semantic \ + = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. batch_size = image.size(0) for i in range(batch_size): + _loss_semantic = 0 _loss, _loss_pull, _loss_push = hinge_embedding_loss(embedding[i:i+1], sample['num_planes'][i:i+1], instance[i:i+1], device) - _loss_binary = class_balanced_cross_entropy_loss(logit[i], semantic[i]) + _loss_binary = class_balanced_cross_entropy_loss(logit[i], planar[i]) _loss_normal, mean_angle = surface_normal_loss(param[i:i+1], gt_plane_parameters[i:i+1], valid_region[i:i+1]) _loss_L1 = parameter_loss(param[i:i + 1], gt_plane_parameters[i:i + 1], valid_region[i:i + 1]) _loss_depth, rmse, infered_depth = Q_loss(param[i:i+1], k_inv_dot_xy1, gt_depth[i:i+1]) + + if cfg.model.semantic: + _loss_semantic = semantic_loss(semantic, gt_class, device) if segmentations[i] is None: continue @@ -279,12 +313,13 @@ def train(_run, _log): instance_parameter_loss(segmentations[i], sample_segmentations[i], sample_params[i], valid_region[i:i+1], gt_depth[i:i+1]) - _loss += _loss_binary + _loss_depth + _loss_normal + _instance_loss + _loss_L1 + _loss += _loss_binary + _loss_depth + _loss_normal + _instance_loss + _loss_L1 + _loss_semantic + # planar segmentation iou prob = torch.sigmoid(logit[i]) mask = (prob > 0.5).float().cpu().numpy() - iou = eval_iou(mask, semantic[i].cpu().numpy()) + iou = eval_iou(mask, planar[i].cpu().numpy()) ioues.update(iou * 100) instance_rmses.update(instance_abs_disntace.item()) rmses.update(rmse.item()) @@ -297,6 +332,7 @@ def train(_run, _log): loss_depth += _loss_depth loss_normal += _loss_normal loss_instance += _instance_loss + loss_semantic += _loss_semantic loss /= batch_size loss_pull /= batch_size @@ -305,6 +341,7 @@ def train(_run, _log): loss_depth /= batch_size loss_normal /= batch_size loss_instance /= batch_size + loss_semantic /= batch_size # Backward optimizer.zero_grad() @@ -319,7 +356,7 @@ def train(_run, _log): losses_depth.update(loss_depth.item()) losses_normal.update(loss_normal.item()) losses_instance.update(loss_instance.item()) - + losses_semantic.update(loss_semantic.item()) # update time batch_time.update(time.time() - tic) tic = time.time() @@ -337,7 +374,9 @@ def train(_run, _log): f"AN: {mean_angles.val:.4f} ({mean_angles.avg:.4f}) " f"Depth: {losses_depth.val:.4f} ({losses_depth.avg:.4f}) " f"INSDEPTH: {instance_rmses.val:.4f} ({instance_rmses.avg:.4f}) " - f"RMSE: {rmses.val:.4f} ({rmses.avg:.4f}) ") + f"RMSE: {rmses.val:.4f} ({rmses.avg:.4f}) " + f"Semantic: {losses_semantic.val:.4f}({losses_semantic.avg:.4f}) ") + _log.info(f"* epoch: {epoch:2d}\t" f"Loss: {losses.avg:.6f}\t" @@ -346,7 +385,8 @@ def train(_run, _log): f"Binary: {losses_binary.avg:.6f}\t" f"Depth: {losses_depth.avg:.6f}\t" f"IoU: {ioues.avg:.2f}\t" - f"RMSE: {rmses.avg:.4f}\t") + f"RMSE: {rmses.avg:.4f}\t" + f"Semantic: {losses_semantic.avg:.4f}\t") # save history history['losses'].append(losses.avg) @@ -356,11 +396,13 @@ def train(_run, _log): history['losses_depth'].append(losses_depth.avg) history['ioues'].append(ioues.avg) history['rmses'].append(rmses.avg) + history['losses_semantic'].append(losses_semantic.avg) + # save checkpoint # if not (_run._id is None): - torch.save(network.state_dict(), os.path.join(checkpoint_dir, f"{model_name}.pt")) - pickle.dump(history, open(os.path.join(checkpoint_dir, 'history.pkl'), 'wb')) + torch.save(network.state_dict(), model_path) + pickle.dump(history, open(os.path.join(checkpoint_dir, 'history_semantic.pkl'), 'wb')) @ex.command @@ -374,12 +416,12 @@ def eval(_run, _log): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") checkpoint_dir = cfg.resume_dir - model_name = cfg.model.name + # model_name = cfg.model.name # build network network = UNet(cfg.model) - model_dict = torch.load('/cluster/52/sarwath/snet/output/models/baseline_4.pt', map_location=lambda storage, loc: storage) + model_dict = torch.load('/cluster/52/sarwath/snet/output/models/baseline_dpt.pt', map_location=lambda storage, loc: storage) network.load_state_dict(model_dict) # load nets into gpu @@ -401,27 +443,41 @@ def eval(_run, _log): with torch.no_grad(): for iter, sample in enumerate(data_loader): + resized_image = sample['resized_image'].to(device) image = sample['image'].to(device) instance = sample['instance'].to(device) gt_seg = sample['gt_seg'].numpy() - semantic = sample['semantic'].to(device) + planar = sample['planar'].to(device) gt_depth = sample['depth'].to(device) # gt_plane_parameters = sample['plane_parameters'].to(device) valid_region = sample['valid_region'].to(device) gt_plane_num = sample['num_planes'].int() # gt_plane_instance_parameter = sample['plane_instance_parameter'].numpy() + gt_class = sample['gt_class'].to(device) + + x = image # forward pass - logit, embedding, _, _, param = network(image) + if cfg.model.arch=='dpt': + x = image_processor(resized_image, do_resize=False, return_tensors='pt')['pixel_values'].to(device) + + if cfg.model.semantic: + logit, embedding, _, _, param, semantic, combi = network(image) + else: + logit, embedding, _, _, param = network(image) + tempc = embedding + if cfg.model.semantic: + tempc = combi prob = torch.sigmoid(logit[0]) + # image = cv2.resize(src=image, dsize=(192,256)) # infer per pixel depth using per pixel plane parameter _, _, per_pixel_depth = Q_loss(param, k_inv_dot_xy1, gt_depth) # fast mean shift segmentation, sampled_segmentation, sample_param = bin_mean_shift.test_forward( - prob, embedding[0], param, mask_threshold=0.1) + prob, tempc[0], param, mask_threshold=0.1) # since GT plane segmentation is somewhat noise, the boundary of plane in GT is not well aligned, # we thus use avg_pool_2d to smooth the segmentation results @@ -461,7 +517,7 @@ def eval(_run, _log): # visualization and evaluation h, w = 192, 256 image = tensor_to_image(image.cpu()[0]) - semantic = semantic.cpu().numpy().reshape(h, w) + planar = planar.cpu().numpy().reshape(h, w) mask = (prob > 0.1).float().cpu().numpy().reshape(h, w) gt_seg = gt_seg.reshape(h, w) depth = instance_depth.cpu().numpy()[0, 0].reshape(h, w) @@ -501,8 +557,8 @@ def eval(_run, _log): blend_pred = (pred_seg * 0.7 + image * 0.3).astype(np.uint8) blend_gt = (gt_seg_image * 0.7 + image * 0.3).astype(np.uint8) - semantic = cv2.resize((semantic * 255).astype(np.uint8), (w, h)) - semantic = cv2.cvtColor(semantic, cv2.COLOR_GRAY2BGR) + planar = cv2.resize((planar * 255).astype(np.uint8), (w, h)) + planar = cv2.cvtColor(planar, cv2.COLOR_GRAY2BGR) mask = cv2.resize((mask * 255).astype(np.uint8), (w, h)) mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) @@ -522,7 +578,7 @@ def eval(_run, _log): image_1 = np.concatenate((image, pred_seg, gt_seg_image), axis=1) image_2 = np.concatenate((image, blend_pred, blend_gt), axis=1) - image_3 = np.concatenate((image, mask, semantic), axis=1) + image_3 = np.concatenate((image, mask, planar), axis=1) image_4 = np.concatenate((depth_diff, depth, gt_depth), axis=1) image = np.concatenate((image_1, image_2, image_3, image_4), axis=0) diff --git a/models/baseline_same.py b/models/baseline_same.py index 2f99508..5c9745c 100644 --- a/models/baseline_same.py +++ b/models/baseline_same.py @@ -2,8 +2,11 @@ import torch.nn as nn from models import resnet_scene as resnet -from transformers import DPTFeatureExtractor as dpt - +from transformers import DPTModel, DPTConfig, DPTImageProcessor, DPTForSemanticSegmentation +import collections.abc +import math +from dataclasses import dataclass +from typing import List, Optional, Set, Tuple, Union class ResNet(nn.Module): @@ -41,19 +44,174 @@ def forward(self, x): x5 = self.layer4(x4) return x1, x2, x3, x4, x5 + + + +# def _get_backbone_hidden_size(config): +# if config.backbone_config is not None and config.is_hybrid is False: +# return config.backbone_config.hidden_size +# else: +# return config.hidden_size + + +# class DPTReassembleLayer(nn.Module): +# def __init__(self, config, channels, factor): +# super().__init__() +# # projection +# hidden_size = _get_backbone_hidden_size(config) +# self.projection = nn.Conv2d(in_channels=hidden_size, out_channels=channels, kernel_size=1) + +# # up/down sampling depending on factor +# if factor > 1: +# self.resize = nn.ConvTranspose2d(channels, channels, kernel_size=factor, stride=factor, padding=0) +# elif factor == 1: +# self.resize = nn.Identity() +# elif factor < 1: +# # so should downsample +# self.resize = nn.Conv2d(channels, channels, kernel_size=3, stride=int(1 / factor), padding=1) + +# def forward(self, hidden_state): +# hidden_state = self.projection(hidden_state) +# hidden_state = self.resize(hidden_state) +# return hidden_state + +# class DPTReassembleStage(nn.Module): +# """ +# This class reassembles the hidden states of the backbone into image-like feature representations at various +# resolutions. + +# This happens in 3 stages: +# 1. Map the N + 1 tokens to a set of N tokens, by taking into account the readout ([CLS]) token according to +# `config.readout_type`. +# 2. Project the channel dimension of the hidden states according to `config.neck_hidden_sizes`. +# 3. Resizing the spatial dimensions (height, width). + +# Args: +# config (`[DPTConfig]`): +# Model configuration class defining the model architecture. +# """ + +# def __init__(self, config): +# super().__init__() + +# self.config = config +# self.layers = nn.ModuleList() +# if config.is_hybrid: +# self._init_reassemble_dpt_hybrid(config) +# else: +# self._init_reassemble_dpt(config) + +# self.neck_ignore_stages = config.neck_ignore_stages + +# def _init_reassemble_dpt_hybrid(self, config): +# r""" " +# For DPT-Hybrid the first 2 reassemble layers are set to `nn.Identity()`, please check the official +# implementation: https://github.com/isl-org/DPT/blob/f43ef9e08d70a752195028a51be5e1aff227b913/dpt/vit.py#L438 +# for more details. +# """ +# for i, factor in zip(range(len(config.neck_hidden_sizes)), config.reassemble_factors): +# if i <= 1: +# self.layers.append(nn.Identity()) +# elif i > 1: +# self.layers.append(DPTReassembleLayer(config, channels=config.neck_hidden_sizes[i], factor=factor)) + +# if config.readout_type != "project": +# raise ValueError(f"Readout type {config.readout_type} is not supported for DPT-Hybrid.") + +# # When using DPT-Hybrid the readout type is set to "project". The sanity check is done on the config file +# self.readout_projects = nn.ModuleList() +# hidden_size = _get_backbone_hidden_size(config) +# for i in range(len(config.neck_hidden_sizes)): +# if i <= 1: +# self.readout_projects.append(nn.Sequential(nn.Identity())) +# elif i > 1: +# self.readout_projects.append( +# nn.Sequential(nn.Linear(2 * hidden_size, hidden_size), ACT2FN[config.hidden_act]) +# ) + +# def _init_reassemble_dpt(self, config): +# for i, factor in zip(range(len(config.neck_hidden_sizes)), config.reassemble_factors): +# self.layers.append(DPTReassembleLayer(config, channels=config.neck_hidden_sizes[i], factor=factor)) + +# if config.readout_type == "project": +# self.readout_projects = nn.ModuleList() +# hidden_size = _get_backbone_hidden_size(config) +# for _ in range(len(config.neck_hidden_sizes)): +# self.readout_projects.append( +# nn.Sequential(nn.Linear(2 * hidden_size, hidden_size), ACT2FN[config.hidden_act]) +# ) + +# def forward(self, hidden_states: List[torch.Tensor], patch_height=None, patch_width=None) -> List[torch.Tensor]: +# """ +# Args: +# hidden_states (`List[torch.FloatTensor]`, each of shape `(batch_size, sequence_length + 1, hidden_size)`): +# List of hidden states from the backbone. +# """ +# out = [] + +# for i, hidden_state in enumerate(hidden_states): +# if i not in self.neck_ignore_stages: +# # reshape to (batch_size, num_channels, height, width) +# cls_token, hidden_state = hidden_state[:, 0], hidden_state[:, 1:] +# batch_size, sequence_length, num_channels = hidden_state.shape +# if patch_height is not None and patch_width is not None: +# hidden_state = hidden_state.reshape(batch_size, patch_height, patch_width, num_channels) +# else: +# size = int(math.sqrt(sequence_length)) +# hidden_state = hidden_state.reshape(batch_size, size, size, num_channels) +# hidden_state = hidden_state.permute(0, 3, 1, 2).contiguous() + +# feature_shape = hidden_state.shape +# if self.config.readout_type == "project": +# # reshape to (batch_size, height*width, num_channels) +# hidden_state = hidden_state.flatten(2).permute((0, 2, 1)) +# readout = cls_token.unsqueeze(1).expand_as(hidden_state) +# # concatenate the readout token to the hidden states and project +# hidden_state = self.readout_projects[i](torch.cat((hidden_state, readout), -1)) +# # reshape back to (batch_size, num_channels, height, width) +# hidden_state = hidden_state.permute(0, 2, 1).reshape(feature_shape) +# elif self.config.readout_type == "add": +# hidden_state = hidden_state.flatten(2) + cls_token.unsqueeze(-1) +# hidden_state = hidden_state.reshape(feature_shape) +# hidden_state = self.layers[i](hidden_state) +# out.append(hidden_state) + +# return out + + class Baseline(nn.Module): def __init__(self, cfg): super(Baseline, self).__init__() - self.feature_extractor = dpt.from_pretrained("Intel/dpt-large") - self.dpt = cfg.dpt - orig_resnet = resnet.__dict__[cfg.arch](pretrained=cfg.pretrained) - self.backbone = ResNet(orig_resnet) - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.arch = cfg.arch + self.semantic = cfg.semantic + if cfg.arch == 'dpt': + self.arch = 'dpt' + self.dpt_config = DPTConfig(image_size=256) + self.dpt = DPTForSemanticSegmentation(config = self.dpt_config).from_pretrained("Intel/dpt-large-ade") + self.dpt_config = self.dpt.config + self.dpt_proj = nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + self.dpt_conv1 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1) + self.dpt_up1 = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) + self.dpt_conv2 = nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1) + self.dpt_maxp = nn.MaxPool2d(kernel_size=(9,1), dilation=(8,1), stride = (1,1)) + self.dpt_relu = nn.ReLU() + self.dpt_head = nn.Sequential( + nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), + nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1), + nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True), + nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1), + nn.MaxPool2d(kernel_size=(9,1), dilation=(8,1), stride = (1,1)) , + nn.ReLU() + ) + else: + orig_resnet = resnet.__dict__[cfg.arch](pretrained=cfg.pretrained) + self.backbone = ResNet(orig_resnet) + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.relu = nn.ReLU(inplace=True) - channel = 3 if cfg.dpt else 64 + channel = 64 # top down self.upsample = nn.Upsample(scale_factor=2, mode='bilinear') self.up_conv5 = nn.Conv2d(channel, channel, (1, 1)) @@ -72,6 +230,9 @@ def __init__(self, cfg): self.p0_conv = nn.Conv2d(channel, channel, (3, 3), padding=1) + + + # plane or non-plane classifier self.pred_prob = nn.Conv2d(channel, 1, (1, 1), padding=0) # embedding @@ -82,6 +243,12 @@ def __init__(self, cfg): self.pred_surface_normal = nn.Conv2d(channel, 3, (1, 1), padding=0) # surface plane parameters self.pred_param = nn.Conv2d(channel, 3, (1, 1), padding=0) + + if cfg.semantic: + # semantic segmentation + self.pred_semantic = nn.Conv2d(channel, 41, (1, 1), padding=0) + # combination for semantic pool + self.combination = nn.Conv2d(43, 2, (1, 1), padding=0) def top_down(self, x): c1, c2, c3, c4, c5 = x @@ -97,6 +264,25 @@ def top_down(self, x): p0 = self.relu(self.p0_conv(p0)) return p0, p1, p2, p3, p4, p5 + + def dpt_backbone(self, x): + + l1 = self.dpt.dpt(x, output_hidden_states=True, output_attentions=True, return_dict=True) + hidden_states = l1.hidden_states + # print(l1.shape) + # print('dpt_enc'*10) + # output_attentions = l1.output_attentions + # return_dict = l1.return_dict + hidden_states = [ feature for idx, feature in enumerate(hidden_states[1:]) if idx in self.dpt.config.backbone_out_indices] + + l2 = self.dpt.neck(hidden_states=hidden_states) + l3 = l2[-1] + # print(l3.shape) + # print('dpt_dec_'*20) + l4 = self.dpt_head(l3) + # print(l4.shape) + # print('dpt_head_'*20) + return l1,l2,l3,l4 def forward(self, x): @@ -112,9 +298,9 @@ def forward(self, x): # print('00'*111) # print(test.data['pixel_values'].size()) # print('00'*111) - - if self.dpt: - p0 = self.feature_extractor(x, do_resize=False, return_tensors='pt').data['pixel_values'].to(self.device) + + if self.arch == 'dpt': + p3,p2,p1,p0 = self.dpt_backbone(x) else: # bottom up c1, c2, c3, c4, c5 = self.backbone(x) @@ -129,12 +315,18 @@ def forward(self, x): # output + prob = self.pred_prob(p0) embedding = self.embedding_conv(p0) depth = self.pred_depth(p0) surface_normal = self.pred_surface_normal(p0) param = self.pred_param(p0) - + + if self.semantic: + semantic = self.pred_semantic(p0) + combination = self.combination(torch.cat((embedding, semantic), dim=1)) + return prob, embedding, depth, surface_normal, param, semantic, combination + return prob, embedding, depth, surface_normal, param diff --git a/predict.py b/predict.py index 2cd8baa..9053d92 100644 --- a/predict.py +++ b/predict.py @@ -66,7 +66,7 @@ def predict(_run, _log): image = transforms(image) image = image.to(device).unsqueeze(0) # forward pass - logit, embedding, _, _, param = network(image) + logit, embedding, _, _, param, semantic, combi = network(image) prob = torch.sigmoid(logit[0]) diff --git a/utils/loss.py b/utils/loss.py index 788a9ea..4d7f91e 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -151,3 +151,13 @@ def Q_loss(param, k_inv_dot_xy1, gt_depth): q_diff = torch.abs(torch.sum(valid_param * Q, dim=0, keepdim=True) - 1.) loss = torch.mean(q_diff) return loss, abs_distance, infered_depth.view(1, 1, h, w) + +def semantic_loss(semantic, gt_class,device): + b, c, h, w = semantic.size() + + semantic = torch.transpose(semantic.view(c, -1).to(device), 0, 1) + gt_class = gt_class.long().view(-1).to(device) + loss_func = torch.nn.CrossEntropyLoss().to(device) + loss = loss_func(semantic, gt_class) + return loss + From c1c884f1442eda382884383e92f5381d885cb123 Mon Sep 17 00:00:00 2001 From: Dmitry Fadeev Date: Wed, 7 Feb 2024 12:40:55 +0100 Subject: [PATCH 5/9] changes to contrastive loss --- configs/config.yaml | 4 +-- main.py | 12 ++++--- utils/loss.py | 85 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 6 deletions(-) diff --git a/configs/config.yaml b/configs/config.yaml index c12c8ee..f49d3b0 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -1,7 +1,7 @@ seed: 123 num_gpus: 1 num_epochs: 5 -resume_dir: /cluster/52/sarwath/snet/output/models/ +resume_dir: /Users/dimafadeev/Desktop/Catalog/TUM/WS23/ML3D/repo/output/models/ print_interval: 10 @@ -11,7 +11,7 @@ solver: weight_decay: 0.00001 dataset: - root_dir: /cluster/52/sarwath/snet/output/processed/ + root_dir: /Users/dimafadeev/Desktop/Catalog/TUM/WS23/ML3D/repo/output/processed/ batch_size: 16 num_workers: 8 diff --git a/main.py b/main.py index 6be75d6..df71a9b 100644 --- a/main.py +++ b/main.py @@ -17,7 +17,7 @@ from models.baseline_same import Baseline as UNet from utils.loss import hinge_embedding_loss, surface_normal_loss, parameter_loss, \ - class_balanced_cross_entropy_loss + class_balanced_cross_entropy_loss, contrastive_loss from utils.misc import AverageMeter, get_optimizer from utils.metric import eval_iou, eval_plane_prediction from utils.disp import tensor_to_image @@ -291,9 +291,13 @@ def train(_run, _log): = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. batch_size = image.size(0) for i in range(batch_size): - _loss_semantic = 0 - _loss, _loss_pull, _loss_push = hinge_embedding_loss(embedding[i:i+1], sample['num_planes'][i:i+1], - instance[i:i+1], device) + _loss_semantic = 0 + # Update with contrastive losee + # _loss, _loss_pull, _loss_push = hinge_embedding_loss(embedding[i:i+1], sample['num_planes'][i:i+1], + # instance[i:i+1], device) + + _loss, _loss_pull, _loss_push = contrastive_loss(embedding[i:i + 1], sample['num_planes'][i:i + 1], + instance[i:i + 1], device) _loss_binary = class_balanced_cross_entropy_loss(logit[i], planar[i]) diff --git a/utils/loss.py b/utils/loss.py index 4d7f91e..ae5d2c8 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -161,3 +161,88 @@ def semantic_loss(semantic, gt_class,device): loss = loss_func(semantic, gt_class) return loss + +def contrastive_loss(embedding, num_planes, segmentation, device, temperature=0.07, base_temperature=0.07): + """Args: + features: hidden vector of shape [batch_size, num_features]. + labels: ground truth of shape [batch_size]. + Returns: + A loss scalar. + """ + # logits --> for each pixel, the dot product of its embedding and the mean embedding of each plane + # segmentation --> for each pixel, take its num_planes masks (should be one-hot) + + # PROBLEM!! NOT EVERY PIXEL IS FROM A PLANE, THOSE HAVE NO SEGMENTATION AND SHOULD NOT BE ACCOUNTED! + + # print(torch.min(torch.sum(segmentation, dim=0)), torch.max(torch.sum(segmentation, dim=0))) # 1, 1 CHECKED, IT IS 0 + # print(torch.unique(segmentation.sum(dim=0))) # [0,1] + + # print(logits.shape, segmentation.shape) # num_planes x h*w CHECK + + # print(positive.shape) # num_planes x h*w CHECK + + # GOAL: If positive is 0, that pixel is not from a plane, it has to be discarded from everywhere + + # print(indices, indices.shape) + # print(nonzero, len(indices)) # IT'S THE SAME + + b, c, h, w = embedding.size() # b = 1 + + # print(embedding.size()) # 1 x 2 x 192 x 256 CHECK + + # Since it is a single image get rid of first dimension (batch) + num_planes = num_planes.numpy()[0] + # print(num_planes) # 7 for the first + embedding = embedding[0] + segmentation = segmentation[0] + embeddings = [] + + # print(embedding.size()) # 2 x 192 x 256 CHECK + nonzero = 0 + # select embedding with segmentation + for i in range(num_planes): # do not take non-planar region + feature = torch.transpose(torch.masked_select(embedding, segmentation[i, :, :].view(1, h, w)).view(c, -1), 0, 1) + nonzero += feature.shape[0] + # print(feature.shape) # num pixels of plane i x 2 CHECK + embeddings.append(feature) + + centers = [] + for feature in embeddings: + center = torch.mean(feature, dim=0).view(1, c) + centers.append(center) + centers = torch.cat(centers) + + centers = centers.unsqueeze(1) + embedding = embedding.view(-1, c).unsqueeze(0) + logits = embedding * centers + logits = logits.sum(2) # num_planes x h*w + + segmentation = segmentation[:num_planes, :, :].view(-1, h * w) # mask each pixel w.r.t. segmentation + + indices = segmentation.sum(dim=0).nonzero() + + # Only take the dot product of the corresponding center + positive = logits * segmentation.to(torch.float) + + positive = torch.index_select(positive, 1, indices.squeeze()) + logits = torch.index_select(logits, 1, indices.squeeze()) + + # print(positive.shape) # num_planes x planar pixels + # print(logits.shape) + + for i in range(positive.shape[1]): + if len(torch.unique(torch.abs(positive[:, i]))) != 2 & num_planes > 1: + print('FUCK') + print(torch.unique(torch.abs(positive[:, i]))) + if torch.max(torch.abs(positive[:, i])).item() != torch.sum(torch.abs(positive[:, i])): + print('FUCK 2') + print(torch.max(torch.abs(positive[:, i])).item(), torch.sum(positive[:, i]), torch.abs(positive[:, i])) + + exp_logits = torch.exp(logits) + + # positive.sum(0) should only be adding 1 number + log_prob = positive.sum(0) - torch.log(exp_logits.sum(0, keepdim=True)) + + loss = - (temperature / base_temperature) * log_prob + + return torch.mean(loss), torch.mean(loss), torch.tensor(0) From d4fd451a9338aba6249ce1f70318c6f9d18e14e1 Mon Sep 17 00:00:00 2001 From: Dmitry Fadeev Date: Wed, 7 Feb 2024 13:14:29 +0100 Subject: [PATCH 6/9] results contrastive --- utils/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/loss.py b/utils/loss.py index ae5d2c8..afa3f57 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -201,7 +201,7 @@ def contrastive_loss(embedding, num_planes, segmentation, device, temperature=0. nonzero = 0 # select embedding with segmentation for i in range(num_planes): # do not take non-planar region - feature = torch.transpose(torch.masked_select(embedding, segmentation[i, :, :].view(1, h, w)).view(c, -1), 0, 1) + feature = torch.transpose(torch.masked_select(embedding, segmentation[i, :, :].view(1, h, w).bool()).view(c, -1), 0, 1) nonzero += feature.shape[0] # print(feature.shape) # num pixels of plane i x 2 CHECK embeddings.append(feature) From d50dd7f7a25d7e43baa9b046ab3ab82a8d4642f1 Mon Sep 17 00:00:00 2001 From: Dmitry Fadeev Date: Wed, 7 Feb 2024 15:35:22 +0100 Subject: [PATCH 7/9] training going down --- main.py | 3 ++- utils/loss.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index df71a9b..655e4e7 100644 --- a/main.py +++ b/main.py @@ -228,7 +228,8 @@ def train(_run, _log): # save losses per epoch history = {'losses': [], 'losses_pull': [], 'losses_push': [], - 'losses_binary': [], 'losses_depth': [], 'ioues': [], 'rmses': []} + 'losses_binary': [], 'losses_depth': [], 'ioues': [], 'rmses': [], + 'losses_semantic': []} network.train(not cfg.model.fix_bn) diff --git a/utils/loss.py b/utils/loss.py index afa3f57..d6b5437 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -197,6 +197,10 @@ def contrastive_loss(embedding, num_planes, segmentation, device, temperature=0. segmentation = segmentation[0] embeddings = [] + # Debug print + print(f"Batch size: {b}, Channels: {c}, Height: {h}, Width: {w}") + print(f"Number of planes: {num_planes}") + # print(embedding.size()) # 2 x 192 x 256 CHECK nonzero = 0 # select embedding with segmentation @@ -206,6 +210,9 @@ def contrastive_loss(embedding, num_planes, segmentation, device, temperature=0. # print(feature.shape) # num pixels of plane i x 2 CHECK embeddings.append(feature) + # Debug print + print(f"Non-zero features count: {nonzero}") + centers = [] for feature in embeddings: center = torch.mean(feature, dim=0).view(1, c) @@ -245,4 +252,9 @@ def contrastive_loss(embedding, num_planes, segmentation, device, temperature=0. loss = - (temperature / base_temperature) * log_prob + # Debug print + print(f"Logits shape: {logits.shape}, Positive shape: {positive.shape}") + print(f"Sample logits: {logits[:5]}, Sample positive: {positive[:5]}") + print(f"Loss tensor: {loss}") + return torch.mean(loss), torch.mean(loss), torch.tensor(0) From f0ea7cc5a40c9708c726a27ccff35e5bcefdd0a9 Mon Sep 17 00:00:00 2001 From: Dmitry Fadeev Date: Wed, 7 Feb 2024 23:56:03 +0100 Subject: [PATCH 8/9] added new file --- embedding.pt | 3 +++ main.py | 1 + 2 files changed, 4 insertions(+) create mode 100644 embedding.pt diff --git a/embedding.pt b/embedding.pt new file mode 100644 index 0000000..ac35f5c --- /dev/null +++ b/embedding.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4137e5d2d5aacbec248e9c090cf41fa828ec1395dd0b4d5c74d5d849127607e0 +size 1574054 diff --git a/main.py b/main.py index 655e4e7..0764413 100644 --- a/main.py +++ b/main.py @@ -407,6 +407,7 @@ def train(_run, _log): # save checkpoint # if not (_run._id is None): torch.save(network.state_dict(), model_path) + torch.save(embedding, 'embedding.pt') pickle.dump(history, open(os.path.join(checkpoint_dir, 'history_semantic.pkl'), 'wb')) From 8eeca2f015b32cfa639b31824958ee655100bac2 Mon Sep 17 00:00:00 2001 From: Dmitry Fadeev Date: Sat, 10 Feb 2024 10:50:48 +0100 Subject: [PATCH 9/9] trained after 2 days --- embedding.pt | 4 ++-- instance.pt | 3 +++ main.py | 1 + utils/loss.py | 12 ++++++------ utils/subset_npz.py | 31 +++++++++++++++++++++++++++++++ 5 files changed, 43 insertions(+), 8 deletions(-) create mode 100644 instance.pt create mode 100644 utils/subset_npz.py diff --git a/embedding.pt b/embedding.pt index ac35f5c..516c774 100644 --- a/embedding.pt +++ b/embedding.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4137e5d2d5aacbec248e9c090cf41fa828ec1395dd0b4d5c74d5d849127607e0 -size 1574054 +oid sha256:a8f7ce8431ff6498653ff87fb86ecb7d14bfa424f29b32e04132ea66ffd9b7b9 +size 6292646 diff --git a/instance.pt b/instance.pt new file mode 100644 index 0000000..acc5d85 --- /dev/null +++ b/instance.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29cedc5d821c9d5793bba67f1332436ff9e72e03a1e3ab08a785b6ef78e9e51c +size 16516257 diff --git a/main.py b/main.py index 0764413..51907c3 100644 --- a/main.py +++ b/main.py @@ -408,6 +408,7 @@ def train(_run, _log): # if not (_run._id is None): torch.save(network.state_dict(), model_path) torch.save(embedding, 'embedding.pt') + torch.save(instance, 'instance.pt') pickle.dump(history, open(os.path.join(checkpoint_dir, 'history_semantic.pkl'), 'wb')) diff --git a/utils/loss.py b/utils/loss.py index d6b5437..0689ca1 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -198,8 +198,8 @@ def contrastive_loss(embedding, num_planes, segmentation, device, temperature=0. embeddings = [] # Debug print - print(f"Batch size: {b}, Channels: {c}, Height: {h}, Width: {w}") - print(f"Number of planes: {num_planes}") + # print(f"Batch size: {b}, Channels: {c}, Height: {h}, Width: {w}") + # print(f"Number of planes: {num_planes}") # print(embedding.size()) # 2 x 192 x 256 CHECK nonzero = 0 @@ -210,8 +210,8 @@ def contrastive_loss(embedding, num_planes, segmentation, device, temperature=0. # print(feature.shape) # num pixels of plane i x 2 CHECK embeddings.append(feature) - # Debug print - print(f"Non-zero features count: {nonzero}") + # # Debug print + # print(f"Non-zero features count: {nonzero}") centers = [] for feature in embeddings: @@ -253,8 +253,8 @@ def contrastive_loss(embedding, num_planes, segmentation, device, temperature=0. loss = - (temperature / base_temperature) * log_prob # Debug print - print(f"Logits shape: {logits.shape}, Positive shape: {positive.shape}") - print(f"Sample logits: {logits[:5]}, Sample positive: {positive[:5]}") + # print(f"Logits shape: {logits.shape}, Positive shape: {positive.shape}") + # print(f"Sample logits: {logits[:5]}, Sample positive: {positive[:5]}") print(f"Loss tensor: {loss}") return torch.mean(loss), torch.mean(loss), torch.tensor(0) diff --git a/utils/subset_npz.py b/utils/subset_npz.py new file mode 100644 index 0000000..7be0ddd --- /dev/null +++ b/utils/subset_npz.py @@ -0,0 +1,31 @@ +import shutil +import os + +def main(): + # Path to the directory containing all .npz files + npz_directory = '/Users/dimafadeev/Desktop/Catalog/TUM/WS23/ML3D/repo/processed_data/train' + + # Path to the .txt file containing the list of files to subset + txt_file_path = '/Users/dimafadeev/Desktop/Catalog/TUM/WS23/ML3D/repo/processed_data/train.txt' + + # Path to the directory where you want to save the subset + subset_directory = '/Users/dimafadeev/Desktop/Catalog/TUM/WS23/ML3D/repo/processed_data/train_subset' + + # Make sure the subset directory exists + os.makedirs(subset_directory, exist_ok=True) + + # Read the list of .npz file names from the .txt file + with open(txt_file_path, 'r') as file: + subset_files = [line.strip() for line in file] + + # Copy the subset .npz files + for file_name in subset_files: + full_file_path = os.path.join(npz_directory, file_name) + if os.path.isfile(full_file_path): + # Copy the file to the subset directory + shutil.copy(full_file_path, subset_directory) + else: + print(f"File {file_name} not found in the npz directory.") + +if __name__ == "__main__": + main()