From 333c6e2421ebd2d763270e1400d913f0d7b62d12 Mon Sep 17 00:00:00 2001 From: Seungbaek Hong Date: Fri, 17 May 2024 17:38:52 +0900 Subject: [PATCH 1/2] [Application] update yolo v2 python for building pre-training model In order to train a large dataset, instead of loading the dataset into memory in advance, it was changed to a real-time loading method during training, and visualization code was added to check whether the training proceeded well. Signed-off-by: Seungbaek Hong --- Applications/YOLOv2/PyTorch/dataset.py | 22 ++++++++++----- Applications/YOLOv2/PyTorch/main.py | 39 ++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/Applications/YOLOv2/PyTorch/dataset.py b/Applications/YOLOv2/PyTorch/dataset.py index 8f804eff49..d939e0f8a9 100644 --- a/Applications/YOLOv2/PyTorch/dataset.py +++ b/Applications/YOLOv2/PyTorch/dataset.py @@ -22,6 +22,7 @@ class YOLODataset(Dataset): def __init__(self, img_dir, ann_dir): super().__init__() + self.img_dir = img_dir pattern = re.compile("\/(\d+)\.") img_list = glob.glob(img_dir + "*") ann_list = glob.glob(ann_dir + "*") @@ -30,12 +31,11 @@ def __init__(self, img_dir, ann_dir): ann_ids = list(map(lambda x: pattern.search(x).group(1), ann_list)) ids_list = list(set(img_ids) & set(ann_ids)) - self.input_images = [] + self.ids_list = [] self.bbox_gt = [] self.cls_gt = [] for ids in ids_list: - img = np.array(Image.open(img_dir + ids + ".jpg").resize((416, 416))) / 255 label_bbox = [] label_cls = [] with open(ann_dir + ids + ".txt", "rt", encoding="utf-8") as f: @@ -47,19 +47,27 @@ def __init__(self, img_dir, ann_dir): if len(label_cls) == 0: continue - self.input_images.append(img) + self.ids_list.append(ids) self.bbox_gt.append(label_bbox) self.cls_gt.append(label_cls) - self.length = len(self.input_images) - self.input_images = np.array(self.input_images) - self.input_images = torch.FloatTensor(self.input_images).permute((0, 3, 1, 2)) + self.length = len(self.ids_list) def __len__(self): return self.length def __getitem__(self, idx): - return self.input_images[idx], self.bbox_gt[idx], self.cls_gt[idx] + img = ( + torch.FloatTensor( + np.array( + Image.open(self.img_dir + self.ids_list[idx] + ".jpg").resize( + (416, 416) + ) + ) + ).permute((2, 0, 1)) + / 255 + ) + return img, self.bbox_gt[idx], self.cls_gt[idx] ## diff --git a/Applications/YOLOv2/PyTorch/main.py b/Applications/YOLOv2/PyTorch/main.py index cd8d277945..6e42fa1c6b 100644 --- a/Applications/YOLOv2/PyTorch/main.py +++ b/Applications/YOLOv2/PyTorch/main.py @@ -10,14 +10,17 @@ import sys import os -from torchconverter import save_bin -import torch +from PIL import Image, ImageDraw +from matplotlib import pyplot as plt from torch import optim from torch.utils.data import DataLoader +import torch +import numpy as np from yolo import YoloV2 from yolo_loss import YoloV2_LOSS from dataset import YOLODataset, collate_db +from torchconverter import save_bin device = "cuda" if torch.cuda.is_available() else "cpu" @@ -137,10 +140,9 @@ def get_util_path(): valid loss: {epoch_valid_loss / len(valid_loader):.4f}" ) + ## # @brief bbox post process function for inference - - def post_process_for_bbox(bbox_p): """ @param bbox_p shape(batch_size, cell_h x cell_w, num_anchors, 4) @@ -175,8 +177,32 @@ def post_process_for_bbox(bbox_p): return bbox_p +def visualize_bbox(img_pred, bbox_preds): + img_array = (img_pred.to("cpu") * 255).permute((1, 2, 0)).numpy().astype(np.uint8) + img = Image.fromarray(img_array) + + for bbox_pred in bbox_preds: + bbox_pred = [int(x * 416) for x in bbox_pred] + + if sum(bbox_pred) == 0: + continue + + x_lefttop = bbox_pred[0] + y_lefttop = bbox_pred[1] + width = bbox_pred[2] + height = bbox_pred[3] + + draw = ImageDraw.Draw(img) + draw.rectangle( + [(x_lefttop, y_lefttop), (x_lefttop + width, y_lefttop + height)] + ) + + plt.imshow(img) + plt.show() + + # inference example using trained model -hypothesis = model(img).permute((0, 2, 3, 1)) +hypothesis = model(img.to(device)).permute((0, 2, 3, 1)) hypothesis = hypothesis[0].reshape((1, out_size**2, num_anchors, 5 + num_classes)) # transform output @@ -192,4 +218,5 @@ def post_process_for_bbox(bbox_p): # result of inference (data range 0~1) iou_mask = iou_pred > 0.5 -print(bbox_pred * iou_mask, iou_pred * iou_mask, prob_pred * iou_mask) +bbox_pred = bbox_pred * iou_mask +visualize_bbox(img, bbox_pred.reshape(-1, 4)) From d71ef52790b0c8df0971033d75d937dceee25736 Mon Sep 17 00:00:00 2001 From: Seungbaek Hong Date: Wed, 22 May 2024 14:39:01 +0900 Subject: [PATCH 2/2] [Application] update yolo v2 modeling update yolo v2 modeling part of yolo v2. (update some hyper param values) - update yolo v2 pytorch(python) script - update yolo v2 nntrainer(c++) script * issue - activation function(in this case, leaky relu) of nntrainer needs to support setting negative slope via parameter... **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Seungbaek Hong --- Applications/YOLOv2/PyTorch/yolo.py | 108 +++++++++++++++++----------- Applications/YOLOv2/jni/main.cpp | 10 +-- 2 files changed, 72 insertions(+), 46 deletions(-) diff --git a/Applications/YOLOv2/PyTorch/yolo.py b/Applications/YOLOv2/PyTorch/yolo.py index b2a535c501..390cbd5ada 100644 --- a/Applications/YOLOv2/PyTorch/yolo.py +++ b/Applications/YOLOv2/PyTorch/yolo.py @@ -20,98 +20,122 @@ def __init__(self, num_classes, num_anchors=5): self.num_classes = num_classes self.num_anchors = num_anchors self.conv1 = nn.Sequential( - nn.Conv2d(3, 32, 3, 1, 1), - nn.BatchNorm2d(32, eps=1e-3), - nn.LeakyReLU(), + nn.Conv2d(3, 32, 3, 1, 1, bias=False), + nn.BatchNorm2d(32), + nn.LeakyReLU(0.1), nn.MaxPool2d(2, 2), ) self.conv2 = nn.Sequential( - nn.Conv2d(32, 64, 3, 1, 1), - nn.BatchNorm2d(64, eps=1e-3), - nn.LeakyReLU(), + nn.Conv2d(32, 64, 3, 1, 1, bias=False), + nn.BatchNorm2d(64), + nn.LeakyReLU(0.1), nn.MaxPool2d(2, 2), ) self.conv3 = nn.Sequential( - nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(64, 128, 3, 1, 1, bias=False), + nn.BatchNorm2d(128), + nn.LeakyReLU(0.1), ) self.conv4 = nn.Sequential( - nn.Conv2d(128, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(128, 64, 1, 1, 0, bias=False), + nn.BatchNorm2d(64), + nn.LeakyReLU(0.1), ) self.conv5 = nn.Sequential( - nn.Conv2d(64, 128, 3, 1, 1), - nn.BatchNorm2d(128, eps=1e-3), - nn.LeakyReLU(), + nn.Conv2d(64, 128, 3, 1, 1, bias=False), + nn.BatchNorm2d(128), + nn.LeakyReLU(0.1), nn.MaxPool2d(2, 2), ) self.conv6 = nn.Sequential( - nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(128, 256, 3, 1, 1, bias=False), + nn.BatchNorm2d(256), + nn.LeakyReLU(0.1), ) self.conv7 = nn.Sequential( - nn.Conv2d(256, 128, 1, 1, 0), nn.BatchNorm2d(128, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(256, 128, 1, 1, 0, bias=False), + nn.BatchNorm2d(128), + nn.LeakyReLU(0.1), ) self.conv8 = nn.Sequential( - nn.Conv2d(128, 256, 3, 1, 1), - nn.BatchNorm2d(256, eps=1e-3), - nn.LeakyReLU(), + nn.Conv2d(128, 256, 3, 1, 1, bias=False), + nn.BatchNorm2d(256), + nn.LeakyReLU(0.1), nn.MaxPool2d(2, 2), ) self.conv9 = nn.Sequential( - nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(256, 512, 3, 1, 1, bias=False), + nn.BatchNorm2d(512), + nn.LeakyReLU(0.1), ) self.conv10 = nn.Sequential( - nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(512, 256, 1, 1, 0, bias=False), + nn.BatchNorm2d(256), + nn.LeakyReLU(0.1), ) self.conv11 = nn.Sequential( - nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(256, 512, 3, 1, 1, bias=False), + nn.BatchNorm2d(512), + nn.LeakyReLU(0.1), ) self.conv12 = nn.Sequential( - nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(512, 256, 1, 1, 0, bias=False), + nn.BatchNorm2d(256), + nn.LeakyReLU(0.1), ) self.conv13 = nn.Sequential( - nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(256, 512, 3, 1, 1, bias=False), + nn.BatchNorm2d(512), + nn.LeakyReLU(0.1), ) self.conv_b = nn.Sequential( - nn.Conv2d(512, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(512, 64, 1, 1, 0, bias=False), + nn.BatchNorm2d(64), + nn.LeakyReLU(0.1), ) self.maxpool_a = nn.MaxPool2d(2, 2) self.conv_a1 = nn.Sequential( - nn.Conv2d(512, 1024, 3, 1, 1), - nn.BatchNorm2d(1024, eps=1e-3), - nn.LeakyReLU(), + nn.Conv2d(512, 1024, 3, 1, 1, bias=False), + nn.BatchNorm2d(1024), + nn.LeakyReLU(0.1), ) self.conv_a2 = nn.Sequential( - nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(1024, 512, 1, 1, 0, bias=False), + nn.BatchNorm2d(512), + nn.LeakyReLU(0.1), ) self.conv_a3 = nn.Sequential( - nn.Conv2d(512, 1024, 3, 1, 1), - nn.BatchNorm2d(1024, eps=1e-3), - nn.LeakyReLU(), + nn.Conv2d(512, 1024, 3, 1, 1, bias=False), + nn.BatchNorm2d(1024), + nn.LeakyReLU(0.1), ) self.conv_a4 = nn.Sequential( - nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU() + nn.Conv2d(1024, 512, 1, 1, 0, bias=False), + nn.BatchNorm2d(512), + nn.LeakyReLU(0.1), ) self.conv_a5 = nn.Sequential( - nn.Conv2d(512, 1024, 3, 1, 1), - nn.BatchNorm2d(1024, eps=1e-3), - nn.LeakyReLU(), + nn.Conv2d(512, 1024, 3, 1, 1, bias=False), + nn.BatchNorm2d(1024), + nn.LeakyReLU(0.1), ) self.conv_a6 = nn.Sequential( - nn.Conv2d(1024, 1024, 3, 1, 1), - nn.BatchNorm2d(1024, eps=1e-3), - nn.LeakyReLU(), + nn.Conv2d(1024, 1024, 3, 1, 1, bias=False), + nn.BatchNorm2d(1024), + nn.LeakyReLU(0.1), ) self.conv_a7 = nn.Sequential( - nn.Conv2d(1024, 1024, 3, 1, 1), - nn.BatchNorm2d(1024, eps=1e-3), - nn.LeakyReLU(), + nn.Conv2d(1024, 1024, 3, 1, 1, bias=False), + nn.BatchNorm2d(1024), + nn.LeakyReLU(0.1), ) self.conv_out1 = nn.Sequential( - nn.Conv2d(1280, 1024, 3, 1, 1), - nn.BatchNorm2d(1024, eps=1e-3), - nn.LeakyReLU(), + nn.Conv2d(1280, 1024, 3, 1, 1, bias=False), + nn.BatchNorm2d(1024), + nn.LeakyReLU(0.1), ) self.conv_out2 = nn.Conv2d(1024, self.num_anchors * (5 + num_classes), 1, 1, 0) diff --git a/Applications/YOLOv2/jni/main.cpp b/Applications/YOLOv2/jni/main.cpp index bc3985adbd..018602e408 100644 --- a/Applications/YOLOv2/jni/main.cpp +++ b/Applications/YOLOv2/jni/main.cpp @@ -139,6 +139,7 @@ std::vector yoloBlock(const std::string &block_name, withKey("filters", filters), withKey("kernel_size", {kernel_size, kernel_size}), withKey("padding", padding), + withKey("disable_bias", "true"), withKey("input_layers", input_layer)}; return createLayer("conv2d", props); @@ -150,6 +151,7 @@ std::vector yoloBlock(const std::string &block_name, if (downsample) { LayerHandle a2 = createLayer("batch_normalization", {with_name("a2"), withKey("momentum", "0.9"), + withKey("epsilon", 0.00001), withKey("activation", "leaky_relu")}); LayerHandle a3 = createLayer( @@ -158,10 +160,10 @@ std::vector yoloBlock(const std::string &block_name, return {a1, a2, a3}; } else { - LayerHandle a2 = - createLayer("batch_normalization", - {withKey("name", block_name), withKey("momentum", "0.9"), - withKey("activation", "leaky_relu")}); + LayerHandle a2 = createLayer( + "batch_normalization", + {withKey("name", block_name), withKey("momentum", "0.9"), + withKey("epsilon", 0.00001), withKey("activation", "leaky_relu")}); return {a1, a2}; }