Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Wait for #2588][Application] update yolo v2 hyper params #2597

Merged
merged 2 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions Applications/YOLOv2/PyTorch/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
class YOLODataset(Dataset):
def __init__(self, img_dir, ann_dir):
super().__init__()
self.img_dir = img_dir
pattern = re.compile("\/(\d+)\.")
img_list = glob.glob(img_dir + "*")
ann_list = glob.glob(ann_dir + "*")
Expand All @@ -30,12 +31,11 @@ def __init__(self, img_dir, ann_dir):
ann_ids = list(map(lambda x: pattern.search(x).group(1), ann_list))
ids_list = list(set(img_ids) & set(ann_ids))

self.input_images = []
self.ids_list = []
self.bbox_gt = []
self.cls_gt = []

for ids in ids_list:
img = np.array(Image.open(img_dir + ids + ".jpg").resize((416, 416))) / 255
label_bbox = []
label_cls = []
with open(ann_dir + ids + ".txt", "rt", encoding="utf-8") as f:
Expand All @@ -47,19 +47,27 @@ def __init__(self, img_dir, ann_dir):
if len(label_cls) == 0:
continue

self.input_images.append(img)
self.ids_list.append(ids)
self.bbox_gt.append(label_bbox)
self.cls_gt.append(label_cls)

self.length = len(self.input_images)
self.input_images = np.array(self.input_images)
self.input_images = torch.FloatTensor(self.input_images).permute((0, 3, 1, 2))
self.length = len(self.ids_list)

def __len__(self):
return self.length

def __getitem__(self, idx):
return self.input_images[idx], self.bbox_gt[idx], self.cls_gt[idx]
img = (
torch.FloatTensor(
np.array(
Image.open(self.img_dir + self.ids_list[idx] + ".jpg").resize(
(416, 416)
)
)
).permute((2, 0, 1))
/ 255
)
return img, self.bbox_gt[idx], self.cls_gt[idx]


##
Expand Down
39 changes: 33 additions & 6 deletions Applications/YOLOv2/PyTorch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@
import sys
import os

from torchconverter import save_bin
import torch
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt
from torch import optim
from torch.utils.data import DataLoader
import torch
import numpy as np

from yolo import YoloV2
from yolo_loss import YoloV2_LOSS
from dataset import YOLODataset, collate_db
from torchconverter import save_bin

device = "cuda" if torch.cuda.is_available() else "cpu"

Expand Down Expand Up @@ -137,10 +140,9 @@ def get_util_path():
valid loss: {epoch_valid_loss / len(valid_loader):.4f}"
)


##
# @brief bbox post process function for inference


def post_process_for_bbox(bbox_p):
"""
@param bbox_p shape(batch_size, cell_h x cell_w, num_anchors, 4)
Expand Down Expand Up @@ -175,8 +177,32 @@ def post_process_for_bbox(bbox_p):
return bbox_p


def visualize_bbox(img_pred, bbox_preds):
img_array = (img_pred.to("cpu") * 255).permute((1, 2, 0)).numpy().astype(np.uint8)
img = Image.fromarray(img_array)

for bbox_pred in bbox_preds:
bbox_pred = [int(x * 416) for x in bbox_pred]

if sum(bbox_pred) == 0:
continue

x_lefttop = bbox_pred[0]
y_lefttop = bbox_pred[1]
width = bbox_pred[2]
height = bbox_pred[3]

draw = ImageDraw.Draw(img)
draw.rectangle(
[(x_lefttop, y_lefttop), (x_lefttop + width, y_lefttop + height)]
)

plt.imshow(img)
plt.show()


# inference example using trained model
hypothesis = model(img).permute((0, 2, 3, 1))
hypothesis = model(img.to(device)).permute((0, 2, 3, 1))
hypothesis = hypothesis[0].reshape((1, out_size**2, num_anchors, 5 + num_classes))

# transform output
Expand All @@ -192,4 +218,5 @@ def post_process_for_bbox(bbox_p):

# result of inference (data range 0~1)
iou_mask = iou_pred > 0.5
print(bbox_pred * iou_mask, iou_pred * iou_mask, prob_pred * iou_mask)
bbox_pred = bbox_pred * iou_mask
visualize_bbox(img, bbox_pred.reshape(-1, 4))
108 changes: 66 additions & 42 deletions Applications/YOLOv2/PyTorch/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,98 +20,122 @@ def __init__(self, num_classes, num_anchors=5):
self.num_classes = num_classes
self.num_anchors = num_anchors
self.conv1 = nn.Sequential(
nn.Conv2d(3, 32, 3, 1, 1),
nn.BatchNorm2d(32, eps=1e-3),
nn.LeakyReLU(),
nn.Conv2d(3, 32, 3, 1, 1, bias=False),
nn.BatchNorm2d(32),
nn.LeakyReLU(0.1),
nn.MaxPool2d(2, 2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(32, 64, 3, 1, 1),
nn.BatchNorm2d(64, eps=1e-3),
nn.LeakyReLU(),
nn.Conv2d(32, 64, 3, 1, 1, bias=False),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.1),
nn.MaxPool2d(2, 2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(64, 128, 3, 1, 1, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.1),
)
self.conv4 = nn.Sequential(
nn.Conv2d(128, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(128, 64, 1, 1, 0, bias=False),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.1),
)
self.conv5 = nn.Sequential(
nn.Conv2d(64, 128, 3, 1, 1),
nn.BatchNorm2d(128, eps=1e-3),
nn.LeakyReLU(),
nn.Conv2d(64, 128, 3, 1, 1, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.1),
nn.MaxPool2d(2, 2),
)
self.conv6 = nn.Sequential(
nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(128, 256, 3, 1, 1, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.1),
)
self.conv7 = nn.Sequential(
nn.Conv2d(256, 128, 1, 1, 0), nn.BatchNorm2d(128, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(256, 128, 1, 1, 0, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.1),
)
self.conv8 = nn.Sequential(
nn.Conv2d(128, 256, 3, 1, 1),
nn.BatchNorm2d(256, eps=1e-3),
nn.LeakyReLU(),
nn.Conv2d(128, 256, 3, 1, 1, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.1),
nn.MaxPool2d(2, 2),
)
self.conv9 = nn.Sequential(
nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(256, 512, 3, 1, 1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.1),
)
self.conv10 = nn.Sequential(
nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(512, 256, 1, 1, 0, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.1),
)
self.conv11 = nn.Sequential(
nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(256, 512, 3, 1, 1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.1),
)
self.conv12 = nn.Sequential(
nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(512, 256, 1, 1, 0, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.1),
)
self.conv13 = nn.Sequential(
nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(256, 512, 3, 1, 1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.1),
)

self.conv_b = nn.Sequential(
nn.Conv2d(512, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(512, 64, 1, 1, 0, bias=False),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.1),
)

self.maxpool_a = nn.MaxPool2d(2, 2)
self.conv_a1 = nn.Sequential(
nn.Conv2d(512, 1024, 3, 1, 1),
nn.BatchNorm2d(1024, eps=1e-3),
nn.LeakyReLU(),
nn.Conv2d(512, 1024, 3, 1, 1, bias=False),
nn.BatchNorm2d(1024),
nn.LeakyReLU(0.1),
)
self.conv_a2 = nn.Sequential(
nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(1024, 512, 1, 1, 0, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.1),
)
self.conv_a3 = nn.Sequential(
nn.Conv2d(512, 1024, 3, 1, 1),
nn.BatchNorm2d(1024, eps=1e-3),
nn.LeakyReLU(),
nn.Conv2d(512, 1024, 3, 1, 1, bias=False),
nn.BatchNorm2d(1024),
nn.LeakyReLU(0.1),
)
self.conv_a4 = nn.Sequential(
nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
nn.Conv2d(1024, 512, 1, 1, 0, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.1),
)
self.conv_a5 = nn.Sequential(
nn.Conv2d(512, 1024, 3, 1, 1),
nn.BatchNorm2d(1024, eps=1e-3),
nn.LeakyReLU(),
nn.Conv2d(512, 1024, 3, 1, 1, bias=False),
nn.BatchNorm2d(1024),
nn.LeakyReLU(0.1),
)
self.conv_a6 = nn.Sequential(
nn.Conv2d(1024, 1024, 3, 1, 1),
nn.BatchNorm2d(1024, eps=1e-3),
nn.LeakyReLU(),
nn.Conv2d(1024, 1024, 3, 1, 1, bias=False),
nn.BatchNorm2d(1024),
nn.LeakyReLU(0.1),
)
self.conv_a7 = nn.Sequential(
nn.Conv2d(1024, 1024, 3, 1, 1),
nn.BatchNorm2d(1024, eps=1e-3),
nn.LeakyReLU(),
nn.Conv2d(1024, 1024, 3, 1, 1, bias=False),
nn.BatchNorm2d(1024),
nn.LeakyReLU(0.1),
)

self.conv_out1 = nn.Sequential(
nn.Conv2d(1280, 1024, 3, 1, 1),
nn.BatchNorm2d(1024, eps=1e-3),
nn.LeakyReLU(),
nn.Conv2d(1280, 1024, 3, 1, 1, bias=False),
nn.BatchNorm2d(1024),
nn.LeakyReLU(0.1),
)

self.conv_out2 = nn.Conv2d(1024, self.num_anchors * (5 + num_classes), 1, 1, 0)
Expand Down
10 changes: 6 additions & 4 deletions Applications/YOLOv2/jni/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ std::vector<LayerHandle> yoloBlock(const std::string &block_name,
withKey("filters", filters),
withKey("kernel_size", {kernel_size, kernel_size}),
withKey("padding", padding),
withKey("disable_bias", "true"),
withKey("input_layers", input_layer)};

return createLayer("conv2d", props);
Expand All @@ -150,6 +151,7 @@ std::vector<LayerHandle> yoloBlock(const std::string &block_name,
if (downsample) {
LayerHandle a2 = createLayer("batch_normalization",
{with_name("a2"), withKey("momentum", "0.9"),
withKey("epsilon", 0.00001),
withKey("activation", "leaky_relu")});

LayerHandle a3 = createLayer(
Expand All @@ -158,10 +160,10 @@ std::vector<LayerHandle> yoloBlock(const std::string &block_name,

return {a1, a2, a3};
} else {
LayerHandle a2 =
createLayer("batch_normalization",
{withKey("name", block_name), withKey("momentum", "0.9"),
withKey("activation", "leaky_relu")});
LayerHandle a2 = createLayer(
"batch_normalization",
{withKey("name", block_name), withKey("momentum", "0.9"),
withKey("epsilon", 0.00001), withKey("activation", "leaky_relu")});

return {a1, a2};
}
Expand Down