diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/common.py b/models/common.py new file mode 100644 index 0000000..4211db4 --- /dev/null +++ b/models/common.py @@ -0,0 +1,395 @@ +# YOLOv5 common modules + +import math +from copy import copy +from pathlib import Path + +import numpy as np +import pandas as pd +import requests +import torch +import torch.nn as nn +from PIL import Image +from torch.cuda import amp + +from utils.datasets import letterbox +from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box +from utils.plots import colors, plot_one_box +from utils.torch_utils import time_synchronized + + +def autopad(k, p=None): # kernel, padding + # Pad to 'same' + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + + +def DWConv(c1, c2, k=1, s=1, act=True): + # Depthwise convolution + return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) + + +class Conv(nn.Module): + # Standard convolution + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(Conv, self).__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) + self.bn = nn.BatchNorm2d(c2) + self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def fuseforward(self, x): + return self.act(self.conv(x)) + + +class TransformerLayer(nn.Module): + # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) + def __init__(self, c, num_heads): + super().__init__() + self.q = nn.Linear(c, c, bias=False) + self.k = nn.Linear(c, c, bias=False) + self.v = nn.Linear(c, c, bias=False) + self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) + self.fc1 = nn.Linear(c, c, bias=False) + self.fc2 = nn.Linear(c, c, bias=False) + + def forward(self, x): + x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x + x = self.fc2(self.fc1(x)) + x + return x + + +class TransformerBlock(nn.Module): + # Vision Transformer https://arxiv.org/abs/2010.11929 + def __init__(self, c1, c2, num_heads, num_layers): + super().__init__() + self.conv = None + if c1 != c2: + self.conv = Conv(c1, c2) + self.linear = nn.Linear(c2, c2) # learnable position embedding + self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)]) + self.c2 = c2 + + def forward(self, x): + if self.conv is not None: + x = self.conv(x) + b, _, w, h = x.shape + p = x.flatten(2) + p = p.unsqueeze(0) + p = p.transpose(0, 3) + p = p.squeeze(3) + e = self.linear(p) + x = p + e + + x = self.tr(x) + x = x.unsqueeze(3) + x = x.transpose(0, 3) + x = x.reshape(b, self.c2, w, h) + return x + + +class Bottleneck(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super(Bottleneck, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c2, 3, 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class BottleneckCSP(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSP, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.LeakyReLU(0.1, inplace=True) + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class C3(nn.Module): + # CSP Bottleneck with 3 convolutions + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(C3, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) + + def forward(self, x): + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) + + +class C3TR(C3): + # C3 module with TransformerBlock() + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) + self.m = TransformerBlock(c_, c_, 4, n) + + +class SPP(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, k=(5, 9, 13)): + super(SPP, self).__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + + def forward(self, x): + x = self.cv1(x) + return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) + + +class Focus(nn.Module): + # Focus wh information into c-space + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(Focus, self).__init__() + self.conv = Conv(c1 * 4, c2, k, s, p, g, act) + # self.contract = Contract(gain=2) + + def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) + # return self.conv(self.contract(x)) + + +class Contract(nn.Module): + # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) + def __init__(self, gain=2): + super().__init__() + self.gain = gain + + def forward(self, x): + N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain' + s = self.gain + x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2) + x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) + return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40) + + +class Expand(nn.Module): + # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) + def __init__(self, gain=2): + super().__init__() + self.gain = gain + + def forward(self, x): + N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' + s = self.gain + x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80) + x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) + return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160) + + +class Concat(nn.Module): + # Concatenate a list of tensors along dimension + def __init__(self, dimension=1): + super(Concat, self).__init__() + self.d = dimension + + def forward(self, x): + return torch.cat(x, self.d) + + +class NMS(nn.Module): + # Non-Maximum Suppression (NMS) module + conf = 0.25 # confidence threshold + iou = 0.45 # IoU threshold + classes = None # (optional list) filter by class + max_det = 1000 # maximum number of detections per image + + def __init__(self): + super(NMS, self).__init__() + + def forward(self, x): + return non_max_suppression(x[0], self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det) + + +class AutoShape(nn.Module): + # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS + conf = 0.25 # NMS confidence threshold + iou = 0.45 # NMS IoU threshold + classes = None # (optional list) filter by class + max_det = 1000 # maximum number of detections per image + + def __init__(self, model): + super(AutoShape, self).__init__() + self.model = model.eval() + + def autoshape(self): + print('AutoShape already enabled, skipping... ') # model already converted to model.autoshape() + return self + + @torch.no_grad() + def forward(self, imgs, size=640, augment=False, profile=False): + # Inference from various sources. For height=640, width=1280, RGB images example inputs are: + # filename: imgs = 'data/images/zidane.jpg' + # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' + # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) + # PIL: = Image.open('image.jpg') # HWC x(640,1280,3) + # numpy: = np.zeros((640,1280,3)) # HWC + # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) + # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images + + t = [time_synchronized()] + p = next(self.model.parameters()) # for device and type + if isinstance(imgs, torch.Tensor): # torch + with amp.autocast(enabled=p.device.type != 'cpu'): + return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference + + # Pre-process + n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images + shape0, shape1, files = [], [], [] # image and inference shapes, filenames + for i, im in enumerate(imgs): + f = f'image{i}' # filename + if isinstance(im, str): # filename or uri + im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im + elif isinstance(im, Image.Image): # PIL Image + im, f = np.asarray(im), getattr(im, 'filename', f) or f + files.append(Path(f).with_suffix('.jpg').name) + if im.shape[0] < 5: # image in CHW + im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) + im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input + s = im.shape[:2] # HWC + shape0.append(s) # image shape + g = (size / max(s)) # gain + shape1.append([y * g for y in s]) + imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update + shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape + x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad + x = np.stack(x, 0) if n > 1 else x[0][None] # stack + x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW + x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 + t.append(time_synchronized()) + + with amp.autocast(enabled=p.device.type != 'cpu'): + # Inference + y = self.model(x, augment, profile)[0] # forward + t.append(time_synchronized()) + + # Post-process + y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det) # NMS + for i in range(n): + scale_coords(shape1, y[i][:, :4], shape0[i]) + + t.append(time_synchronized()) + return Detections(imgs, y, files, t, self.names, x.shape) + + +class Detections: + # detections class for YOLOv5 inference results + def __init__(self, imgs, pred, files, times=None, names=None, shape=None): + super(Detections, self).__init__() + d = pred[0].device # device + gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations + self.imgs = imgs # list of images as numpy arrays + self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) + self.names = names # class names + self.files = files # image filenames + self.xyxy = pred # xyxy pixels + self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels + self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized + self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized + self.n = len(self.pred) # number of images (batch size) + self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms) + self.s = shape # inference BCHW shape + + def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')): + for i, (im, pred) in enumerate(zip(self.imgs, self.pred)): + str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' + if pred is not None: + for c in pred[:, -1].unique(): + n = (pred[:, -1] == c).sum() # detections per class + str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string + if show or save or render or crop: + for *box, conf, cls in pred: # xyxy, confidence, class + label = f'{self.names[int(cls)]} {conf:.2f}' + if crop: + save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i]) + else: # all others + plot_one_box(box, im, label=label, color=colors(cls)) + + im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np + if pprint: + print(str.rstrip(', ')) + if show: + im.show(self.files[i]) # show + if save: + f = self.files[i] + im.save(save_dir / f) # save + print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n') + if render: + self.imgs[i] = np.asarray(im) + + def print(self): + self.display(pprint=True) # print results + print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t) + + def show(self): + self.display(show=True) # show results + + def save(self, save_dir='runs/hub/exp'): + save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp', mkdir=True) # increment save_dir + self.display(save=True, save_dir=save_dir) # save results + + def crop(self, save_dir='runs/hub/exp'): + save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp', mkdir=True) # increment save_dir + self.display(crop=True, save_dir=save_dir) # crop results + print(f'Saved results to {save_dir}\n') + + def render(self): + self.display(render=True) # render results + return self.imgs + + def pandas(self): + # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) + new = copy(self) # return copy + ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns + cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns + for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): + a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update + setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) + return new + + def tolist(self): + # return a list of Detections objects, i.e. 'for result in results.tolist():' + x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)] + for d in x: + for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: + setattr(d, k, getattr(d, k)[0]) # pop out of list + return x + + def __len__(self): + return self.n + + +class Classify(nn.Module): + # Classification head, i.e. x(b,c1,20,20) to x(b,c2) + def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups + super(Classify, self).__init__() + self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) + self.flat = nn.Flatten() + + def forward(self, x): + z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list + return self.flat(self.conv(z)) # flatten to x(b,c2) diff --git a/models/experimental.py b/models/experimental.py new file mode 100644 index 0000000..d4a4f93 --- /dev/null +++ b/models/experimental.py @@ -0,0 +1,140 @@ +# YOLOv5 experimental modules + +import numpy as np +import torch +import torch.nn as nn + +from models.common import Conv, DWConv +from utils.google_utils import attempt_download + + +class CrossConv(nn.Module): + # Cross Convolution Downsample + def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): + # ch_in, ch_out, kernel, stride, groups, expansion, shortcut + super(CrossConv, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, (1, k), (1, s)) + self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class Sum(nn.Module): + # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, n, weight=False): # n: number of inputs + super(Sum, self).__init__() + self.weight = weight # apply weights boolean + self.iter = range(n - 1) # iter object + if weight: + self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights + + def forward(self, x): + y = x[0] # no weight + if self.weight: + w = torch.sigmoid(self.w) * 2 + for i in self.iter: + y = y + x[i + 1] * w[i] + else: + for i in self.iter: + y = y + x[i + 1] + return y + + +class GhostConv(nn.Module): + # Ghost Convolution https://github.com/huawei-noah/ghostnet + def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups + super(GhostConv, self).__init__() + c_ = c2 // 2 # hidden channels + self.cv1 = Conv(c1, c_, k, s, None, g, act) + self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) + + def forward(self, x): + y = self.cv1(x) + return torch.cat([y, self.cv2(y)], 1) + + +class GhostBottleneck(nn.Module): + # Ghost Bottleneck https://github.com/huawei-noah/ghostnet + def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride + super(GhostBottleneck, self).__init__() + c_ = c2 // 2 + self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw + DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw + GhostConv(c_, c2, 1, 1, act=False)) # pw-linear + self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), + Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() + + def forward(self, x): + return self.conv(x) + self.shortcut(x) + + +class MixConv2d(nn.Module): + # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 + def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): + super(MixConv2d, self).__init__() + groups = len(k) + if equal_ch: # equal c_ per group + i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices + c_ = [(i == g).sum() for g in range(groups)] # intermediate channels + else: # equal weight.numel() per group + b = [c2] + [0] * groups + a = np.eye(groups + 1, groups, k=-1) + a -= np.roll(a, 1, axis=1) + a *= np.array(k) ** 2 + a[0] = 1 + c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b + + self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) + self.bn = nn.BatchNorm2d(c2) + self.act = nn.LeakyReLU(0.1, inplace=True) + + def forward(self, x): + return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) + + +class Ensemble(nn.ModuleList): + # Ensemble of models + def __init__(self): + super(Ensemble, self).__init__() + + def forward(self, x, augment=False): + y = [] + for module in self: + y.append(module(x, augment)[0]) + # y = torch.stack(y).max(0)[0] # max ensemble + # y = torch.stack(y).mean(0) # mean ensemble + y = torch.cat(y, 1) # nms ensemble + return y, None # inference, train output + + +def attempt_load(weights, map_location=None, inplace=True): + from models.yolo import Detect, Model + + # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a + model = Ensemble() + for w in weights if isinstance(weights, list) else [weights]: + ckpt = torch.load(attempt_download(w), map_location=map_location) # load + model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model + + # Compatibility updates + for m in model.modules(): + if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]: + m.inplace = inplace # pytorch 1.7.0 compatibility + elif type(m) is Conv: + m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility + + if len(model) == 1: + print("Condition one: model length is 1") + + return model[-1] # return model + else: + print("Condition two: model length is large than one") + + print(f'Ensemble created with {weights}\n') + for k in ['names']: + setattr(model, k, getattr(model[-1], k)) + model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride + return model # return ensemble diff --git a/models/hub/anchors.yaml b/models/hub/anchors.yaml new file mode 100644 index 0000000..a07a4dc --- /dev/null +++ b/models/hub/anchors.yaml @@ -0,0 +1,58 @@ +# Default YOLOv5 anchors for COCO data + + +# P5 ------------------------------------------------------------------------------------------------------------------- +# P5-640: +anchors_p5_640: + - [ 10,13, 16,30, 33,23 ] # P3/8 + - [ 30,61, 62,45, 59,119 ] # P4/16 + - [ 116,90, 156,198, 373,326 ] # P5/32 + + +# P6 ------------------------------------------------------------------------------------------------------------------- +# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 +anchors_p6_640: + - [ 9,11, 21,19, 17,41 ] # P3/8 + - [ 43,32, 39,70, 86,64 ] # P4/16 + - [ 65,131, 134,130, 120,265 ] # P5/32 + - [ 282,180, 247,354, 512,387 ] # P6/64 + +# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +anchors_p6_1280: + - [ 19,27, 44,40, 38,94 ] # P3/8 + - [ 96,68, 86,152, 180,137 ] # P4/16 + - [ 140,301, 303,264, 238,542 ] # P5/32 + - [ 436,615, 739,380, 925,792 ] # P6/64 + +# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 +anchors_p6_1920: + - [ 28,41, 67,59, 57,141 ] # P3/8 + - [ 144,103, 129,227, 270,205 ] # P4/16 + - [ 209,452, 455,396, 358,812 ] # P5/32 + - [ 653,922, 1109,570, 1387,1187 ] # P6/64 + + +# P7 ------------------------------------------------------------------------------------------------------------------- +# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 +anchors_p7_640: + - [ 11,11, 13,30, 29,20 ] # P3/8 + - [ 30,46, 61,38, 39,92 ] # P4/16 + - [ 78,80, 146,66, 79,163 ] # P5/32 + - [ 149,150, 321,143, 157,303 ] # P6/64 + - [ 257,402, 359,290, 524,372 ] # P7/128 + +# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 +anchors_p7_1280: + - [ 19,22, 54,36, 32,77 ] # P3/8 + - [ 70,83, 138,71, 75,173 ] # P4/16 + - [ 165,159, 148,334, 375,151 ] # P5/32 + - [ 334,317, 251,626, 499,474 ] # P6/64 + - [ 750,326, 534,814, 1079,818 ] # P7/128 + +# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 +anchors_p7_1920: + - [ 29,34, 81,55, 47,115 ] # P3/8 + - [ 105,124, 207,107, 113,259 ] # P4/16 + - [ 247,238, 222,500, 563,227 ] # P5/32 + - [ 501,476, 376,939, 749,711 ] # P6/64 + - [ 1126,489, 801,1222, 1618,1227 ] # P7/128 diff --git a/models/hub/yolov3-spp.yaml b/models/hub/yolov3-spp.yaml new file mode 100644 index 0000000..38dcc44 --- /dev/null +++ b/models/hub/yolov3-spp.yaml @@ -0,0 +1,51 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# anchors +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# darknet53 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [32, 3, 1]], # 0 + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + [-1, 1, Bottleneck, [64]], + [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 + [-1, 2, Bottleneck, [128]], + [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 + [-1, 8, Bottleneck, [256]], + [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 + [-1, 8, Bottleneck, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 + [-1, 4, Bottleneck, [1024]], # 10 + ] + +# YOLOv3-SPP head +head: + [[-1, 1, Bottleneck, [1024, False]], + [-1, 1, SPP, [512, [5, 9, 13]]], + [-1, 1, Conv, [1024, 3, 1]], + [-1, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) + + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 8], 1, Concat, [1]], # cat backbone P4 + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) + + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P3 + [-1, 1, Bottleneck, [256, False]], + [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) + + [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] diff --git a/models/hub/yolov3-tiny.yaml b/models/hub/yolov3-tiny.yaml new file mode 100644 index 0000000..ff7638c --- /dev/null +++ b/models/hub/yolov3-tiny.yaml @@ -0,0 +1,41 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# anchors +anchors: + - [10,14, 23,27, 37,58] # P4/16 + - [81,82, 135,169, 344,319] # P5/32 + +# YOLOv3-tiny backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [16, 3, 1]], # 0 + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 + [-1, 1, Conv, [32, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 + [-1, 1, Conv, [512, 3, 1]], + [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 + [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 + ] + +# YOLOv3-tiny head +head: + [[-1, 1, Conv, [1024, 3, 1]], + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) + + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 8], 1, Concat, [1]], # cat backbone P4 + [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) + + [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) + ] diff --git a/models/hub/yolov3.yaml b/models/hub/yolov3.yaml new file mode 100644 index 0000000..f2e7613 --- /dev/null +++ b/models/hub/yolov3.yaml @@ -0,0 +1,51 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# anchors +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# darknet53 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [32, 3, 1]], # 0 + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + [-1, 1, Bottleneck, [64]], + [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 + [-1, 2, Bottleneck, [128]], + [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 + [-1, 8, Bottleneck, [256]], + [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 + [-1, 8, Bottleneck, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 + [-1, 4, Bottleneck, [1024]], # 10 + ] + +# YOLOv3 head +head: + [[-1, 1, Bottleneck, [1024, False]], + [-1, 1, Conv, [512, [1, 1]]], + [-1, 1, Conv, [1024, 3, 1]], + [-1, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) + + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 8], 1, Concat, [1]], # cat backbone P4 + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) + + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P3 + [-1, 1, Bottleneck, [256, False]], + [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) + + [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] diff --git a/models/hub/yolov5-fpn.yaml b/models/hub/yolov5-fpn.yaml new file mode 100644 index 0000000..e772bff --- /dev/null +++ b/models/hub/yolov5-fpn.yaml @@ -0,0 +1,42 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# anchors +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, Bottleneck, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 9, BottleneckCSP, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, BottleneckCSP, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 1, SPP, [1024, [5, 9, 13]]], + [-1, 6, BottleneckCSP, [1024]], # 9 + ] + +# YOLOv5 FPN head +head: + [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) + + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 1, Conv, [512, 1, 1]], + [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) + + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 1, Conv, [256, 1, 1]], + [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) + + [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] diff --git a/models/hub/yolov5-p2.yaml b/models/hub/yolov5-p2.yaml new file mode 100644 index 0000000..0633a90 --- /dev/null +++ b/models/hub/yolov5-p2.yaml @@ -0,0 +1,54 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# anchors +anchors: 3 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 + [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 + [ -1, 3, C3, [ 128 ] ], + [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 + [ -1, 9, C3, [ 256 ] ], + [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 + [ -1, 9, C3, [ 512 ] ], + [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32 + [ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ], + [ -1, 3, C3, [ 1024, False ] ], # 9 + ] + +# YOLOv5 head +head: + [ [ -1, 1, Conv, [ 512, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 + [ -1, 3, C3, [ 512, False ] ], # 13 + + [ -1, 1, Conv, [ 256, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 + [ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small) + + [ -1, 1, Conv, [ 128, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 2 ], 1, Concat, [ 1 ] ], # cat backbone P2 + [ -1, 1, C3, [ 128, False ] ], # 21 (P2/4-xsmall) + + [ -1, 1, Conv, [ 128, 3, 2 ] ], + [ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P3 + [ -1, 3, C3, [ 256, False ] ], # 24 (P3/8-small) + + [ -1, 1, Conv, [ 256, 3, 2 ] ], + [ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4 + [ -1, 3, C3, [ 512, False ] ], # 27 (P4/16-medium) + + [ -1, 1, Conv, [ 512, 3, 2 ] ], + [ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat head P5 + [ -1, 3, C3, [ 1024, False ] ], # 30 (P5/32-large) + + [ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5) + ] diff --git a/models/hub/yolov5-p6.yaml b/models/hub/yolov5-p6.yaml new file mode 100644 index 0000000..3728a11 --- /dev/null +++ b/models/hub/yolov5-p6.yaml @@ -0,0 +1,56 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# anchors +anchors: 3 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 + [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 + [ -1, 3, C3, [ 128 ] ], + [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 + [ -1, 9, C3, [ 256 ] ], + [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 + [ -1, 9, C3, [ 512 ] ], + [ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 + [ -1, 3, C3, [ 768 ] ], + [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 + [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], + [ -1, 3, C3, [ 1024, False ] ], # 11 + ] + +# YOLOv5 head +head: + [ [ -1, 1, Conv, [ 768, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 + [ -1, 3, C3, [ 768, False ] ], # 15 + + [ -1, 1, Conv, [ 512, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 + [ -1, 3, C3, [ 512, False ] ], # 19 + + [ -1, 1, Conv, [ 256, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 + [ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) + + [ -1, 1, Conv, [ 256, 3, 2 ] ], + [ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 + [ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) + + [ -1, 1, Conv, [ 512, 3, 2 ] ], + [ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 + [ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) + + [ -1, 1, Conv, [ 768, 3, 2 ] ], + [ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 + [ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge) + + [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) + ] diff --git a/models/hub/yolov5-p7.yaml b/models/hub/yolov5-p7.yaml new file mode 100644 index 0000000..ca8f849 --- /dev/null +++ b/models/hub/yolov5-p7.yaml @@ -0,0 +1,67 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# anchors +anchors: 3 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 + [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 + [ -1, 3, C3, [ 128 ] ], + [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 + [ -1, 9, C3, [ 256 ] ], + [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 + [ -1, 9, C3, [ 512 ] ], + [ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 + [ -1, 3, C3, [ 768 ] ], + [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 + [ -1, 3, C3, [ 1024 ] ], + [ -1, 1, Conv, [ 1280, 3, 2 ] ], # 11-P7/128 + [ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ], + [ -1, 3, C3, [ 1280, False ] ], # 13 + ] + +# YOLOv5 head +head: + [ [ -1, 1, Conv, [ 1024, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat backbone P6 + [ -1, 3, C3, [ 1024, False ] ], # 17 + + [ -1, 1, Conv, [ 768, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 + [ -1, 3, C3, [ 768, False ] ], # 21 + + [ -1, 1, Conv, [ 512, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 + [ -1, 3, C3, [ 512, False ] ], # 25 + + [ -1, 1, Conv, [ 256, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 + [ -1, 3, C3, [ 256, False ] ], # 29 (P3/8-small) + + [ -1, 1, Conv, [ 256, 3, 2 ] ], + [ [ -1, 26 ], 1, Concat, [ 1 ] ], # cat head P4 + [ -1, 3, C3, [ 512, False ] ], # 32 (P4/16-medium) + + [ -1, 1, Conv, [ 512, 3, 2 ] ], + [ [ -1, 22 ], 1, Concat, [ 1 ] ], # cat head P5 + [ -1, 3, C3, [ 768, False ] ], # 35 (P5/32-large) + + [ -1, 1, Conv, [ 768, 3, 2 ] ], + [ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P6 + [ -1, 3, C3, [ 1024, False ] ], # 38 (P6/64-xlarge) + + [ -1, 1, Conv, [ 1024, 3, 2 ] ], + [ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P7 + [ -1, 3, C3, [ 1280, False ] ], # 41 (P7/128-xxlarge) + + [ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6, P7) + ] diff --git a/models/hub/yolov5-panet.yaml b/models/hub/yolov5-panet.yaml new file mode 100644 index 0000000..340f95a --- /dev/null +++ b/models/hub/yolov5-panet.yaml @@ -0,0 +1,48 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# anchors +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, BottleneckCSP, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 9, BottleneckCSP, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, BottleneckCSP, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 1, SPP, [1024, [5, 9, 13]]], + [-1, 3, BottleneckCSP, [1024, False]], # 9 + ] + +# YOLOv5 PANet head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, BottleneckCSP, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] diff --git a/models/hub/yolov5l6.yaml b/models/hub/yolov5l6.yaml new file mode 100644 index 0000000..11298b0 --- /dev/null +++ b/models/hub/yolov5l6.yaml @@ -0,0 +1,60 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# anchors +anchors: + - [ 19,27, 44,40, 38,94 ] # P3/8 + - [ 96,68, 86,152, 180,137 ] # P4/16 + - [ 140,301, 303,264, 238,542 ] # P5/32 + - [ 436,615, 739,380, 925,792 ] # P6/64 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 + [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 + [ -1, 3, C3, [ 128 ] ], + [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 + [ -1, 9, C3, [ 256 ] ], + [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 + [ -1, 9, C3, [ 512 ] ], + [ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 + [ -1, 3, C3, [ 768 ] ], + [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 + [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], + [ -1, 3, C3, [ 1024, False ] ], # 11 + ] + +# YOLOv5 head +head: + [ [ -1, 1, Conv, [ 768, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 + [ -1, 3, C3, [ 768, False ] ], # 15 + + [ -1, 1, Conv, [ 512, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 + [ -1, 3, C3, [ 512, False ] ], # 19 + + [ -1, 1, Conv, [ 256, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 + [ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) + + [ -1, 1, Conv, [ 256, 3, 2 ] ], + [ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 + [ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) + + [ -1, 1, Conv, [ 512, 3, 2 ] ], + [ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 + [ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) + + [ -1, 1, Conv, [ 768, 3, 2 ] ], + [ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 + [ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge) + + [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) + ] diff --git a/models/hub/yolov5m6.yaml b/models/hub/yolov5m6.yaml new file mode 100644 index 0000000..48afc86 --- /dev/null +++ b/models/hub/yolov5m6.yaml @@ -0,0 +1,60 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 0.67 # model depth multiple +width_multiple: 0.75 # layer channel multiple + +# anchors +anchors: + - [ 19,27, 44,40, 38,94 ] # P3/8 + - [ 96,68, 86,152, 180,137 ] # P4/16 + - [ 140,301, 303,264, 238,542 ] # P5/32 + - [ 436,615, 739,380, 925,792 ] # P6/64 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 + [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 + [ -1, 3, C3, [ 128 ] ], + [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 + [ -1, 9, C3, [ 256 ] ], + [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 + [ -1, 9, C3, [ 512 ] ], + [ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 + [ -1, 3, C3, [ 768 ] ], + [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 + [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], + [ -1, 3, C3, [ 1024, False ] ], # 11 + ] + +# YOLOv5 head +head: + [ [ -1, 1, Conv, [ 768, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 + [ -1, 3, C3, [ 768, False ] ], # 15 + + [ -1, 1, Conv, [ 512, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 + [ -1, 3, C3, [ 512, False ] ], # 19 + + [ -1, 1, Conv, [ 256, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 + [ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) + + [ -1, 1, Conv, [ 256, 3, 2 ] ], + [ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 + [ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) + + [ -1, 1, Conv, [ 512, 3, 2 ] ], + [ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 + [ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) + + [ -1, 1, Conv, [ 768, 3, 2 ] ], + [ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 + [ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge) + + [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) + ] diff --git a/models/hub/yolov5s-transformer.yaml b/models/hub/yolov5s-transformer.yaml new file mode 100644 index 0000000..f2d6667 --- /dev/null +++ b/models/hub/yolov5s-transformer.yaml @@ -0,0 +1,48 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 0.33 # model depth multiple +width_multiple: 0.50 # layer channel multiple + +# anchors +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 9, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 1, SPP, [1024, [5, 9, 13]]], + [-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module + ] + +# YOLOv5 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] diff --git a/models/hub/yolov5s6.yaml b/models/hub/yolov5s6.yaml new file mode 100644 index 0000000..1df577a --- /dev/null +++ b/models/hub/yolov5s6.yaml @@ -0,0 +1,60 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 0.33 # model depth multiple +width_multiple: 0.50 # layer channel multiple + +# anchors +anchors: + - [ 19,27, 44,40, 38,94 ] # P3/8 + - [ 96,68, 86,152, 180,137 ] # P4/16 + - [ 140,301, 303,264, 238,542 ] # P5/32 + - [ 436,615, 739,380, 925,792 ] # P6/64 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 + [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 + [ -1, 3, C3, [ 128 ] ], + [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 + [ -1, 9, C3, [ 256 ] ], + [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 + [ -1, 9, C3, [ 512 ] ], + [ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 + [ -1, 3, C3, [ 768 ] ], + [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 + [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], + [ -1, 3, C3, [ 1024, False ] ], # 11 + ] + +# YOLOv5 head +head: + [ [ -1, 1, Conv, [ 768, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 + [ -1, 3, C3, [ 768, False ] ], # 15 + + [ -1, 1, Conv, [ 512, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 + [ -1, 3, C3, [ 512, False ] ], # 19 + + [ -1, 1, Conv, [ 256, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 + [ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) + + [ -1, 1, Conv, [ 256, 3, 2 ] ], + [ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 + [ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) + + [ -1, 1, Conv, [ 512, 3, 2 ] ], + [ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 + [ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) + + [ -1, 1, Conv, [ 768, 3, 2 ] ], + [ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 + [ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge) + + [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) + ] diff --git a/models/hub/yolov5x6.yaml b/models/hub/yolov5x6.yaml new file mode 100644 index 0000000..5ebc021 --- /dev/null +++ b/models/hub/yolov5x6.yaml @@ -0,0 +1,60 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.33 # model depth multiple +width_multiple: 1.25 # layer channel multiple + +# anchors +anchors: + - [ 19,27, 44,40, 38,94 ] # P3/8 + - [ 96,68, 86,152, 180,137 ] # P4/16 + - [ 140,301, 303,264, 238,542 ] # P5/32 + - [ 436,615, 739,380, 925,792 ] # P6/64 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 + [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 + [ -1, 3, C3, [ 128 ] ], + [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 + [ -1, 9, C3, [ 256 ] ], + [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 + [ -1, 9, C3, [ 512 ] ], + [ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 + [ -1, 3, C3, [ 768 ] ], + [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 + [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], + [ -1, 3, C3, [ 1024, False ] ], # 11 + ] + +# YOLOv5 head +head: + [ [ -1, 1, Conv, [ 768, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 + [ -1, 3, C3, [ 768, False ] ], # 15 + + [ -1, 1, Conv, [ 512, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 + [ -1, 3, C3, [ 512, False ] ], # 19 + + [ -1, 1, Conv, [ 256, 1, 1 ] ], + [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], + [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 + [ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) + + [ -1, 1, Conv, [ 256, 3, 2 ] ], + [ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 + [ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) + + [ -1, 1, Conv, [ 512, 3, 2 ] ], + [ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 + [ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) + + [ -1, 1, Conv, [ 768, 3, 2 ] ], + [ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 + [ -1, 3, C3, [ 1024, False ] ], # 32 (P6/64-xlarge) + + [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) + ] diff --git a/models/yolo.py b/models/yolo.py new file mode 100644 index 0000000..ba45ef1 --- /dev/null +++ b/models/yolo.py @@ -0,0 +1,319 @@ +"""YOLOv5-specific modules + +Usage: + $ python path/to/models/yolo.py --cfg yolov5s.yaml +""" + +import argparse +import logging +import sys +from copy import deepcopy +from pathlib import Path + +FILE = Path(__file__).absolute() +sys.path.append(FILE.parents[1].as_posix()) # add yolov5/ to path + +from models.common import * +from models.experimental import * +from utils.autoanchor import check_anchor_order +from utils.general import make_divisible, check_file, set_logging +from utils.plots import feature_visualization +from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ + select_device, copy_attr + +try: + import thop # for FLOPs computation +except ImportError: + thop = None + +logger = logging.getLogger(__name__) + + +class Detect(nn.Module): + stride = None # strides computed during build + onnx_dynamic = False # ONNX export parameter + + def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer + super(Detect, self).__init__() + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + self.grid = [torch.zeros(1)] * self.nl # init grid + a = torch.tensor(anchors).float().view(self.nl, -1, 2) + self.register_buffer('anchors', a) # shape(nl,na,2) + self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv + self.inplace = inplace # use in-place ops (e.g. slice assignment) + + def forward(self, x): + # x = x.copy() # for profiling + z = [] # inference output + for i in range(self.nl): + x[i] = self.m[i](x[i]) # conv + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() + + if not self.training: # inference + if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic: + self.grid[i] = self._make_grid(nx, ny).to(x[i].device) + + y = x[i].sigmoid() + y_out = y.clone() # for fixing bug in https://github.com/ultralytics/yolov5/issues/671 + if self.inplace: + # y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy + y_out[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy + # y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + y_out[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 + xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy + wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh + # y = torch.cat((xy, wh, y[..., 4:]), -1) + y_out = torch.cat((xy, wh, y[..., 4:]), -1) + # z.append(y.view(bs, -1, self.no)) + z.append(y_out.view(bs, -1, self.no)) + + return x if self.training else (torch.cat(z, 1), x) + + @staticmethod + def _make_grid(nx=20, ny=20): + yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) + return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() + + +class Model(nn.Module): + def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes + super(Model, self).__init__() + if isinstance(cfg, dict): + self.yaml = cfg # model dict + else: # is *.yaml + import yaml # for torch hub + self.yaml_file = Path(cfg).name + with open(cfg) as f: + self.yaml = yaml.safe_load(f) # model dict + + # Define model + ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels + if nc and nc != self.yaml['nc']: + logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") + self.yaml['nc'] = nc # override yaml value + if anchors: + logger.info(f'Overriding model.yaml anchors with anchors={anchors}') + self.yaml['anchors'] = round(anchors) # override yaml value + self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist + self.names = [str(i) for i in range(self.yaml['nc'])] # default names + self.inplace = self.yaml.get('inplace', True) + # logger.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) + + # Build strides, anchors + m = self.model[-1] # Detect() + if isinstance(m, Detect): + s = 256 # 2x min stride + m.inplace = self.inplace + m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward + m.anchors /= m.stride.view(-1, 1, 1) + check_anchor_order(m) + self.stride = m.stride + self._initialize_biases() # only run once + # logger.info('Strides: %s' % m.stride.tolist()) + + # Init weights, biases + initialize_weights(self) + self.info() + logger.info('') + + def forward(self, x, augment=False, profile=False): + if augment: + return self.forward_augment(x) # augmented inference, None + else: + return self.forward_once(x, profile) # single-scale inference, train + + def forward_augment(self, x): + img_size = x.shape[-2:] # height, width + s = [1, 0.83, 0.67] # scales + f = [None, 3, None] # flips (2-ud, 3-lr) + y = [] # outputs + for si, fi in zip(s, f): + xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) + yi = self.forward_once(xi)[0] # forward + # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save + yi = self._descale_pred(yi, fi, si, img_size) + y.append(yi) + return torch.cat(y, 1), None # augmented inference, train + + def forward_once(self, x, profile=False, feature_vis=False): + y, dt = [], [] # outputs + for m in self.model: + if m.f != -1: # if not from previous layer + x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers + + if profile: + o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs + t = time_synchronized() + for _ in range(10): + _ = m(x) + dt.append((time_synchronized() - t) * 100) + if m == self.model[0]: + logger.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} {'module'}") + logger.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') + + x = m(x) # run + y.append(x if m.i in self.save else None) # save output + + if feature_vis and m.type == 'models.common.SPP': + feature_visualization(x, m.type, m.i) + + if profile: + logger.info('%.1fms total' % sum(dt)) + return x + + def _descale_pred(self, p, flips, scale, img_size): + # de-scale predictions following augmented inference (inverse operation) + if self.inplace: + p[..., :4] /= scale # de-scale + if flips == 2: + p[..., 1] = img_size[0] - p[..., 1] # de-flip ud + elif flips == 3: + p[..., 0] = img_size[1] - p[..., 0] # de-flip lr + else: + x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale + if flips == 2: + y = img_size[0] - y # de-flip ud + elif flips == 3: + x = img_size[1] - x # de-flip lr + p = torch.cat((x, y, wh, p[..., 4:]), -1) + return p + + def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency + # https://arxiv.org/abs/1708.02002 section 3.3 + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. + m = self.model[-1] # Detect() module + for mi, s in zip(m.m, m.stride): # from + b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) + b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls + mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + + def _print_biases(self): + m = self.model[-1] # Detect() module + for mi in m.m: # from + b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) + logger.info( + ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) + + # def _print_weights(self): + # for m in self.model.modules(): + # if type(m) is Bottleneck: + # logger.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights + + def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers + logger.info('Fusing layers... ') + for m in self.model.modules(): + if type(m) is Conv and hasattr(m, 'bn'): + m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv + delattr(m, 'bn') # remove batchnorm + m.forward = m.fuseforward # update forward + self.info() + return self + + def nms(self, mode=True): # add or remove NMS module + present = type(self.model[-1]) is NMS # last layer is NMS + if mode and not present: + logger.info('Adding NMS... ') + m = NMS() # module + m.f = -1 # from + m.i = self.model[-1].i + 1 # index + self.model.add_module(name='%s' % m.i, module=m) # add + self.eval() + elif not mode and present: + logger.info('Removing NMS... ') + self.model = self.model[:-1] # remove + return self + + def autoshape(self): # add AutoShape module + logger.info('Adding AutoShape... ') + m = AutoShape(self) # wrap model + copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes + return m + + def info(self, verbose=False, img_size=640): # print model information + model_info(self, verbose, img_size) + + +def parse_model(d, ch): # model_dict, input_channels(3) + logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) + anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] + na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors + no = na * (nc + 5) # number of outputs = anchors * (classes + 5) + + layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out + for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args + m = eval(m) if isinstance(m, str) else m # eval strings + for j, a in enumerate(args): + try: + args[j] = eval(a) if isinstance(a, str) else a # eval strings + except: + pass + + n = max(round(n * gd), 1) if n > 1 else n # depth gain + if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, + C3, C3TR]: + c1, c2 = ch[f], args[0] + if c2 != no: # if not output + c2 = make_divisible(c2 * gw, 8) + + args = [c1, c2, *args[1:]] + if m in [BottleneckCSP, C3, C3TR]: + args.insert(2, n) # number of repeats + n = 1 + elif m is nn.BatchNorm2d: + args = [ch[f]] + elif m is Concat: + c2 = sum([ch[x] for x in f]) + elif m is Detect: + args.append([ch[x] for x in f]) + if isinstance(args[1], int): # number of anchors + args[1] = [list(range(args[1] * 2))] * len(f) + elif m is Contract: + c2 = ch[f] * args[0] ** 2 + elif m is Expand: + c2 = ch[f] // args[0] ** 2 + else: + c2 = ch[f] + + m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module + t = str(m)[8:-2].replace('__main__.', '') # module type + np = sum([x.numel() for x in m_.parameters()]) # number params + m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params + logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print + save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist + layers.append(m_) + if i == 0: + ch = [] + ch.append(c2) + return nn.Sequential(*layers), sorted(save) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + opt = parser.parse_args() + opt.cfg = check_file(opt.cfg) # check file + set_logging() + device = select_device(opt.device) + + # Create model + model = Model(opt.cfg).to(device) + model.train() + + # Profile + # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 320, 320).to(device) + # y = model(img, profile=True) + + # Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898) + # from torch.utils.tensorboard import SummaryWriter + # tb_writer = SummaryWriter('.') + # logger.info("Run 'tensorboard --logdir=models' to view tensorboard at http://localhost:6006/") + # tb_writer.add_graph(torch.jit.trace(model, img, strict=False), []) # add model graph + # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard diff --git a/models/yolov5l.yaml b/models/yolov5l.yaml new file mode 100644 index 0000000..71ebf86 --- /dev/null +++ b/models/yolov5l.yaml @@ -0,0 +1,48 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# anchors +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 9, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 1, SPP, [1024, [5, 9, 13]]], + [-1, 3, C3, [1024, False]], # 9 + ] + +# YOLOv5 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] diff --git a/models/yolov5m.yaml b/models/yolov5m.yaml new file mode 100644 index 0000000..3c749c9 --- /dev/null +++ b/models/yolov5m.yaml @@ -0,0 +1,48 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 0.67 # model depth multiple +width_multiple: 0.75 # layer channel multiple + +# anchors +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 9, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 1, SPP, [1024, [5, 9, 13]]], + [-1, 3, C3, [1024, False]], # 9 + ] + +# YOLOv5 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] diff --git a/models/yolov5s.yaml b/models/yolov5s.yaml new file mode 100644 index 0000000..aca669d --- /dev/null +++ b/models/yolov5s.yaml @@ -0,0 +1,48 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 0.33 # model depth multiple +width_multiple: 0.50 # layer channel multiple + +# anchors +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 9, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 1, SPP, [1024, [5, 9, 13]]], + [-1, 3, C3, [1024, False]], # 9 + ] + +# YOLOv5 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] diff --git a/models/yolov5x.yaml b/models/yolov5x.yaml new file mode 100644 index 0000000..d3babdf --- /dev/null +++ b/models/yolov5x.yaml @@ -0,0 +1,48 @@ +# parameters +nc: 80 # number of classes +depth_multiple: 1.33 # model depth multiple +width_multiple: 1.25 # layer channel multiple + +# anchors +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 9, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 1, SPP, [1024, [5, 9, 13]]], + [-1, 3, C3, [1024, False]], # 9 + ] + +# YOLOv5 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ]