diff --git a/cfg/training/yolov7-tiny.yaml b/cfg/training/yolov7-tiny.yaml index 3679b0d557..39e92a264d 100644 --- a/cfg/training/yolov7-tiny.yaml +++ b/cfg/training/yolov7-tiny.yaml @@ -1,6 +1,6 @@ # parameters -nc: 80 # number of classes -depth_multiple: 1.0 # model depth multiple +nc: 2 # number of classes +depth_multiple: 1.0 # model depth multiple @@ HK TODO: width_multiple: 1.0 # layer channel multiple # anchors diff --git a/data/coco_2_tir.yaml b/data/coco_2_tir.yaml new file mode 100644 index 0000000000..a5f810798d --- /dev/null +++ b/data/coco_2_tir.yaml @@ -0,0 +1,21 @@ +# COCO 2017 dataset http://cocodataset.org + +# download command/URL (optional) +path: /home/hanoch/projects/tir_frames_rois/yolo7_tir_coco_classes_data_all #/home/hanoch/projects/tir_frames_rois/yolo7_tir_data_all #/home/hanochk/tir_frames_rois/yolo7_tir_data +# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] +train: ./yolov7/tir_od/training_set.txt #./yolov7/tir_od/val_set_1_file.txt #./yolov7/tir_od/training_set.txt #./yolov7/tir_od/training_set.txt # 118287 images +val: ./yolov7/tir_od/validation_set.txt # ./yolov7/tir_od/validation_set.txt #./yolov7/tir_od/val_tir_od.txt #./yolov7/tir_od/validation_set.txt # 5000 images + +# number of classes +nc: 80 + +# class names +names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', + 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', + 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', + 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush' ] diff --git a/data/hyp.scratch.tiny.yaml b/data/hyp.scratch.tiny.yaml index b0dc14ae1b..eec5871260 100644 --- a/data/hyp.scratch.tiny.yaml +++ b/data/hyp.scratch.tiny.yaml @@ -1,4 +1,4 @@ -lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) +lr0: 0.0005 # initial learning rate (SGD=1E-2, Adam=1E-3) lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) momentum: 0.937 # SGD momentum/Adam beta1 weight_decay: 0.0005 # optimizer weight decay 5e-4 @@ -14,12 +14,12 @@ iou_t: 0.20 # IoU training threshold anchor_t: 4.0 # anchor-multiple threshold # anchors: 3 # anchors per output layer (0 to ignore) fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) -hsv_h: 0.015 # image HSV-Hue augmentation (fraction) -hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) -hsv_v: 0.4 # image HSV-Value augmentation (fraction) +hsv_h: 0.0 # image HSV-Hue augmentation (fraction) +hsv_s: 0.0 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.0 # image HSV-Value augmentation (fraction) degrees: 0.0 # image rotation (+/- deg) -translate: 0.1 # image translation (+/- fraction) -scale: 0.5 # image scale (+/- gain) +translate: 0.0 # image translation (+/- fraction) +scale: 0.0 # image scale (+/- gain) shear: 0.0 # image shear (+/- deg) perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 flipud: 0.0 # image flip up-down (probability) diff --git a/data/hyp.tir_od.tiny.yaml b/data/hyp.tir_od.tiny.yaml new file mode 100644 index 0000000000..bb3983cf89 --- /dev/null +++ b/data/hyp.tir_od.tiny.yaml @@ -0,0 +1,36 @@ +lr0: 0.001 #0.001 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.005 # optimizer weight decay 5e-4 It resolve mAP of overfitting test +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.001 #0.001 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +anchors: 2 # anchors per output layer (0 to ignore) @@HK was 3 +fl_gamma: 1.5 #1.5 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.0 # image HSV-Hue augmentation (fraction) +hsv_s: 0.0 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.0 # image HSV-Value augmentation (fraction) +degrees: 0.0 # image rotation (+/- deg) +translate: 0.0 # image translation (+/- fraction) +scale: 0.0 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.3 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 0.0 # image mosaic (probability) +mixup: 0.0 # image mixup (probability) +copy_paste: 0.0 # image copy paste (probability) +paste_in: 0.0 # image copy paste (probability), use 0 for faster training : cutout +loss_ota: 0 #1 # use ComputeLossOTA, use 0 for faster training +inversion: 0.5 #opposite temperature +drc_per_ch_percentile: 0.3 #[0, 0.2, 0.5] +img_percentile_removal: 0.3 +beta : 0.3 +random_perspective : 0 diff --git a/data/hyp.tir_od.tiny_aug.yaml b/data/hyp.tir_od.tiny_aug.yaml new file mode 100644 index 0000000000..20190a9451 --- /dev/null +++ b/data/hyp.tir_od.tiny_aug.yaml @@ -0,0 +1,36 @@ +lr0: 0.005 #0.001 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.005 # optimizer weight decay 5e-4 It resolve mAP of overfitting test +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.001 #0.001 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +anchors: 2 # anchors per output layer (0 to ignore) @@HK was 3 +fl_gamma: 1.5 #1.5 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.0 # image HSV-Hue augmentation (fraction) +hsv_s: 0.0 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.0 # image HSV-Value augmentation (fraction) +degrees: 0 # image rotation (+/- deg) +translate: 0 # image translation (+/- fraction) +scale: 0 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.3 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 0.5 # image mosaic (probability) +mixup: 0.15 # image mixup (probability) +copy_paste: 0.0 # image copy paste (probability) +paste_in: 0.0 # image copy paste (probability), use 0 for faster training : cutout +loss_ota: 0 #1 # use ComputeLossOTA, use 0 for faster training +inversion: 0.5 #opposite temperature +tir_channel_expansion: 0.3 #[0, 0.2, 0.5] +img_percentile_removal: 0.3 +beta : 0.3 +random_perspective : 0 diff --git a/data/hyp.tir_od.tiny_aug_no_inversion.yaml b/data/hyp.tir_od.tiny_aug_no_inversion.yaml new file mode 100644 index 0000000000..6d2ac9f2f8 --- /dev/null +++ b/data/hyp.tir_od.tiny_aug_no_inversion.yaml @@ -0,0 +1,36 @@ +lr0: 0.005 #0.001 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.005 # optimizer weight decay 5e-4 It resolve mAP of overfitting test +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.001 #0.001 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +anchors: 2 # anchors per output layer (0 to ignore) @@HK was 3 +fl_gamma: 1.5 #1.5 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.0 # image HSV-Hue augmentation (fraction) +hsv_s: 0.0 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.0 # image HSV-Value augmentation (fraction) +degrees: 0 # image rotation (+/- deg) +translate: 0 # image translation (+/- fraction) +scale: 0 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.3 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 0.5 # image mosaic (probability) +mixup: 0.15 # image mixup (probability) +copy_paste: 0.0 # image copy paste (probability) +paste_in: 0.0 # image copy paste (probability), use 0 for faster training : cutout +loss_ota: 0 #1 # use ComputeLossOTA, use 0 for faster training +inversion: 0 #opposite temperature +tir_channel_expansion: 0.3 #[0, 0.2, 0.5] +img_percentile_removal: 0.3 +beta : 0.3 +random_perspective : 0 diff --git a/data/hyp.tir_od.tiny_aug_no_inversion_paste_in.yaml b/data/hyp.tir_od.tiny_aug_no_inversion_paste_in.yaml new file mode 100644 index 0000000000..2c31ac0e33 --- /dev/null +++ b/data/hyp.tir_od.tiny_aug_no_inversion_paste_in.yaml @@ -0,0 +1,36 @@ +lr0: 0.005 #0.001 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.005 # optimizer weight decay 5e-4 It resolve mAP of overfitting test +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.001 #0.001 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +anchors: 2 # anchors per output layer (0 to ignore) @@HK was 3 +fl_gamma: 1.5 #1.5 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.0 # image HSV-Hue augmentation (fraction) +hsv_s: 0.0 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.0 # image HSV-Value augmentation (fraction) +degrees: 0 # image rotation (+/- deg) +translate: 0 # image translation (+/- fraction) +scale: 0 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.3 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 0.5 # image mosaic (probability) +mixup: 0.15 # image mixup (probability) +copy_paste: 0.0 # image copy paste (probability) +paste_in: 0.3 # image copy paste (probability), use 0 for faster training : cutout +loss_ota: 0 #1 # use ComputeLossOTA, use 0 for faster training +inversion: 0 #opposite temperature +tir_channel_expansion: 0.3 #[0, 0.2, 0.5] +img_percentile_removal: 0.3 +beta : 0.3 +random_perspective : 0 diff --git a/data/hyp.tir_od.tiny_lr0_0p005.yaml b/data/hyp.tir_od.tiny_lr0_0p005.yaml new file mode 100644 index 0000000000..5574ec64a5 --- /dev/null +++ b/data/hyp.tir_od.tiny_lr0_0p005.yaml @@ -0,0 +1,36 @@ +lr0: 0.005 #0.001 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.005 # optimizer weight decay 5e-4 It resolve mAP of overfitting test +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.001 #0.001 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +anchors: 2 # anchors per output layer (0 to ignore) @@HK was 3 +fl_gamma: 1.5 #1.5 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.0 # image HSV-Hue augmentation (fraction) +hsv_s: 0.0 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.0 # image HSV-Value augmentation (fraction) +degrees: 0.0 # image rotation (+/- deg) +translate: 0.0 # image translation (+/- fraction) +scale: 0.0 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.3 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 0.0 # image mosaic (probability) +mixup: 0.0 # image mixup (probability) +copy_paste: 0.0 # image copy paste (probability) +paste_in: 0.0 # image copy paste (probability), use 0 for faster training : cutout +loss_ota: 0 #1 # use ComputeLossOTA, use 0 for faster training +inversion: 0.5 #opposite temperature +drc_per_ch_percentile: [0, 0.2, 0.5] +img_percentile_removal: 0.3 +beta : 0.3 +random_perspective : 0 diff --git a/data/hyp.tir_od.tiny_lr0_0p01.yaml b/data/hyp.tir_od.tiny_lr0_0p01.yaml new file mode 100644 index 0000000000..44e1b490dc --- /dev/null +++ b/data/hyp.tir_od.tiny_lr0_0p01.yaml @@ -0,0 +1,36 @@ +lr0: 0.01 #0.001 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.005 # optimizer weight decay 5e-4 It resolve mAP of overfitting test +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.001 #0.001 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +anchors: 2 # anchors per output layer (0 to ignore) @@HK was 3 +fl_gamma: 1.5 #1.5 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.0 # image HSV-Hue augmentation (fraction) +hsv_s: 0.0 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.0 # image HSV-Value augmentation (fraction) +degrees: 0.0 # image rotation (+/- deg) +translate: 0.0 # image translation (+/- fraction) +scale: 0.0 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.3 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 0.0 # image mosaic (probability) +mixup: 0.0 # image mixup (probability) +copy_paste: 0.0 # image copy paste (probability) +paste_in: 0.0 # image copy paste (probability), use 0 for faster training : cutout +loss_ota: 0 #1 # use ComputeLossOTA, use 0 for faster training +inversion: 0.5 #opposite temperature +drc_per_ch_percentile: [0, 0.2, 0.5] +img_percentile_removal: 0.3 +beta : 0.3 +random_perspective : 0 diff --git a/data/hyp.tir_od.tiny_lr0_0p05.yaml b/data/hyp.tir_od.tiny_lr0_0p05.yaml new file mode 100644 index 0000000000..464ea2731e --- /dev/null +++ b/data/hyp.tir_od.tiny_lr0_0p05.yaml @@ -0,0 +1,36 @@ +lr0: 0.05 #0.001 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.005 # optimizer weight decay 5e-4 It resolve mAP of overfitting test +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.001 #0.001 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +anchors: 2 # anchors per output layer (0 to ignore) @@HK was 3 +fl_gamma: 1.5 #1.5 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.0 # image HSV-Hue augmentation (fraction) +hsv_s: 0.0 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.0 # image HSV-Value augmentation (fraction) +degrees: 0.0 # image rotation (+/- deg) +translate: 0.0 # image translation (+/- fraction) +scale: 0.0 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.3 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 0.0 # image mosaic (probability) +mixup: 0.0 # image mixup (probability) +copy_paste: 0.0 # image copy paste (probability) +paste_in: 0.0 # image copy paste (probability), use 0 for faster training : cutout +loss_ota: 0 #1 # use ComputeLossOTA, use 0 for faster training +inversion: 0.5 #opposite temperature +drc_per_ch_percentile: [0, 0.2, 0.5] +img_percentile_removal: 0.3 +beta : 0.3 +random_perspective : 0 diff --git a/data/tir_od.yaml b/data/tir_od.yaml new file mode 100644 index 0000000000..3a2ad0b959 --- /dev/null +++ b/data/tir_od.yaml @@ -0,0 +1,17 @@ +# COCO 2017 dataset http://cocodataset.org + +# download command/URL (optional) +#Make symbolic link +# sudo ln -s ~hanoch/projects/tir_frames_rois /mnt/Data/hanoch/tir_frames_rois +path: /mnt/Data/hanoch/tir_frames_rois/yolo7_tir_data_all #/home/hanoch/projects/tir_frames_rois/tir_car_44person_31 #/home/hanochk/tir_frames_rois/yolo7_tir_data +# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] +train: ./yolov7/tir_od/training_set.txt #./yolov7/tir_od/training_set.txt # ./yolov7/tir_od/tir_tiff_car_person_min_size_44_31_training_set.txt #./yolov7/tir_od/training_set.txt #./yolov7/tir_od/training_set.txt # 118287 images +val: ./yolov7/tir_od/validation_set.txt #./yolov7/tir_od/tir_tiff_car_person_min_size_44_31_validation_set.txt #./yolov7/tir_od/validation_set.txt #./yolov7/tir_od/val_tir_od.txt #./yolov7/tir_od/validation_set.txt # 5000 images +test: ./yolov7/tir_od/tir_tiff_tiff_folder_test_set.txt # data at tir_tiff_tiff_files +#test: ./tir_od/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 + +# number of classes +nc: 2 + +# class names +names: ['car', 'person'] \ No newline at end of file diff --git a/data/tir_od_test_set.yaml b/data/tir_od_test_set.yaml new file mode 100644 index 0000000000..f05056ade1 --- /dev/null +++ b/data/tir_od_test_set.yaml @@ -0,0 +1,16 @@ +# COCO 2017 dataset http://cocodataset.org + +# download command/URL (optional) + +path: /home/hanoch/projects/tir_frames_rois/tir_tiff_tiff_files #/home/hanoch/projects/tir_frames_rois/tir_car_44person_31 #/home/hanochk/tir_frames_rois/yolo7_tir_data +# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] +#train: ./yolov7/tir_od/training_set.txt #./yolov7/tir_od/training_set.txt # ./yolov7/tir_od/tir_tiff_car_person_min_size_44_31_training_set.txt #./yolov7/tir_od/training_set.txt #./yolov7/tir_od/training_set.txt # 118287 images +#val: ./yolov7/tir_od/validation_set.txt #./yolov7/tir_od/tir_tiff_car_person_min_size_44_31_validation_set.txt #./yolov7/tir_od/validation_set.txt #./yolov7/tir_od/val_tir_od.txt #./yolov7/tir_od/validation_set.txt # 5000 images +test: ./yolov7/tir_od/test_set/tir_tiff_tiff_folder_test_set.txt # data at tir_tiff_tiff_files +#test: ./tir_od/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 + +# number of classes +nc: 2 + +# class names +names: ['car', 'person'] \ No newline at end of file diff --git a/detect.py b/detect.py index 5e0c4416a4..11eadbef14 100644 --- a/detect.py +++ b/detect.py @@ -1,4 +1,5 @@ import argparse +import copy import time from pathlib import Path @@ -6,9 +7,12 @@ import torch import torch.backends.cudnn as cudnn from numpy import random - +import os +import tifffile +import copy +import numpy as np from models.experimental import attempt_load -from utils.datasets import LoadStreams, LoadImages +from utils.datasets import LoadStreams, LoadImages, scaling_image from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \ scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path from utils.plots import plot_one_box @@ -54,7 +58,7 @@ def detect(save_img=False): cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: - dataset = LoadImages(source, img_size=imgsz, stride=stride) + dataset = LoadImages(source, img_size=imgsz, stride=stride, scaling_type=opt.norm_type) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names @@ -68,9 +72,18 @@ def detect(save_img=False): t0 = time.time() for path, img, im0s, vid_cap in dataset: + + if os.path.basename(path).split('.')[1] == 'tiff': + im0s = np.repeat(im0s[ :, :, np.newaxis], 3, axis=2) # convert GL to RGB by replication + im0s = scaling_image(im0s, scaling_type=opt.norm_type) + if im0s.max()<=1: + im0s = im0s*255 + + # im0s = copy.deepcopy(np.uint8(img.transpose(1,2,0) * 255.0)) + img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 + # img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) @@ -139,7 +152,13 @@ def detect(save_img=False): # Save results (image with detections) if save_img: if dataset.mode == 'image': - cv2.imwrite(save_path, im0) + print(save_path,os.path.basename(save_path).split('.')) + if os.path.basename(save_path).split('.')[1] == 'tiff': + #print('ka') + save_path = os.path.join(os.path.dirname(save_path), os.path.basename(save_path).split('.')[0] + '.png') + cv2.imwrite(save_path, im0) + else: + cv2.imwrite(save_path, im0) print(f" The image with the result is saved in: {save_path}") else: # 'video' or 'stream' if vid_path != save_path: # new video @@ -183,6 +202,11 @@ def detect(save_img=False): parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--no-trace', action='store_true', help='don`t trace model') + parser.add_argument('--norm-type', type=str, default='standardization', + choices=['standardization', 'single_image_0_to_1', 'single_image_mean_std','single_image_percentile_0_255', + 'single_image_percentile_0_1', 'remove+global_outlier_0_1'], + help='Normalization approach') + opt = parser.parse_args() print(opt) #check_requirements(exclude=('pycocotools', 'thop')) @@ -194,3 +218,12 @@ def detect(save_img=False): strip_optimizer(opt.weights) else: detect() + +""" +python detect.py --weights yolov7.pt --conf 0.25 --img-size 640 --source inference/images/horses.jpg +python -u ./yolov7/detect.py --weights ./yolov7/yolov7.pt --conf 0.25 --img-size 640 --device 0 --save-txt --source /home/hanoch/projects/tir_frames_rois/png/Rotem_test_22c_dec18.png +python -u ./yolov7/detect.py --weights ./yolov7/yolov7.pt --conf 0.25 --img-size 640 --device 0 --save-txt --norm-type single_image_percentile_0_1 --source /home/hanoch/projects/tir_frames_rois/yolo7_tir_data_all/TIR10_v20_Dec18_Test22C_20181127_223533_FS_210F_0001_5500_ROTEM_left_roi_220_4707.tiff +--weights ./yolov7/yolov7.pt --conf 0.25 --img-size 640 --device 0 --save-txt --norm-type single_image_percentile_0_1 --source /home/hanoch/projects/tir_frames_rois/yolo7_tir_data_all/TIR10_v20_Dec18_Test22C_20181127_223533_FS_210F_0001_5500_ROTEM_left_roi_220_4707.tiff +--weights ./yolov7/yolov7.pt --conf 0.25 --img-size 640 --device 0 --save-txt --norm-type single_image_percentile_0_1 --source /home/hanoch/projects/tir_frames_rois/yolo7_tir_data_all/TIR10_V50_OCT21_Test46A_ML_RD_IL_2021_08_05_14_48_05_FS_210_XGA_630_922_DENIS_right_roi_210_881.tiff +--weights ./yolov7/yolov7.pt --conf 0.25 --img-size 640 --device 0 --save-txt --norm-type single_image_percentile_0_1 --source /home/hanoch/projects/tir_frames_rois/yolo7_tir_data_all/TIR135_V80_JUL23_Test55A_SY_RD_US_2023_01_18_07_29_38_FS_50_XGA_0001_3562_Shahar_left_roi_50_1348.tiff +""" \ No newline at end of file diff --git a/models/common.py b/models/common.py index edb5edc9fe..aab60cd973 100644 --- a/models/common.py +++ b/models/common.py @@ -8,7 +8,7 @@ import torch import torch.nn as nn import torch.nn.functional as F -from torchvision.ops import DeformConv2d +# from torchvision.ops import DeformConv2d from PIL import Image from torch.cuda import amp diff --git a/models/yolo.py b/models/yolo.py index 95a019c6ae..ad0d327ed6 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -20,7 +20,7 @@ thop = None -class Detect(nn.Module): +class Detect(nn.Module): # classifier head stride = None # strides computed during build export = False # onnx export end2end = False @@ -93,7 +93,9 @@ def convert(self, z): box @= convert_matrix return (box, score) - +# IDetect contains YOLOR implicit knowledge modeling. Where the Detect block just use a 1x1 Conv block to create the anchors, HK +# IDetect first pass the inputs through a ImplicitA block then the Conv block, and then through a ImplicitM block. +# https://www.kaggle.com/code/jobayerhossain/yolov7-explanation-and-implementation-from-scratch class IDetect(nn.Module): stride = None # strides computed during build export = False # onnx export @@ -575,7 +577,7 @@ def __init__(self, cfg='yolor-csp-c.yaml', ch=3, nc=None, anchors=None): # mode # Init weights, biases initialize_weights(self) - self.info() + self.info(verbose=False) logger.info('') def forward(self, x, augment=False, profile=False): @@ -706,7 +708,7 @@ def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers elif isinstance(m, (IDetect, IAuxDetect)): m.fuse() m.forward = m.fuseforward - self.info() + self.info(verbose=True) return self def nms(self, mode=True): # add or remove NMS module diff --git a/requirements.txt b/requirements.txt index f4d218218a..a3a047d8e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,38 @@ # Usage: pip install -r requirements.txt +absl-py==2.1.0 +asttokens==2.4.1 +attrs==24.2.0 +certifi==2024.7.4 +charset-normalizer==3.3.2 +clearml==1.16.4 +contourpy==1.2.1 +cycler==0.12.1 +decorator==5.1.1 +et-xmlfile==1.1.0 +exceptiongroup==1.2.2 +executing==2.0.1 +filelock==3.15.4 +fonttools==4.53.1 +fsspec==2024.6.1 +furl==2.1.3 +grpcio==1.65.5 +idna==3.7 +imageio==2.35.1 +jedi==0.19.1 +Jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.23.0 +jsonschema-specifications==2023.12.1 +kiwisolver==1.4.5 +lazy_loader==0.4 +Markdown==3.7 +MarkupSafe==2.1.5 +mpmath==1.3.0 +networkx==3.3 # Base ---------------------------------------- matplotlib>=3.2.2 -numpy>=1.18.5,<1.24.0 +#numpy>=1.18.5,<1.24.0 opencv-python>=4.1.1 Pillow>=7.1.2 PyYAML>=5.3.1 @@ -37,3 +67,31 @@ thop # FLOPs computation # albumentations>=1.0.3 # pycocotools>=2.0 # COCO mAP # roboflow + +openpyxl==3.1.5 +orderedmultidict==1.0.1 +packaging==24.1 +parso==0.8.4 +pathlib2==2.3.7.post1 +pexpect==4.9.0 +prompt_toolkit==3.0.47 +ptyprocess==0.7.0 +pure_eval==0.2.3 +Pygments==2.18.0 +PyJWT==2.8.0 +pyparsing==3.1.2 +python-dateutil==2.9.0.post0 +pytz==2024.1 +PyYAML==6.0.2 +referencing==0.35.1 +requests==2.32.3 +rpds-py==0.20.0 +scikit-image==0.24.0 +scikit-learn==1.5.1 +six==1.16.0 +slicerator==1.1.0 +stack-data==0.6.3 +sympy==1.13.2 +tensorboard-data-server==0.7.2 +threadpoolctl==3.5.0 +tifffile==2024.8.10 \ No newline at end of file diff --git a/test.py b/test.py index 17b48060be..9189f74cf8 100644 --- a/test.py +++ b/test.py @@ -14,7 +14,7 @@ from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, check_requirements, \ box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, set_logging, increment_path, colorstr from utils.metrics import ap_per_class, ConfusionMatrix -from utils.plots import plot_images, output_to_target, plot_study_txt +from utils.plots import plot_images, output_to_target, plot_study_txt, append_to_txt from utils.torch_utils import select_device, time_synchronized, TracedModel @@ -62,8 +62,9 @@ def test(data, if trace: model = TracedModel(model, device, imgsz) + #torch.backends.cudnn.benchmark = True ##uses the inbuilt cudnn auto-tuner to find the fastest convolution algorithms. - # Half - half = device.type != 'cpu' and half_precision # half precision only supported on CUDA + half = device.type != 'cpu' and half_precision # half precision only supported on CUDA @@ HK : TODO what are the consequences add : if half: model.half() @@ -87,7 +88,12 @@ def test(data, if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once task = opt.task if opt.task in ('train', 'val', 'test') else 'val' # path to train/val/test images - dataloader = create_dataloader(data[task], imgsz, batch_size, gs, opt, pad=0.5, rect=True, + hyp = dict() + hyp['person_size_small_medium_th'] = 32 * 32 + hyp['car_size_small_medium_th'] = 44 * 44 + hyp['img_percentile_removal'] = 0.3 + hyp['beta'] = 0.3 + dataloader = create_dataloader(data[task], imgsz, batch_size, gs, opt, hyp, pad=0.5, rect=False, #rect was True # HK@@@ TODO : why pad =0.5?? only effective in rect=True in test time ? https://github.com/ultralytics/ultralytics/issues/13271 prefix=colorstr(f'{task}: '))[0] if v5_metric: @@ -101,19 +107,21 @@ def test(data, p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] + stats_person_small, stats_person_medium = [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 + img = img.half() if half else img.float() + # uint8 to fp16/32 + # img /= 255.0 # 0 - 255 to 0.0 - 1.0 c# already done inside dataloader targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width with torch.no_grad(): # Run model t = time_synchronized() - out, train_out = model(img, augment=augment) # inference and training outputs + out, train_out = model(img, augment=augment) # inference(4 coordination, obj conf, cls conf ) and training outputs(batch_size, anchor per scale, x,y dim of scale out 40x40 ,n_classes-conf+1-objectness+4-bbox ) over 3 scales diferent outputs (2,2,80,80,7), (2,2,40,40,7) : 640/8=40 t0 += time_synchronized() - t - + # out coco 80 classes : [1, 25200, 85] [batch, proposals_3_scales,4_box__coord+1_obj_score + n x classes] # Compute loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls @@ -122,11 +130,11 @@ def test(data, targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_synchronized() - out = non_max_suppression(out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb, multi_label=True) + out = non_max_suppression(out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb, multi_label=True) # Does thresholding for class : list of detections, on (n,6) tensor per image [xyxy, conf, cls] t1 += time_synchronized() - t # Statistics per image - for si, pred in enumerate(out): + for si, pred in enumerate(out): # [bbox_coors, objectness_logit, class] labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class @@ -139,7 +147,7 @@ def test(data, continue # Predictions - predn = pred.clone() + predn = pred.clone() # *xyxy, conf, cls in predn [x y ,w ,h, conf, cls] taking top 300 after NMS scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file @@ -208,11 +216,18 @@ def test(data, if len(detected) == nl: # all targets already located in image break - # Append statistics (correct, conf, pcls, tcls) + # Append statistics (correct, conf_objectness, pcls, tcls) Predicted class is ML among all classes logit and threshol goes over the objectness only stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) + if not training: + # assert len(pred[:, :4]) == 1 + x, y, w, h = xyxy2xywh(pred[:, :4])[0] + if w * h < hyp['person_size_small_medium_th']: + stats_person_small.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) + else: + stats_person_medium.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images - if plots and batch_i < 3: + if plots and batch_i < 10 or 1: f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions @@ -222,8 +237,14 @@ def test(data, stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, v5_metric=v5_metric, save_dir=save_dir, names=names) + if not training: + p_med, r_med, ap_med, f1_med, ap_class_med = ap_per_class(*stats_person_medium, plot=plots, v5_metric=v5_metric, save_dir=save_dir, names=names) + ap50_med, ap_med = ap_med[:, 0], ap_med.mean(1) # AP@0.5, AP@0.5:0.95 + mp_med, mr_med, map50_med, map_med = p_med.mean(), r_med.mean(), ap50_med.mean(), ap_med.mean() + ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() + nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) @@ -232,10 +253,14 @@ def test(data, pf = '%20s' + '%12i' * 2 + '%12.3g' * 4 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) + file_path = os.path.join(save_dir, 'class_stats.txt') #'Class', 'Images', 'Labels', 'P', 'R', 'mAP@.5', 'mAP@.5:.95' + append_to_txt(file_path, 'all', seen, nt.sum(), mp, mr, map50, map) + # Print results per class - if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): + if 1 or (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) + append_to_txt(file_path, names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple @@ -252,7 +277,7 @@ def test(data, wandb_logger.log({"Bounding Box Debugger/Images": wandb_images}) # Save JSON - if save_json and len(jdict): + if save_json and len(jdict): # @@ HK TODO: w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = './coco/annotations/instances_val2017.json' # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json @@ -286,7 +311,6 @@ def test(data, maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t - if __name__ == '__main__': parser = argparse.ArgumentParser(prog='test.py') parser.add_argument('--weights', nargs='+', type=str, default='yolov7.pt', help='model.pt path(s)') @@ -294,7 +318,7 @@ def test(data, parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch') parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') - parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS') + parser.add_argument('--iou-thres', type=float, default=0.6, help='IOU threshold for NMS') parser.add_argument('--task', default='val', help='train, val, test, speed or study') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') @@ -309,7 +333,25 @@ def test(data, parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--no-trace', action='store_true', help='don`t trace model') parser.add_argument('--v5-metric', action='store_true', help='assume maximum recall as 1.0 in AP calculation') + parser.add_argument('--norm-type', type=str, default='standardization', + choices=['standardization', 'single_image_0_to_1', 'single_image_mean_std','single_image_percentile_0_255', + 'single_image_percentile_0_1', 'remove+global_outlier_0_1'], + help='Normalization approach') + + parser.add_argument('--no-tir-signal', action='store_true', help='') + + parser.add_argument('--tir-channel-expansion', action='store_true', help='drc_per_ch_percentile') + + parser.add_argument('--input-channels', type=int, default=3, help='') + opt = parser.parse_args() + + if opt.tir_channel_expansion: # operates over 3 channels + opt.input_channels = 3 + + if opt.tir_channel_expansion and opt.norm_type != 'single_image_percentile_0_1': # operates over 3 channels + print('Not a good combination') + opt.save_json |= opt.data.endswith('coco.yaml') opt.data = check_file(opt.data) # check file print(opt) @@ -351,3 +393,12 @@ def test(data, np.savetxt(f, y, fmt='%10.4g') # save os.system('zip -r study.zip study_*.txt') plot_study_txt(x=x) # plot +""" + +--weights ./yolov7/yolov7.pt --device 0 --batch-size 16 --data data/coco_2_tir.yaml --img-size 640 --conf 0.6 --verbose --save-txt --save-hybrid --norm-type single_image_percentile_0_1 +test based on RGB coco model +--weights ./yolov7/yolov7.pt --device 0 --batch-size 64 --data data/coco_2_tir.yaml --img-size 640 --conf 0.25 --verbose --save-txt --norm-type single_image_percentile_0_1 --project test --task train + +--weights ./yolov7/yolov7.pt --device 0 --batch-size 64 --data data/tir_od.yaml --img-size 640 --conf 0.25 --verbose --save-txt --norm-type single_image_percentile_0_1 --project test --task val + +""" \ No newline at end of file diff --git a/tools/merge_results.py b/tools/merge_results.py new file mode 100644 index 0000000000..8bec949bd5 --- /dev/null +++ b/tools/merge_results.py @@ -0,0 +1,151 @@ +import os +import pandas as pd +from argparse import ArgumentParser +import yaml + +def process_class_stats(file_path): + columns = ['class_name', 'num_files', 'num_objects', 'precision', 'recall', 'map50', 'map'] + + # Read the text file into a pandas DataFrame + # df = pd.read_csv(file_path, delim_whitespace=True) + df = pd.read_csv(file_path, delim_whitespace=True, names=columns, header=None) + + # Find the index where the last repetition of 'all' starts + last_all_index = df[df['class_name'] == 'all'].index[-1] + + # Slice the DataFrame from the last 'all' row downward + sliced_df = df.iloc[last_all_index:] + + # Create a new DataFrame with num_objects and renamed map50 columns + result_df = sliced_df[['class_name', 'num_objects', 'map50']].copy() + result_df = result_df.set_index('class_name') + # Rename the 'map50' column to 'class_name_map50' for each class_name + # result_df['class_name_map50'] = result_df['class_name'] + '_map50' + result_df = result_df[['map50']].T + + # Rename the columns by appending '_map50' + result_df.columns = [f"{col}_map50" for col in result_df.columns] + # Select only the required columns + # result_df = result_df[['num_objects', 'class_name_map50']] + + # Write the result to a CSV file + # result_df.to_csv(output_csv, index=False) + + return result_df +def main(args: list = None): + parser = ArgumentParser() + parser.add_argument('--path', type=str, default='/home/hanoch/projects/tir_od/runs/train', metavar='PATH', + help="if given, all output of the training will be in this folder. " + "The exception is the tensorboard logs.") + + parser.add_argument('--task', default='train', help='train, val, test, speed or study') + + args = parser.parse_args(args) + if 0: + path = '/hdd/hanoch/data/objects-data-bbox-20191106-simple-sharded-part/tile_data/test_eileen_best_qual/csv' + filenames = [os.path.join(path, x) for x in os.listdir(path) + if x.endswith('csv')] + + df_acm = pd.DataFrame() + for file in filenames: + df = pd.read_csv(file, index_col=False) + file_patt = df.full_file_name[0].split('/')[-1].split('.')[0].split('_')[1:] + df['file_name'] = file_patt[0] + '_' + "_".join(df.full_file_name[0].split('/')[-1].split('.')[0].split('_')[1:]) + df['val'] = 0 + df_acm = df_acm.append((df)) + + cols = df_acm.columns.to_list() + cols2 = [cols[-2]] + cols[2:-2] + [cols[-1]] + cols3 = cols2[:-3] + cols2[-2:] + df_acm = df_acm[cols3] + df_acm.to_csv(os.path.join(path, 'merged.csv'), index=False) + + else: + path = args.path + path_result = '/home/hanoch/projects/tir_od' + results_columns = ['Epoch', 'gpu_mem', 'box_loss', 'obj_loss', 'cls_loss', 'total_loss', 'labels', 'img_size', + 'P', 'R', 'mAP@.5', 'mAP@.5:.95', 'val_box_loss', 'val_obj_loss', 'val_cls_loss'] + + # from pathlib import Path + # Path(os.path.join(path, 'merged')).mkdir(parents=True, exist_ok=True) + # + # filenames = [os.path.join(path, x) for x in os.listdir(path) + # if x.endswith('csv')] + # + # df_acm = pd.DataFrame() + # for file in filenames: + # df = pd.read_csv(file, index_col=False) + # if 1: + # df.columns = df.iloc[0] + # df = df[1:2] + # print(file) + # df_acm = df_acm.append((df)) + # + # # df_acm = df_acm.reindex(sorted(df_acm.columns), axis=1) + # df_acm.to_csv(os.path.join(path, 'merged', 'merged.csv'), index=False) + # List to hold the data + data = [] + root_dir = path + # Iterate through all the subfolders + for subdir, dirs, files in os.walk(root_dir): + # Check if 'results.txt' and 'hyp.yaml' exist in the current subdir + results_path = os.path.join(subdir, 'results.txt') + hyp_path = os.path.join(subdir, 'hyp.yaml') + opt_path = os.path.join(subdir, 'opt.yaml') + per_class_results = os.path.join(subdir, 'class_stats.txt') + + + if os.path.exists(results_path) and os.path.exists(hyp_path) and os.path.exists(opt_path): + # Get the last line from 'results.txt' + with open(results_path, 'r') as results_file: + last_line = results_file.readlines()[-1].strip() + + # Split the last line into the corresponding fields + results_values = last_line.split() + + # Ensure that the last line contains the expected number of fields + if len(results_values) == len(results_columns): + results_data = dict(zip(results_columns, results_values)) + else: + print(f"Warning: Unexpected format in {results_path}, skipping.") + continue + + # Load the 'hyp.yaml' file + with open(hyp_path, 'r') as hyp_file: + hyp_data = yaml.safe_load(hyp_file) + + with open(opt_path, 'r') as opt_file: + opt_data = yaml.safe_load(opt_file) + + df_per_class_results = pd.DataFrame() + if os.path.exists(per_class_results): + df_per_class_results = process_class_stats(per_class_results) + + # Add the result and the 'hyp.yaml' content into the data list + row = { + 'subdir': subdir, + } + # Update the row with the parsed results.txt values + row.update(results_data) + # Update the row with the hyperparameters from the 'hyp.yaml' + row.update(hyp_data) + + row.update(opt_data) + if not df_per_class_results.empty: + row.update(df_per_class_results.to_dict(orient='list')) + + data.append(row) + + # Convert the list of dictionaries to a pandas DataFrame + df = pd.DataFrame(data) + + # Save the DataFrame to a CSV file + output_csv = 'runs_' + str(args.task) + '_summary.csv' + # df.to_csv(output_csv, index=False) + df.to_csv(os.path.join(path_result, output_csv), index=False) + + print(f"Data successfully written to {os.path.join(path_result, output_csv)}") + +if __name__ == '__main__': + main() + diff --git a/tools/merge_run_summary_csv.sh b/tools/merge_run_summary_csv.sh new file mode 100644 index 0000000000..b47a8f41a3 --- /dev/null +++ b/tools/merge_run_summary_csv.sh @@ -0,0 +1,7 @@ +#!/bin/bash +source /home/hanoch/.virtualenvs/tir_od/bin/activate +if [ -z $1 ] ; then + python -u /home/hanoch/projects/tir_od/yolov7/tools/merge_results.py +else + python -u /home/hanoch/projects/tir_od/yolov7/tools/merge_results.py --path "$1" +fi diff --git a/train.py b/train.py index 86c7e48d5a..3b5e8f5084 100644 --- a/train.py +++ b/train.py @@ -37,6 +37,44 @@ logger = logging.getLogger(__name__) +from clearml import Task, Logger +""" +api { + web_server:https://app.railvision.hosted.allegro.ai/ + api_server:https://api.railvision.hosted.allegro.ai + files_server:https://files.railvision.hosted.allegro.ai + credentials { + "access_key"="Q8ICCH7QKGVW433QT2OYE28HCVJZ10" + "secret_key"="A5u8JB-sgmF7Sdgs8H61i3GXPihF1WSO8Pxn44PnKhBNxfJ8eb1wZQ8J-RGB2Z7zAQk" + } +} +""" +task = Task.init( + project_name="TIR_OD", + task_name="train yolov7 with augmented data" + ) +def compare_models_basic(model1, model2): + for ix, (p1, p2) in enumerate(zip(model1.parameters(), model2.parameters())): + if p1.data.ne(p2.data).sum() > 0: + print('Models are different', ix, p1.data.ne(p2.data).sum()) + return False + return True + + +def compare_models(model1, model2): + # Iterate through named layers and parameters of both models + for (name1, param1), (name2, param2) in zip(model1.named_parameters(), model2.named_parameters()): + if name1 != name2: + print(f"Layer names differ: {name1} vs {name2}") + + + # Compare the parameters + if not torch.equal(param1, param2): + print('Difference found in layer{} {}'.format(name1, param1.data.ne(param2.data).sum())) + + return + # print("No differences found in any layer.") + def train(hyp, opt, device, tb_writer=None): logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) @@ -56,6 +94,8 @@ def train(hyp, opt, device, tb_writer=None): with open(save_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) + + # Configure plots = not opt.evolve # create plots cuda = device.type != 'cpu' @@ -64,6 +104,9 @@ def train(hyp, opt, device, tb_writer=None): data_dict = yaml.load(f, Loader=yaml.SafeLoader) # data dict is_coco = opt.data.endswith('coco.yaml') + with open(save_dir / 'data.yaml', 'w') as f: + yaml.dump(data_dict, f, sort_keys=False) + # Logging- Doing this before checking the dataset. Might update data_dict loggers = {'wandb': None} # loggers dict if rank in [-1, 0]: @@ -85,19 +128,19 @@ def train(hyp, opt, device, tb_writer=None): with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint - model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create + model = Model(opt.cfg or ckpt['model'].yaml, ch=opt.input_channels, nc=nc, anchors=hyp.get('anchors')).to(device) # create model structure according to yaml and not the checkpoint exclude = ['anchor'] if (opt.cfg or hyp.get('anchors')) and not opt.resume else [] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: - model = Model(opt.cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create + model = Model(opt.cfg, ch=opt.input_channels, nc=nc, anchors=hyp.get('anchors')).to(device) # create with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] - + images_parent_folder = data_dict['path'] # Freeze freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): @@ -115,7 +158,7 @@ def train(hyp, opt, device, tb_writer=None): pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): - pg2.append(v.bias) # biases + pg2.append(v.bias) # biases # also need to be set to zero if isinstance(v, nn.BatchNorm2d): pg0.append(v.weight) # no decay elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): @@ -177,14 +220,25 @@ def train(hyp, opt, device, tb_writer=None): if hasattr(v.rbr_dense, 'vector'): pg0.append(v.rbr_dense.vector) - if opt.adam: - optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum + if opt.adam: # @@ HK AdamW() is a fix for Adam due to Wdecay/L2 loss bug + optimizer = optim.AdamW(pg0, lr=hyp['lr0'], weight_decay=0 , betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: - optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) + optimizer = optim.SGD(pg0, lr=hyp['lr0'], weight_decay=0 , momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay - optimizer.add_param_group({'params': pg2}) # add pg2 (biases) + optimizer.add_param_group({'params': pg2 , 'weight_decay': 0}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) + + # validate that we considered every parameter + # param_dict = {pn: p for pn, p in model.named_parameters()} + # inter_params = set(pg1) & set(pg0) & set(pg1) + # union_params = set(pg1) | set(pg0) | set(pg1) + # assert len(inter_params) == 0, "parameters %s made it into both decay/no_decay sets!" % (str(inter_params),) + # assert len( + # param_dict.keys() - union_params) == 0, "parameters %s were not separated into either decay/no_decay set!" \ + # % (str(param_dict.keys() - union_params),) + + del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf @@ -195,6 +249,9 @@ def train(hyp, opt, device, tb_writer=None): lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) + # from utils.plots import plot_lr_scheduler + # plot_lr_scheduler(optimizer, scheduler, epochs, save_dir='/home/hanoch/projects/tir_od') + # EMA ema = ModelEMA(model) if rank in [-1, 0] else None @@ -245,17 +302,22 @@ def train(hyp, opt, device, tb_writer=None): dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers, - image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: ')) + image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '), + rel_path_images=images_parent_folder, num_cls=data_dict['nc']) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: - testloader = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt, # testloader - hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, + testloader , test_dataset = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt, # testloader + hyp=hyp, cache=opt.cache_images and not opt.notest, rect=False, rank=-1, # @@@ rect was True why? world_size=opt.world_size, workers=opt.workers, - pad=0.5, prefix=colorstr('val: '))[0] + pad=0.5, prefix=colorstr('val: '), + rel_path_images=images_parent_folder, num_cls=data_dict['nc']) + + mlc = np.concatenate(test_dataset.labels, 0)[:, 0].max() # max label class + assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) if not opt.resume: labels = np.concatenate(dataset.labels, 0) @@ -270,7 +332,8 @@ def train(hyp, opt, device, tb_writer=None): # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) - model.half().float() # pre-reduce anchor precision + if 1: + model.half().float() # pre-reduce anchor precision TODO HK Why ? >???!!!! # DDP mode if cuda and rank != -1: @@ -303,7 +366,10 @@ def train(hyp, opt, device, tb_writer=None): f'Using {dataloader.num_workers} dataloader workers\n' f'Logging results to {save_dir}\n' f'Starting training for {epochs} epochs...') - torch.save(model, wdir / 'init.pt') + + if (not opt.nosave): + torch.save(model, wdir / 'init.pt') + for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ model.train() @@ -335,7 +401,8 @@ def train(hyp, opt, device, tb_writer=None): optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) - imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 + # imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 @@HK TODO is that standartization ? + imgs = imgs.to(device, non_blocking=True).float() # Warmup if ni <= nw: @@ -350,7 +417,7 @@ def train(hyp, opt, device, tb_writer=None): # Multi-scale if opt.multi_scale: - sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size + sz = random.randrange(int(imgsz * 0.5), int(imgsz * 1.5 + gs)) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) @@ -390,7 +457,7 @@ def train(hyp, opt, device, tb_writer=None): # Plot if plots and ni < 10: f = save_dir / f'train_batch{ni}.jpg' # filename - Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() + Thread(target=plot_images, args=(imgs, targets, paths, f, opt.input_channels), daemon=True).start() # if tb_writer: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(torch.jit.trace(model, imgs, strict=False), []) # add model graph @@ -403,8 +470,10 @@ def train(hyp, opt, device, tb_writer=None): # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard + # print("Lr : ", 10*'+',lr) scheduler.step() - + if 1: #@@ HK + plots = True # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP @@ -415,6 +484,7 @@ def train(hyp, opt, device, tb_writer=None): results, maps, times = test.test(data_dict, batch_size=batch_size * 2, imgsz=imgsz_test, + save_json=True, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, @@ -562,8 +632,24 @@ def train(hyp, opt, device, tb_writer=None): parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone of yolov7=50, first3=0 1 2') parser.add_argument('--v5-metric', action='store_true', help='assume maximum recall as 1.0 in AP calculation') + parser.add_argument('--norm-type', type=str, default='standardization', + choices=['standardization', 'single_image_0_to_1', 'single_image_mean_std','single_image_percentile_0_255', + 'single_image_percentile_0_1', 'remove+global_outlier_0_1'], + help='Normalization approach') + parser.add_argument('--no-tir-signal', action='store_true', help='') + + parser.add_argument('--tir-channel-expansion', action='store_true', help='drc_per_ch_percentile') + + parser.add_argument('--input-channels', type=int, default=3, help='') + opt = parser.parse_args() + if opt.tir_channel_expansion: # operates over 3 channels + opt.input_channels = 3 + + if opt.tir_channel_expansion and opt.norm_type != 'single_image_percentile_0_1': # operates over 3 channels + print('Not a good combination') + # Set DDP variables opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 @@ -703,3 +789,34 @@ def train(hyp, opt, device, tb_writer=None): plot_evolution(yaml_file) print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n' f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}') + + +""" +TODO +Anchors, + hyp['anchor_t'] = 4 let the AR<=4 => TODO check if valid + Ive reduced anchors to 2 per anchors: 2 +Sampler : torch_weighted : WeightedRandomSampler +PP-YOLO bumps the batch size up from 64 to 192. Of course, this is hard to implement if you have GPU memory constraints. + + +****** DONT FORGET to delete cache files upon changing data ************ + +python train.py --workers 8 --device 'cpu' --batch-size 32 --data data/coco.yaml --img 640 640 --cfg cfg/training/yolov7.yaml --weights 'v7' --name yolov7 --hyp data/hyp.scratch.p5.yaml +--workers 8 --device cpu --batch-size 32 --data data/tir_od.yaml --img 640 640 --cfg cfg/training/yolov7.yaml --weights 'v7' --name yolov7 --cache-images --hyp data/hyp.tir_od.tiny.yaml --adam --norm-type single_image_percentile_0_1 +--workers 8 --device cpu --batch-size 32 --data data/tir_od.yaml --img 640 640 --cfg cfg/training/yolov7-tiny.yaml --weights 'v7' --name yolov7 --cache-images --hyp data/hyp.tir_od.tiny.yaml --adam --norm-type single_image_percentile_0_1 --input-channels 1 --multi-scale +--multi-scale training with resized image resolution not good for TIR +TRaining based on given model w/o prototype yaml by the --cfg + +--workers 8 --device 0 --batch-size 16 --data data/coco_2_tir.yaml --img 640 640 --weights ./yolov7/yolov7.pt --name yolov7 --hyp data/hyp.tir_od.tiny.yaml --adam --norm-type single_image_percentile_0_1 --input-channels 3 --linear-lr --noautoanchor + +--workers 8 --device 0 --batch-size 16 --data data/tir_od.yaml --img 640 640 --weights ./yolov7/yolov7-tiny.pt --name yolov7 --hyp data/hyp.tir_od.tiny.yaml --adam --norm-type single_image_percentile_0_1 --input-channels 3 --linear-lr --noautoanchor + +=========================================================================== +FT : you need the --cfg of arch yaml because nc-classes are changing +--workers 8 --device 0 --batch-size 16 --data data/tir_od.yaml --img 640 640 --weights ./yolov7/yolov7-tiny.pt --cfg cfg/training/yolov7-tiny.yaml --name yolov7 --hyp data/hyp.tir_od.tiny.yaml --adam --norm-type single_image_percentile_0_1 --input-channels 3 --linear-lr + + +--workers 8 --device 0 --batch-size 16 --data data/tir_od.yaml --img 640 640 --weights ./yolov7/yolov7-tiny.pt --cfg cfg/training/yolov7-tiny.yaml --name yolov7 --hyp hyp.tir_od.tiny_aug.yaml --adam --norm-type single_image_mean_std --input-channels 3 --linear-lr --epochs 2 + +""" \ No newline at end of file diff --git a/train_aux.py b/train_aux.py index 0e8053f850..3f4979149b 100644 --- a/train_aux.py +++ b/train_aux.py @@ -85,14 +85,14 @@ def train(hyp, opt, device, tb_writer=None): with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint - model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create + model = Model(opt.cfg or ckpt['model'].yaml, ch=opt.input_channels, nc=nc, anchors=hyp.get('anchors')).to(device) # create exclude = ['anchor'] if (opt.cfg or hyp.get('anchors')) and not opt.resume else [] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: - model = Model(opt.cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create + model = Model(opt.cfg, ch=opt.input_channels, nc=nc, anchors=hyp.get('anchors')).to(device) # create with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] @@ -335,7 +335,7 @@ def train(hyp, opt, device, tb_writer=None): optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) - imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 + imgs = imgs.to(device, non_blocking=True).float() #/ 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: @@ -558,6 +558,13 @@ def train(hyp, opt, device, tb_writer=None): parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch') parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') parser.add_argument('--v5-metric', action='store_true', help='assume maximum recall as 1.0 in AP calculation') + parser.add_argument('--tir-od', action='store_true', help='TIR Object Detection') + parser.add_argument('--norm-type', type=str, default='standardization', + choices=['standardization', 'single_image_0_to_1', 'single_image_mean_std', 'single_image_percentile_0_1', 'remove+global_outlier_0_1'], + help='Normalization approach') + + + opt = parser.parse_args() # Set DDP variables @@ -697,3 +704,11 @@ def train(hyp, opt, device, tb_writer=None): plot_evolution(yaml_file) print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n' f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}') + + +""" +python train.py --workers 8 --device 0 --batch-size 32 --data data/coco.yaml --img 640 640 --cfg cfg/training/yolov7.yaml --weights '' --name yolov7 --hyp data/hyp.scratch.p5.yaml + +--workers 8 --device cpu --batch-size 32 --data data/tir_od.yaml --img 640 640 --cfg cfg/training/yolov7.yaml --weights 'v7' --name yolov7 --cache-images --hyp data/hyp.tir_od.tiny.yaml --adam + +""" \ No newline at end of file diff --git a/utils/autoanchor.py b/utils/autoanchor.py index f491032e53..1a1a5f14f4 100644 --- a/utils/autoanchor.py +++ b/utils/autoanchor.py @@ -44,7 +44,7 @@ def metric(k): # compute metric print('. Attempting to improve anchors, please wait...') na = m.anchor_grid.numel() // 2 # number of anchors try: - anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) + anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=True) except Exception as e: print(f'{prefix}ERROR: {e}') new_bpr = metric(anchors)[0] diff --git a/utils/datasets.py b/utils/datasets.py index 5fe4f7bcc2..1efbac5f47 100644 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -29,8 +29,10 @@ from utils.general import check_requirements, xyxy2xywh, xywh2xyxy, xywhn2xyxy, xyn2xy, segment2box, segments2boxes, \ resample_segments, clean_str from utils.torch_utils import torch_distributed_zero_first - +# @@HK : pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 resolve h\lib\fbgemm.dll" or one of its dependencies on Windows # Parameters +def flatten(lst): return [x for l in lst for x in l] + help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'] # acceptable image suffixes vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes @@ -60,10 +62,53 @@ def exif_size(img): pass return s +def scaling_image(img, scaling_type, percentile=0.03, beta=0.3): + if scaling_type == 'standardization': # default by repo + img = img/ 255.0 + + elif scaling_type =="single_image_0_to_1": + max_val = np.max(img.ravel()) + min_val = np.min(img.ravel()) + img = np.double(img - min_val) / np.double(max_val - min_val) + img = np.minimum(np.maximum(img, 0), 1) + + elif scaling_type == 'single_image_mean_std': + img = (img - img.ravel().mean()) / img.ravel().std() + + elif scaling_type == 'single_image_percentile_0_1': + min_val = np.percentile(img.ravel(), percentile) + max_val = np.percentile(img.ravel(), 100-percentile) + img = np.double(img - min_val) / np.double(max_val - min_val) + img = np.minimum(np.maximum(img, 0), 1) + + elif scaling_type == 'single_image_percentile_0_255': + # min_val = np.percentile(img.ravel(), percentile) + # max_val = np.percentile(img.ravel(), 100 - percentile) + # img = np.double(img - min_val) / np.double(max_val - min_val) + # img = np.uint8(np.minimum(np.maximum(img, 0), 1)*255) + ImgMin = np.percentile(img, percentile) + ImgMax = np.percentile(img, 100-percentile) + ImgDRC = (np.double(img - ImgMin) / np.double(ImgMax - ImgMin)) * 255 + img_temp = (np.uint8(np.minimum(np.maximum(ImgDRC, 0), 255))) + # img_temp = img_temp / 255.0 + return img_temp + + + elif scaling_type == 'remove+global_outlier_0_1': + img = np.double(img - img.min()*(beta))/np.double(img.max()*(1-beta) - img.min()*(beta)) # beta in [percentile] + img = np.double(np.minimum(np.maximum(img, 0), 1)) + elif scaling_type == 'normalization_uint16': + raise ValueError("normalization norm image method was not imp yet.") + elif scaling_type == 'normalization': + raise ValueError("normalization norm image method was not imp yet.") + else: + raise ValueError("Unknown norm image method") + + return img def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, - rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix=''): + rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix='',rel_path_images='', num_cls=-1): # Make sure only the first process in DDP process the dataset first, and the following others can use the cache with torch_distributed_zero_first(rank): dataset = LoadImagesAndLabels(path, imgsz, batch_size, @@ -75,7 +120,13 @@ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=Fa stride=int(stride), pad=pad, image_weights=image_weights, - prefix=prefix) + prefix=prefix, + rel_path_images=rel_path_images, + scaling_type=opt.norm_type, + input_channels=opt.input_channels, + num_cls=num_cls, + tir_channel_expansion=opt.tir_channel_expansion, + no_tir_signal=opt.no_tir_signal) batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers @@ -126,7 +177,9 @@ def __iter__(self): class LoadImages: # for inference - def __init__(self, path, img_size=640, stride=32): + def __init__(self, path, img_size=640, stride=32, + scaling_type='standardization', img_percentile_removal=0.3, beta=0.3, input_channels=3): + p = str(Path(path).absolute()) # os-agnostic absolute path if '*' in p: files = sorted(glob.glob(p, recursive=True)) # glob @@ -154,6 +207,11 @@ def __init__(self, path, img_size=640, stride=32): assert self.nf > 0, f'No images or videos found in {p}. ' \ f'Supported formats are:\nimages: {img_formats}\nvideos: {vid_formats}' + self.scaling_type = scaling_type + self.percentile = img_percentile_removal + self.beta = beta + self.input_channels = input_channels + def __iter__(self): self.count = 0 return self @@ -183,17 +241,45 @@ def __next__(self): else: # Read image self.count += 1 - img0 = cv2.imread(path) # BGR + # img0 = cv2.imread(path) # BGR + # 16bit unsigned + if os.path.basename(path).split('.')[-1] == 'tiff': + img0 = cv2.imread(path, -1) + else: + img0 = cv2.imread(path) # BGR + assert img0 is not None, 'Image Not Found ' + path #print(f'image {self.count}/{self.nf} {path}: ', end='') # Padded resize img = letterbox(img0, self.img_size, stride=self.stride)[0] - # Convert - img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + + if img.ndim > 2: # GL no permute + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + else: + img = np.repeat(img[np.newaxis, :, :], self.input_channels, axis=0) # convert GL to RGB by replication + + # print('\n image file', self.img_files[index]) + if 0: + import matplotlib.pyplot as plt + plt.figure() + plt.hist(img.ravel(), bins=128) + plt.savefig(os.path.join('/home/hanoch/projects/tir_od/outputs', os.path.basename(path).split('.')[0]+ 'pre')) + + img = scaling_image(img, scaling_type=self.scaling_type, + percentile=self.percentile, beta=self.beta) + + if 0: + import matplotlib.pyplot as plt + plt.figure() + plt.hist(img.ravel(), bins=128) + plt.savefig(os.path.join('/home/hanoch/projects/tir_od/outputs', os.path.basename(path).split('.')[0]+ 'post')) + img = np.ascontiguousarray(img) + return path, img, img0, self.cap def new_video(self, path): @@ -352,16 +438,26 @@ def img2label_paths(img_paths): class LoadImagesAndLabels(Dataset): # for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''): + cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', rel_path_images='', + scaling_type='standardization', input_channels=3, + num_cls=-1, tir_channel_expansion=False, no_tir_signal=False): + self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect - self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) + self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) @@ HK TODO: disable mosaic implicitly by prob mosaic =0 self.mosaic_border = [-img_size // 2, -img_size // 2] self.stride = stride - self.path = path + self.path = path + self.scaling_type = scaling_type + self.percentile = hyp['img_percentile_removal'] + self.beta = hyp['beta'] + self.input_channels = input_channels# in case GL image but NN is RGB hence replicate + self.tir_channel_expansion = tir_channel_expansion + self.is_tir_signal = not (no_tir_signal) + #self.albumentations = Albumentations() if augment else None try: @@ -375,7 +471,11 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r with open(p, 'r') as t: t = t.read().strip().splitlines() parent = str(p.parent) + os.sep - f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + if bool(rel_path_images): + f += [os.path.join(rel_path_images, x.replace('./', '')).rstrip() if x.startswith('./') else x for x in t] # local to global path + else: + f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) else: raise Exception(f'{prefix}{p} does not exist') @@ -385,7 +485,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r except Exception as e: raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}') - # Check cache + # Check cache HK : cache is only for labels /annotations self.label_files = img2label_paths(self.img_files) # labels cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels if cache_path.is_file(): @@ -393,7 +493,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r #if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed # cache, exists = self.cache_labels(cache_path, prefix), False # re-cache else: - cache, exists = self.cache_labels(cache_path, prefix), False # cache + cache, exists = self.cache_labels(num_cls, cache_path, prefix), False # cache # Display cache nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total @@ -443,7 +543,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r elif mini > 1: shapes[i] = [1, 1 / mini] - self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride + self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride #pad=0.5 https://github.com/ultralytics/ultralytics/issues/13271 : @123456dad the padding of 0.5 in the BaseDataset class, which results in resizing an image from 640x640 to 672x672, is primarily for maintaining aspect ratio and providing a buffer to apply various augmentations without losing important features at the edges. This padding can affect model performance, as seen in your observation where the .pt model shows a slightly higher mAP compared to the ONNX model. # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) self.imgs = [None] * n @@ -467,7 +567,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB)' pbar.close() - def cache_labels(self, path=Path('./labels.cache'), prefix=''): + def cache_labels(self, num_cls, path=Path('./labels.cache'), prefix=''): # Cache dataset labels, check images and read shapes x = {} # dict nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, duplicate @@ -497,6 +597,10 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): assert (l >= 0).all(), 'negative labels' assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels' assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels' + assert (l[:, 0].max() < num_cls), 'class label out of range -- invalid' # max label can't be greater than num of labels + # print(l[:, 0]) + + else: ne += 1 # label empty l = np.zeros((0, 5), dtype=np.float32) @@ -551,7 +655,7 @@ def __getitem__(self, index): else: img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1)) r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 - img = (img * r + img2 * (1 - r)).astype(np.uint8) + img = (img * r + img2 * (1 - r)).astype(img.dtype)#.astype(np.uint8) labels = np.concatenate((labels, labels2), 0) else: @@ -570,18 +674,23 @@ def __getitem__(self, index): if self.augment: # Augment imagespace if not mosaic: - img, labels = random_perspective(img, labels, - degrees=hyp['degrees'], - translate=hyp['translate'], - scale=hyp['scale'], - shear=hyp['shear'], - perspective=hyp['perspective']) - + if hyp['random_perspective']: + img, labels = random_perspective(img, labels, + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear'], + perspective=hyp['perspective']) + if random.random() < hyp['inversion']: + img = inversion_aug(img) + # GL gain/attenuation + # Squeeze pdf (x-mu)*scl+mu + #img, labels = self.albumentations(img, labels) - + if hyp['hsv_h'] >0 or hyp['hsv_s'] >0 or hyp['hsv_v'] >0 : # Augment colorspace - augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) + augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: @@ -622,10 +731,52 @@ def __getitem__(self, index): if nL: labels_out[:, 1:] = torch.from_numpy(labels) - # Convert - img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 - img = np.ascontiguousarray(img) + if self.tir_channel_expansion: + img = np.repeat(img[np.newaxis, :, :], 3, axis=0) # convert GL to RGB by replication + img_ce = np.zeros_like(img).astype('float64') + + # CH1 hist equalization + img_chan = scaling_image(img[0, :, :], scaling_type=self.scaling_type, + percentile=0, beta=self.beta) + img_ce[0, :, :] = img_chan.astype('float64') + + img_chan = scaling_image(img[1, :, :], scaling_type=self.scaling_type, + percentile=self.percentile, beta=self.beta) + img_ce[1, :, :] = img_chan.astype('float64') + + img_chan = inversion_aug(img_ce[1, :, :]) # invert the DRC one + img_ce[2, :, :] = img_chan.astype('float64') + img = img_ce + # tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od', 'img_ce.tiff'), 255*img.transpose(1,2,0).astype('uint8')) + else: + if self.is_tir_signal: + img = np.repeat(img[np.newaxis, :, :], self.input_channels, axis=0) #convert GL to RGB by replication + else: + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + # else: + # # img = img[np.newaxis,...] # unsqueeze + # img = np.repeat(img[np.newaxis, :, :], self.input_channels, axis=0) #convert GL to RGB by replication + + # print('\n image file', self.img_files[index]) + if 0: + import matplotlib.pyplot as plt + plt.figure() + plt.hist(img.ravel(), bins=128) + plt.savefig(os.path.join('/home/hanoch/projects/tir_od/outputs', os.path.basename(self.img_files[index]).split('.')[0]+ 'pre_' +str(self.scaling_type))) + + img = scaling_image(img, scaling_type=self.scaling_type, + percentile=self.percentile, beta=self.beta) + if 0: + import matplotlib.pyplot as plt + plt.figure() + plt.hist(img.ravel(), bins=128) + plt.savefig(os.path.join('/home/hanoch/projects/tir_od/outputs', os.path.basename(self.img_files[index]).split('.')[0] + 'post_'+ str(self.scaling_type))) + + # print('\n 1st', img.shape) + img = np.ascontiguousarray(img) + # print('\n 2nd', img.shape) return torch.from_numpy(img), labels_out, self.img_files[index], shapes @staticmethod @@ -668,7 +819,12 @@ def load_image(self, index): img = self.imgs[index] if img is None: # not cached path = self.img_files[index] - img = cv2.imread(path) # BGR + #16bit unsigned + if os.path.basename(path).split('.')[-1] == 'tiff': + img = cv2.imread(path, -1) + img = img[:, :, np.newaxis] # (640,640, 1) + else: + img = cv2.imread(path) # BGR assert img is not None, 'Image Not Found ' + path h0, w0 = img.shape[:2] # orig hw r = self.img_size / max(h0, w0) # resize image to img_size @@ -693,6 +849,16 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed +def inversion_aug(img): + if img.dtype == np.uint16 or img.dtype == np.int8: + img = np.iinfo(img.dtype).max - img + return img + elif img.dtype == np.float32 or img.dtype == np.float64: + img = 1.0 - img + return img + else: + raise ValueError("image type is not supported (int8, UINT16) {}".format(img.dtype)) + def hist_equalize(img, clahe=True, bgr=False): # Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255 @@ -715,10 +881,17 @@ def load_mosaic(self, index): for i, index in enumerate(indices): # Load image img, _, (h, w) = load_image(self, index) - + # if 1: + # img = np.repeat(img[:, :, np.newaxis], 3, axis=2) + # if img.ndim <3 : #TIR =>unsqueeze ro RGB 1-channel + # tir_signal = True + # img = img[:, :, np.newaxis] #np.repeat(img[:, :, np.newaxis], 3, axis=2) #img[:, :, np.newaxis] # place img in img4 if i == 0: # top left - img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + if self.is_tir_signal: + img4 = np.full((s * 2, s * 2, img.shape[2]), 0, dtype=np.uint16) # base image with 4 tiles + else: + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right @@ -773,10 +946,17 @@ def load_mosaic9(self, index): for i, index in enumerate(indices): # Load image img, _, (h, w) = load_image(self, index) + # if img.ndim <3 : #TIR =>unsqueeze ro RGB 1-channel + # tir_signal = True + # img = img[:, :, np.newaxis] #np.repeat(img[:, :, np.newaxis], 3, axis=2) #img[:, :, np.newaxis] # place img in img9 if i == 0: # center - img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + if self.is_tir_signal: + img9 = np.full((s * 3, s * 3, img.shape[2]), 0, dtype=np.uint16) # base image with 4 tiles + else: + img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + h0, w0 = h, w c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates elif i == 1: # top diff --git a/utils/general.py b/utils/general.py index decdcc64ec..0c28236adc 100644 --- a/utils/general.py +++ b/utils/general.py @@ -871,6 +871,7 @@ def apply_classifier(x, model, img, im0): im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 im /= 255.0 # 0 - 255 to 0.0 - 1.0 + raise # image alreay normalized in datasets class ims.append(im) pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction diff --git a/utils/loss.py b/utils/loss.py index 2b1d968f8f..8ae8cd15f9 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -418,7 +418,7 @@ def backward(ctx, out_grad1): g1, =ctx.saved_tensors return g1*out_grad1, None, None - +# Dual obj and cls losses and outputs inherited from Joseph Redmon's original YOLOv3 class ComputeLoss: # Compute losses def __init__(self, model, autobalance=False): diff --git a/utils/plots.py b/utils/plots.py index fdd8d0e853..0497eec62c 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -111,7 +111,7 @@ def output_to_target(output): return np.array(targets) -def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): +def plot_images(images, targets, paths=None, fname='images.jpg', input_channels=3, names=None, max_size=640, max_subplots=16): # Plot image grid with labels if isinstance(images, torch.Tensor): @@ -148,7 +148,14 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max if scale_factor < 1: img = cv2.resize(img, (w, h)) - mosaic[block_y:block_y + h, block_x:block_x + w, :] = img + if img.ndim > 2: # GL no permute + # Convert + mosaic[block_y:block_y + h, block_x:block_x + w, :] = img + else: + # img = img[np.newaxis,...] # unsqueeze + mosaic[block_y:block_y + h, block_x:block_x + w, :] = np.repeat(img[np.newaxis, :, :], 3, axis=0).transpose(1, 2, 0) + + if len(targets) > 0: image_targets = targets[targets[:, 0] == i] boxes = xywh2xyxy(image_targets[:, 2:6]).T @@ -487,3 +494,17 @@ def plot_skeleton_kpts(im, kpts, steps, orig_shape=None): if pos2[0] % 640 == 0 or pos2[1] % 640 == 0 or pos2[0]<0 or pos2[1]<0: continue cv2.line(im, pos1, pos2, (int(r), int(g), int(b)), thickness=2) + + +def append_to_txt(file_path, class_name, images, labels, P, R, map_5, map_5_95): + # Format the new line with the provided values + formatted_line = f"{class_name:<10} {images:<10} {labels:<10} {P:<10.4f} {R:<10.4f} {map_5:<10.4f} {map_5_95:<10.4f}\n" + + # Open the file in append mode and write the new line + with open(file_path, 'a') as file: + file.write(formatted_line) + + +# # Example usage: +# file_path = 'results.txt' +# append_to_txt(file_path, 'Dog', 500, 1500, 0.85, 0.80, 0.90, 0.75)