From 94eb99ee714735a52512d8aa35488bfd7eb3039d Mon Sep 17 00:00:00 2001
From: TannyLe <130630658+tannyle289@users.noreply.github.com>
Date: Tue, 3 Sep 2024 21:02:36 +0200
Subject: [PATCH] Add crop image functionality and min_width, min_height
 conditions in project_config.yaml

---
 Dockerfile                         |   8 +-
 condition.py                       | 116 ++++++++++++++++++++++++++---
 exports/flat/flat_export.py        |  13 +++-
 exports/yolov8/yolov8_export.py    |  13 +++-
 projects/helmet/helmet_config.yaml |   9 ++-
 projects/helmet/helmet_project.py  |  27 ++++++-
 services/harvest_service.py        |   5 +-
 single-shot.py                     |  27 +++----
 8 files changed, 177 insertions(+), 41 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 6ed0853..fc1bd53 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,8 +40,8 @@ WORKDIR /ml
 COPY . .
 
 # Environment variables
-ENV MEDIA_SAVEPATH "/ml/data/input/input_video.mp4"
-
+# Feature parameters
+ENV PROJECT_NAME=""
 
 # Dataset parameters
 ENV DATASET_FORMAT="base"
@@ -79,9 +79,6 @@ ENV S3_ACCESS_KEY=""
 ENV S3_SECRET_KEY=""
 ENV S3_BUCKET=""
 
-# Feature parameters
-ENV PROJECT_NAME=""
-
 ENV CREATE_BBOX_FRAME "False"
 ENV SAVE_BBOX_FRAME "False"
 ENV BBOX_FRAME_SAVEPATH "/ml/data/output/output_bbox_frame.jpg"
@@ -100,7 +97,6 @@ ENV MAX_NUMBER_OF_PREDICTIONS ""
 ENV MIN_DISTANCE ""
 ENV MIN_STATIC_DISTANCE ""
 ENV MIN_DETECTIONS ""
-ENV ALLOWED_CLASSIFICATIONS "0, 1, 2, 3, 5, 7, 14, 15, 16, 24, 26, 28"
 ENV IOU ""
 ENV FRAMES_SKIP_AFTER_DETECT ""
 ENV MIN_DETECTIONS ""
diff --git a/condition.py b/condition.py
index 720df0d..dc1f817 100644
--- a/condition.py
+++ b/condition.py
@@ -5,7 +5,7 @@
 var = VariableClass()
 
 
-def process_frame(frame, project, video_out='', frames_out=''):
+def process_frame(frame, project, cv2=None, frames_out=''):
     # Perform object classification on the frame.
     # persist=True -> The tracking results are stored in the model.
     # persist should be kept True, as this provides unique IDs for each detection.
@@ -32,7 +32,7 @@ def process_frame(frame, project, video_out='', frames_out=''):
             total_time_class_prediction += time.time() - start_time_class_prediction
 
         if len(cur_results[0]) == 0:
-            return frame, total_time_class_prediction, False, labels_and_boxes
+            return None, labels_and_boxes, None, total_time_class_prediction, False
 
         total_results.append(cur_results[0])
 
@@ -49,15 +49,16 @@ def process_frame(frame, project, video_out='', frames_out=''):
     # Since we have over 1k videos per day, the dataset we collect need to be high-quality
     # Valid image need to:
     # + Have at least MIN_DETECTIONS objects detected:
-    # + Have to have helmet (since we are lacking of helmet dataset)
+    # + Have to satisfy the project.condition_func which defines custom condition logics for every specific project.
     if project.condition_func(total_results):
         for index, results in enumerate(total_results):
             # As a convention we will store all result labels under model1's
             # The other models' will be mapped accordingly
             if not combined_results:
-                combined_results += [(box.xywhn, box.cls, box.conf) for box in results.boxes]
+                combined_results += [(box.xywhn, box.xyxy, box.cls, box.conf) for box in results.boxes]
             else:
-                combined_results += [(box.xywhn, project.map_to_first_model(index, box.cls), box.conf) for box in results.boxes]
+                combined_results += [(box.xywhn, box.xyxy, project.map_to_first_model(index, box.cls), box.conf) for box
+                                     in results.boxes]
 
         # sort results based on descending confidences
         sorted_combined_results = sorted(combined_results, key=lambda x: x[2], reverse=True)
@@ -68,7 +69,7 @@ def process_frame(frame, project, video_out='', frames_out=''):
         for element in sorted_combined_results:
             add_flag = True
             for res in combined_results:
-                if res[1] == element[1]:
+                if res[2] == element[2]: # classes comparison
                     if (abs(res[0][0][0] - element[0][0][0]) < 0.01
                             and (abs(res[0][0][1] - element[0][0][1]) < 0.01)):
                         add_flag = False
@@ -78,8 +79,103 @@ def process_frame(frame, project, video_out='', frames_out=''):
         # If the combined result has at least MIN_DETECTIONS boxes found (Could belong to either class)
         if len(combined_results) >= var.MIN_DETECTIONS:
             print("Condition met, we are gathering the labels and boxes and return results")
-            for xywhn, cls, _ in combined_results:
-                labels_and_boxes += f'{int(cls)} {xywhn[0, 0].item()} {xywhn[0, 1].item()} {xywhn[0, 2].item()} {xywhn[0, 3].item()}\n'
-            return frame, total_time_class_prediction, True, labels_and_boxes
+            # Crop frane to get only the interested area to reduce storage waste
+            cropped_frame, cropped_coordinate = __crop_frame__(frame, combined_results)
 
-    return frame, total_time_class_prediction, False, labels_and_boxes
+            # <For testing> if you want to check if the labels
+            # are transformed and applied correctly to the cropped frame -> uncomment the line below
+            labeled_frame = None
+            # labeled_frame = __get_labeled_frame__(cropped_frame, cropped_coordinate, cv2, combined_results)
+
+            # Transform the labels and boxes accordingly
+            labels_and_boxes = __transform_labels__(cropped_frame, cropped_coordinate, combined_results)
+            total_time_class_prediction += time.time() - start_time_class_prediction
+            return cropped_frame, labels_and_boxes, labeled_frame, total_time_class_prediction, True
+
+    return None, labels_and_boxes, None, total_time_class_prediction, False
+
+
+def __crop_frame__(frame, combined_results, padding=100):
+    """
+    Crop frame to get only the interesting area, meanwhile it removes the background that doesn't have any detection.
+
+    Args:
+        frame: The original frame to be processed.
+        combined_results: List of results detected by models.
+        padding: Add some space padding to the cropped frame to avoid object cutoff.
+    """
+    # If the combined result has at least MIN_DETECTIONS boxes found
+    if len(combined_results) >= var.MIN_DETECTIONS:
+        # Initialize bounding box limits
+        x1_min, y1_min, x2_max, y2_max = float('inf'), float('inf'), float('-inf'), float('-inf')
+
+        for _, xyxy, _, _ in combined_results:
+            x1, y1, x2, y2 = xyxy[0]
+            x1_min, y1_min = min(x1_min, x1), min(y1_min, y1)
+            x2_max, y2_max = max(x2_max, x2), max(y2_max, y2)
+
+        # Apply padding to the bounding box
+        orig_height, orig_width = frame.shape[:2]
+        x1_min = int(max(0, x1_min - padding))
+        y1_min = int(max(0, y1_min - padding))
+        x2_max = int(min(orig_width, x2_max + padding))
+        y2_max = int(min(orig_height, y2_max + padding))
+
+        # Crop the frame to the union bounding box with padding
+        cropped_frame = frame[y1_min:y2_max, x1_min:x2_max]
+
+        return cropped_frame, (x1_min, y1_min, x2_max, y2_max)
+
+
+def __transform_labels__(cropped_frame, cropped_coordinate, combined_results):
+    """
+    Transform the labels and boxes coordinates to match with the cropped frame.
+
+    Args:
+        cropped_frame: The cropped frame to transform labels.
+        cropped_coordinate: Cropped coordinate of the frame (in xyxy format)
+        combined_results: List of results detected by models.
+    """
+    labels_and_boxes = ''
+    frame_width, frame_height = cropped_frame.shape[:2]
+
+    for _, xyxy, cls, conf in combined_results:
+        x1, y1, x2, y2 = xyxy[0]
+        x1, y1, x2, y2 = int(abs(x1 - cropped_coordinate[0])), int(abs(y1 - cropped_coordinate[1])), int(abs(x2 - cropped_coordinate[0])), int(abs(y2 - cropped_coordinate[1]))
+
+        x_center = (x1 + x2) / 2
+        y_center = (y1 + y2) / 2
+
+        # Calculate the xywhn values (requirement for ultralytics YOLO models dataset)
+        x_center_norm = x_center / frame_width
+        y_center_norm = y_center / frame_height
+        width_norm = (x2 - x1) / frame_width
+        height_norm = (y2 - y1) / frame_height
+
+        labels_and_boxes += f'{int(cls)} {x_center_norm} {y_center_norm} {width_norm} {height_norm}\n'
+
+    return labels_and_boxes
+
+
+def __get_labeled_frame__(cropped_frame, cropped_coordinate, cv2, combined_results):
+    """
+    <Used for testing if you want to see the labeled frame>
+    Return the cropped frame with transformed labeled applied on the frame.
+
+    Args:
+        cropped_frame: The cropped frame to transform labels.
+        cropped_coordinate: Cropped coordinate of the frame (in xyxy format)
+        cv2: The Capture Video agent,
+        combined_results: List of results detected by models.
+    """
+    labeled_frame = cropped_frame.copy()
+    for _, xyxy, cls, _ in combined_results:
+        x1, y1, x2, y2 = xyxy[0]
+        x1, y1, x2, y2 = int(abs(x1 - cropped_coordinate[0])), int(abs(y1 - cropped_coordinate[1])), int(abs(x2 - cropped_coordinate[0])), int(abs(y2 - cropped_coordinate[1]))
+        print(f"Box: {xyxy}, Class: {int(cls)}")
+        print(f"Width: {x2 - x1} and height: {y2 - y1}")
+        cv2.rectangle(labeled_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+        cv2.putText(labeled_frame, f'{int(cls)}', (x1 - 10, y1 - 20),
+                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
+
+    return labeled_frame
diff --git a/exports/flat/flat_export.py b/exports/flat/flat_export.py
index 07a960f..0e888bb 100644
--- a/exports/flat/flat_export.py
+++ b/exports/flat/flat_export.py
@@ -25,6 +25,7 @@ def __init__(self, name):
         self.proj_dir = pjoin(_cur_dir, f'../../data/{name}')
         self.proj_dir = pabspath(self.proj_dir)  # normalise the link
         self.result_dir_path = None
+        self.result_labeled_dir_path = None
 
     def initialize_save_dir(self):
         """
@@ -36,6 +37,9 @@ def initialize_save_dir(self):
         self.result_dir_path = pjoin(self.proj_dir, f'{self._var.DATASET_FORMAT}-v{self._var.DATASET_VERSION}')
         os.makedirs(self.result_dir_path, exist_ok=True)
 
+        self.result_labeled_dir_path = pjoin(self.proj_dir,
+                                             f'{self._var.DATASET_FORMAT}-v{self._var.DATASET_VERSION}-labeled')
+
         if os.path.exists(self.result_dir_path):
             print('Successfully initialize save directory!')
             return True
@@ -43,7 +47,7 @@ def initialize_save_dir(self):
             print('Something wrong happened!')
             return False
 
-    def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes):
+    def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes, labeled_frame=None):
         """
         See iflat_export.py
 
@@ -57,6 +61,13 @@ def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes):
         cv2.imwrite(
             f'{self.result_dir_path}/{unix_time}.png',
             frame)
+
+        if labeled_frame.any():
+            os.makedirs(self.result_labeled_dir_path, exist_ok=True)
+
+            cv2.imwrite(
+                f'{self.result_labeled_dir_path}/{unix_time}.png',
+                labeled_frame)
         # Save labels and boxes
         with open(f'{self.result_dir_path}/{unix_time}.txt',
                   'w') as my_file:
diff --git a/exports/yolov8/yolov8_export.py b/exports/yolov8/yolov8_export.py
index 05044fe..0579132 100644
--- a/exports/yolov8/yolov8_export.py
+++ b/exports/yolov8/yolov8_export.py
@@ -28,6 +28,7 @@ def __init__(self, name):
         self.label_dir_path = None
         self.yaml_path = None
         self.result_dir_path = None
+        self.result_labeled_dir_path = None
 
     def initialize_save_dir(self):
         """
@@ -47,6 +48,9 @@ def initialize_save_dir(self):
 
         self.yaml_path = pjoin(self.result_dir_path, 'data.yaml')
 
+        self.result_labeled_dir_path = pjoin(self.proj_dir,
+                                             f'{self._var.DATASET_FORMAT}-v{self._var.DATASET_VERSION}-labeled')
+
         if (os.path.exists(self.result_dir_path)
                 and os.path.exists(self.image_dir_path)
                 and os.path.exists(self.label_dir_path)):
@@ -56,7 +60,7 @@ def initialize_save_dir(self):
             print('Something wrong happened!')
             return False
 
-    def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes):
+    def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes, labeled_frame=None):
         """
         See iyolov8_export.py
 
@@ -70,6 +74,13 @@ def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes):
         cv2.imwrite(
             f'{self.image_dir_path}/{unix_time}.png',
             frame)
+
+        if labeled_frame.any():
+            os.makedirs(self.result_labeled_dir_path, exist_ok=True)
+
+            cv2.imwrite(
+                f'{self.result_labeled_dir_path}/{unix_time}.png',
+                labeled_frame)
         # Save labels and boxes
         with open(f'{self.label_dir_path}/{unix_time}.txt',
                   'w') as my_file:
diff --git a/projects/helmet/helmet_config.yaml b/projects/helmet/helmet_config.yaml
index cec2e58..494424c 100644
--- a/projects/helmet/helmet_config.yaml
+++ b/projects/helmet/helmet_config.yaml
@@ -1,10 +1,11 @@
 models:
  - helmet_dectector_1k_16b_150e.pt
- - yolov8n.pt
- - yolov8n.pt
+ - yolov8x.pt
 allowed_classes:
  - [0, 1, 2]
  - [0]
- - [0]
 
-temp: "/tmp/video.mp4" # System will temporarily download video from Integration to process
+min_height: 100
+min_width: 30
+
+temp: "/tmp/video.mp4" # System will temporarily download video from Integration platform (s3, roboflow) to this path to process
diff --git a/projects/helmet/helmet_project.py b/projects/helmet/helmet_project.py
index 836f2bd..27b7b1e 100644
--- a/projects/helmet/helmet_project.py
+++ b/projects/helmet/helmet_project.py
@@ -27,13 +27,22 @@ def __init__(self):
         super().__init__()
         self._config = self.__read_config__(config_path)
         self.temp_path = self._config.get('temp')
+        self.min_width = int(self._config.get('min_width')) if self._config.get('min_width') else 0
+        self.min_height = int(self._config.get('min_height')) if self._config.get('min_height') else 0
         self.models, self.models_allowed_classes = self.connect_models()
         self.mapping = self.class_mapping(self.models)
         self.create_proj_save_dir()
 
     def condition_func(self, total_results):
         """
-        See ihelmet_project.py
+        Apply custom condition for the helmet project.
+        For each frame processed by all models, all conditions below have to be satisfied:
+        - All models have to return results
+        - Model0 has PERSON detection
+        - Model1 has PERSON detection
+        - Model0 has HELMET detection
+        - All models have all PERSON bounding boxes with height greater than minimum_height
+        - All models have all PERSON bounding boxes with width greater than minimum_width
 
         Returns:
             None
@@ -42,9 +51,19 @@ def condition_func(self, total_results):
         person_model1 = self.mapping[person_model0][1]  # Mapping person from model1 to model0
         helmet_model0 = 1
 
-        return (any(box.cls == person_model0 for box in total_results[0].boxes)
-                and any(box.cls == helmet_model0 for box in total_results[0].boxes)
-                and any(box.cls == person_model1 for box in total_results[1].boxes))
+        has_person_model0 = any(box.cls == person_model0 for box in total_results[0].boxes)
+        has_helmet_model0 = any(box.cls == helmet_model0 for box in total_results[0].boxes)
+        has_person_model1 = any(box.cls == person_model1 for box in total_results[1].boxes)
+        has_minimum_width_height_model0 = all(box.xywh[0, 2] > self.min_width
+                                              and box.xywh[0, 3] > self.min_height for box in total_results[0].boxes
+                                              if box.cls == person_model0)
+        has_minimum_width_height_model1 = all(box.xywh[0, 2] > self.min_width
+                                              and box.xywh[0, 3] > self.min_height for box in total_results[1].boxes
+                                              if box.cls == person_model1)
+        if has_person_model0 and has_helmet_model0 and has_person_model1 and has_minimum_width_height_model0 and has_minimum_width_height_model1:
+            return True
+        else:
+            return False
 
     def class_mapping(self, models):
         """
diff --git a/services/harvest_service.py b/services/harvest_service.py
index 72f761b..ce920d6 100644
--- a/services/harvest_service.py
+++ b/services/harvest_service.py
@@ -191,6 +191,7 @@ def evaluate(self, video):
                     skip_frames_counter)
             # Free all resources
             cv2.destroyAllWindows()
+
         return self.export.result_dir_path
 
     def get_frame(self, cap: cv2.VideoCapture, skip_frames_counter):
@@ -219,10 +220,10 @@ def predict_frame(self, frame, skip_frames_counter):
             int: The updated skip frames counter.
         """
         if self.frame_number > 0 and self.frame_skip_factor > 0 and self.frame_number % self.frame_skip_factor == 0:
-            frame, total_time_class_prediction, condition_met, labels_and_boxes = con_process_frame(frame, self.project)
+            frame, labels_and_boxes, labeled_frame, total_time_class_prediction, condition_met = con_process_frame(frame, self.project, cv2)
 
             if condition_met:
-                self.predicted_frames = self.export.save_frame(frame, self.predicted_frames, cv2, labels_and_boxes)
+                self.predicted_frames = self.export.save_frame(frame, self.predicted_frames, cv2, labels_and_boxes, labeled_frame)
                 skip_frames_counter = self._var.FRAMES_SKIP_AFTER_DETECT
             print(f'Currently in frame: {self.frame_number}')
         self.frame_number += 1
diff --git a/single-shot.py b/single-shot.py
index c4f04ba..5e57f9c 100644
--- a/single-shot.py
+++ b/single-shot.py
@@ -1,6 +1,7 @@
 # This script is used to look for objects under a specific condition (at least 5 persons etc)
 # The script reads a video from a message queue, classifies the objects in the video, and does a condition check.
 # If condition is met, the video is being forwarded to a remote vault.
+from exports.export_factory import ExportFactory
 from integrations.integration_factory import IntegrationFactory
 from projects.project_factory import ProjectFactory
 from services.harvest_service import HarvestService
@@ -13,30 +14,30 @@
 
 
 def init():
+    # Service and Project initializations
+    project = ProjectFactory().init()
+    integration = IntegrationFactory().init()
+    export = ExportFactory().init()
     harvest_service = HarvestService()
-    model1, model2 = harvest_service.connect_models()
-
-    project = ProjectFactory().init('helmet')
 
-    # Mapping classes of 2 models
-    mapping = project.class_mapping(model1, model2)
-    integration = IntegrationFactory().init()
+    # register to service
+    harvest_service.register('project', project)
+    harvest_service.register('integration', integration)
+    harvest_service.register('export', export)
 
     # Open video-capture/recording using the video-path. Throw FileNotFoundError if cap is unable to open.
-    cap = harvest_service.open_video()
     time_verbose = TimeVerbose()
+    video = harvest_service.open_video()
 
     if var.LOGGING:
         print(f'5. Classifying frames')
     if var.TIME_VERBOSE:
         time_verbose.add_preprocessing_time()
-    save_dir = harvest_service.process(
-        cap,
-        model1,
-        model2,
-        project.condition_func,
-        mapping)
 
+    # Evaluate the video
+    save_dir = harvest_service.evaluate(video)
+
+    # Upload dataset if True
     if var.DATASET_UPLOAD:
         integration.upload_dataset(save_dir)