From 94eb99ee714735a52512d8aa35488bfd7eb3039d Mon Sep 17 00:00:00 2001 From: TannyLe <130630658+tannyle289@users.noreply.github.com> Date: Tue, 3 Sep 2024 21:02:36 +0200 Subject: [PATCH] Add crop image functionality and min_width, min_height conditions in project_config.yaml --- Dockerfile | 8 +- condition.py | 116 ++++++++++++++++++++++++++--- exports/flat/flat_export.py | 13 +++- exports/yolov8/yolov8_export.py | 13 +++- projects/helmet/helmet_config.yaml | 9 ++- projects/helmet/helmet_project.py | 27 ++++++- services/harvest_service.py | 5 +- single-shot.py | 27 +++---- 8 files changed, 177 insertions(+), 41 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6ed0853..fc1bd53 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,8 +40,8 @@ WORKDIR /ml COPY . . # Environment variables -ENV MEDIA_SAVEPATH "/ml/data/input/input_video.mp4" - +# Feature parameters +ENV PROJECT_NAME="" # Dataset parameters ENV DATASET_FORMAT="base" @@ -79,9 +79,6 @@ ENV S3_ACCESS_KEY="" ENV S3_SECRET_KEY="" ENV S3_BUCKET="" -# Feature parameters -ENV PROJECT_NAME="" - ENV CREATE_BBOX_FRAME "False" ENV SAVE_BBOX_FRAME "False" ENV BBOX_FRAME_SAVEPATH "/ml/data/output/output_bbox_frame.jpg" @@ -100,7 +97,6 @@ ENV MAX_NUMBER_OF_PREDICTIONS "" ENV MIN_DISTANCE "" ENV MIN_STATIC_DISTANCE "" ENV MIN_DETECTIONS "" -ENV ALLOWED_CLASSIFICATIONS "0, 1, 2, 3, 5, 7, 14, 15, 16, 24, 26, 28" ENV IOU "" ENV FRAMES_SKIP_AFTER_DETECT "" ENV MIN_DETECTIONS "" diff --git a/condition.py b/condition.py index 720df0d..dc1f817 100644 --- a/condition.py +++ b/condition.py @@ -5,7 +5,7 @@ var = VariableClass() -def process_frame(frame, project, video_out='', frames_out=''): +def process_frame(frame, project, cv2=None, frames_out=''): # Perform object classification on the frame. # persist=True -> The tracking results are stored in the model. # persist should be kept True, as this provides unique IDs for each detection. @@ -32,7 +32,7 @@ def process_frame(frame, project, video_out='', frames_out=''): total_time_class_prediction += time.time() - start_time_class_prediction if len(cur_results[0]) == 0: - return frame, total_time_class_prediction, False, labels_and_boxes + return None, labels_and_boxes, None, total_time_class_prediction, False total_results.append(cur_results[0]) @@ -49,15 +49,16 @@ def process_frame(frame, project, video_out='', frames_out=''): # Since we have over 1k videos per day, the dataset we collect need to be high-quality # Valid image need to: # + Have at least MIN_DETECTIONS objects detected: - # + Have to have helmet (since we are lacking of helmet dataset) + # + Have to satisfy the project.condition_func which defines custom condition logics for every specific project. if project.condition_func(total_results): for index, results in enumerate(total_results): # As a convention we will store all result labels under model1's # The other models' will be mapped accordingly if not combined_results: - combined_results += [(box.xywhn, box.cls, box.conf) for box in results.boxes] + combined_results += [(box.xywhn, box.xyxy, box.cls, box.conf) for box in results.boxes] else: - combined_results += [(box.xywhn, project.map_to_first_model(index, box.cls), box.conf) for box in results.boxes] + combined_results += [(box.xywhn, box.xyxy, project.map_to_first_model(index, box.cls), box.conf) for box + in results.boxes] # sort results based on descending confidences sorted_combined_results = sorted(combined_results, key=lambda x: x[2], reverse=True) @@ -68,7 +69,7 @@ def process_frame(frame, project, video_out='', frames_out=''): for element in sorted_combined_results: add_flag = True for res in combined_results: - if res[1] == element[1]: + if res[2] == element[2]: # classes comparison if (abs(res[0][0][0] - element[0][0][0]) < 0.01 and (abs(res[0][0][1] - element[0][0][1]) < 0.01)): add_flag = False @@ -78,8 +79,103 @@ def process_frame(frame, project, video_out='', frames_out=''): # If the combined result has at least MIN_DETECTIONS boxes found (Could belong to either class) if len(combined_results) >= var.MIN_DETECTIONS: print("Condition met, we are gathering the labels and boxes and return results") - for xywhn, cls, _ in combined_results: - labels_and_boxes += f'{int(cls)} {xywhn[0, 0].item()} {xywhn[0, 1].item()} {xywhn[0, 2].item()} {xywhn[0, 3].item()}\n' - return frame, total_time_class_prediction, True, labels_and_boxes + # Crop frane to get only the interested area to reduce storage waste + cropped_frame, cropped_coordinate = __crop_frame__(frame, combined_results) - return frame, total_time_class_prediction, False, labels_and_boxes + # if you want to check if the labels + # are transformed and applied correctly to the cropped frame -> uncomment the line below + labeled_frame = None + # labeled_frame = __get_labeled_frame__(cropped_frame, cropped_coordinate, cv2, combined_results) + + # Transform the labels and boxes accordingly + labels_and_boxes = __transform_labels__(cropped_frame, cropped_coordinate, combined_results) + total_time_class_prediction += time.time() - start_time_class_prediction + return cropped_frame, labels_and_boxes, labeled_frame, total_time_class_prediction, True + + return None, labels_and_boxes, None, total_time_class_prediction, False + + +def __crop_frame__(frame, combined_results, padding=100): + """ + Crop frame to get only the interesting area, meanwhile it removes the background that doesn't have any detection. + + Args: + frame: The original frame to be processed. + combined_results: List of results detected by models. + padding: Add some space padding to the cropped frame to avoid object cutoff. + """ + # If the combined result has at least MIN_DETECTIONS boxes found + if len(combined_results) >= var.MIN_DETECTIONS: + # Initialize bounding box limits + x1_min, y1_min, x2_max, y2_max = float('inf'), float('inf'), float('-inf'), float('-inf') + + for _, xyxy, _, _ in combined_results: + x1, y1, x2, y2 = xyxy[0] + x1_min, y1_min = min(x1_min, x1), min(y1_min, y1) + x2_max, y2_max = max(x2_max, x2), max(y2_max, y2) + + # Apply padding to the bounding box + orig_height, orig_width = frame.shape[:2] + x1_min = int(max(0, x1_min - padding)) + y1_min = int(max(0, y1_min - padding)) + x2_max = int(min(orig_width, x2_max + padding)) + y2_max = int(min(orig_height, y2_max + padding)) + + # Crop the frame to the union bounding box with padding + cropped_frame = frame[y1_min:y2_max, x1_min:x2_max] + + return cropped_frame, (x1_min, y1_min, x2_max, y2_max) + + +def __transform_labels__(cropped_frame, cropped_coordinate, combined_results): + """ + Transform the labels and boxes coordinates to match with the cropped frame. + + Args: + cropped_frame: The cropped frame to transform labels. + cropped_coordinate: Cropped coordinate of the frame (in xyxy format) + combined_results: List of results detected by models. + """ + labels_and_boxes = '' + frame_width, frame_height = cropped_frame.shape[:2] + + for _, xyxy, cls, conf in combined_results: + x1, y1, x2, y2 = xyxy[0] + x1, y1, x2, y2 = int(abs(x1 - cropped_coordinate[0])), int(abs(y1 - cropped_coordinate[1])), int(abs(x2 - cropped_coordinate[0])), int(abs(y2 - cropped_coordinate[1])) + + x_center = (x1 + x2) / 2 + y_center = (y1 + y2) / 2 + + # Calculate the xywhn values (requirement for ultralytics YOLO models dataset) + x_center_norm = x_center / frame_width + y_center_norm = y_center / frame_height + width_norm = (x2 - x1) / frame_width + height_norm = (y2 - y1) / frame_height + + labels_and_boxes += f'{int(cls)} {x_center_norm} {y_center_norm} {width_norm} {height_norm}\n' + + return labels_and_boxes + + +def __get_labeled_frame__(cropped_frame, cropped_coordinate, cv2, combined_results): + """ + + Return the cropped frame with transformed labeled applied on the frame. + + Args: + cropped_frame: The cropped frame to transform labels. + cropped_coordinate: Cropped coordinate of the frame (in xyxy format) + cv2: The Capture Video agent, + combined_results: List of results detected by models. + """ + labeled_frame = cropped_frame.copy() + for _, xyxy, cls, _ in combined_results: + x1, y1, x2, y2 = xyxy[0] + x1, y1, x2, y2 = int(abs(x1 - cropped_coordinate[0])), int(abs(y1 - cropped_coordinate[1])), int(abs(x2 - cropped_coordinate[0])), int(abs(y2 - cropped_coordinate[1])) + print(f"Box: {xyxy}, Class: {int(cls)}") + print(f"Width: {x2 - x1} and height: {y2 - y1}") + cv2.rectangle(labeled_frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.putText(labeled_frame, f'{int(cls)}', (x1 - 10, y1 - 20), + cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2) + + return labeled_frame diff --git a/exports/flat/flat_export.py b/exports/flat/flat_export.py index 07a960f..0e888bb 100644 --- a/exports/flat/flat_export.py +++ b/exports/flat/flat_export.py @@ -25,6 +25,7 @@ def __init__(self, name): self.proj_dir = pjoin(_cur_dir, f'../../data/{name}') self.proj_dir = pabspath(self.proj_dir) # normalise the link self.result_dir_path = None + self.result_labeled_dir_path = None def initialize_save_dir(self): """ @@ -36,6 +37,9 @@ def initialize_save_dir(self): self.result_dir_path = pjoin(self.proj_dir, f'{self._var.DATASET_FORMAT}-v{self._var.DATASET_VERSION}') os.makedirs(self.result_dir_path, exist_ok=True) + self.result_labeled_dir_path = pjoin(self.proj_dir, + f'{self._var.DATASET_FORMAT}-v{self._var.DATASET_VERSION}-labeled') + if os.path.exists(self.result_dir_path): print('Successfully initialize save directory!') return True @@ -43,7 +47,7 @@ def initialize_save_dir(self): print('Something wrong happened!') return False - def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes): + def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes, labeled_frame=None): """ See iflat_export.py @@ -57,6 +61,13 @@ def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes): cv2.imwrite( f'{self.result_dir_path}/{unix_time}.png', frame) + + if labeled_frame.any(): + os.makedirs(self.result_labeled_dir_path, exist_ok=True) + + cv2.imwrite( + f'{self.result_labeled_dir_path}/{unix_time}.png', + labeled_frame) # Save labels and boxes with open(f'{self.result_dir_path}/{unix_time}.txt', 'w') as my_file: diff --git a/exports/yolov8/yolov8_export.py b/exports/yolov8/yolov8_export.py index 05044fe..0579132 100644 --- a/exports/yolov8/yolov8_export.py +++ b/exports/yolov8/yolov8_export.py @@ -28,6 +28,7 @@ def __init__(self, name): self.label_dir_path = None self.yaml_path = None self.result_dir_path = None + self.result_labeled_dir_path = None def initialize_save_dir(self): """ @@ -47,6 +48,9 @@ def initialize_save_dir(self): self.yaml_path = pjoin(self.result_dir_path, 'data.yaml') + self.result_labeled_dir_path = pjoin(self.proj_dir, + f'{self._var.DATASET_FORMAT}-v{self._var.DATASET_VERSION}-labeled') + if (os.path.exists(self.result_dir_path) and os.path.exists(self.image_dir_path) and os.path.exists(self.label_dir_path)): @@ -56,7 +60,7 @@ def initialize_save_dir(self): print('Something wrong happened!') return False - def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes): + def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes, labeled_frame=None): """ See iyolov8_export.py @@ -70,6 +74,13 @@ def save_frame(self, frame, predicted_frames, cv2, labels_and_boxes): cv2.imwrite( f'{self.image_dir_path}/{unix_time}.png', frame) + + if labeled_frame.any(): + os.makedirs(self.result_labeled_dir_path, exist_ok=True) + + cv2.imwrite( + f'{self.result_labeled_dir_path}/{unix_time}.png', + labeled_frame) # Save labels and boxes with open(f'{self.label_dir_path}/{unix_time}.txt', 'w') as my_file: diff --git a/projects/helmet/helmet_config.yaml b/projects/helmet/helmet_config.yaml index cec2e58..494424c 100644 --- a/projects/helmet/helmet_config.yaml +++ b/projects/helmet/helmet_config.yaml @@ -1,10 +1,11 @@ models: - helmet_dectector_1k_16b_150e.pt - - yolov8n.pt - - yolov8n.pt + - yolov8x.pt allowed_classes: - [0, 1, 2] - [0] - - [0] -temp: "/tmp/video.mp4" # System will temporarily download video from Integration to process +min_height: 100 +min_width: 30 + +temp: "/tmp/video.mp4" # System will temporarily download video from Integration platform (s3, roboflow) to this path to process diff --git a/projects/helmet/helmet_project.py b/projects/helmet/helmet_project.py index 836f2bd..27b7b1e 100644 --- a/projects/helmet/helmet_project.py +++ b/projects/helmet/helmet_project.py @@ -27,13 +27,22 @@ def __init__(self): super().__init__() self._config = self.__read_config__(config_path) self.temp_path = self._config.get('temp') + self.min_width = int(self._config.get('min_width')) if self._config.get('min_width') else 0 + self.min_height = int(self._config.get('min_height')) if self._config.get('min_height') else 0 self.models, self.models_allowed_classes = self.connect_models() self.mapping = self.class_mapping(self.models) self.create_proj_save_dir() def condition_func(self, total_results): """ - See ihelmet_project.py + Apply custom condition for the helmet project. + For each frame processed by all models, all conditions below have to be satisfied: + - All models have to return results + - Model0 has PERSON detection + - Model1 has PERSON detection + - Model0 has HELMET detection + - All models have all PERSON bounding boxes with height greater than minimum_height + - All models have all PERSON bounding boxes with width greater than minimum_width Returns: None @@ -42,9 +51,19 @@ def condition_func(self, total_results): person_model1 = self.mapping[person_model0][1] # Mapping person from model1 to model0 helmet_model0 = 1 - return (any(box.cls == person_model0 for box in total_results[0].boxes) - and any(box.cls == helmet_model0 for box in total_results[0].boxes) - and any(box.cls == person_model1 for box in total_results[1].boxes)) + has_person_model0 = any(box.cls == person_model0 for box in total_results[0].boxes) + has_helmet_model0 = any(box.cls == helmet_model0 for box in total_results[0].boxes) + has_person_model1 = any(box.cls == person_model1 for box in total_results[1].boxes) + has_minimum_width_height_model0 = all(box.xywh[0, 2] > self.min_width + and box.xywh[0, 3] > self.min_height for box in total_results[0].boxes + if box.cls == person_model0) + has_minimum_width_height_model1 = all(box.xywh[0, 2] > self.min_width + and box.xywh[0, 3] > self.min_height for box in total_results[1].boxes + if box.cls == person_model1) + if has_person_model0 and has_helmet_model0 and has_person_model1 and has_minimum_width_height_model0 and has_minimum_width_height_model1: + return True + else: + return False def class_mapping(self, models): """ diff --git a/services/harvest_service.py b/services/harvest_service.py index 72f761b..ce920d6 100644 --- a/services/harvest_service.py +++ b/services/harvest_service.py @@ -191,6 +191,7 @@ def evaluate(self, video): skip_frames_counter) # Free all resources cv2.destroyAllWindows() + return self.export.result_dir_path def get_frame(self, cap: cv2.VideoCapture, skip_frames_counter): @@ -219,10 +220,10 @@ def predict_frame(self, frame, skip_frames_counter): int: The updated skip frames counter. """ if self.frame_number > 0 and self.frame_skip_factor > 0 and self.frame_number % self.frame_skip_factor == 0: - frame, total_time_class_prediction, condition_met, labels_and_boxes = con_process_frame(frame, self.project) + frame, labels_and_boxes, labeled_frame, total_time_class_prediction, condition_met = con_process_frame(frame, self.project, cv2) if condition_met: - self.predicted_frames = self.export.save_frame(frame, self.predicted_frames, cv2, labels_and_boxes) + self.predicted_frames = self.export.save_frame(frame, self.predicted_frames, cv2, labels_and_boxes, labeled_frame) skip_frames_counter = self._var.FRAMES_SKIP_AFTER_DETECT print(f'Currently in frame: {self.frame_number}') self.frame_number += 1 diff --git a/single-shot.py b/single-shot.py index c4f04ba..5e57f9c 100644 --- a/single-shot.py +++ b/single-shot.py @@ -1,6 +1,7 @@ # This script is used to look for objects under a specific condition (at least 5 persons etc) # The script reads a video from a message queue, classifies the objects in the video, and does a condition check. # If condition is met, the video is being forwarded to a remote vault. +from exports.export_factory import ExportFactory from integrations.integration_factory import IntegrationFactory from projects.project_factory import ProjectFactory from services.harvest_service import HarvestService @@ -13,30 +14,30 @@ def init(): + # Service and Project initializations + project = ProjectFactory().init() + integration = IntegrationFactory().init() + export = ExportFactory().init() harvest_service = HarvestService() - model1, model2 = harvest_service.connect_models() - - project = ProjectFactory().init('helmet') - # Mapping classes of 2 models - mapping = project.class_mapping(model1, model2) - integration = IntegrationFactory().init() + # register to service + harvest_service.register('project', project) + harvest_service.register('integration', integration) + harvest_service.register('export', export) # Open video-capture/recording using the video-path. Throw FileNotFoundError if cap is unable to open. - cap = harvest_service.open_video() time_verbose = TimeVerbose() + video = harvest_service.open_video() if var.LOGGING: print(f'5. Classifying frames') if var.TIME_VERBOSE: time_verbose.add_preprocessing_time() - save_dir = harvest_service.process( - cap, - model1, - model2, - project.condition_func, - mapping) + # Evaluate the video + save_dir = harvest_service.evaluate(video) + + # Upload dataset if True if var.DATASET_UPLOAD: integration.upload_dataset(save_dir)