Initial support for Hailo devices

Add some examples using hefs from /usr/share/hailo-models. Improve handling of networks with multiple outputs so that they can be used with or without batching. The pose network post-processing is updated slightly to cope with these changes. Signed-off-by: David Plowman <[email protected]>
raspberrypi · Aug 29, 2024 · 94de9bd · 94de9bd
1 parent 1d22cf9
commit 94de9bd
Show file tree

Hide file tree

Showing 8 changed files with 727 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -51,3 +51,4 @@ docs/_build/
 .idea
 /.spyproject
 .spyproject
+hailort.log
diff --git a/examples/hailo/coco.txt b/examples/hailo/coco.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/examples/hailo/detect.py b/examples/hailo/detect.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+"""Example module for Hailo Detection."""
+
+import argparse
+
+import cv2
+
+from picamera2 import MappedArray, Picamera2, Preview
+from picamera2.devices import Hailo
+
+
+def extract_detections(hailo_output, w, h, class_names, threshold=0.5):
+    """Extract detections from the HailoRT-postprocess output."""
+    results = []
+    for class_id, detections in enumerate(hailo_output):
+        for detection in detections:
+            score = detection[4]
+            if score >= threshold:
+                y0, x0, y1, x1 = detection[:4]
+                bbox = (int(x0 * w), int(y0 * h), int(x1 * w), int(y1 * h))
+                results.append([class_names[class_id], bbox, score])
+    return results
+
+
+def draw_objects(request):
+    current_detections = detections
+    if current_detections:
+        with MappedArray(request, "main") as m:
+            for class_name, bbox, score in current_detections:
+                x0, y0, x1, y1 = bbox
+                label = f"{class_name} %{int(score * 100)}"
+                cv2.rectangle(m.array, (x0, y0), (x1, y1), (0, 255, 0, 0), 2)
+                cv2.putText(m.array, label, (x0 + 5, y0 + 15),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0, 0), 1, cv2.LINE_AA)
+
+
+if __name__ == "__main__":
+    # Parse command-line arguments.
+    parser = argparse.ArgumentParser(description="Detection Example")
+    parser.add_argument("-m", "--model", help="Path for the HEF model.",
+                        default="/usr/share/hailo-models/yolov8s_h8l.hef")
+    parser.add_argument("-l", "--labels", default="coco.txt",
+                        help="Path to a text file containing labels.")
+    parser.add_argument("-s", "--score_thresh", type=float, default=0.5,
+                        help="Score threshold, must be a float between 0 and 1.")
+    args = parser.parse_args()
+
+    # Get the Hailo model, the input size it wants, and the size of our preview stream.
+    with Hailo(args.model) as hailo:
+        model_h, model_w, _ = hailo.get_input_shape()
+        video_w, video_h = 1280, 960
+
+        # Load class names from the labels file
+        with open(args.labels, 'r', encoding="utf-8") as f:
+            class_names = f.read().splitlines()
+
+        # The list of detected objects to draw.
+        detections = None
+
+        # Configure and start Picamera2.
+        with Picamera2() as picam2:
+            main = {'size': (video_w, video_h), 'format': 'XRGB8888'}
+            lores = {'size': (model_w, model_h), 'format': 'RGB888'}
+            controls = {'FrameRate': 30}
+            config = picam2.create_preview_configuration(main, lores=lores, controls=controls)
+            picam2.configure(config)
+
+            picam2.start_preview(Preview.QTGL, x=0, y=0, width=video_w, height=video_h)
+            picam2.start()
+            picam2.pre_callback = draw_objects
+
+            # Process each low resolution camera frame.
+            while True:
+                frame = picam2.capture_array('lores')
+
+                # Run inference on the preprocessed frame
+                results = hailo.run(frame)
+
+                # Extract detections from the inference results
+                detections = extract_detections(results[0], video_w, video_h, class_names, args.score_thresh)
diff --git a/examples/hailo/pose.py b/examples/hailo/pose.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+
+import argparse
+
+import cv2
+from pose_utils import postproc_yolov8_pose
+
+from picamera2 import MappedArray, Picamera2, Preview
+from picamera2.devices import Hailo
+
+parser = argparse.ArgumentParser(description='Pose estimation using Hailo')
+parser.add_argument('-m', '--model', help="HEF file path", default="/usr/share/hailo-models/yolov8s_pose_h8l_pi.hef")
+args = parser.parse_args()
+
+NOSE, L_EYE, R_EYE, L_EAR, R_EAR, L_SHOULDER, R_SHOULDER, L_ELBOW, R_ELBOW, \
+    L_WRIST, R_WRIST, L_HIP, R_HIP, L_KNEE, R_KNEE, L_ANKLE, R_ANKLE = range(17)
+
+JOINT_PAIRS = [[NOSE, L_EYE], [L_EYE, L_EAR], [NOSE, R_EYE], [R_EYE, R_EAR],
+               [L_SHOULDER, R_SHOULDER],
+               [L_SHOULDER, L_ELBOW], [L_ELBOW, L_WRIST], [R_SHOULDER, R_ELBOW], [R_ELBOW, R_WRIST],
+               [L_SHOULDER, L_HIP], [R_SHOULDER, R_HIP], [L_HIP, R_HIP],
+               [L_HIP, L_KNEE], [R_HIP, R_KNEE], [L_KNEE, L_ANKLE], [R_KNEE, R_ANKLE]]
+
+
+def visualize_pose_estimation_result(results, image, model_size, detection_threshold=0.5, joint_threshold=0.5):
+    image_size = (image.shape[1], image.shape[0])
+
+    def scale_coord(coord):
+        return tuple([int(c * t / f) for c, f, t in zip(coord, model_size, image_size)])
+
+    bboxes, scores, keypoints, joint_scores = (
+        results['bboxes'], results['scores'], results['keypoints'], results['joint_scores'])
+    box, score, keypoint, keypoint_score = bboxes[0], scores[0], keypoints[0], joint_scores[0]
+
+    for detection_box, detection_score, detection_keypoints, detection_keypoints_score in (
+            zip(box, score, keypoint, keypoint_score)):
+        if detection_score < detection_threshold:
+            continue
+
+        coord_min = scale_coord(detection_box[:2])
+        coord_max = scale_coord(detection_box[2:])
+        cv2.rectangle(image, coord_min, coord_max, (255, 0, 0), 1)
+        cv2.putText(image, str(detection_score), coord_min, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36, 255, 12), 1)
+
+        joint_visible = detection_keypoints_score > joint_threshold
+
+        detection_keypoints = detection_keypoints.reshape(17, 2)
+        for joint, joint_score in zip(detection_keypoints, detection_keypoints_score):
+            if joint_score > joint_threshold:
+                cv2.circle(image, scale_coord(joint), 4, (255, 0, 255), -1)
+
+        for joint0, joint1 in JOINT_PAIRS:
+            if joint_visible[joint0] and joint_visible[joint1]:
+                cv2.line(image, scale_coord(detection_keypoints[joint0]),
+                         scale_coord(detection_keypoints[joint1]), (255, 0, 255), 3)
+
+
+def draw_predictions(request):
+    with MappedArray(request, 'main') as m:
+        predictions = last_predictions
+        if predictions:
+            visualize_pose_estimation_result(predictions, m.array, model_size)
+
+
+# ---------------- Start of the example --------------------- #
+
+last_predictions = None
+
+with Hailo(args.model) as hailo:
+    main_size = (1024, 768)
+    model_h, model_w, _ = hailo.get_input_shape()
+    model_size = lores_size = (model_w, model_h)
+
+    with Picamera2() as picam2:
+        main = {'size': main_size, 'format': 'XRGB8888'}
+        lores = {'size': lores_size, 'format': 'RGB888'}
+        config = picam2.create_video_configuration(main, lores=lores)
+        picam2.configure(config)
+
+        picam2.start_preview(Preview.QTGL, x=0, y=0, width=main_size[0], height=main_size[1])
+        picam2.start()
+        picam2.pre_callback = draw_predictions
+
+        while True:
+            frame = picam2.capture_array('lores')
+
+            # Do pose estimation.
+            raw_detections = hailo.run(frame)
+
+            # Tidy up the predictions. num_of_classes is always 1 (?).
+            last_predictions = postproc_yolov8_pose(1, raw_detections, model_size)