google-ai-edge · PaulTR · Oct 3, 2023 · Sep 11, 2023 · Sep 11, 2023
diff --git a/examples/face_detector/raspberry_pi/README.md b/examples/face_detector/raspberry_pi/README.md
@@ -0,0 +1,72 @@
+# MediaPipe Face Detection example with Raspberry Pi
+
+This example uses [MediaPipe](https://github.com/google/mediapipe) with Python on
+a Raspberry Pi to perform real-time face detection using images streamed from
+the Pi Camera. It draws a bounding box around each detected face in the camera
+preview (when the object score is above a given threshold).
+
+## Set up your hardware
+
+Before you begin, you need to
+[set up your Raspberry Pi](https://projects.raspberrypi.org/en/projects/raspberry-pi-setting-up)
+with Raspberry 64-bit Pi OS (preferably updated to Buster).
+
+You also need to [connect and configure the Pi Camera](
+https://www.raspberrypi.org/documentation/configuration/camera.md) if you use
+the Pi Camera. This code also works with USB camera connect to the Raspberry Pi.
+
+And to see the results from the camera, you need a monitor connected
+to the Raspberry Pi. It's okay if you're using SSH to access the Pi shell
+(you don't need to use a keyboard connected to the Pi)—you only need a monitor
+attached to the Pi to see the camera stream.
+
+## Install MediaPipe
+
+You can install the required dependencies using the setup.sh script provided with this project.
+
+## Download the examples repository
+
+First, clone this Git repo onto your Raspberry Pi.
+
+Run this script to install the required dependencies and download the TFLite models:
+
+```
+cd mediapipe/examples/face_detection/raspberry_pi
+sh setup.sh
+```
+
+## Run the example
+
+```
+python3 detect.py \
+  --model detector.tflite
+```
+
+You should see the camera feed appear on the monitor attached to your Raspberry
+Pi. Ask people to appear in front of the camera and you'll be able to see boxes 
+drawn around their faces, including the detection score for each. It also prints 
+the number of frames per second (FPS) at the top-left corner of the screen. 
+As the pipeline contains some processes other than model inference, including 
+visualizing the detection results, you can expect a higher FPS if your inference
+pipeline runs in headless mode without visualization.
+
+*   You can optionally specify the `model` parameter to set the TensorFlow Lite
+    model to be used:
+    *   The default value is `detector.tflite`
+    *   TensorFlow Lite face detection models **with metadata**  
+        * Models from [MediaPipe Models](https://developers.google.com/mediapipe/solutions/vision/face_detector/index#models)
+*   You can optionally specify the `minDetectionConfidence` parameter to adjust the
+    minimum confidence score for face detection to be considered successful:
+    *   Supported value: A floating-point number.
+    *   Default value: `0.5`
+*   You can optionally specify the `minSuppressionThreshold` parameter to adjust the
+    minimum non-maximum-suppression threshold for face detection to be considered overlapped:
+    *   Supported value: A floating-point number.
+    *   Default value: `0.5`
+*   Example usage:
+    ```
+    python3 detect.py \
+      --model detector.tflite \
+      --minDetectionConfidence 0.3 \
+      --minSuppressionThreshold 0.5
+    ```
diff --git a/examples/face_detector/raspberry_pi/detect.py b/examples/face_detector/raspberry_pi/detect.py
@@ -0,0 +1,171 @@
+# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Main scripts to run face detector."""
+
+import argparse
+import sys
+import time
+
+import cv2
+import mediapipe as mp
+
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+
+from utils import visualize
+
+# Global variables to calculate FPS
+COUNTER, FPS = 0, 0
+START_TIME = time.time()
+DETECTION_RESULT = None
+
+
+def run(model: str, min_detection_confidence: float,
+        min_suppression_threshold: float, camera_id: int, width: int,
+        height: int) -> None:
+  """Continuously run inference on images acquired from the camera.
+
+  Args:
+    model: Name of the TFLite face detection model.
+    min_detection_confidence: The minimum confidence score for the face
+      detection to be considered successful.
+    min_suppression_threshold: The minimum non-maximum-suppression threshold for
+      face detection to be considered overlapped.
+    camera_id: The camera id to be passed to OpenCV.
+    width: The width of the frame captured from the camera.
+    height: The height of the frame captured from the camera.
+  """
+
+  # Start capturing video input from the camera
+  cap = cv2.VideoCapture(camera_id)
+  cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
+  cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
+
+  # Visualization parameters
+  row_size = 50  # pixels
+  left_margin = 24  # pixels
+  text_color = (0, 0, 0)  # black
+  font_size = 1
+  font_thickness = 1
+  fps_avg_frame_count = 10
+
+  def save_result(result: vision.FaceDetectorResult, unused_output_image: mp.Image,
+                  timestamp_ms: int):
+      global FPS, COUNTER, START_TIME, DETECTION_RESULT
+
+      # Calculate the FPS
+      if COUNTER % fps_avg_frame_count == 0:
+          FPS = fps_avg_frame_count / (time.time() - START_TIME)
+          START_TIME = time.time()
+
+      DETECTION_RESULT = result
+      COUNTER += 1
+
+  # Initialize the face detection model
+  base_options = python.BaseOptions(model_asset_path=model)
+  options = vision.FaceDetectorOptions(base_options=base_options,
+                                       running_mode=vision.RunningMode.LIVE_STREAM,
+                                       min_detection_confidence=min_detection_confidence,
+                                       min_suppression_threshold=min_suppression_threshold,
+                                       result_callback=save_result)
+  detector = vision.FaceDetector.create_from_options(options)
+
+
+  # Continuously capture images from the camera and run inference
+  while cap.isOpened():
+    success, image = cap.read()
+    if not success:
+      sys.exit(
+          'ERROR: Unable to read from webcam. Please verify your webcam settings.'
+      )
+
+    image = cv2.flip(image, 1)
+
+    # Convert the image from BGR to RGB as required by the TFLite model.
+    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image)
+
+    # Run face detection using the model.
+    detector.detect_async(mp_image, time.time_ns() // 1_000_000)
+
+    # Show the FPS
+    fps_text = 'FPS = {:.1f}'.format(FPS)
+    text_location = (left_margin, row_size)
+    current_frame = image
+    cv2.putText(current_frame, fps_text, text_location, cv2.FONT_HERSHEY_DUPLEX,
+                font_size, text_color, font_thickness, cv2.LINE_AA)
+
+    if DETECTION_RESULT:
+        # print(DETECTION_RESULT)
+        current_frame = visualize(current_frame, DETECTION_RESULT)
+
+    cv2.imshow('face_detection', current_frame)
+
+    # Stop the program if the ESC key is pressed.
+    if cv2.waitKey(1) == 27:
+      break
+
+  detector.close()
+  cap.release()
+  cv2.destroyAllWindows()
+
+
+def main():
+  parser = argparse.ArgumentParser(
+      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+  parser.add_argument(
+      '--model',
+      help='Path of the face detection model.',
+      required=False,
+      default='detector.tflite')
+  parser.add_argument(
+      '--minDetectionConfidence',
+      help='The minimum confidence score for the face detection to be '
+           'considered successful..',
+      required=False,
+      type=float,
+      default=0.5)
+  parser.add_argument(
+      '--minSuppressionThreshold',
+      help='The minimum non-maximum-suppression threshold for face detection '
+           'to be considered overlapped.',
+      required=False,
+      type=float,
+      default=0.5)
+  # Finding the camera ID can be very reliant on platform-dependent methods. 
+  # One common approach is to use the fact that camera IDs are usually indexed sequentially by the OS, starting from 0. 
+  # Here, we use OpenCV and create a VideoCapture object for each potential ID with 'cap = cv2.VideoCapture(i)'.
+  # If 'cap' is None or not 'cap.isOpened()', it indicates the camera ID is not available.
+  parser.add_argument(
+      '--cameraId', help='Id of camera.', required=False, type=int, default=0)
+  parser.add_argument(
+      '--frameWidth',
+      help='Width of frame to capture from camera.',
+      required=False,
+      type=int,
+      default=1280)
+  parser.add_argument(
+      '--frameHeight',
+      help='Height of frame to capture from camera.',
+      required=False,
+      type=int,
+      default=720)
+  args = parser.parse_args()
+
+  run(args.model, args.minDetectionConfidence, args.minSuppressionThreshold,
+      int(args.cameraId), args.frameWidth, args.frameHeight)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/examples/face_detector/raspberry_pi/requirements.txt b/examples/face_detector/raspberry_pi/requirements.txt
@@ -0,0 +1 @@
+mediapipe
diff --git a/examples/face_detector/raspberry_pi/setup.sh b/examples/face_detector/raspberry_pi/setup.sh
@@ -0,0 +1,5 @@
+# Install Python dependencies.
+python3 -m pip install pip --upgrade
+python3 -m pip install -r requirements.txt
+
+wget -q -O detector.tflite -q https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/1/blaze_face_short_range.tflite
diff --git a/examples/face_detector/raspberry_pi/utils.py b/examples/face_detector/raspberry_pi/utils.py
@@ -0,0 +1,56 @@
+# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cv2
+import numpy as np
+
+
+MARGIN = 10  # pixels
+ROW_SIZE = 30  # pixels
+FONT_SIZE = 1
+FONT_THICKNESS = 1
+TEXT_COLOR = (0, 0, 0)  # black
+
+
+def visualize(
+    image,
+    detection_result
+) -> np.ndarray:
+  """Draws bounding boxes on the input image and return it.
+  Args:
+    image: The input RGB image.
+    detection_result: The list of all "Detection" entities to be visualized.
+  Returns:
+    Image with bounding boxes.
+  """
+  for detection in detection_result.detections:
+    # Draw bounding_box
+    bbox = detection.bounding_box
+    start_point = bbox.origin_x, bbox.origin_y
+    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
+    # Use the orange color for high visibility.
+    cv2.rectangle(image, start_point, end_point, (0, 165, 255), 3)
+
+    # Draw label and score
+    category = detection.categories[0]
+    category_name = (category.category_name if category.category_name is not
+                     None else '')
+    probability = round(category.score, 2)
+    result_text = category_name + ' (' + str(probability) + ')'
+    text_location = (MARGIN + bbox.origin_x,
+                     MARGIN + ROW_SIZE + bbox.origin_y)
+    cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_DUPLEX,
+                FONT_SIZE, TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)
+
+  return image
diff --git a/examples/object_detection/raspberry_pi/detect.py b/examples/object_detection/raspberry_pi/detect.py
@@ -36,7 +36,7 @@ def run(model: str, max_results: int, score_threshold: float,
 
   Args:
     model: Name of the TFLite object detection model.
-    max_results: Max of classification results.
+    max_results: Max number of detection results.
     score_threshold: The score threshold of detection results.
     camera_id: The camera id to be passed to OpenCV.
     width: The width of the frame captured from the camera.