From 755c784bda2a3f3c3d3e9929056470a382472e85 Mon Sep 17 00:00:00 2001 From: mdraw Date: Thu, 19 Mar 2020 00:22:18 +0100 Subject: [PATCH] readme: Add docs on high-res media issues, --scale --- README.md | 17 +++++++++++++++-- deface/deface.py | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ca0b058..266886e 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ If you want to try out anonymizing a video using the default settings, you just $ deface myvideos/vid1.mp4 -This will show you a live preview of the output and write the the output video to the new file `myvideos/vid1_anonymized.mp4`. +This will write the the output to the new video file `myvideos/vid1_anonymized.mp4`. ### Live capture demo @@ -75,7 +75,7 @@ optional arguments: --thresh T, -t T Detection threshold (tune this to trade off between false positive and false negative rate). Default: 0.2. --scale WxH, -s WxH Downscale images for network inference to this size - (format: WxH, example: --scale=640x360). + (format: WxH, example: --scale 640x360). --preview, -p Enable live preview GUI (can decrease performance). --boxes Use boxes instead of ellipse masks. --draw-scores Draw detection scores onto outputs. @@ -138,6 +138,19 @@ If you are interested in seeing the faceness score (a score between 0 and 1 that This option can be useful to figure out an optimal value for the detection threshold that can then be set through the `--thresh` option. +### High-resolution media and performance issues + +Since `deface` tries to detect faces in the unscaled full-res version of input files by default, this can lead to performance issues on high-res inputs (>> 720p). In extreme cases, even detection accuracy can suffer because the detector neural network has not been trained on ultra-high-res images. + +To counter these performance issues, `deface` supports downsampling its inputs on-the-fly before detecting faces, and subsequently rescaling detection results to the original resolution. Downsampling only applies to the detection process, whereas the final output resolution remains the same as the input resolution. + +This feature is controlled through the `--scale` option, which expects a value of the form `WxH`, where `W` and `H` are the desired width and height of downscaled input representations. +It is very important to make sure the aspect ratio of the inputs remains intact when using this option, because otherwise, distorted images are fed into the detector, resulting in decreased accuracy. + +For example, if your inputs have the common aspect ratio 16:9, you can instruct the detector to run in 360p resolution by specifying `--scale 640x360`. +If the results at this fairly low resolution are not good enough, detection at 720p input resolution (`--scale 1280x720`) may work better. + + ## Hardware acceleration Depending on your available hardware, you can often speed up neural network inference by enabling the optional [ONNX Runtime](https://microsoft.github.io/onnxruntime/) backend of `deface`. diff --git a/deface/deface.py b/deface/deface.py index 8000660..35e28a9 100755 --- a/deface/deface.py +++ b/deface/deface.py @@ -212,7 +212,7 @@ def parse_cli_args(): help='Detection threshold (tune this to trade off between false positive and false negative rate). Default: 0.2.') parser.add_argument( '--scale', '-s', default=None, metavar='WxH', - help='Downscale images for network inference to this size (format: WxH, example: --scale=640x360).') + help='Downscale images for network inference to this size (format: WxH, example: --scale 640x360).') parser.add_argument( '--preview', '-p', default=False, action='store_true', help='Enable live preview GUI (can decrease performance).')