From 1fb4f75b21846fc59bbfea344835c5e0c0b3e19c Mon Sep 17 00:00:00 2001 From: shiyu22 Date: Mon, 11 Apr 2022 15:51:23 +0800 Subject: [PATCH] Update to milvus2.0.2 and towhee0.6 Signed-off-by: shiyu22 Update video/quick-deploy Signed-off-by: shiyu22 Update image_search/object_detection Signed-off-by: shiyu22 Update video-analysis/object-detection Signed-off-by: shiyu22 Update image-search-webserver docker Signed-off-by: shiyu22 Update image-search/object-detection docker Signed-off-by: shiyu22 Update video server image Signed-off-by: shiyu22 Update test_docker_compose Signed-off-by: shiyu22 Update port to 5000 Signed-off-by: shiyu22 Update README Signed-off-by: shiyu22 Update test_main Signed-off-by: shiyu22 --- .../object_detection/README.md | 8 +- .../object_detection/docker-compose.yaml | 8 +- .../object_detection/server/__init__.py | 0 .../object_detection/server/requirements.txt | 7 +- .../object_detection/server/src/config.py | 2 +- .../object_detection/server/src/encode.py | 42 +- .../server/src/encode_resnet50.py | 36 -- .../object_detection/server/src/main.py | 2 +- .../server/src/yolov3_detector/__init__.py | 0 .../src/yolov3_detector/data/prepare_model.sh | 26 -- .../server/src/yolov3_detector/paddle_yolo.py | 167 -------- .../server/src/yolov3_detector/yolo_infer.py | 367 ------------------ .../object_detection/test_docker_compose.py | 12 +- .../quick_deploy/README.md | 2 +- .../quick_deploy/docker-compose.yaml | 4 +- .../quick_deploy/server/Dockerfile | 2 + .../quick_deploy/server/requirements.txt | 5 +- .../quick_deploy/server/src/config.py | 2 +- .../quick_deploy/server/src/encode.py | 12 +- .../server/src/encode_tf_resnet50.py | 26 -- .../object_detection/README.md | 4 +- .../object_detection/docker-compose.yaml | 4 +- .../object_detection/server/Dockerfile | 6 +- .../object_detection/server/requirements.txt | 4 +- .../object_detection/server/src/config.py | 2 +- .../object_detection/server/src/encode.py | 42 +- .../server/src/encode_resnet50.py | 32 -- .../server/src/yolov3_detector/__init__.py | 0 .../src/yolov3_detector/data/prepare_model.sh | 26 -- .../server/src/yolov3_detector/paddle_yolo.py | 165 -------- .../server/src/yolov3_detector/yolo_infer.py | 366 ----------------- .../object_detection/test_docker_compose.py | 10 +- .../quick_deploy/README.md | 2 +- .../quick_deploy/docker-compose.yaml | 4 +- .../quick_deploy/server/requirements.txt | 9 +- .../quick_deploy/server/src/config.py | 2 +- .../quick_deploy/server/src/encode.py | 14 +- .../server/src/resnet50_encode.py | 33 -- .../quick_deploy/server/src/test_main.py | 2 +- .../quick_deploy/test_docker_compose.py | 2 +- 40 files changed, 124 insertions(+), 1335 deletions(-) delete mode 100644 solutions/reverse_image_search/object_detection/server/__init__.py delete mode 100644 solutions/reverse_image_search/object_detection/server/src/encode_resnet50.py delete mode 100644 solutions/reverse_image_search/object_detection/server/src/yolov3_detector/__init__.py delete mode 100644 solutions/reverse_image_search/object_detection/server/src/yolov3_detector/data/prepare_model.sh delete mode 100644 solutions/reverse_image_search/object_detection/server/src/yolov3_detector/paddle_yolo.py delete mode 100644 solutions/reverse_image_search/object_detection/server/src/yolov3_detector/yolo_infer.py delete mode 100644 solutions/reverse_image_search/quick_deploy/server/src/encode_tf_resnet50.py delete mode 100644 solutions/video_similarity_search/object_detection/server/src/encode_resnet50.py delete mode 100644 solutions/video_similarity_search/object_detection/server/src/yolov3_detector/__init__.py delete mode 100644 solutions/video_similarity_search/object_detection/server/src/yolov3_detector/data/prepare_model.sh delete mode 100644 solutions/video_similarity_search/object_detection/server/src/yolov3_detector/paddle_yolo.py delete mode 100644 solutions/video_similarity_search/object_detection/server/src/yolov3_detector/yolo_infer.py delete mode 100644 solutions/video_similarity_search/quick_deploy/server/src/resnet50_encode.py diff --git a/solutions/reverse_image_search/object_detection/README.md b/solutions/reverse_image_search/object_detection/README.md index 3717f90dd..d0594efff 100644 --- a/solutions/reverse_image_search/object_detection/README.md +++ b/solutions/reverse_image_search/object_detection/README.md @@ -2,7 +2,7 @@ ## Overview -This demo uses the [towhee](https://github.com/towhee-io/towhee) pipelines to detect objects in images and extract feature vectors of images, and then uses Milvus to build an image similarity search system. +This demo uses the [towhee](https://github.com/towhee-io/towhee) operators to detect objects in images and extract feature vectors of images, and then uses Milvus to build an image similarity search system. The following is the system diagram. @@ -61,7 +61,7 @@ And show all containers with `docker ps`, and you can use `docker logs img-searc ```bash CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -4cc6e60eb295 milvusbootcamp/imgsearch-with-objdet:towhee "/bin/sh -c 'python3…" 56 seconds ago Up 55 seconds 0.0.0.0:5010->5010/tcp img-obj-det-webserver +4cc6e60eb295 milvusbootcamp/imgsearch-with-objdet:towhee "/bin/sh -c 'python3…" 56 seconds ago Up 55 seconds 0.0.0.0:5000->5000/tcp img-obj-det-webserver 40f4ea99fd22 milvusdb/milvus:v2.0.0-rc8-20211104-d1f4106 "/tini -- milvus run…" 57 seconds ago Up 55 seconds 0.0.0.0:19530->19530/tcp milvus-standalone 60ed080afac1 minio/minio:RELEASE.2020-12-03T00-03-10Z "/usr/bin/docker-ent…" 57 seconds ago Up 56 seconds (healthy) 9000/tcp milvus-minio 5d9cdfba872b mysql:5.7 "docker-entrypoint.s…" 57 seconds ago Up 56 seconds 0.0.0.0:3306->3306/tcp, 33060/tcp img-obj-det-mysql @@ -122,7 +122,7 @@ $ python main.py - **API docs** -Visit 127.0.0.1:5010/docs in your browser to use all the APIs. +Visit `127.0.0.1:5000/docs` in your browser to use all the APIs. ![fastapi](pic/fastapi.png) @@ -144,7 +144,7 @@ Visit 127.0.0.1:5010/docs in your browser to use all the APIs. ```bash # Modify API_URL to the IP address and port of the server. -$ export API_URL='http://127.0.0.1:5010' +$ export API_URL='http://127.0.0.1:5000' $ docker run -d -p 8001:80 \ -e API_URL=${API_URL} \ milvusbootcamp/img-search-client:1.0 diff --git a/solutions/reverse_image_search/object_detection/docker-compose.yaml b/solutions/reverse_image_search/object_detection/docker-compose.yaml index 2fa5eeee0..cf71fd21d 100644 --- a/solutions/reverse_image_search/object_detection/docker-compose.yaml +++ b/solutions/reverse_image_search/object_detection/docker-compose.yaml @@ -33,7 +33,7 @@ services: standalone: container_name: milvus-standalone - image: milvusdb/milvus:v2.0.0-rc8-20211104-d1f4106 + image: milvusdb/milvus:v2.0.2 networks: app_net: ipv4_address: 172.16.238.10 @@ -62,7 +62,7 @@ services: webserver: container_name: img-obj-det-webserver - image: milvusbootcamp/imgsearch-with-objdet:towhee + image: milvusbootcamp/img-search-with-object:towhee0.6 networks: app_net: ipv4_address: 172.16.238.12 @@ -76,7 +76,7 @@ services: - standalone - mysql ports: - - "5010:5010" + - "5000:5000" webclient: container_name: img-obj-det-webclient @@ -85,7 +85,7 @@ services: app_net: ipv4_address: 172.16.238.13 environment: - API_URL: 'http://127.0.0.1:5010' + API_URL: 'http://127.0.0.1:5000' ports: - "8001:80" healthcheck: diff --git a/solutions/reverse_image_search/object_detection/server/__init__.py b/solutions/reverse_image_search/object_detection/server/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/solutions/reverse_image_search/object_detection/server/requirements.txt b/solutions/reverse_image_search/object_detection/server/requirements.txt index cb8841094..813a0359f 100644 --- a/solutions/reverse_image_search/object_detection/server/requirements.txt +++ b/solutions/reverse_image_search/object_detection/server/requirements.txt @@ -1,4 +1,4 @@ -pymilvus==2.0.1 +pymilvus==2.0.2 diskcache==5.2.1 uvicorn==0.13.4 PyMySQL==1.0.2 @@ -14,4 +14,7 @@ torch==1.9.0 pillow==9.0.1 matplotlib seaborn -towhee==0.2.0 +towhee==0.6.0 +scipy +opencv-python +pyyaml diff --git a/solutions/reverse_image_search/object_detection/server/src/config.py b/solutions/reverse_image_search/object_detection/server/src/config.py index 99621628c..52629aec6 100644 --- a/solutions/reverse_image_search/object_detection/server/src/config.py +++ b/solutions/reverse_image_search/object_detection/server/src/config.py @@ -3,7 +3,7 @@ ############### Milvus Configuration ############### MILVUS_HOST = os.getenv("MILVUS_HOST", "127.0.0.1") MILVUS_PORT = int(os.getenv("MILVUS_PORT", "19530")) -VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "1000")) +VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "2048")) INDEX_FILE_SIZE = int(os.getenv("INDEX_FILE_SIZE", "1024")) METRIC_TYPE = os.getenv("METRIC_TYPE", "L2") DEFAULT_TABLE = os.getenv("DEFAULT_TABLE", "milvus_obj_det") diff --git a/solutions/reverse_image_search/object_detection/server/src/encode.py b/solutions/reverse_image_search/object_detection/server/src/encode.py index afaf0dcc7..30e0a223e 100644 --- a/solutions/reverse_image_search/object_detection/server/src/encode.py +++ b/solutions/reverse_image_search/object_detection/server/src/encode.py @@ -1,25 +1,33 @@ -from towhee import pipeline +import towhee +import cv2 from numpy import linalg as LA -# os.environ['KERAS_HOME'] = os.path.abspath(os.path.join('.', 'data')) - class CustomOperator: - """ - Say something about the ExampleCalass... - - Args: - args_0 (`type`): - ... - """ - def __init__(self): - #self.img_embedding = pipeline('image-embedding') - self.yolo_embedding = pipeline('shiyu/img_object_embedding_pytorch_yolov5_resnet50') - def execute(self, img_path): - objs = self.yolo_embedding(img_path) + boxes, _, _ = towhee.glob(img_path) \ + .image_decode() \ + .object_detection.yolov5() \ + .to_list()[0] + + imgs = self.get_imgs_list(img_path, boxes) + embeddings = towhee.dc(imgs) \ + .image_embedding.timm(model_name='resnet50') \ + .to_list() + norm_objs = [] - for feat in objs: - norm_feat = feat[0] / LA.norm(feat[0]) + for feat in embeddings: + norm_feat = feat / LA.norm(feat) norm_objs.append(norm_feat.tolist()) return norm_objs + + + @staticmethod + def get_imgs_list(img_path, boxes): + img_list = [] + img = cv2.imread(img_path) + + for box in boxes: + tmp_obj = img[int(box[1]):int(box[3]), int(box[0]):int(box[2])] + img_list.append(towhee._types.Image(tmp_obj, 'BGR')) + return img_list diff --git a/solutions/reverse_image_search/object_detection/server/src/encode_resnet50.py b/solutions/reverse_image_search/object_detection/server/src/encode_resnet50.py deleted file mode 100644 index ec9d21f17..000000000 --- a/solutions/reverse_image_search/object_detection/server/src/encode_resnet50.py +++ /dev/null @@ -1,36 +0,0 @@ -import numpy as np -from tensorflow.keras.applications.resnet50 import ResNet50 -from tensorflow.keras.applications.resnet50 import preprocess_input as preprocess_input_resnet50 -from tensorflow.keras.preprocessing import image -from numpy import linalg as LA - - -# os.environ['KERAS_HOME'] = os.path.abspath(os.path.join('.', 'data')) - - -class CustomOperator: - """ - Say something about the ExampleCalass... - - Args: - args_0 (`type`): - ... - """ - def __init__(self): - self.input_shape = (224, 224, 3) - self.weight = 'imagenet' - self.pooling = 'max' - self.model_resnet50 = ResNet50(weights='imagenet', - input_shape=(self.input_shape[0], self.input_shape[1], self.input_shape[2]), - pooling=self.pooling, include_top=False) - self.model_resnet50.predict(np.zeros((1, 224, 224, 3))) - - def execute(self, img_path): - # Return the embedding([[list]]) of the images - img = image.load_img(img_path, target_size=(self.input_shape[0], self.input_shape[1])) - img = image.img_to_array(img) - img = np.expand_dims(img, axis=0) - img = preprocess_input_resnet50(img) - feat = self.model_resnet50.predict(img) - norm_feat = feat[0] / LA.norm(feat[0]) - return norm_feat.tolist() diff --git a/solutions/reverse_image_search/object_detection/server/src/main.py b/solutions/reverse_image_search/object_detection/server/src/main.py index 6f7475722..c5c37aa06 100644 --- a/solutions/reverse_image_search/object_detection/server/src/main.py +++ b/solutions/reverse_image_search/object_detection/server/src/main.py @@ -123,4 +123,4 @@ async def search_images(image: UploadFile = File(...), table_name: str = None): if __name__ == '__main__': - uvicorn.run(app=app, host='0.0.0.0', port=5010) + uvicorn.run(app=app, host='0.0.0.0', port=5000) diff --git a/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/__init__.py b/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/data/prepare_model.sh b/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/data/prepare_model.sh deleted file mode 100644 index e05d7167c..000000000 --- a/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/data/prepare_model.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -ex -## model url from mask_rcnn -url='https://github.com/ABNER-1/omnisearch-operators/releases/download/v1.0/yolov3_darknet.tar.gz' -file='yolov3_darknet.tar.gz' -dir='yolov3_darknet' - -if [[ ! -d "${dir}" ]]; then - if [[ ! -f "${file}" ]]; then - echo "[INFO] Model tar package does not exist, begin to download..." - wget ${url} - echo "[INFO] Model tar package download successfully!" - fi - - echo "[INFO] Model directory does not exist, begin to untar..." - tar -zxvf ${file} - rm ${file} - echo "[INFO] Model directory untar successfully!" -fi - -if [[ -d "${dir}" ]];then - echo "[INFO] Model has been prepared successfully!" - exit 0 -fi - -echo "[ERROR] Failed to prepare model due to unexpected reason!" -exit 1 diff --git a/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/paddle_yolo.py b/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/paddle_yolo.py deleted file mode 100644 index ee4d08e41..000000000 --- a/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/paddle_yolo.py +++ /dev/null @@ -1,167 +0,0 @@ -import os -import uuid -import base64 -import logging -import sys -import time -import numpy as np -import yaml -import cv2 -import paddle.fluid as fluid -import paddle -from yolov3_detector.yolo_infer import offset_to_lengths -from yolov3_detector.yolo_infer import coco17_category_info, bbox2out -from yolov3_detector.yolo_infer import Preprocess - -sys.path.append("..") -from config import DATA_PATH, COCO_MODEL_PATH, YOLO_CONFIG_PATH - -paddle.enable_static() - - -# def temp_directory(): -# return os.path.abspath(os.path.join('.', 'data')) - - -# COCO_MODEL_PATH = os.path.join(temp_directory(), "yolov3_darknet") -# YOLO_CONFIG_PATH = os.path.join(COCO_MODEL_PATH, "yolo.yml") - - -class BoundingBox: - def __init__(self, x1, y1, x2, y2, score, label=None): - self.x1 = x1 - self.x2 = x2 - self.y1 = y1 - self.y2 = y2 - self.score = score - self.label = label - - -def cv2base64(image, fps, path): - try: - tmp_file_name = os.path.join(path, "object/%010d-%s.jpg" % (fps, uuid.uuid1())) - cv2.imwrite(tmp_file_name, image) - with open(tmp_file_name, "rb") as f: - base64_data = base64.b64encode(f.read()) - base64_data = base64_data.decode("utf-8") - return base64_data - except Exception as e: - err_msg = "Convert cv2 object to base64 failed: " - logging.error(err_msg, e, exc_info=True) - raise e - - -class YOLO_v3: - def __init__(self): - self.model_init = False - self.fps = 0 - self.user_config = self.get_operator_config() - self.model_path = COCO_MODEL_PATH - self.config_path = YOLO_CONFIG_PATH - with open(self.config_path) as f: - self.conf = yaml.safe_load(f) - - self.infer_prog, self.feed_var_names, self.fetch_targets = fluid.io.load_inference_model( - dirname=self.model_path, - executor=self.executor, - model_filename='__model__', - params_filename='__params__') - self.clsid2catid, self.catid2name = coco17_category_info(False) - self.execute(np.zeros((300, 300, 3), dtype='float32'), DATA_PATH) - - def get_operator_config(self): - try: - config = {} - self.device_str = os.environ.get("device_id", "/cpu:0") - if "gpu" not in self.device_str.lower(): - self.place = fluid.CPUPlace() - else: - gpu_device_id = int(self.device_str.split(':')[-1]) - gpu_mem_limit = float(os.environ.get("gpu_mem_limit", 0.3)) - os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = str( - gpu_mem_limit) - config["gpu_memory_limit"] = gpu_mem_limit - self.place = fluid.CUDAPlace(gpu_device_id) - self.executor = fluid.Executor(self.place) - return config - except Exception as e: - logging.error("unexpected error happen during read config", - exc_info=True) - raise e - - def get_bboxes(self, bbox_results, threshold=0.5): - bboxes = [[]] - for item in bbox_results: - box, score, cls = item["bbox"], item["score"], item["category_id"] - idx = item["image_id"] - if score > threshold: - assert idx == 0, "get_bboxes function now must input image = 1" - bboxes[idx].append(BoundingBox(x1=box[0], y1=box[1], - x2=box[0] + box[2], - y2=box[1] + box[3], - score=score, - label=self.catid2name[int(cls)])) - return bboxes - - @staticmethod - def get_obj_image(self, images, bboxes, path): - obj_images = [] - for i, frame_bboxes in enumerate(bboxes): - frame_object = [] - for j, bbox in enumerate(frame_bboxes): - tmp_obj = images[i][int(bbox.y1):int( - bbox.y2), int(bbox.x1):int(bbox.x2)] - frame_object.append(cv2base64(tmp_obj, self.fps, path)) - - self.fps += 1 - obj_images.append(frame_object) - return obj_images - - def execute(self, image, path): - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - - img_data = Preprocess(image, - self.conf['arch'], - self.conf['Preprocess']) - data_dict = {k: v for k, v in zip(self.feed_var_names, img_data)} - outs = self.executor.run(self.infer_prog, - feed=data_dict, - fetch_list=self.fetch_targets, - return_numpy=False) - out = outs[-1] - lod = out.lod() - lengths = offset_to_lengths(lod) - np_data = np.array(out) - - res = {'bbox': (np_data, lengths), 'im_id': np.array([[0]])} - bbox_results = bbox2out([res], self.clsid2catid, False) - bboxes = self.get_bboxes(bbox_results, 0.5) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - objs = self.get_obj_image(self, [image], bboxes, path) - return objs[0] - - -def run(detector, path): - result_images = [] - images = os.listdir(path) - images.sort() - start = time.time() - if not os.path.exists(path + '/object'): - os.mkdir(path + '/object') - - try: - object_num = [] - for image_path in images: - if not image_path.endswith(".jpg"): - continue - # print(path + '/' + image_path) - image = cv2.imread(path + '/' + image_path) - images = detector.execute(image, path) - result_images.append(images) - object_num.append(len(images)) - except Exception as e: - logging.error("something error: %s", str(e), exc_info=True) - end = time.time() - logging.info('%s cost: {:.3f}s, get %d results'.format(end - start), - "yolov3 detector", len(result_images)) - return result_images, object_num diff --git a/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/yolo_infer.py b/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/yolo_infer.py deleted file mode 100644 index 2bd22fbda..000000000 --- a/solutions/reverse_image_search/object_detection/server/src/yolov3_detector/yolo_infer.py +++ /dev/null @@ -1,367 +0,0 @@ -import numpy as np -import cv2 -import copy - - -def offset_to_lengths(lod): - offset = lod[0] - lengths = [offset[i + 1] - offset[i] for i in range(len(offset) - 1)] - return [lengths] - - -def get_extra_info(im, arch, shape, scale): - info = [] - if 'YOLO' in arch: - im_size = np.array([shape[:2]]).astype('int32') - info.append(im_size) - return info - - -class Resize(object): - def __init__(self, target_size, max_size=0, interp=cv2.INTER_LINEAR): - super(Resize, self).__init__() - self.target_size = target_size - self.max_size = max_size - self.interp = interp - - def __call__(self, im, arch): - origin_shape = im.shape[:2] - im_c = im.shape[2] - scale_set = {'RCNN', 'RetinaNet'} - if self.max_size != 0 and arch in scale_set: - im_size_min = np.min(origin_shape[0:2]) - im_size_max = np.max(origin_shape[0:2]) - im_scale = float(self.target_size) / float(im_size_min) - if np.round(im_scale * im_size_max) > self.max_size: - im_scale = float(self.max_size) / float(im_size_max) - im_scale_x = im_scale - im_scale_y = im_scale - resize_w = int(im_scale_x * float(origin_shape[1])) - resize_h = int(im_scale_y * float(origin_shape[0])) - else: - im_scale_x = float(self.target_size) / float(origin_shape[1]) - im_scale_y = float(self.target_size) / float(origin_shape[0]) - im = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) - # padding im - if self.max_size != 0 and arch in scale_set: - padding_im = np.zeros( - (self.max_size, self.max_size, im_c), dtype=np.float32) - im_h, im_w = im.shape[:2] - padding_im[:im_h, :im_w, :] = im - im = padding_im - return im, im_scale_x - - -class Normalize(object): - def __init__(self, mean, std, is_scale=True): - super(Normalize, self).__init__() - self.mean = mean - self.std = std - self.is_scale = is_scale - - def __call__(self, im): - im = im.astype(np.float32, copy=False) - if self.is_scale: - im = im / 255.0 - im -= self.mean - im /= self.std - return im - - -class Permute(object): - def __init__(self, to_bgr=False): - self.to_bgr = to_bgr - - def __call__(self, im): - im = im.transpose((2, 0, 1)).copy() - if self.to_bgr: - im = im[[2, 1, 0], :, :] - return im - - -class PadStride(object): - def __init__(self, stride=0): - # assert stride >= 0, "Unsupported stride: {}, the stride in PadStride must be greater or equal to 0".format( - # stride) - self.coarsest_stride = stride - - def __call__(self, im): - coarsest_stride = self.coarsest_stride - if coarsest_stride == 0: - return im - im_c, im_h, im_w = im.shape - pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) - pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) - padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) - padding_im[:, :im_h, :im_w] = im - return padding_im - - -def Preprocess(img_path, arch, config): - img = img_path - # img = DecodeImage(img_path) - orig_shape = img.shape - scale = 1. - data = [] - data_config = copy.deepcopy(config) - for data_aug_conf in data_config: - obj = data_aug_conf.pop('type') - preprocess = eval(obj)(**data_aug_conf) - if obj == 'Resize': - img, scale = preprocess(img, arch) - else: - img = preprocess(img) - - img = img[np.newaxis, :] # N, C, H, W - data.append(img) - extra_info = get_extra_info(img, arch, orig_shape, scale) - data += extra_info - return data - - -def coco17_category_info(with_background=True): - """ - Get class id to category id map and category id - to category name map of COCO2017 dataset - - Args: - with_background (bool, default True): - whether load background as class 0. - """ - clsid2catid = { - 1: 1, - 2: 2, - 3: 3, - 4: 4, - 5: 5, - 6: 6, - 7: 7, - 8: 8, - 9: 9, - 10: 10, - 11: 11, - 12: 13, - 13: 14, - 14: 15, - 15: 16, - 16: 17, - 17: 18, - 18: 19, - 19: 20, - 20: 21, - 21: 22, - 22: 23, - 23: 24, - 24: 25, - 25: 27, - 26: 28, - 27: 31, - 28: 32, - 29: 33, - 30: 34, - 31: 35, - 32: 36, - 33: 37, - 34: 38, - 35: 39, - 36: 40, - 37: 41, - 38: 42, - 39: 43, - 40: 44, - 41: 46, - 42: 47, - 43: 48, - 44: 49, - 45: 50, - 46: 51, - 47: 52, - 48: 53, - 49: 54, - 50: 55, - 51: 56, - 52: 57, - 53: 58, - 54: 59, - 55: 60, - 56: 61, - 57: 62, - 58: 63, - 59: 64, - 60: 65, - 61: 67, - 62: 70, - 63: 72, - 64: 73, - 65: 74, - 66: 75, - 67: 76, - 68: 77, - 69: 78, - 70: 79, - 71: 80, - 72: 81, - 73: 82, - 74: 84, - 75: 85, - 76: 86, - 77: 87, - 78: 88, - 79: 89, - 80: 90 - } - - catid2name = { - 0: 'background', - 1: 'person', - 2: 'bicycle', - 3: 'car', - 4: 'motorcycle', - 5: 'airplane', - 6: 'bus', - 7: 'train', - 8: 'truck', - 9: 'boat', - 10: 'traffic light', - 11: 'fire hydrant', - 13: 'stop sign', - 14: 'parking meter', - 15: 'bench', - 16: 'bird', - 17: 'cat', - 18: 'dog', - 19: 'horse', - 20: 'sheep', - 21: 'cow', - 22: 'elephant', - 23: 'bear', - 24: 'zebra', - 25: 'giraffe', - 27: 'backpack', - 28: 'umbrella', - 31: 'handbag', - 32: 'tie', - 33: 'suitcase', - 34: 'frisbee', - 35: 'skis', - 36: 'snowboard', - 37: 'sports ball', - 38: 'kite', - 39: 'baseball bat', - 40: 'baseball glove', - 41: 'skateboard', - 42: 'surfboard', - 43: 'tennis racket', - 44: 'bottle', - 46: 'wine glass', - 47: 'cup', - 48: 'fork', - 49: 'knife', - 50: 'spoon', - 51: 'bowl', - 52: 'banana', - 53: 'apple', - 54: 'sandwich', - 55: 'orange', - 56: 'broccoli', - 57: 'carrot', - 58: 'hot dog', - 59: 'pizza', - 60: 'donut', - 61: 'cake', - 62: 'chair', - 63: 'couch', - 64: 'potted plant', - 65: 'bed', - 67: 'dining table', - 70: 'toilet', - 72: 'tv', - 73: 'laptop', - 74: 'mouse', - 75: 'remote', - 76: 'keyboard', - 77: 'cell phone', - 78: 'microwave', - 79: 'oven', - 80: 'toaster', - 81: 'sink', - 82: 'refrigerator', - 84: 'book', - 85: 'clock', - 86: 'vase', - 87: 'scissors', - 88: 'teddy bear', - 89: 'hair drier', - 90: 'toothbrush' - } - - if not with_background: - clsid2catid = {k - 1: v for k, v in clsid2catid.items()} - - return clsid2catid, catid2name - - -def clip_bbox(bbox): - xmin = max(min(bbox[0], 1.), 0.) - ymin = max(min(bbox[1], 1.), 0.) - xmax = max(min(bbox[2], 1.), 0.) - ymax = max(min(bbox[3], 1.), 0.) - return xmin, ymin, xmax, ymax - - -def bbox2out(results, clsid2catid, is_bbox_normalized=False): - """ - Args: - results: request a dict, should include: `bbox`, `im_id`, - if is_bbox_normalized=True, also need `im_shape`. - clsid2catid: class id to category id map of COCO2017 dataset. - is_bbox_normalized: whether or not bbox is normalized. - """ - xywh_res = [] - for t in results: - bboxes = t['bbox'][0] - lengths = t['bbox'][1][0] - im_ids = np.array(t['im_id'][0]).flatten() - if bboxes.shape == (1, 1) or bboxes is None: - continue - - k = 0 - for i in range(len(lengths)): - num = lengths[i] - im_id = int(im_ids[i]) - for j in range(num): - dt = bboxes[k] - clsid, score, xmin, ymin, xmax, ymax = dt.tolist() - catid = (clsid2catid[int(clsid)]) - - if is_bbox_normalized: - xmin, ymin, xmax, ymax = \ - clip_bbox([xmin, ymin, xmax, ymax]) - w = xmax - xmin - h = ymax - ymin - im_shape = t['im_shape'][0][i].tolist() - im_height, im_width = int(im_shape[0]), int(im_shape[1]) - xmin *= im_width - ymin *= im_height - w *= im_width - h *= im_height - else: - w = xmax - xmin + 1 - h = ymax - ymin + 1 - - bbox = [xmin, ymin, w, h] - coco_res = { - 'image_id': im_id, - 'category_id': catid, - 'bbox': bbox, - 'score': score - } - xywh_res.append(coco_res) - k += 1 - return xywh_res diff --git a/solutions/reverse_image_search/object_detection/test_docker_compose.py b/solutions/reverse_image_search/object_detection/test_docker_compose.py index 1e80b20e6..aedd986b5 100644 --- a/solutions/reverse_image_search/object_detection/test_docker_compose.py +++ b/solutions/reverse_image_search/object_detection/test_docker_compose.py @@ -12,24 +12,24 @@ def get_file(): def test_load_img(): get_file() response = requests.post( - "http://127.0.0.1:5010/img/load", + "http://127.0.0.1:5000/img/load", json={"File": "/data/example_img"} ) assert response.status_code == 200 assert response.json() == "Successfully loaded data!" def test_progress(): - response = requests.get("http://127.0.0.1:5010/progress") + response = requests.get("http://127.0.0.1:5000/progress") assert response.status_code == 200 assert response.json() == "current: 20, total: 20" def test_count(): - response = requests.post("http://127.0.0.1:5010/img/count") + response = requests.post("http://127.0.0.1:5000/img/count") assert response.status_code == 200 def test_get_img(): response = requests.get( - 'http://127.0.0.1:5010/data?image_path=%2Fdata%2Fexample_img%2Ftest.jpg' + 'http://127.0.0.1:5000/data?image_path=%2Fdata%2Fexample_img%2Ftest.jpg' ) assert response.status_code == 200 @@ -37,12 +37,12 @@ def test_search(): _test_upload_file = './data/example_img/test.jpg' _files = {'image': open(_test_upload_file, 'rb')} response = requests.post( - 'http://127.0.0.1:5010/img/search', + 'http://127.0.0.1:5000/img/search', files = _files ) assert response.status_code == 200 def test_drop(): - response = requests.post("http://127.0.0.1:5010/img/drop") + response = requests.post("http://127.0.0.1:5000/img/drop") assert response.status_code == 200 diff --git a/solutions/reverse_image_search/quick_deploy/README.md b/solutions/reverse_image_search/quick_deploy/README.md index 03ae32142..14ff987c4 100644 --- a/solutions/reverse_image_search/quick_deploy/README.md +++ b/solutions/reverse_image_search/quick_deploy/README.md @@ -1,6 +1,6 @@ # Reverse Image Search Based on Milvus & Towhee -This demo uses [towhee](https://github.com/towhee-io/towhee) pipeline (image-embedding) to extract image features by ResNet50, and uses Milvus to build a system that can perform reverse image search. +This demo uses [towhee](https://github.com/towhee-io/towhee) image embedding operator to extract image features by ResNet50, and uses Milvus to build a system that can perform reverse image search. The system architecture is as below: diff --git a/solutions/reverse_image_search/quick_deploy/docker-compose.yaml b/solutions/reverse_image_search/quick_deploy/docker-compose.yaml index 917c9b680..e80be3e0f 100644 --- a/solutions/reverse_image_search/quick_deploy/docker-compose.yaml +++ b/solutions/reverse_image_search/quick_deploy/docker-compose.yaml @@ -33,7 +33,7 @@ services: standalone: container_name: milvus-standalone - image: milvusdb/milvus:v2.0.0-rc8-20211104-d1f4106 + image: milvusdb/milvus:v2.0.2 networks: app_net: ipv4_address: 172.16.238.10 @@ -62,7 +62,7 @@ services: webserver: container_name: img-search-webserver - image: milvusbootcamp/img-search-server:towhee + image: milvusbootcamp/img-search-server:towhee0.6 networks: app_net: ipv4_address: 172.16.238.12 diff --git a/solutions/reverse_image_search/quick_deploy/server/Dockerfile b/solutions/reverse_image_search/quick_deploy/server/Dockerfile index 38a520b09..8dcba75e2 100644 --- a/solutions/reverse_image_search/quick_deploy/server/Dockerfile +++ b/solutions/reverse_image_search/quick_deploy/server/Dockerfile @@ -1,6 +1,8 @@ FROM python:3.7-slim-buster RUN pip3 install --upgrade pip +RUN apt-get update +RUN apt-get install ffmpeg libsm6 libxext6 -y WORKDIR /app/src COPY . /app diff --git a/solutions/reverse_image_search/quick_deploy/server/requirements.txt b/solutions/reverse_image_search/quick_deploy/server/requirements.txt index befba1a08..7bc8cc22c 100644 --- a/solutions/reverse_image_search/quick_deploy/server/requirements.txt +++ b/solutions/reverse_image_search/quick_deploy/server/requirements.txt @@ -1,10 +1,11 @@ PyMySQL==1.0.2 +scipy diskcache==5.2.1 uvicorn==0.13.4 numpy==1.21.3 pydantic==1.8.2 -pymilvus==2.0.1 -towhee==0.2.0 +pymilvus==2.0.2 +towhee==0.6.0 fastapi==0.65.2 python-multipart==0.0.5 pillow==8.4.0 diff --git a/solutions/reverse_image_search/quick_deploy/server/src/config.py b/solutions/reverse_image_search/quick_deploy/server/src/config.py index ff0d89534..1644cefb5 100644 --- a/solutions/reverse_image_search/quick_deploy/server/src/config.py +++ b/solutions/reverse_image_search/quick_deploy/server/src/config.py @@ -3,7 +3,7 @@ ############### Milvus Configuration ############### MILVUS_HOST = os.getenv("MILVUS_HOST", "127.0.0.1") MILVUS_PORT = int(os.getenv("MILVUS_PORT", "19530")) -VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "1000")) +VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "2048")) INDEX_FILE_SIZE = int(os.getenv("INDEX_FILE_SIZE", "1024")) METRIC_TYPE = os.getenv("METRIC_TYPE", "L2") DEFAULT_TABLE = os.getenv("DEFAULT_TABLE", "milvus_img_search") diff --git a/solutions/reverse_image_search/quick_deploy/server/src/encode.py b/solutions/reverse_image_search/quick_deploy/server/src/encode.py index c9b016213..d947b5f0b 100644 --- a/solutions/reverse_image_search/quick_deploy/server/src/encode.py +++ b/solutions/reverse_image_search/quick_deploy/server/src/encode.py @@ -1,5 +1,5 @@ from numpy import linalg as LA -from towhee import pipeline +import towhee from PIL import Image @@ -11,12 +11,12 @@ class Resnet50: args_0 (`type`): ... """ - def __init__(self): - self.embedding_pipeline = pipeline('image-embedding') def resnet50_extract_feat(self, img_path): + feat = towhee.glob(img_path) \ + .image_decode() \ + .image_embedding.timm(model_name='resnet50') \ + .to_list() # Return the normalized embedding([[vec]]) of image - img = Image.open(img_path) - feat = self.embedding_pipeline(img) norm_feat = feat / LA.norm(feat) - return norm_feat.tolist()[0][0] + return norm_feat.tolist()[0] diff --git a/solutions/reverse_image_search/quick_deploy/server/src/encode_tf_resnet50.py b/solutions/reverse_image_search/quick_deploy/server/src/encode_tf_resnet50.py deleted file mode 100644 index 8495b4652..000000000 --- a/solutions/reverse_image_search/quick_deploy/server/src/encode_tf_resnet50.py +++ /dev/null @@ -1,26 +0,0 @@ -import numpy as np -from tensorflow.keras.applications.resnet50 import ResNet50 -from tensorflow.keras.applications.resnet50 import preprocess_input as preprocess_input_resnet50 -from tensorflow.keras.preprocessing import image -from numpy import linalg as LA - - -class Resnet50: - def __init__(self): - self.input_shape = (224, 224, 3) - self.weight = 'imagenet' - self.pooling = 'max' - self.model_resnet50 = ResNet50(weights='imagenet', - input_shape=(self.input_shape[0], self.input_shape[1], self.input_shape[2]), - pooling=self.pooling, include_top=False) - self.model_resnet50.predict(np.zeros((1, 224, 224, 3))) - - def resnet50_extract_feat(self, img_path): - # Return the normalized embedding([[list]]) of the images - img = image.load_img(img_path, target_size=(self.input_shape[0], self.input_shape[1])) - img = image.img_to_array(img) - img = np.expand_dims(img, axis=0) - img = preprocess_input_resnet50(img) - feat = self.model_resnet50.predict(img) - norm_feat = feat[0] / LA.norm(feat[0]) - return norm_feat.tolist() diff --git a/solutions/video_similarity_search/object_detection/README.md b/solutions/video_similarity_search/object_detection/README.md index 4c969e32b..c556cec78 100644 --- a/solutions/video_similarity_search/object_detection/README.md +++ b/solutions/video_similarity_search/object_detection/README.md @@ -2,7 +2,7 @@ ## Overview -This demo uses **Milvus** to detect objects in a video based on a dataset of object images with known information. To get images of objects in videos, it uses OpenCV to extract video frames and then uses towhee pipelines to detect objects in each frame. It uses YOLOV5 to detect objects in images and ResNet50 to get feature vectors of images. Finally, it can detect object and get object information easily by similarity search in Milvus. Let's have fun playing with it! +This demo uses [**Milvus**](https://milvus.io/) to detect objects in a video based on a dataset of object images with known information. To get images of objects in videos, it uses OpenCV to extract video frames and then uses [**Towhee**](https://towhee.io/) to detect objects in each frame. It uses YOLOV5 to detect objects in images and ResNet50 to get feature vectors of images. Finally, it can detect object and get object information easily by similarity search in Milvus. Let's have fun playing with it! arch @@ -118,7 +118,7 @@ The next step is to start the system server. It provides HTTP backend services, - **The API docs** - Type localhost:5000/docs in your browser to see all the APIs. + Type `127.0.0.1:5000/docs` in your browser to see all the APIs. arch diff --git a/solutions/video_similarity_search/object_detection/docker-compose.yaml b/solutions/video_similarity_search/object_detection/docker-compose.yaml index 0b7970760..f4e4c9b91 100644 --- a/solutions/video_similarity_search/object_detection/docker-compose.yaml +++ b/solutions/video_similarity_search/object_detection/docker-compose.yaml @@ -33,7 +33,7 @@ services: standalone: container_name: milvus-standalone - image: milvusdb/milvus:v2.0.0-rc8-20211104-d1f4106 + image: milvusdb/milvus:v2.0.2 networks: app_net: ipv4_address: 172.16.238.10 @@ -62,7 +62,7 @@ services: webserver: container_name: videoobj-search-webserver - image: milvusbootcamp/video-object-detect-server:towhee + image: milvusbootcamp/video-object-detect-server:towhee0.6 networks: app_net: ipv4_address: 172.16.238.12 diff --git a/solutions/video_similarity_search/object_detection/server/Dockerfile b/solutions/video_similarity_search/object_detection/server/Dockerfile index 2b6688ac0..e35e4f505 100644 --- a/solutions/video_similarity_search/object_detection/server/Dockerfile +++ b/solutions/video_similarity_search/object_detection/server/Dockerfile @@ -3,10 +3,8 @@ FROM tensorflow/tensorflow:2.5.0 WORKDIR /app/src COPY . /app -ENV TF_XLA_FLAGS --tf_xla_cpu_global_jit -RUN mkdir -p /root/.keras/models && mv /app/data/models/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5 /root/.keras/models/ - -RUN apt-get update +RUN apt-get -y update +RUN apt-get install -y ffmpeg RUN apt-get install -y libsm6 libxext6 libxrender-dev libgl1-mesa-glx RUN pip3 install -r /app/requirements.txt diff --git a/solutions/video_similarity_search/object_detection/server/requirements.txt b/solutions/video_similarity_search/object_detection/server/requirements.txt index 6561522e8..1221512a8 100644 --- a/solutions/video_similarity_search/object_detection/server/requirements.txt +++ b/solutions/video_similarity_search/object_detection/server/requirements.txt @@ -5,7 +5,7 @@ fastapi uvicorn pandas aiofiles -pymilvus==2.0.0rc7 +pymilvus==2.0.2 diskcache python-multipart pyyaml @@ -15,4 +15,4 @@ matplotlib seaborn torchvision==0.10.0 torch==1.9.0 -towhee==0.2.0 +towhee==0.6.0 diff --git a/solutions/video_similarity_search/object_detection/server/src/config.py b/solutions/video_similarity_search/object_detection/server/src/config.py index ba6445e4f..2e472b99b 100644 --- a/solutions/video_similarity_search/object_detection/server/src/config.py +++ b/solutions/video_similarity_search/object_detection/server/src/config.py @@ -3,7 +3,7 @@ ############### Milvus Configuration ############### MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost") MILVUS_PORT = int(os.getenv("MILVUS_PORT", "19530")) -VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "1000")) +VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "2048")) INDEX_FILE_SIZE = int(os.getenv("INDEX_FILE_SIZE", "1024")) METRIC_TYPE = os.getenv("METRIC_TYPE", "L2") DEFAULT_TABLE = os.getenv("DEFAULT_TABLE", "video_obj_det") diff --git a/solutions/video_similarity_search/object_detection/server/src/encode.py b/solutions/video_similarity_search/object_detection/server/src/encode.py index 9bce47b4d..cece469d3 100644 --- a/solutions/video_similarity_search/object_detection/server/src/encode.py +++ b/solutions/video_similarity_search/object_detection/server/src/encode.py @@ -1,6 +1,7 @@ from PIL import Image from numpy import linalg as LA -from towhee import pipeline +import towhee +import cv2 class CustomOperator: """ @@ -10,22 +11,41 @@ class CustomOperator: args_0 (`type`): ... """ - def __init__(self): - self.resnet_embedding=pipeline('image-embedding') - self.yolo_embedding = pipeline('shiyu/img_object_embedding_pytorch_yolov5_resnet50') - def execute(self, img_path): # Get an image embedding with resnet50 pipeline - img = Image.open(img_path) - feat = self.resnet_embedding(img) - norm_feat = feat[0] / LA.norm(feat[0]) + feat = towhee.glob(img_path) \ + .image_decode() \ + .image_embedding.timm(model_name='resnet50') \ + .to_list() + # Return the normalized embedding([[vec]]) of image + norm_feat = feat / LA.norm(feat) return norm_feat.tolist()[0] def yolo(self, img_path): # Get objects' embeddings of an image - objs = self.yolo_embedding(img_path) + boxes, _, _ = towhee.glob(img_path) \ + .image_decode() \ + .object_detection.yolov5() \ + .to_list()[0] + + imgs = self.get_imgs_list(img_path, boxes) + embeddings = towhee.dc(imgs) \ + .image_embedding.timm(model_name='resnet50') \ + .to_list() + norm_objs = [] - for feat in objs: - norm_feat = feat[0] / LA.norm(feat[0]) + for feat in embeddings: + norm_feat = feat / LA.norm(feat) norm_objs.append(norm_feat.tolist()) return norm_objs + + + @staticmethod + def get_imgs_list(img_path, boxes): + img_list = [] + img = cv2.imread(img_path) + + for box in boxes: + tmp_obj = img[int(box[1]):int(box[3]), int(box[0]):int(box[2])] + img_list.append(towhee._types.Image(tmp_obj, 'BGR')) + return img_list diff --git a/solutions/video_similarity_search/object_detection/server/src/encode_resnet50.py b/solutions/video_similarity_search/object_detection/server/src/encode_resnet50.py deleted file mode 100644 index 036e9228a..000000000 --- a/solutions/video_similarity_search/object_detection/server/src/encode_resnet50.py +++ /dev/null @@ -1,32 +0,0 @@ -import numpy as np -from tensorflow.keras.applications.resnet50 import ResNet50 -from tensorflow.keras.applications.resnet50 import preprocess_input as preprocess_input_resnet50 -from tensorflow.keras.preprocessing import image -from numpy import linalg as LA - -class CustomOperator: - """ - Say something about the ExampleCalass... - - Args: - args_0 (`type`): - ... - """ - def __init__(self): - self.input_shape = (224, 224, 3) - self.weight = 'imagenet' - self.pooling = 'max' - self.model_resnet50 = ResNet50(weights='imagenet', - input_shape=(self.input_shape[0], self.input_shape[1], self.input_shape[2]), - pooling=self.pooling, include_top=False) - self.model_resnet50.predict(np.zeros((1, 224, 224, 3))) - - def execute(self, img_path): - # Return the embedding([[list]]) of the images - img = image.load_img(img_path, target_size=(self.input_shape[0], self.input_shape[1])) - img = image.img_to_array(img) - img = np.expand_dims(img, axis=0) - img = preprocess_input_resnet50(img) - feat = self.model_resnet50.predict(img) - norm_feat = feat[0] / LA.norm(feat[0]) - return norm_feat.tolist() diff --git a/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/__init__.py b/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/data/prepare_model.sh b/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/data/prepare_model.sh deleted file mode 100644 index e05d7167c..000000000 --- a/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/data/prepare_model.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -ex -## model url from mask_rcnn -url='https://github.com/ABNER-1/omnisearch-operators/releases/download/v1.0/yolov3_darknet.tar.gz' -file='yolov3_darknet.tar.gz' -dir='yolov3_darknet' - -if [[ ! -d "${dir}" ]]; then - if [[ ! -f "${file}" ]]; then - echo "[INFO] Model tar package does not exist, begin to download..." - wget ${url} - echo "[INFO] Model tar package download successfully!" - fi - - echo "[INFO] Model directory does not exist, begin to untar..." - tar -zxvf ${file} - rm ${file} - echo "[INFO] Model directory untar successfully!" -fi - -if [[ -d "${dir}" ]];then - echo "[INFO] Model has been prepared successfully!" - exit 0 -fi - -echo "[ERROR] Failed to prepare model due to unexpected reason!" -exit 1 diff --git a/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/paddle_yolo.py b/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/paddle_yolo.py deleted file mode 100644 index 2842dea21..000000000 --- a/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/paddle_yolo.py +++ /dev/null @@ -1,165 +0,0 @@ -# pylint: disable=W -import os -import uuid -import base64 -import logging -import sys -import time -import numpy as np -import yaml -import cv2 -import paddle.fluid as fluid -import paddle -from yolov3_detector.yolo_infer import offset_to_lengths -from yolov3_detector.yolo_infer import coco17_category_info, bbox2out -from yolov3_detector.yolo_infer import Preprocess - -sys.path.append("..") -from config import DATA_PATH, COCO_MODEL_PATH, YOLO_CONFIG_PATH - -paddle.enable_static() -# def temp_directory(): -# return os.path.abspath(os.path.join('.', 'data')) - - -# COCO_MODEL_PATH = os.path.join(temp_directory(), "yolov3_darknet") -# YOLO_CONFIG_PATH = os.path.join(COCO_MODEL_PATH, "yolo.yml") - -class BoundingBox: - def __init__(self, x1, y1, x2, y2, score, label=None): - self.x1 = x1 - self.x2 = x2 - self.y1 = y1 - self.y2 = y2 - self.score = score - self.label = label - - -def cv2base64(image, fps, path): - try: - tmp_file_name = os.path.join(path, "object/%010d-%s.jpg" % (fps, uuid.uuid1())) - cv2.imwrite(tmp_file_name, image) - with open(tmp_file_name, "rb") as f: - base64_data = base64.b64encode(f.read()) - base64_data = base64_data.decode("utf-8") - return base64_data - except Exception as e: - err_msg = "Convert cv2 object to base64 failed: " - logging.error(err_msg, e, exc_info=True) - raise e - - -class YOLO_v3: - def __init__(self): - self.model_init = False - self.fps = 0 - self.user_config = self.get_operator_config() - self.model_path = COCO_MODEL_PATH - self.config_path = YOLO_CONFIG_PATH - with open(self.config_path) as f: - self.conf = yaml.safe_load(f) - - self.infer_prog, self.feed_var_names, self.fetch_targets = fluid.io.load_inference_model( - dirname=self.model_path, - executor=self.executor, - model_filename='__model__', - params_filename='__params__') - self.clsid2catid, self.catid2name = coco17_category_info(False) - self.execute(np.zeros((300, 300, 3), dtype='float32'), DATA_PATH) - - def get_operator_config(self): - try: - config = {} - self.device_str = os.environ.get("device_id", "/cpu:0") - if "gpu" not in self.device_str.lower(): - self.place = fluid.CPUPlace() - else: - gpu_device_id = int(self.device_str.split(':')[-1]) - gpu_mem_limit = float(os.environ.get("gpu_mem_limit", 0.3)) - os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = str( - gpu_mem_limit) - config["gpu_memory_limit"] = gpu_mem_limit - self.place = fluid.CUDAPlace(gpu_device_id) - self.executor = fluid.Executor(self.place) - return config - except Exception as e: - logging.error("unexpected error happen during read config", - exc_info=True) - raise e - - def get_bboxes(self, bbox_results, threshold=0.5): - bboxes = [[]] - for item in bbox_results: - box, score, cls = item["bbox"], item["score"], item["category_id"] - idx = item["image_id"] - if score > threshold: - assert idx == 0, "get_bboxes function now must input image = 1" - bboxes[idx].append(BoundingBox(x1=box[0], y1=box[1], - x2=box[0] + box[2], - y2=box[1] + box[3], - score=score, - label=self.catid2name[int(cls)])) - return bboxes - - @staticmethod - def get_obj_image(self, images, bboxes, path): - obj_images = [] - for i, frame_bboxes in enumerate(bboxes): - frame_object = [] - for j, bbox in enumerate(frame_bboxes): - tmp_obj = images[i][int(bbox.y1):int( - bbox.y2), int(bbox.x1):int(bbox.x2)] - frame_object.append(cv2base64(tmp_obj, self.fps, path)) - - self.fps += 1 - obj_images.append(frame_object) - return obj_images - - def execute(self, image, path): - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - - img_data = Preprocess(image, - self.conf['arch'], - self.conf['Preprocess']) - data_dict = {k: v for k, v in zip(self.feed_var_names, img_data)} - outs = self.executor.run(self.infer_prog, - feed=data_dict, - fetch_list=self.fetch_targets, - return_numpy=False) - out = outs[-1] - lod = out.lod() - lengths = offset_to_lengths(lod) - np_data = np.array(out) - - res = {'bbox': (np_data, lengths), 'im_id': np.array([[0]])} - bbox_results = bbox2out([res], self.clsid2catid, False) - bboxes = self.get_bboxes(bbox_results, 0.5) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - objs = self.get_obj_image(self, [image], bboxes, path) - return objs[0] - - -def run(detector, path): - result_images = [] - images = os.listdir(path) - images.sort() - start = time.time() - if not os.path.exists(path + '/object'): - os.mkdir(path + '/object') - - try: - object_num = [] - for image_path in images: - if not image_path.endswith(".jpg"): - continue - # print(path + '/' + image_path) - image = cv2.imread(path + '/' + image_path) - images = detector.execute(image, path) - result_images.append(images) - object_num.append(len(images)) - except Exception as e: - logging.error("something error: %s", str(e), exc_info=True) - end = time.time() - logging.info('%s cost: {:.3f}s, get %d results'.format(end - start), - "yolov3 detector", len(result_images)) - return result_images, object_num diff --git a/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/yolo_infer.py b/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/yolo_infer.py deleted file mode 100644 index 5363c6246..000000000 --- a/solutions/video_similarity_search/object_detection/server/src/yolov3_detector/yolo_infer.py +++ /dev/null @@ -1,366 +0,0 @@ -# pylint: disable=W -import numpy as np -import cv2 -import copy - - -def offset_to_lengths(lod): - offset = lod[0] - lengths = [offset[i + 1] - offset[i] for i in range(len(offset) - 1)] - return [lengths] - -def get_extra_info(im, arch, shape, scale): - info = [] - if 'YOLO' in arch: - im_size = np.array([shape[:2]]).astype('int32') - info.append(im_size) - return info - - -class Resize(object): - def __init__(self, target_size, max_size=0, interp=cv2.INTER_LINEAR): - super(Resize, self).__init__() - self.target_size = target_size - self.max_size = max_size - self.interp = interp - - def __call__(self, im, arch): - origin_shape = im.shape[:2] - im_c = im.shape[2] - scale_set = {'RCNN', 'RetinaNet'} - if self.max_size != 0 and arch in scale_set: - im_size_min = np.min(origin_shape[0:2]) - im_size_max = np.max(origin_shape[0:2]) - im_scale = float(self.target_size) / float(im_size_min) - if np.round(im_scale * im_size_max) > self.max_size: - im_scale = float(self.max_size) / float(im_size_max) - im_scale_x = im_scale - im_scale_y = im_scale - resize_w = int(im_scale_x * float(origin_shape[1])) - resize_h = int(im_scale_y * float(origin_shape[0])) - else: - im_scale_x = float(self.target_size) / float(origin_shape[1]) - im_scale_y = float(self.target_size) / float(origin_shape[0]) - im = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) - if self.max_size != 0 and arch in scale_set: - padding_im = np.zeros( - (self.max_size, self.max_size, im_c), dtype=np.float32) - im_h, im_w = im.shape[:2] - padding_im[:im_h, :im_w, :] = im - im = padding_im - return im, im_scale_x - - -class Normalize(object): - def __init__(self, mean, std, is_scale=True): - super(Normalize, self).__init__() - self.mean = mean - self.std = std - self.is_scale = is_scale - - def __call__(self, im): - im = im.astype(np.float32, copy=False) - if self.is_scale: - im = im / 255.0 - im -= self.mean - im /= self.std - return im - - -class Permute(object): - def __init__(self, to_bgr=False): - self.to_bgr = to_bgr - - def __call__(self, im): - im = im.transpose((2, 0, 1)).copy() - if self.to_bgr: - im = im[[2, 1, 0], :, :] - return im - - -class PadStride(object): - def __init__(self, stride=0): - # assert stride >= 0, "Unsupported stride: {}, the stride in PadStride must be greater or equal to 0".format( - # stride) - self.coarsest_stride = stride - - def __call__(self, im): - coarsest_stride = self.coarsest_stride - if coarsest_stride == 0: - return im - im_c, im_h, im_w = im.shape - pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) - pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) - padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) - padding_im[:, :im_h, :im_w] = im - return padding_im - - -def Preprocess(img_path, arch, config): - img = img_path - # img = DecodeImage(img_path) - orig_shape = img.shape - scale = 1. - data = [] - data_config = copy.deepcopy(config) - for data_aug_conf in data_config: - obj = data_aug_conf.pop('type') - preprocess = eval(obj)(**data_aug_conf) - if obj == 'Resize': - img, scale = preprocess(img, arch) - else: - img = preprocess(img) - - img = img[np.newaxis, :] # N, C, H, W - data.append(img) - extra_info = get_extra_info(img, arch, orig_shape, scale) - data += extra_info - return data - - -def coco17_category_info(with_background=True): - """ - Get class id to category id map and category id - to category name map of COCO2017 dataset - - Args: - with_background (bool, default True): - whether load background as class 0. - """ - clsid2catid = { - 1: 1, - 2: 2, - 3: 3, - 4: 4, - 5: 5, - 6: 6, - 7: 7, - 8: 8, - 9: 9, - 10: 10, - 11: 11, - 12: 13, - 13: 14, - 14: 15, - 15: 16, - 16: 17, - 17: 18, - 18: 19, - 19: 20, - 20: 21, - 21: 22, - 22: 23, - 23: 24, - 24: 25, - 25: 27, - 26: 28, - 27: 31, - 28: 32, - 29: 33, - 30: 34, - 31: 35, - 32: 36, - 33: 37, - 34: 38, - 35: 39, - 36: 40, - 37: 41, - 38: 42, - 39: 43, - 40: 44, - 41: 46, - 42: 47, - 43: 48, - 44: 49, - 45: 50, - 46: 51, - 47: 52, - 48: 53, - 49: 54, - 50: 55, - 51: 56, - 52: 57, - 53: 58, - 54: 59, - 55: 60, - 56: 61, - 57: 62, - 58: 63, - 59: 64, - 60: 65, - 61: 67, - 62: 70, - 63: 72, - 64: 73, - 65: 74, - 66: 75, - 67: 76, - 68: 77, - 69: 78, - 70: 79, - 71: 80, - 72: 81, - 73: 82, - 74: 84, - 75: 85, - 76: 86, - 77: 87, - 78: 88, - 79: 89, - 80: 90 - } - - catid2name = { - 0: 'background', - 1: 'person', - 2: 'bicycle', - 3: 'car', - 4: 'motorcycle', - 5: 'airplane', - 6: 'bus', - 7: 'train', - 8: 'truck', - 9: 'boat', - 10: 'traffic light', - 11: 'fire hydrant', - 13: 'stop sign', - 14: 'parking meter', - 15: 'bench', - 16: 'bird', - 17: 'cat', - 18: 'dog', - 19: 'horse', - 20: 'sheep', - 21: 'cow', - 22: 'elephant', - 23: 'bear', - 24: 'zebra', - 25: 'giraffe', - 27: 'backpack', - 28: 'umbrella', - 31: 'handbag', - 32: 'tie', - 33: 'suitcase', - 34: 'frisbee', - 35: 'skis', - 36: 'snowboard', - 37: 'sports ball', - 38: 'kite', - 39: 'baseball bat', - 40: 'baseball glove', - 41: 'skateboard', - 42: 'surfboard', - 43: 'tennis racket', - 44: 'bottle', - 46: 'wine glass', - 47: 'cup', - 48: 'fork', - 49: 'knife', - 50: 'spoon', - 51: 'bowl', - 52: 'banana', - 53: 'apple', - 54: 'sandwich', - 55: 'orange', - 56: 'broccoli', - 57: 'carrot', - 58: 'hot dog', - 59: 'pizza', - 60: 'donut', - 61: 'cake', - 62: 'chair', - 63: 'couch', - 64: 'potted plant', - 65: 'bed', - 67: 'dining table', - 70: 'toilet', - 72: 'tv', - 73: 'laptop', - 74: 'mouse', - 75: 'remote', - 76: 'keyboard', - 77: 'cell phone', - 78: 'microwave', - 79: 'oven', - 80: 'toaster', - 81: 'sink', - 82: 'refrigerator', - 84: 'book', - 85: 'clock', - 86: 'vase', - 87: 'scissors', - 88: 'teddy bear', - 89: 'hair drier', - 90: 'toothbrush' - } - - if not with_background: - clsid2catid = {k - 1: v for k, v in clsid2catid.items()} - - return clsid2catid, catid2name - - -def clip_bbox(bbox): - xmin = max(min(bbox[0], 1.), 0.) - ymin = max(min(bbox[1], 1.), 0.) - xmax = max(min(bbox[2], 1.), 0.) - ymax = max(min(bbox[3], 1.), 0.) - return xmin, ymin, xmax, ymax - - -def bbox2out(results, clsid2catid, is_bbox_normalized=False): - """ - Args: - results: request a dict, should include: `bbox`, `im_id`, - if is_bbox_normalized=True, also need `im_shape`. - clsid2catid: class id to category id map of COCO2017 dataset. - is_bbox_normalized: whether or not bbox is normalized. - """ - xywh_res = [] - for t in results: - bboxes = t['bbox'][0] - lengths = t['bbox'][1][0] - im_ids = np.array(t['im_id'][0]).flatten() - if bboxes.shape == (1, 1) or bboxes is None: - continue - - k = 0 - for i in range(len(lengths)): - num = lengths[i] - im_id = int(im_ids[i]) - for j in range(num): - dt = bboxes[k] - clsid, score, xmin, ymin, xmax, ymax = dt.tolist() - catid = (clsid2catid[int(clsid)]) - - if is_bbox_normalized: - xmin, ymin, xmax, ymax = \ - clip_bbox([xmin, ymin, xmax, ymax]) - w = xmax - xmin - h = ymax - ymin - im_shape = t['im_shape'][0][i].tolist() - im_height, im_width = int(im_shape[0]), int(im_shape[1]) - xmin *= im_width - ymin *= im_height - w *= im_width - h *= im_height - else: - w = xmax - xmin + 1 - h = ymax - ymin + 1 - - bbox = [xmin, ymin, w, h] - coco_res = { - 'image_id': im_id, - 'category_id': catid, - 'bbox': bbox, - 'score': score - } - xywh_res.append(coco_res) - k += 1 - return xywh_res diff --git a/solutions/video_similarity_search/object_detection/test_docker_compose.py b/solutions/video_similarity_search/object_detection/test_docker_compose.py index f4da281ba..9122f009c 100644 --- a/solutions/video_similarity_search/object_detection/test_docker_compose.py +++ b/solutions/video_similarity_search/object_detection/test_docker_compose.py @@ -2,11 +2,6 @@ import gdown import zipfile -def test_drop(): - response = requests.post("http://127.0.0.1:5000/image/drop") - assert response.status_code == 200 - - def get_file(): url = 'https://drive.google.com/uc?id=12AzMujXPw_UjnS63LuwwCOyjkZYVEp3Y' gdown.download(url) @@ -51,3 +46,8 @@ def test_search(): files = _files ) assert response.status_code == 200 + +def test_drop(): + response = requests.post("http://127.0.0.1:5000/image/drop") + assert response.status_code == 200 + diff --git a/solutions/video_similarity_search/quick_deploy/README.md b/solutions/video_similarity_search/quick_deploy/README.md index 226c137ba..9686de897 100644 --- a/solutions/video_similarity_search/quick_deploy/README.md +++ b/solutions/video_similarity_search/quick_deploy/README.md @@ -2,7 +2,7 @@ ## Overview -This demo uses OpenCV to extract video frames. Then it uses towhee image-embedding pipeline (ResNet50) to get the feature vector of each frame. Finally, it uses **Milvus** to save and search the data, which makes it very easy to build a system for video similarity search. So let's have fun playing with it! +This demo uses OpenCV to extract video frames. Then it uses [**Towhee**](https://towhee.io/) image-embedding (ResNet50) opeator to get the feature vector of each frame. Finally, it uses [**Milvus**](https://milvus.io/) to save and search the data, which makes it very easy to build a system for video similarity search. So let's have fun playing with it! ## Data source diff --git a/solutions/video_similarity_search/quick_deploy/docker-compose.yaml b/solutions/video_similarity_search/quick_deploy/docker-compose.yaml index 9e5855f6c..3a0333f09 100644 --- a/solutions/video_similarity_search/quick_deploy/docker-compose.yaml +++ b/solutions/video_similarity_search/quick_deploy/docker-compose.yaml @@ -33,7 +33,7 @@ services: standalone: container_name: milvus-standalone - image: milvusdb/milvus:v2.0.0-rc8-20211104-d1f4106 + image: milvusdb/milvus:v2.0.2 networks: app_net: ipv4_address: 172.16.238.10 @@ -63,7 +63,7 @@ services: webserver: container_name: video-webserver - image: milvusbootcamp/video_search_webserver:towhee + image: milvusbootcamp/video-search-server:towhee0.6 networks: app_net: ipv4_address: 172.16.238.12 diff --git a/solutions/video_similarity_search/quick_deploy/server/requirements.txt b/solutions/video_similarity_search/quick_deploy/server/requirements.txt index 8a7986998..3de8ce1c3 100644 --- a/solutions/video_similarity_search/quick_deploy/server/requirements.txt +++ b/solutions/video_similarity_search/quick_deploy/server/requirements.txt @@ -1,13 +1,14 @@ #Keras==2.6.0 #tensorflow==2.6.0 -#opencv-python -Pillow==8.3.1 +opencv-python +Pillow==9.0.0 pymysql fastapi uvicorn pandas aiofiles -pymilvus==2.0.1 +pymilvus==2.0.2 diskcache python-multipart -towhee==0.2.0 +towhee==0.6.0 +scipy diff --git a/solutions/video_similarity_search/quick_deploy/server/src/config.py b/solutions/video_similarity_search/quick_deploy/server/src/config.py index 116941703..92e9ede03 100644 --- a/solutions/video_similarity_search/quick_deploy/server/src/config.py +++ b/solutions/video_similarity_search/quick_deploy/server/src/config.py @@ -3,7 +3,7 @@ ############### Milvus Configuration ############### MILVUS_HOST = os.getenv("MILVUS_HOST", "127.0.0.1") MILVUS_PORT = int(os.getenv("MILVUS_PORT", "19530")) -VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "1000")) +VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "2048")) INDEX_FILE_SIZE = int(os.getenv("INDEX_FILE_SIZE", "1024")) METRIC_TYPE = os.getenv("METRIC_TYPE", "L2") DEFAULT_TABLE = os.getenv("DEFAULT_TABLE", "milvus_video_search") diff --git a/solutions/video_similarity_search/quick_deploy/server/src/encode.py b/solutions/video_similarity_search/quick_deploy/server/src/encode.py index b29c6c3bc..fd36371ee 100644 --- a/solutions/video_similarity_search/quick_deploy/server/src/encode.py +++ b/solutions/video_similarity_search/quick_deploy/server/src/encode.py @@ -1,4 +1,4 @@ -from towhee import pipeline +import towhee from PIL import Image from numpy import linalg as LA @@ -11,12 +11,12 @@ class Resnet50: args_0 (`type`): ... """ - def __init__(self): - self.img_embedding = pipeline('image-embedding') def resnet50_extract_feat(self, img_path): - # Return the normalized embedding of the images - img = Image.open(img_path) - feat = self.img_embedding(img) + feat = towhee.glob(img_path) \ + .image_decode() \ + .image_embedding.timm(model_name='resnet50') \ + .to_list() + # Return the normalized embedding([[vec]]) of image norm_feat = feat / LA.norm(feat) - return norm_feat.tolist()[0][0] + return norm_feat.tolist()[0] diff --git a/solutions/video_similarity_search/quick_deploy/server/src/resnet50_encode.py b/solutions/video_similarity_search/quick_deploy/server/src/resnet50_encode.py deleted file mode 100644 index 648649ee0..000000000 --- a/solutions/video_similarity_search/quick_deploy/server/src/resnet50_encode.py +++ /dev/null @@ -1,33 +0,0 @@ -import numpy as np -from tensorflow.keras.applications.resnet50 import ResNet50 -from tensorflow.keras.applications.resnet50 import preprocess_input as preprocess_input_resnet50 -from tensorflow.keras.preprocessing import image -from numpy import linalg as LA - - -class Resnet50: - """ - Say something about the ExampleCalass... - - Args: - args_0 (`type`): - ... - """ - def __init__(self): - self.input_shape = (224, 224, 3) - self.weight = 'imagenet' - self.pooling = 'max' - self.model_resnet50 = ResNet50(weights='imagenet', - input_shape=(self.input_shape[0], self.input_shape[1], self.input_shape[2]), - pooling=self.pooling, include_top=False) - self.model_resnet50.predict(np.zeros((1, 224, 224, 3))) - - def resnet50_extract_feat(self, img_path): - # Return the normalized embedding([[list]]) of the images - img = image.load_img(img_path, target_size=(self.input_shape[0], self.input_shape[1])) - img = image.img_to_array(img) - img = np.expand_dims(img, axis=0) - img = preprocess_input_resnet50(img) - feat = self.model_resnet50.predict(img) - norm_feat = feat[0] / LA.norm(feat[0]) - return norm_feat.tolist() diff --git a/solutions/video_similarity_search/quick_deploy/server/src/test_main.py b/solutions/video_similarity_search/quick_deploy/server/src/test_main.py index b887e5d06..ccab5ef66 100644 --- a/solutions/video_similarity_search/quick_deploy/server/src/test_main.py +++ b/solutions/video_similarity_search/quick_deploy/server/src/test_main.py @@ -38,7 +38,7 @@ def test_count(): #assert response.json() == 346 def test_search(): - _test_upload_file = './examle-gif/100-gif/tumblr_ku4lzkM5fg1qa47qco1_250.gif' + _test_upload_file = '../../../pic/show.png' _files = {'image': open(_test_upload_file, 'rb')} response = client.post("/video/search", files=_files) # response = client.post( diff --git a/solutions/video_similarity_search/quick_deploy/test_docker_compose.py b/solutions/video_similarity_search/quick_deploy/test_docker_compose.py index 3e6f45df9..003b4321d 100644 --- a/solutions/video_similarity_search/quick_deploy/test_docker_compose.py +++ b/solutions/video_similarity_search/quick_deploy/test_docker_compose.py @@ -40,7 +40,7 @@ def test_get_img(): assert response.status_code == 200 def test_search(): - _test_upload_file = './data/100-gif/tumblr_l3x3x5EH061qa4rk1o1_250.gif' + _test_upload_file = '../pic/show.png' _files = {'image': open(_test_upload_file, 'rb')} response = requests.post( 'http://127.0.0.1:5000/video/search',