Skip to content
This repository has been archived by the owner on Apr 16, 2023. It is now read-only.

Predict app #121

Open
wants to merge 14 commits into
base: dev
Choose a base branch
from
124 changes: 39 additions & 85 deletions autofocus/predict/app/app.py
Original file line number Diff line number Diff line change
@@ -1,109 +1,63 @@
import mimetypes
import os
import time
from zipfile import ZipFile

from flask import Flask, jsonify, make_response, request
from werkzeug import secure_filename
from flask import Flask, jsonify, request

from .model import predict_multiple, predict_single
from .utils import allowed_file, filter_image_files, list_zip_files
from .models.File import File
from .models.Predictor import Predictor
from .models.ZipArchive import ZipArchive
from .requests.PredictRequestValidator import PredictRequestValidator
from .requests.PredictZipRequestValidator import PredictZipRequestValidator

# We are going to upload the files to the server as part of the request, so set tmp folder here.
UPLOAD_FOLDER = "/tmp/"
ALLOWED_EXTENSIONS = set(
k for k, v in mimetypes.types_map.items() if v.startswith("image/")
)

app = Flask(__name__)
app.config.from_object(__name__)
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER


@app.route("/predict", methods=["GET", "POST"])
@app.route("/predict", methods=["POST"])
def classify_single():
"""Classify a single image"""
if request.method == "POST":
file = request.files["file"]
# Validate request
validator = PredictRequestValidator(request)
if not validator.validate():
validator.abort()

if not file:
return "No file sent."
# Get File object
file = File(request.files["file"], app.config["UPLOAD_FOLDER"])

filename = secure_filename(file.filename)
# Predict probabilities
app.logger.info("Classifying image %s" % (file.getPath()))
t = time.time()
predictor = Predictor()
predictor.predict(file)
dt = time.time() - t
app.logger.info("Execution time: %0.2f" % (dt * 1000.0))

if allowed_file(filename, ALLOWED_EXTENSIONS):
file_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
# this isn't super-optimal since it's saving the file to the server
file.save(file_path)
# Return ziped probabilities
return jsonify(predictor.getProbabilities())

app.logger.info("Classifying image %s" % (file_path))

# Get the predictions (output of the softmax) for this image
t = time.time()
predictions = predict_single(file_path)
dt = time.time() - t
app.logger.info("Execution time: %0.2f" % (dt * 1000.0))

os.remove(file_path)

return jsonify(predictions)
else:
return "File type not allowed. File must be of type {allowed}".format(
allowed=ALLOWED_EXTENSIONS
)


@app.route("/predict_zip", methods=["GET", "POST"])
@app.route("/predict_zip", methods=["POST"])
def classify_zip():
"""Classify all images from a zip file"""
if request.method == "POST":
file = request.files["file"]

if not file:
return "No file sent."

if not file.filename.split(".")[-1] == "zip":
return ".zip is the only compression format currently supported"

filename = secure_filename(file.filename)
zip_file_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
file.save(zip_file_path)

zip_file = ZipFile(zip_file_path)
zip_file_list = list_zip_files(zip_file_path)
all_images = filter_image_files(zip_file_list, ALLOWED_EXTENSIONS)

if len(all_images) == 0:
return "No image files detected in the zip file"

# loop through images
start = 0
increment = 500
all_images_len = len(all_images)

while start < all_images_len:
end = start + increment
if end > len(all_images):
end = len(all_images)

# extract filenames
curr_file_list = all_images[start:end]
for filename in curr_file_list:
zip_file.extract(filename, path=app.config["UPLOAD_FOLDER"])

curr_file_list = [
os.path.join(app.config["UPLOAD_FOLDER"], x) for x in curr_file_list
]

predictions = predict_multiple(curr_file_list)

# remove files
for curr_file in curr_file_list:
os.remove(curr_file)

return make_response(jsonify(predictions))

start = end + 1
# Validate request
validator = PredictZipRequestValidator(request)
if not validator.validate():
validator.abort()

file = ZipArchive(request.files["file"], app.config["UPLOAD_FOLDER"])
if not file.hasImages():
validator.error["file"] = "No image files detected in the zip file."
validator.abort()

# Extract files
files = file.extractAll(app.config["UPLOAD_FOLDER"], file.listAllImages())

# Make prediction
predictor = Predictor()
return jsonify(predictor.predict_multiple(files))


@app.route("/hello")
Expand Down
29 changes: 0 additions & 29 deletions autofocus/predict/app/model.py

This file was deleted.

66 changes: 66 additions & 0 deletions autofocus/predict/app/models/File.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os

from werkzeug import secure_filename


class File:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need our own file class -- Python has built-in ways to do these things.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Especially for our use case, I wrote the File class, so it would remove the file upon destructing the object. This removes the need to manually handle this every time while handling a file.

I think maybe the naming here is the problem, so I renamed this class to be 'TemporaryFile'.

I would like to keep this class, since the functionality here is pretty awesome and makes the handling very simple. However it is your call, if you don't see the value.

What do you think?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"""
Store a file and remove it upon destruction

Parameters:
path: The path to the file
name: Secured filename (Can be empty)
"""

def __init__(self, file=None, upload_path=None):
"""
Constructor of File

Save the file on the server if a file is given.

Parameters:
file: Uploaded file object from flask
upload_path: The path to upload the file
"""
if file:
self.setFromUploadedFile(file, upload_path)

def __del__(self):
"""
Destructor of File

Remove the file from the server.
"""
os.remove(self.path)

def setFromUploadedFile(self, file, upload_path=None):
"""
Save file from uploaded file

Parameters:
file: Uploaded file object from flask
upload_path: The path to upload the file
"""
self.name = secure_filename(file.filename)
self.path = self.name
if upload_path:
self.path = os.path.join(upload_path, self.path)
file.save(self.path)

def setPath(self, path):
"""
Set the path to a saved file

Parameters:
path: Path to the file
"""
self.path = path

def getPath(self):
"""
Return the saved path

Returns:
string: Path to the file
"""
return self.path
55 changes: 55 additions & 0 deletions autofocus/predict/app/models/Predictor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from pathlib import Path

from fastai.vision import load_learner, open_image


MODEL_DIR = Path(__file__).resolve().parents[2] / "models"
MODEL_NAME = "multilabel_model_20190407.pkl"
model = load_learner(MODEL_DIR, MODEL_NAME)
CLASSES = model.data.classes


class Predictor:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make these methods top-level functions? I don't see much benefit in having them together in a class rather than just a module, it requires another line of code for callers, and it seems to me less idiomatic Python.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I made them top-level functions now.
While developing the class I thought about adopting this class for other predictions as well. That was my thought behind this.

"""
Predicts probabilities with the model based on given files

Parameters:
probabilities: Array of probabilities calculated in predict
"""

def predict(self, file):
"""
Predict probabilities of single file

Parameters:
file: File object of image file
"""
image = open_image(file.getPath())
# Get the predictions (output of the softmax) for this image
pred_classes, preds, probs = model.predict(image)
self.probabilities = [prob.item() for prob in probs]

def predict_multiple(self, files):
"""
Predict probabilities of multiple files

Parameters:
files: Dict with File objects of image file

Returns:
dict: Dictionary of probabilities for each file in files
"""
predictions = {}
for key in files:
self.predict(files[key])
predictions[key] = self.getProbabilities()
return predictions

def getProbabilities(self):
"""
Return formated Probabilities

Returns:
dict: A dictionary of classes to probabilities
"""
return dict(zip(CLASSES, self.probabilities))
88 changes: 88 additions & 0 deletions autofocus/predict/app/models/ZipArchive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import os
from zipfile import ZipFile

from .File import File
from ..requests.Validator import ALLOWED_IMAGE_FILES
from ..utils import allowed_file


class ZipArchive:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need our own ZipArchive class -- the standard library has these capabilities https://docs.python.org/3/library/zipfile.html

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here the same as for the 'File' (now 'TemporaryFile'). It adds more functionality to the build in functions and can be reused.

"""
Archive of a zip file

This class is to store and access a zip file.

Parameters:
file: The storage of the zip file (gets removed from the os upon destructor call)
zip: Opened zip file
"""

def __init__(self, file, upload_folder=None):
"""
Constructor of ZipFile

Store the given file and open the zip file.

Parameters:
file: Uploaded file from flask
upload_folder: The folder to save the zip file
"""
self.file = File(file, upload_folder)
self.zip = ZipFile(self.file.getPath())

def listFiles(self):
"""
List all files in the zip

Returns:
array: Array of filenames
"""
return [file.filename for file in self.zip.infolist()]

def listAllImages(self, extensions=ALLOWED_IMAGE_FILES):
"""
List all image files

Lists all image files within the zip archive based on the given extensions

Parameters:
extensions: Array of allowed image extensions

Returns:
array: Array of filenames matching the extension
"""
return [file for file in self.listFiles() if allowed_file(file, extensions)]

def hasImages(self, extensions=ALLOWED_IMAGE_FILES):
"""
Check for images in the zip file

Parameters:
extensions: Array of allowed image extensions

Returns:
boolean: True if zip has images
"""
return len(self.listAllImages(extensions)) > 0

def extractAll(self, path=None, members=None):
"""
Extract all the given files

Extractes all the given files and stores them as File objects.
Upon destruction of the array, files are getting removed from os.

Parameters:
path: Path to store files
members: Files to extract

Returns:
array: Array of extracted File objects
"""
self.zip.extractall(path, members)
extractedFiles = {}
for member in members:
file = File()
file.setPath(os.path.join(path, member))
extractedFiles[member] = file
return extractedFiles
Empty file.
Loading