Skip to content

Commit

Permalink
experiments updates
Browse files Browse the repository at this point in the history
  • Loading branch information
solivr committed Apr 3, 2018
1 parent 905e84b commit 87c6274
Show file tree
Hide file tree
Showing 7 changed files with 220 additions and 22 deletions.
2 changes: 2 additions & 0 deletions exps/DIVA/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env python
__author__ = 'solivr'
24 changes: 11 additions & 13 deletions exps/Ornaments/ornaments_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


def ornament_evaluate_folder(output_folder: str, validation_dir: str, debug_folder: str=None,
verbose: bool=False) -> dict:
verbose: bool=False, min_area: float=0.0, miou_threshold: float=0.8) -> dict:

if debug_folder is not None:
os.makedirs(debug_folder, exist_ok=True)
Expand All @@ -28,21 +28,22 @@ def ornament_evaluate_folder(output_folder: str, validation_dir: str, debug_fold
post_processed_img = post_processed_img / np.maximum(np.max(post_processed_img), 1)

label_image = imread(os.path.join(validation_dir, 'labels', '{}.png'.format(basename)), mode='L')
label_image = label_image / np.max(label_image)
label_image = label_image / np.max(label_image) if np.max(label_image) > 0 else label_image

# Upsample processed image to compare it to original image
target_shape = (label_image.shape[1], label_image.shape[0])
bin_upscaled = cv2.resize(np.uint8(post_processed_img), target_shape, interpolation=cv2.INTER_NEAREST)

pred_boxes = find_box(np.uint8(bin_upscaled), mode='min_rectangle', min_area=0, n_max_boxes=np.inf)
label_boxes = find_box(np.uint8(label_image), mode='min_rectangle', min_area=0, n_max_boxes=np.inf)
pred_boxes = find_box(np.uint8(bin_upscaled), mode='min_rectangle', min_area=min_area, n_max_boxes=np.inf)
label_boxes = find_box(np.uint8(label_image), mode='min_rectangle', min_area=min_area, n_max_boxes=np.inf)

if debug_folder is not None:
imsave(os.path.join(debug_folder, '{}_bin.png'.format(basename)), np.uint8(bin_upscaled*255))
orig_img = imread(os.path.join(validation_dir, 'images', '{}.jpg'.format(basename)), mode='RGB')
cv2.polylines(orig_img, [label_boxes[:, None, :]], True, (0, 255, 0), thickness=15)
# imsave(os.path.join(debug_folder, '{}_bin.png'.format(basename)), np.uint8(bin_upscaled*255))
# orig_img = imread(os.path.join(validation_dir, 'images', '{}.jpg'.format(basename)), mode='RGB')
orig_img = imread(os.path.join(validation_dir, 'images', '{}.png'.format(basename)), mode='RGB')
cv2.polylines(orig_img, [lb[:, None, :] for lb in label_boxes], True, (0, 255, 0), thickness=15)
if pred_boxes is not None:
cv2.polylines(orig_img, [pred_boxes[:, None, :]], True, (0, 0, 255), thickness=15)
cv2.polylines(orig_img, [pb[:, None, :] for pb in pred_boxes], True, (0, 0, 255), thickness=15)
imsave(os.path.join(debug_folder, '{}_boxes.jpg'.format(basename)), orig_img)

def intersection_over_union(cnt1, cnt2):
Expand All @@ -53,7 +54,7 @@ def intersection_over_union(cnt1, cnt2):

return np.sum(mask1 & mask2) / np.sum(mask1 | mask2)

def compute_metric_boxes(predicted_boxes: np.array, label_boxes: np.array, threshold: float=0.6):
def compute_metric_boxes(predicted_boxes: np.array, label_boxes: np.array, threshold: float=miou_threshold):
# Todo test this fn
metric = Metrics()
if label_boxes is None:
Expand Down Expand Up @@ -97,7 +98,4 @@ def compute_metric_boxes(predicted_boxes: np.array, label_boxes: np.array, thres
'recall': global_metrics.recall,
'f_measure': global_metrics.f_measure,
'mIOU': global_metrics.mIOU
}



}
68 changes: 68 additions & 0 deletions exps/Ornaments/ornaments_process_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python
__author__ = 'solivr'

import argparse
import os
import json
from tqdm import tqdm
import numpy as np
from glob import glob
from ornaments_evaluation import ornament_evaluate_folder
from ornaments_post_processing import ornaments_post_processing_fn
import tempfile


PARAMS = {"threshold": 0.6, "ksize_open": [0, 0], "ksize_close": [0, 0]}
MIOU_THRESHOD = 0.8
MIN_AREA = 0.005

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--npy_directory', type=str, required=True,
help='Directory containing bin .png files')
parser.add_argument('-gt', '--gt_directory', type=str, required=True,
help='Directory containing images and labels for evaluation')
parser.add_argument('-o', '--output_directory', type=str, required=True,
help='Output directory')
parser.add_argument('-p', '--params_file', type=str, required=False,
help='JSON params file')
args = parser.parse_args()
args = vars(args)

output_dir = args.get('output_directory')
npy_dir = args.get('npy_directory')
# os.makedirs(output_dir)

if args.get('params_file') is None:
print('No params file found')
params_list = [PARAMS]
else:
with open(args.get('params_file'), 'r') as f:
configs_data = json.load(f)
# If the file contains a list of configurations
if 'configs' in configs_data.keys():
params_list = configs_data['configs']
assert isinstance(params_list, list)
# Or if there is a single configuration
else:
params_list = [configs_data]

npy_files = glob(os.path.join(npy_dir, '*.npy'))
for params in params_list:
new_output_dir = output_dir + 'th{}_a{}_{}'.format(MIOU_THRESHOD, MIN_AREA, np.random.randint(0, 1000))
os.makedirs(new_output_dir)

with tempfile.TemporaryDirectory() as tmpdir:
for filename in tqdm(npy_files):
probs = np.load(filename)
_ = ornaments_post_processing_fn(probs/np.max(probs), **params,
output_basename=os.path.join(tmpdir,
os.path.basename(filename).split('.')[0]))

measures = ornament_evaluate_folder(tmpdir, args.get('gt_directory'), min_area=MIN_AREA,
miou_threshold=MIOU_THRESHOD, debug_folder=new_output_dir)

with open(os.path.join(new_output_dir, 'validation_scores.json'), 'w') as f:
json.dump(measures, f)
with open(os.path.join(new_output_dir, 'post_process_params.json'), 'w') as f:
json.dump(params, f)
12 changes: 6 additions & 6 deletions exps/Ornaments/ornaments_process_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def find_ornament(img_filenames, dir_predictions, post_process_params, output_di
target_shape = (orig_img.shape[1], orig_img.shape[0])
bin_upscaled = cv2.resize(np.uint8(page_bin), target_shape, interpolation=cv2.INTER_NEAREST)
if debug:
imsave(os.path.join(output_dir, '{}_bin.jpg'.format(basename)), bin_upscaled)
imsave(os.path.join(output_dir, '{}_bin.png'.format(basename)), bin_upscaled)
pred_box = boxes_detection.find_box(np.uint8(bin_upscaled), mode='min_rectangle',
min_area=0.005, n_max_boxes=10)
if pred_box is not None:
Expand All @@ -81,7 +81,7 @@ def find_ornament(img_filenames, dir_predictions, post_process_params, output_di
parser.add_argument('--post_process_params', type=str, default=None,
help='JSOn file containing the params for post-processing')
parser.add_argument('--gpu', type=str, default='0', help='Which GPU to use')
parser.add_argument('-p', '--predict_only', default=False, action='store_true',
parser.add_argument('-pp', '--post_process_only', default=False, action='store_true',
help='Whether to make or not the prediction')
args = parser.parse_args()
args = vars(args)
Expand All @@ -106,11 +106,11 @@ def find_ornament(img_filenames, dir_predictions, post_process_params, output_di
# Prediction
with tempfile.TemporaryDirectory() as tmpdirname:
npy_directory = output_dir
predict_on_set(input_files, model_dir, npy_directory)
if not args.get('post_process_only'):
predict_on_set(input_files, model_dir, npy_directory)

if not args.get('predict_only'):
npy_files = glob(os.path.join(npy_directory, '*.npy'))
find_ornament(input_files, npy_directory, post_process_params, output_dir)
npy_files = glob(os.path.join(npy_directory, '*.npy'))
find_ornament(input_files, npy_directory, post_process_params, output_dir)



2 changes: 2 additions & 0 deletions exps/cBAD/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env python
__author__ = 'solivr'
23 changes: 20 additions & 3 deletions scripts/DIVA/diva_process_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@
from doc_seg.post_processing.segmentation import diva_post_processing_fn
from doc_seg.evaluation.segmentation import to_original_color_code, parse_diva_tool_output
import argparse
import json


TILE_SIZE = 400
DIVA_JAR = '/home/datasets/DIVA_Layout_Analysis_Evaluator/out/artifacts/LayoutAnalysisEvaluator.jar'
PARAMS = {'thresholds': [0.6, 0.6, 0.6], 'min_cc': 100}
PARAMS = {'thresholds': [0.5, 0.5, 0.5], 'min_cc': 50}


def predict_on_set(filenames_to_predict, model_dir, output_dir):
Expand Down Expand Up @@ -73,6 +74,8 @@ def evaluate_on_set(files_to_evaluate, post_process_params, output_dir, gt_dir):
parser.add_argument('--gpu', type=str, default='0', help='Which GPU to use')
parser.add_argument('-e', '--eval_only', default=False, action='store_true',
help='Whether to make or not the prediction')
parser.add_argument('-p', '-params_file', type=str, default=None,
help='JSON params file')
args = parser.parse_args()
args = vars(args)

Expand All @@ -91,6 +94,20 @@ def evaluate_on_set(files_to_evaluate, post_process_params, output_dir, gt_dir):
npy_files = glob(os.path.join(output_dir, '*.npy'))

# TODO Get params from file
if args.get('params_file') is None:
params_list = [PARAMS]
else:
with open(args.get('params_file'), 'r') as f:
configs_data = json.load(f)
# If the file contains a list of configurations
if 'configs' in configs_data.keys():
params_list = configs_data['configs']
assert isinstance(params_list, list)
# Or if there is a single configuration
else:
params_list = [configs_data]

gt_dir = args.get('ground_truth_dir')
mean_iu = evaluate_on_set(npy_files, PARAMS, output_dir, gt_dir)
print('MEAN IU : {}'.format(mean_iu))
for params in params_list:
mean_iu = evaluate_on_set(npy_files, params, output_dir, gt_dir)
print('MEAN IU : {}'.format(mean_iu))
111 changes: 111 additions & 0 deletions scripts/ornaments/ornaments_process_set.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,114 @@
#!/usr/bin/env python
__author__ = 'solivr'

import tensorflow as tf
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)))
from doc_seg.loader import LoadedModel
from doc_seg.post_processing import ornaments_post_processing_fn, boxes_detection
from doc_seg.evaluation.extraction import format_quad_to_string
from tqdm import tqdm
import numpy as np
import argparse
import json
import tempfile
from glob import glob
from scipy.misc import imread, imsave
import cv2


def predict_on_set(filenames_to_predict, model_dir, output_dir):
"""
:param filenames_to_predict:
:param model_dir:
:param output_dir:
:return:
"""
with tf.Session():
m = LoadedModel(model_dir, 'filename')
for filename in tqdm(filenames_to_predict, desc='Prediction'):
pred = m.predict(filename)['probs'][0]
np.save(os.path.join(output_dir, os.path.basename(filename).split('.')[0]),
np.uint8(255 * pred))


def find_ornament(img_filenames, dir_predictions, post_process_params, output_dir, debug=False):
"""
:param img_filenames:
:param dir_predictions:
:param post_process_params:
:param output_dir:
:return:
"""

with open(os.path.join(output_dir, 'pages.txt'), 'w') as f:
for filename in tqdm(img_filenames, 'Post-processing'):
orig_img = imread(filename, mode='RGB')
basename = os.path.basename(filename).split('.')[0]

filename_pred = os.path.join(dir_predictions, basename + '.npy')
pred = np.load(filename_pred)
page_bin = ornaments_post_processing_fn(pred / np.max(pred), **post_process_params)

target_shape = (orig_img.shape[1], orig_img.shape[0])
bin_upscaled = cv2.resize(np.uint8(page_bin), target_shape, interpolation=cv2.INTER_NEAREST)
if debug:
imsave(os.path.join(output_dir, '{}_bin.jpg'.format(basename)), bin_upscaled)
pred_box = boxes_detection.find_box(np.uint8(bin_upscaled), mode='min_rectangle',
min_area=0, n_max_boxes=10)
if pred_box is not None:
for box in pred_box:
cv2.polylines(orig_img, [box[:, None, :]], True, (0, 0, 255), thickness=15)
else:
print('No box found in {}'.format(filename))
imsave(os.path.join(output_dir, '{}_boxes.jpg'.format(basename)), orig_img)

f.write('{},{}\n'.format(filename, [format_quad_to_string(box) for box in pred_box]))


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--model_dir', type=str, required=True,
help='Directory of the model (should be of type ''*/export/<timestamp>)')
parser.add_argument('-i', '--input_files', type=str, required=True, nargs='+',
help='Folder containing the images to evaluate the model on')
parser.add_argument('-o', '--output_dir', type=str, required=True,
help='Folder containing the outputs (.npy predictions and visualization errors)')
parser.add_argument('--post_process_params', type=str, default=None,
help='JSOn file containing the params for post-processing')
parser.add_argument('--gpu', type=str, default='0', help='Which GPU to use')
# parser.add_argument('-pp', '--post_proces_only', default=False, action='store_true',
# help='Whether to make or not the prediction')
args = parser.parse_args()
args = vars(args)

os.environ["CUDA_VISIBLE_DEVICES"] = args.get('gpu')
model_dir = args.get('model_dir')
input_files = args.get('input_files')
if len(input_files) == 0:
raise FileNotFoundError

output_dir = args.get('output_dir')
os.makedirs(output_dir, exist_ok=True)
post_process_params = args.get('post_proces_params')

if post_process_params:
with open(post_process_params, 'r') as f:
post_process_params = json.load(f)
post_process_params = post_process_params['params']
else:
post_process_params = {"threshold": -1, "ksize_open": [5, 5], "ksize_close": [5, 5]}

# Prediction
with tempfile.TemporaryDirectory() as tmpdirname:
npy_directory = tmpdirname
predict_on_set(input_files, model_dir, npy_directory)

npy_files = glob(os.path.join(npy_directory, '*.npy'))
find_ornament(input_files, npy_directory, post_process_params, output_dir)



0 comments on commit 87c6274

Please sign in to comment.