-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathprepare_stage1.5_draw_bbox.py
121 lines (93 loc) · 4.36 KB
/
prepare_stage1.5_draw_bbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import json
import re
from PIL import Image
import argparse
import os
from tqdm import tqdm
def get_args():
parser = argparse.ArgumentParser(description="Process images based on bounding box adjustments.")
parser.add_argument('--input-jsonl', type=str, help='Input JSON Lines file path with image data and bounding boxes.')
parser.add_argument('--original-images-jsonl', type=str, help='Original STVQA JSONL file path.')
parser.add_argument('--output-dir', type=str, help='Directory path for saving cropped images.')
return parser.parse_args()
def adjust_bbox_expand(bbox_ratios, width, height, expand_ratio=1.5, min_size=448):
x_min = bbox_ratios[0] * width
y_min = bbox_ratios[1] * height
x_max = bbox_ratios[2] * width
y_max = bbox_ratios[3] * height
bbox = [x_min,y_min,x_max,y_max]
center_x = (x_min + x_max) / 2
center_y = (y_min + y_max) / 2
size = max(x_max - x_min, y_max - y_min) * expand_ratio
size = max(size, min_size)
if size> width or size > height:
return [0,0,1,1]
new_x_min = center_x - size / 2
new_y_min = center_y - size / 2
new_x_max = center_x + size / 2
new_y_max = center_y + size / 2
if new_x_min < 0:
new_x_max = new_x_max - new_x_min
new_x_min = 0
if new_y_min < 0:
new_y_max = new_y_max - new_y_min
new_y_min = 0
if new_x_max > width - 1:
new_x_min = new_x_min - new_x_max + width - 1
new_x_max = width - 1
if new_y_max > height - 1:
new_y_min = new_y_min - new_y_max + height - 1
new_y_max = height - 1
return [new_x_min/width, new_y_min/height, new_x_max/width, new_y_max/height]
from PIL import Image, ImageDraw
def draw_bbox_with_ratios(image_path, bbox_ratios, bbox_image_path):
with Image.open(image_path) as img:
width, height = img.size
x_min = bbox_ratios[0] * width
y_min = bbox_ratios[1] * height
x_max = bbox_ratios[2] * width
y_max = bbox_ratios[3] * height
draw = ImageDraw.Draw(img)
draw.rectangle([x_min, y_min, x_max, y_max], outline=(255, 0, 0), width=5)
img.save(bbox_image_path)
def process_images(input_jsonl, original_images_jsonl, output_dir):
with open(input_jsonl, 'r') as file:
lines = file.readlines()
with open(original_images_jsonl, 'r') as file:
original_stvqa_lines = file.readlines()
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for i in tqdm(range(min(5000, len(lines), len(original_stvqa_lines)))):
inputline = json.loads(lines[i])
original_line = json.loads(original_stvqa_lines[i])
image_path = os.path.join('playground/data/eval/data', original_line['image'])
image = Image.open(image_path)
width, height = image.size
data = inputline['text']
pattern = r'\[\-?\d+\.\d+, \-?\d+\.\d+, \-?\d+\.\d+, \-?\d+\.\d+\]'
matches = re.findall(pattern, data)
bbox_ratios = [0, 0, 1, 1] if len(matches) == 0 else [float(j) for j in matches[0].strip('[]').split(',')]
if 0 <= bbox_ratios[0] < bbox_ratios[2] <= 1 and 0 <= bbox_ratios[1] < bbox_ratios[3] <= 1:
adjust_bbox_ratios = adjust_bbox_expand(bbox_ratios, width, height)
adjust_bbox = (
width * adjust_bbox_ratios[0], # x0
height * adjust_bbox_ratios[1], # y0
width * adjust_bbox_ratios[2], # x1
height * adjust_bbox_ratios[3] # y1
)
cropped_image = image.crop(adjust_bbox)
else:
cropped_image = image
cropped_image = cropped_image.convert("RGB")
os.makedirs(os.path.dirname(output_dir), exist_ok=True)
save_path = os.path.join(output_dir, f'{original_line["question_id"]}_{original_line["image"].split("/")[-1]}')
if len(matches) == 0:
bbox_ratios = [0,0,1,1]
else:
bbox_ratios = [float(j) for j in matches[0].split("[")[-1].split(']')[0].split(',')]
if bbox_ratios[0]<bbox_ratios[2] and bbox_ratios[1] < bbox_ratios [3]:
draw_bbox_with_ratios(image_path,
tuple(bbox_ratios), save_path)
if __name__ == "__main__":
args = get_args()
process_images(args.input_jsonl, args.original_images_jsonl, args.output_dir)