-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.py
325 lines (283 loc) · 12.2 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
import os
import cv2
import shutil
import numpy as np
from multiprocessing import Pool
import logging
from skimage.metrics import structural_similarity as ssim
# Configure logging
logging.basicConfig(filename='dataset_processing.log', level=logging.INFO,
format='%(asctime)s %(levelname)s:%(message)s')
# Define individual dataset paths
DATASET_PATHS = [
r"D:\NUST-EME-FA24\Deep learning\PROJECT\dataset\DB1_B",
r"D:\NUST-EME-FA24\Deep learning\PROJECT\dataset\DB2_B",
r"D:\NUST-EME-FA24\Deep learning\PROJECT\dataset\DB3_B",
r"D:\NUST-EME-FA24\Deep learning\PROJECT\dataset\DB4_B"
]
# Define output directories
OUTPUT_BASE_PATH = r"D:\NUST-EME-FA24\Deep learning\PROJECT\processed_dataset_final"
CLEAN_DATASET_PATH = os.path.join(OUTPUT_BASE_PATH, 'clean_dataset')
NOISY_DATASET_PATH = os.path.join(OUTPUT_BASE_PATH, 'noisy_dataset')
MIXED_NOISY_DATASET_PATH = os.path.join(OUTPUT_BASE_PATH, 'mixed_noisy_dataset')
MIXED_CLEAN_DATASET_PATH = os.path.join(OUTPUT_BASE_PATH, 'mixed_clean_dataset')
# Desired uniform size
UNIFORM_SIZE = (300, 480) # Width x Height
# Noise types and their respective functions
NOISE_TYPES = [
'gaussian_blur',
'motion_blur',
'missing_fingerprint',
'cut_fingerprint',
'partial_fingerprint',
'knify_cut',
'low_ink',
'synthetic_artifacts',
'gaussian_noise',
'salt_pepper_noise',
'double_fingerprint',
'heavy_ink',
'merged_ridges'
]
# Mapping of noise types to their functions
NOISE_FUNCTIONS = {}
def create_directories():
"""Create necessary directories for clean and noisy datasets."""
for path in [CLEAN_DATASET_PATH, NOISY_DATASET_PATH, MIXED_NOISY_DATASET_PATH, MIXED_CLEAN_DATASET_PATH]:
if not os.path.exists(path):
os.makedirs(path)
logging.info(f"Created directory: {path}")
# Create subdirectories for each noise type
for noise in NOISE_TYPES:
noise_dir = os.path.join(NOISY_DATASET_PATH, noise)
if not os.path.exists(noise_dir):
os.makedirs(noise_dir)
logging.info(f"Created noise sub-directory: {noise_dir}")
def is_noisy_image(image):
"""
Detect if an image is noisy based on specific criteria.
Uses Variance of Laplacian for blurriness and contrast measurement for heavy ink.
"""
# Calculate Variance of Laplacian (Blurriness)
variance = cv2.Laplacian(image, cv2.CV_64F).var()
if variance < 100: # Threshold for blurriness
return True
# Calculate Contrast (Standard Deviation)
contrast = image.std()
if contrast < 50: # Threshold for low contrast (heavy ink or dark image)
return True
return False
# Noise augmentation functions
def apply_gaussian_blur(image):
blurred = cv2.GaussianBlur(image, (15, 15), 0)
return blurred
def apply_motion_blur(image):
# Define the kernel size and create a horizontal motion blur kernel
kernel_size = 15
kernel = np.zeros((kernel_size, kernel_size))
# Create a horizontal motion blur
kernel[int((kernel_size-1)/2), :] = np.ones(kernel_size)
kernel = kernel / kernel_size
motion_blurred = cv2.filter2D(image, -1, kernel)
return motion_blurred
def apply_missing_fingerprint(image):
# Remove a random rectangular area to simulate missing fingerprint
h, w = image.shape
top_left_x = np.random.randint(0, w//3)
top_left_y = np.random.randint(0, h//3)
width = np.random.randint(w//4, w//2)
height = np.random.randint(h//4, h//2)
image_missing = image.copy()
cv2.rectangle(image_missing, (top_left_x, top_left_y),
(top_left_x + width, top_left_y + height), (255), -1) # White rectangle
return image_missing
def apply_cut_fingerprint(image):
# Simulate a cut by removing a strip
h, w = image.shape
cut_width = np.random.randint(w//10, w//5)
side = np.random.choice(['left', 'right', 'top', 'bottom'])
image_cut = image.copy()
if side == 'left':
cv2.rectangle(image_cut, (0, 0), (cut_width, h), (255), -1)
elif side == 'right':
cv2.rectangle(image_cut, (w - cut_width, 0), (w, h), (255), -1)
elif side == 'top':
cv2.rectangle(image_cut, (0, 0), (w, cut_width), (255), -1)
else: # bottom
cv2.rectangle(image_cut, (0, h - cut_width), (w, h), (255), -1)
return image_cut
def apply_partial_fingerprint(image):
# Keep only a random partial region of the fingerprint
h, w = image.shape
start_x = np.random.randint(0, w//2)
start_y = np.random.randint(0, h//2)
end_x = start_x + np.random.randint(w//4, w//2)
end_y = start_y + np.random.randint(h//4, h//2)
mask = np.zeros_like(image)
cv2.rectangle(mask, (start_x, start_y), (end_x, end_y), (255), -1)
image_partial = cv2.bitwise_and(image, mask)
return image_partial
def apply_knify_cut(image):
# Introduce irregular cuts or patterns (e.g., diagonal lines)
h, w = image.shape
image_knify = image.copy()
num_lines = np.random.randint(5, 15)
for _ in range(num_lines):
x1 = np.random.randint(0, w)
y1 = np.random.randint(0, h)
x2 = np.random.randint(0, w)
y2 = np.random.randint(0, h)
thickness = np.random.randint(1, 3)
cv2.line(image_knify, (x1, y1), (x2, y2), (255), thickness)
return image_knify
def apply_low_ink(image):
# Reduce image brightness to simulate low ink
image_low_ink = cv2.convertScaleAbs(image, alpha=0.5, beta=0) # Decrease brightness by 50%
return image_low_ink
def apply_synthetic_artifacts(image):
# Add synthetic scratches or artifacts
h, w = image.shape
image_artifacts = image.copy()
num_scratches = np.random.randint(3, 7)
for _ in range(num_scratches):
x1 = np.random.randint(0, w)
y1 = np.random.randint(0, h)
x2 = x1 + np.random.randint(-20, 20)
y2 = y1 + np.random.randint(-20, 20)
thickness = np.random.randint(1, 3)
cv2.line(image_artifacts, (x1, y1), (x2, y2), (255), thickness)
return image_artifacts
def apply_gaussian_noise(image):
# Add Gaussian noise
mean = 0
sigma = 25
gauss = np.random.normal(mean, sigma, image.shape).astype('uint8')
image_gauss = cv2.add(image, gauss)
return image_gauss
def apply_salt_pepper_noise(image):
# Add Salt and Pepper noise
s_vs_p = 0.5
amount = 0.02
image_sp = image.copy()
# Salt
num_salt = np.ceil(amount * image.size * s_vs_p).astype(int)
coords = [np.random.randint(0, i - 1, num_salt) for i in image.shape]
image_sp[coords[0], coords[1]] = 255
# Pepper
num_pepper = np.ceil(amount * image.size * (1. - s_vs_p)).astype(int)
coords = [np.random.randint(0, i - 1, num_pepper) for i in image.shape]
image_sp[coords[0], coords[1]] = 0
return image_sp
def apply_double_fingerprint(image):
"""
Overlay two fingerprint images to simulate double fingerprints.
Since we don't have a second fingerprint, we'll create a mirrored version.
"""
mirrored = cv2.flip(image, 1) # Horizontal flip
double_fingerprint = cv2.addWeighted(image, 0.5, mirrored, 0.5, 0)
return double_fingerprint
def apply_heavy_ink(image):
"""
Create a dark fingerprint with heavy ink by reducing brightness and increasing contrast.
"""
image_heavy_ink = cv2.convertScaleAbs(image, alpha=1.5, beta=-50) # Increase contrast and decrease brightness
image_heavy_ink = np.clip(image_heavy_ink, 0, 255).astype(np.uint8)
return image_heavy_ink
def apply_merged_ridges(image):
"""
Alter fingerprint ridges to appear merged or less distinct.
This can be simulated by applying morphological operations.
"""
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
merged = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=2)
return merged
# Update the NOISE_FUNCTIONS dictionary with new noise types
NOISE_FUNCTIONS = {
'gaussian_blur': apply_gaussian_blur,
'motion_blur': apply_motion_blur,
'missing_fingerprint': apply_missing_fingerprint,
'cut_fingerprint': apply_cut_fingerprint,
'partial_fingerprint': apply_partial_fingerprint,
'knify_cut': apply_knify_cut,
'low_ink': apply_low_ink,
'synthetic_artifacts': apply_synthetic_artifacts,
'gaussian_noise': apply_gaussian_noise,
'salt_pepper_noise': apply_salt_pepper_noise,
'double_fingerprint': apply_double_fingerprint,
'heavy_ink': apply_heavy_ink,
'merged_ridges': apply_merged_ridges
}
def process_image(image_info):
"""Process a single image: save clean and apply multiple noise augmentations."""
image_path, dataset_name = image_info
try:
# Read image in grayscale
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
if image is None:
logging.error(f"Failed to read image: {image_path}")
return
# Resize image to uniform size
image_resized = cv2.resize(image, UNIFORM_SIZE)
# Check if the image is already noisy
if is_noisy_image(image_resized):
# Move to existing noisy directory
noisy_existing_dir = os.path.join(NOISY_DATASET_PATH, 'existing_noisy')
if not os.path.exists(noisy_existing_dir):
os.makedirs(noisy_existing_dir)
logging.info(f"Created directory for existing noisy images: {noisy_existing_dir}")
shutil.move(image_path, os.path.join(noisy_existing_dir, os.path.basename(image_path)))
logging.info(f"Moved existing noisy image: {image_path}")
return
# Save clean image
clean_image_name = f"{dataset_name}_{os.path.basename(image_path)}"
clean_image_path = os.path.join(CLEAN_DATASET_PATH, clean_image_name)
cv2.imwrite(clean_image_path, image_resized)
logging.info(f"Saved clean image: {clean_image_path}")
# Apply noise augmentations
for noise_type, func in NOISE_FUNCTIONS.items():
noisy_img = func(image_resized)
noise_dir = os.path.join(NOISY_DATASET_PATH, noise_type)
noisy_image_name = f"{dataset_name}_{noise_type}_{os.path.basename(image_path)}"
noisy_image_path = os.path.join(noise_dir, noisy_image_name)
cv2.imwrite(noisy_image_path, noisy_img)
logging.info(f"Saved noisy image: {noisy_image_path}")
# Also copy to mixed_noisy_dataset
mixed_noisy_image_path = os.path.join(MIXED_NOISY_DATASET_PATH, noisy_image_name)
shutil.copy(noisy_image_path, mixed_noisy_image_path)
logging.info(f"Copied to mixed noisy dataset: {mixed_noisy_image_path}")
except Exception as e:
logging.error(f"Error processing {image_path}: {e}")
def create_mixed_clean_dataset():
"""Combine all clean images into mixed_clean_dataset directory."""
try:
for img_name in os.listdir(CLEAN_DATASET_PATH):
if img_name.lower().endswith(('.tif', '.tiff', '.png', '.jpg', '.jpeg')):
src_path = os.path.join(CLEAN_DATASET_PATH, img_name)
dst_path = os.path.join(MIXED_CLEAN_DATASET_PATH, img_name)
shutil.copy(src_path, dst_path)
logging.info(f"Copied to mixed clean dataset: {dst_path}")
except Exception as e:
logging.error(f"Error creating mixed clean dataset: {e}")
def main():
"""Main function to orchestrate dataset processing."""
create_directories()
# Prepare a list of all images to process
images_to_process = []
for dataset_path in DATASET_PATHS:
if not os.path.exists(dataset_path):
logging.warning(f"Dataset path does not exist: {dataset_path}")
continue
dataset_name = os.path.basename(dataset_path)
logging.info(f"\nProcessing dataset: {dataset_name}")
for img_name in os.listdir(dataset_path):
if img_name.lower().endswith(('.tif', '.tiff', '.png', '.jpg', '.jpeg')):
image_path = os.path.join(dataset_path, img_name)
images_to_process.append((image_path, dataset_name))
# Use multiprocessing Pool to process images in parallel
with Pool(processes=os.cpu_count()) as pool:
pool.map(process_image, images_to_process)
# Create mixed clean dataset
create_mixed_clean_dataset()
logging.info("\nDataset processing completed.")
if __name__ == "__main__":
main()