-
Notifications
You must be signed in to change notification settings - Fork 0
/
isl detector.py
113 lines (93 loc) · 3.78 KB
/
isl detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import cv2
import mediapipe as mp
import copy
import itertools
from tensorflow import keras
import numpy as np
import pandas as pd
import string
# load the saved model from file
model = keras.models.load_model("model.h5")
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
alphabet = ['1','2','3','4','5','6','7','8','9']
alphabet += list(string.ascii_uppercase)
# functions
def calc_landmark_list(image, landmarks):
image_width, image_height = image.shape[1], image.shape[0]
landmark_point = []
# Keypoint
for _, landmark in enumerate(landmarks.landmark):
landmark_x = min(int(landmark.x * image_width), image_width - 1)
landmark_y = min(int(landmark.y * image_height), image_height - 1)
# landmark_z = landmark.z
landmark_point.append([landmark_x, landmark_y])
return landmark_point
def pre_process_landmark(landmark_list):
temp_landmark_list = copy.deepcopy(landmark_list)
# Convert to relative coordinates
base_x, base_y = 0, 0
for index, landmark_point in enumerate(temp_landmark_list):
if index == 0:
base_x, base_y = landmark_point[0], landmark_point[1]
temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y
# Convert to a one-dimensional list
temp_landmark_list = list(
itertools.chain.from_iterable(temp_landmark_list))
# Normalization
max_value = max(list(map(abs, temp_landmark_list)))
def normalize_(n):
return n / max_value
temp_landmark_list = list(map(normalize_, temp_landmark_list))
return temp_landmark_list
# For webcam input:
cap = cv2.VideoCapture(0)
with mp_hands.Hands(
model_complexity=0,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as hands:
while cap.isOpened():
success, image = cap.read()
# Flip the image horizontally for a selfie-view display.
image = cv2.flip(image, 1)
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue
# To improve performance, optionally mark the image as not writeable to pass by reference.
image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(image)
# Draw the hand annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
debug_image = copy.deepcopy(image)
if results.multi_hand_landmarks:
for hand_landmarks, handedness in zip(results.multi_hand_landmarks,results.multi_handedness):
landmark_list = calc_landmark_list(debug_image, hand_landmarks)
# Conversion to relative coordinates / normalized coordinates
pre_processed_landmark_list = pre_process_landmark(landmark_list)
# Draw the landmarks
mp_drawing.draw_landmarks(
image,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
df = pd.DataFrame(pre_processed_landmark_list).transpose()
# predict the sign language
predictions = model.predict(df, verbose=0)
# get the predicted class for each sample
predicted_classes = np.argmax(predictions, axis=1)
label = alphabet[predicted_classes[0]]
cv2.putText(image, label, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
print(alphabet[predicted_classes[0]])
print("------------------------")
# output image
cv2.imshow('Indian sign language detector', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()