-
Notifications
You must be signed in to change notification settings - Fork 0
/
inference.py
66 lines (55 loc) · 1.68 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import torch
import torchaudio
from cnn_model import CNN
from urbansounddataset import UrbanSoundDataset
from train_model import AUDIO_DIR, ANNOTATION_FILE, SAMPLE_RATE, NUM_SAMPLES, MODEL_DIR
class_mapping = [
"air_conditioner",
"car_horn",
"children_playing",
"dog_bark",
"drilling",
"engine_idling",
"gun_shot",
"jackhammer",
"siren",
"street_music"
]
def predict(model, input, target, class_mapping):
model.eval()
with torch.no_grad():
output = model(input)
# etc Tensor (1, 10) -> [ [0.1, 0.01, ..., 0.6] ]
_, pred_index = torch.max(output, dim=1)
predicted = class_mapping[pred_index]
expected = class_mapping[target]
return predicted, expected
if __name__ == '__main__':
# device
device = torch.device('cpu')
# load back the model
cnn = CNN()
cnn.load_state_dict(torch.load(MODEL_DIR + 'model.pt', map_location=device))
# create transform & dataset
mel_spectogram = torchaudio.transforms.MelSpectrogram(
sample_rate=SAMPLE_RATE,
n_fft=1024,
hop_length=512,
n_mels=64
)
dataset = UrbanSoundDataset(
ANNOTATION_FILE,
AUDIO_DIR,
mel_spectogram,
SAMPLE_RATE,
NUM_SAMPLES,
device
)
# get one sample from dataset for inference
index = 0
input, target = dataset[index][0], dataset[index][1]
input.unsqueeze_(0) # for cnn input as it is 4 dim (batch_size. channels, frequency, time)
# make an inference
predicted, expected = predict(cnn, input, target,
class_mapping)
print(f"Predicted: '{predicted}', expected: '{expected}'")