forked from GrayScaleHY/asr_cn_tf1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcnn_ctc.py
104 lines (82 loc) · 3.49 KB
/
cnn_ctc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import keras
from keras.layers import Input, Conv2D, BatchNormalization, MaxPooling2D
from keras.layers import Reshape, Dense, Dropout, Lambda
from keras.optimizers import Adam
from keras import backend as K
from keras.models import Model
from keras.utils import multi_gpu_model
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
keras.backend.tensorflow_backend.set_session(tf.Session(config=config))
def am_hparams():
params = tf.contrib.training.HParams(
# vocab
vocab_size = 50,
lr = 0.0008,
gpu_nums = 0,
is_training = True)
return params
# =============================搭建模型====================================
class Am():
"""docstring for Amodel."""
def __init__(self, args):
self.vocab_size = args.vocab_size
self.gpu_nums = args.gpu_nums
self.lr = args.lr
self.is_training = args.is_training
self._model_init()
if self.is_training:
self._ctc_init()
self.opt_init()
def _model_init(self):
self.inputs = Input(name='the_inputs', shape=(None, 32, 1)) # 32表示输入特征宽度
self.h1 = cnn_cell(32, self.inputs)
self.h2 = cnn_cell(64, self.h1)
self.h3 = cnn_cell(128, self.h2)
self.h4 = cnn_cell(128, self.h3, pool=False)
self.h5 = cnn_cell(128, self.h4, pool=False)
# 32 / 8 * 128 = 512
self.h6 = Reshape((-1, 512))(self.h5)
self.h6 = Dropout(0.2)(self.h6)
self.h7 = dense(256)(self.h6)
self.h7 = Dropout(0.2)(self.h7)
self.outputs = dense(self.vocab_size, activation='softmax')(self.h7)
self.model = Model(inputs=self.inputs, outputs=self.outputs)
self.model.summary()
def _ctc_init(self):
self.labels = Input(name='the_labels', shape=[None], dtype='float32')
self.input_length = Input(name='input_length', shape=[1], dtype='int64')
self.label_length = Input(name='label_length', shape=[1], dtype='int64')
self.loss_out = Lambda(ctc_lambda, output_shape=(1,), name='ctc')\
([self.labels, self.outputs, self.input_length, self.label_length])
self.ctc_model = Model(inputs=[self.labels, self.inputs,
self.input_length, self.label_length], outputs=self.loss_out)
def opt_init(self):
opt = Adam(lr = self.lr, beta_1 = 0.9, beta_2 = 0.999, decay = 0.01, epsilon = 10e-8)
if self.gpu_nums > 1:
self.ctc_model=multi_gpu_model(self.ctc_model,gpus=self.gpu_nums)
self.ctc_model.compile(loss={'ctc': lambda y_true, output: output}, optimizer=opt) #, metrics = ['accuracy']
# ============================模型组件=================================
def conv2d(size):
return Conv2D(size, (3,3), use_bias=True, activation='relu',
padding='same', kernel_initializer='he_normal')
def norm(x):
return BatchNormalization(axis=-1)(x)
def maxpool(x):
return MaxPooling2D(pool_size=(2,2), strides=None, padding="valid")(x)
def dense(units, activation="relu"):
return Dense(units, activation=activation, use_bias=True,
kernel_initializer='he_normal')
# x.shape=(none, none, none)
# output.shape = (1/2, 1/2, 1/2)
def cnn_cell(size, x, pool=True):
x = norm(conv2d(size)(x))
x = norm(conv2d(size)(x))
if pool:
x = maxpool(x)
return x
def ctc_lambda(args):
labels, y_pred, input_length, label_length = args
y_pred = y_pred[:, :, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)