Skip to content
This repository has been archived by the owner on Mar 3, 2024. It is now read-only.

Commit

Permalink
#55 Merge outputs as feature
Browse files Browse the repository at this point in the history
* #55 Merge outputs as feature

* Update README.md
  • Loading branch information
CyberZHG authored May 11, 2019
1 parent 9db7ad2 commit 2a4a981
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 23 deletions.
26 changes: 5 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ model.fit_generator(


# Use the trained model
inputs, output_layer = get_model( # `output_layer` is the last feature extraction layer (the last transformer)
inputs, output_layer = get_model(
token_num=len(token_dict),
head_num=5,
transformer_num=12,
Expand All @@ -116,25 +116,9 @@ inputs, output_layer = get_model( # `output_layer` is the last feature extracti
seq_len=20,
pos_num=20,
dropout_rate=0.05,
training=False, # The input layers and output layer will be returned if `training` is `False`
trainable=False, # Whether the model is trainable. The default value is the same with `training`
)
```

### Custom Feature Extraction

```python
def _custom_layers(x, trainable=True):
return keras.layers.LSTM(
units=768,
trainable=trainable,
return_sequences=True,
name='LSTM',
)(x)

model = get_model(
token_num=200,
embed_dim=768,
custom_layers=_custom_layers,
training=False, # The input layers and output layer will be returned if `training` is `False`
trainable=False, # Whether the model is trainable. The default value is the same with `training`
output_layer_num=4, # The number of layers whose outputs will be concatenated as a single output.
Only available when `training` is `False`.
)
```
128 changes: 128 additions & 0 deletions README.zh-CN.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Keras BERT

[![Travis](https://travis-ci.org/CyberZHG/keras-bert.svg)](https://travis-ci.org/CyberZHG/keras-bert)
[![Coverage](https://coveralls.io/repos/github/CyberZHG/keras-bert/badge.svg?branch=master)](https://coveralls.io/github/CyberZHG/keras-bert)
[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FCyberZHG%2Fkeras-bert.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2FCyberZHG%2Fkeras-bert?ref=badge_shield)

[BERT](https://arxiv.org/pdf/1810.04805.pdf)的非官方实现,可以加载官方的预训练模型进行特征提取和预测。

## 安装

```bash
pip install keras-bert
```

## 使用

### 官方模型使用

[特征提取展示](./demo/load_model/load_and_extract.py)中使用官方预训练好的`chinese_L-12_H-768_A-12`可以得到和官方工具一样的结果。

[预测展示](./demo/load_model/load_and_predict.py)中可以填补出缺失词并预测是否是上下文。


### 分词

`Tokenizer`类可以用来进行分词工作,包括归一化和英文部分的最大贪心匹配等,在CJK字符集内的中文会以单字分隔。

```python
from keras_bert import Tokenizer

token_dict = {
'[CLS]': 0,
'[SEP]': 1,
'un': 2,
'##aff': 3,
'##able': 4,
'[UNK]': 5,
}
tokenizer = Tokenizer(token_dict)
print(tokenizer.tokenize('unaffable')) # 分词结果是:`['[CLS]', 'un', '##aff', '##able', '[SEP]']`

indices, segments = tokenizer.encode('unaffable')
print(indices) # 词对应的下标:`[0, 2, 3, 4, 1]`
print(segments) # 段落对应下标:`[0, 0, 0, 0, 0]`

print(tokenizer.tokenize(first='unaffable', second=''))
# 分词结果是:`['[CLS]', 'un', '##aff', '##able', '[SEP]', '钢', '[SEP]']`
indices, segments = tokenizer.encode(first='unaffable', second='', max_len=10)
print(indices) # 词对应的下标:`[0, 2, 3, 4, 1, 5, 1, 0, 0, 0]`
print(segments) # 段落对应下标:`[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]`
```

### 训练和使用

训练过程推荐使用官方的代码。这个代码库内包含一个的训练过程,`training``True`的情况下使用的是带warmup的Adam优化器:

```python
from keras_bert import get_base_dict, get_model, gen_batch_inputs


# 随便的输入样例:
sentence_pairs = [
[['all', 'work', 'and', 'no', 'play'], ['makes', 'jack', 'a', 'dull', 'boy']],
[['from', 'the', 'day', 'forth'], ['my', 'arm', 'changed']],
[['and', 'a', 'voice', 'echoed'], ['power', 'give', 'me', 'more', 'power']],
]


# 构建自定义词典
token_dict = get_base_dict() # 初始化特殊符号,如`[CLS]`
for pairs in sentence_pairs:
for token in pairs[0] + pairs[1]:
if token not in token_dict:
token_dict[token] = len(token_dict)
token_list = list(token_dict.keys()) # Used for selecting a random word


# 构建和训练模型
model = get_model(
token_num=len(token_dict),
head_num=5,
transformer_num=12,
embed_dim=25,
feed_forward_dim=100,
seq_len=20,
pos_num=20,
dropout_rate=0.05,
)
model.summary()

def _generator():
while True:
yield gen_batch_inputs(
sentence_pairs,
token_dict,
token_list,
seq_len=20,
mask_rate=0.3,
swap_sentence_rate=1.0,
)

model.fit_generator(
generator=_generator(),
steps_per_epoch=1000,
epochs=100,
validation_data=_generator(),
validation_steps=100,
callbacks=[
keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
],
)


# 使用训练好的模型
inputs, output_layer = get_model(
token_num=len(token_dict),
head_num=5,
transformer_num=12,
embed_dim=25,
feed_forward_dim=100,
seq_len=20,
pos_num=20,
dropout_rate=0.05,
training=False, # 当`training`是`False`,返回值是输入和输出
trainable=False, # 模型是否可训练,默认值和`training`相同
output_layer_num=4, # 最后几层的输出将合并在一起作为最终的输出,只有当`training`是`False`有效
)
```
12 changes: 12 additions & 0 deletions keras_bert/bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def get_model(token_num,
custom_layers=None,
training=True,
trainable=None,
output_layer_num=1,
decay_steps=100000,
warmup_steps=10000,
lr=1e-4):
Expand All @@ -67,6 +68,8 @@ def get_model(token_num,
:param training: The built model will be returned if it is `True`, otherwise the input layers and the last feature
extraction layer will be returned.
:param trainable: Whether the model is trainable.
:param output_layer_num: The number of layers whose outputs will be concatenated as a single output.
Only available when `training` is `False`.
:param decay_steps: Learning rate will decay linearly to zero in decay steps.
:param warmup_steps: Learning rate will increase linearly to lr in first warmup steps.
:param lr: Learning rate.
Expand Down Expand Up @@ -101,6 +104,15 @@ def get_model(token_num,
trainable=trainable,
)
if not training:
if output_layer_num > 1:
if output_layer_num > transformer_num:
output_layer_num = transformer_num
model = keras.models.Model(inputs=inputs[:2], outputs=transformed)
outputs = []
for i in range(output_layer_num):
layer = model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(transformer_num - i))
outputs.append(layer.output)
transformed = keras.layers.Concatenate(name='Encoder-Output')(list(reversed(outputs)))
return inputs[:2], transformed
mlm_dense_layer = keras.layers.Dense(
units=embed_dim,
Expand Down
15 changes: 14 additions & 1 deletion keras_bert/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@ def _loader(name):
def build_model_from_config(config_file,
training=False,
trainable=None,
output_layer_num=1,
seq_len=None):
"""Build the model from config file.
:param config_file: The path to the JSON configuration file.
:param training: If training, the whole model will be returned.
:param trainable: Whether the model is trainable.
:param output_layer_num: The number of layers whose outputs will be concatenated as a single output.
Only available when `training` is `False`.
:param seq_len: If it is not None and it is shorter than the value in the config file, the weights in
position embeddings will be sliced to fit the new length.
:return: model and config
Expand All @@ -47,6 +50,7 @@ def build_model_from_config(config_file,
feed_forward_dim=config['intermediate_size'],
training=training,
trainable=trainable,
output_layer_num=output_layer_num,
)
if not training:
inputs, outputs = model
Expand Down Expand Up @@ -140,6 +144,7 @@ def load_trained_model_from_checkpoint(config_file,
checkpoint_file,
training=False,
trainable=None,
output_layer_num=1,
seq_len=None):
"""Load trained official model from checkpoint.
Expand All @@ -148,10 +153,18 @@ def load_trained_model_from_checkpoint(config_file,
:param training: If training, the whole model will be returned.
Otherwise, the MLM and NSP parts will be ignored.
:param trainable: Whether the model is trainable. The default value is the same with `training`.
:param output_layer_num: The number of layers whose outputs will be concatenated as a single output.
Only available when `training` is `False`.
:param seq_len: If it is not None and it is shorter than the value in the config file, the weights in
position embeddings will be sliced to fit the new length.
:return: model
"""
model, config = build_model_from_config(config_file, training=training, trainable=trainable, seq_len=seq_len)
model, config = build_model_from_config(
config_file,
training=training,
trainable=trainable,
output_layer_num=output_layer_num,
seq_len=seq_len,
)
load_model_weights_from_checkpoint(model, config, checkpoint_file, training=training)
return model
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name='keras-bert',
version='0.41.0',
version='0.42.0',
packages=find_packages(),
url='https://github.com/CyberZHG/keras-bert',
license='MIT',
Expand Down
7 changes: 7 additions & 0 deletions tests/test_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,10 @@ def test_load_training(self):
model_path = os.path.join(current_path, 'test_checkpoint', 'model.ckpt-20')
model = load_trained_model_from_checkpoint(config_path, model_path, training=True)
model.summary()

def test_load_output_layer_num(self):
current_path = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(current_path, 'test_checkpoint', 'config.json')
model_path = os.path.join(current_path, 'test_checkpoint', 'model.ckpt-20')
model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=4)
model.summary()

0 comments on commit 2a4a981

Please sign in to comment.