-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 39c0867
Showing
4 changed files
with
191 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Thu Aug 6 12:45:35 2020 | ||
@author: Abhilash | ||
""" | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import tensorflow as tf | ||
from tensorflow import keras | ||
from keras import backend as k | ||
from keras.layers import LSTM,Dense,Flatten,Bidirectional | ||
from keras.activations import softmax,relu,elu,sigmoid | ||
from keras.optimizers import Adagrad | ||
from keras.initializers import glorot_uniform | ||
from keras.regularizers import l2 | ||
from keras.constraints import min_max_norm | ||
from keras.layers import Embedding,Input | ||
from keras.preprocessing.text import Tokenizer | ||
from keras.preprocessing.sequence import pad_sequences | ||
from keras.layers import Layer | ||
from keras.models import Sequential,Model | ||
|
||
|
||
|
||
def compute_dot(z,kernel): | ||
'''This is a simple dot product implementation with keras.backend''' | ||
return k.dot(z,kernel) | ||
|
||
|
||
class MiniAttentionBlock(Layer): | ||
'''This is a Keras/Tensorflow implementation of Heirarchical Attention Networks for Document Classification (Yang etal,2015). | ||
Link:[https://www.cs.cmu.edu/~./hovy/papers/16HLT-hierarchical-attention-networks.pdf0] | ||
This is compatible with Keras and Tensorflow. | ||
The input to this Layer should consist of 3 values- | ||
Input 3D Tensor - (samples,steps,features) | ||
The output of this Layer consists of 2 values- | ||
Output 2D Tensor - (samples,features). | ||
This Layer can be used after the Keras.Embedding() Layer . | ||
This Layer can also be used on top of a LSTM/Bidirectional -LSTM/ GRU Layer ,return sequences should be | ||
kept True. | ||
This Layer can also be used before the Dense Layer (after LSTM layers). | ||
It is recommended to use it either after the Embedding Layer or after the LSTM (recurrent) Layer and before the Dense Layer. | ||
''' | ||
def __init__(self,W_init,u_init,b_init,W_reg,u_reg,b_reg,W_const,u_const,b_const,bias=True): | ||
'''This initializes the weights and biases for the Attention Layer. | ||
The Weights have initializers,regularizers and constraints - denoted by W_<exp> | ||
where <exp> can be init,reg or const. These are consistent to be used with keras initializers, | ||
regularizers and constraints. The same is applied for bias and outputs (b and u).''' | ||
init_fn=keras.initializers.glorot_uniform | ||
self.W_init=W_init | ||
self.u_init=u_init | ||
self.b_init=b_init | ||
reg_fn= keras.regularizers.l2 | ||
self.W_reg=W_reg | ||
self.u_reg=u_reg | ||
self.b_reg=b_reg | ||
const_fn=keras.constraints.min_max_norm | ||
self.W_const=W_const | ||
self.u_const=u_const | ||
self.b_const=b_const | ||
self.bias=bias | ||
super(MiniAttentionBlock,self).__init__() | ||
|
||
def attention_block(self,input_shape): | ||
'''This assigns the W,b and u with the values for Attention block.The Input of the Mini-Attention | ||
Block consists of 3D Tensor.''' | ||
assert(len(input_shape))==3 | ||
self.W=self.add_weight((input_shape[-1],input_shape[-1],),initializer=self.W_init,regularizer=self.W_reg,constraint=self.W_const,name="Weight Layer") | ||
if self.bias==True: | ||
self.Bias= self.add_weight((input_shape[-1],),initializer=self.b_init,regularizer=self.b_reg,constraint=self.b_const,name="Bias Layer") | ||
self.u=self.add_weight((input_shape[-1],),initializer=self.u_init,regularizer=self.b_reg,constraint=self.b_const,name="Output Layer") | ||
super(MiniAttentionBlock,self).attention_block(input_shape) | ||
|
||
def build_nomask(self,inp): | ||
'''This implements the Un-masked Attention Layer.The weights are computed along with the biases (if any). | ||
Then the output is passed through a tanh activation.The weights are computed by dot product with the u layer. | ||
The corresponding outputs are passed through an exponential unit and then normalized. The final weights are | ||
computed as a dot product of the input tensor and the weights.''' | ||
weights=compute_dot(inp,self.W) | ||
if self.bias==True: | ||
weights+=self.Bias | ||
#apply tanh | ||
weights=k.tanh(weights) | ||
f_weights=compute_dot(weights,self.u) | ||
f_weights=k.exp(f_weights) | ||
#normalize | ||
f_weights/=(k.sum(f_weights) + k.epsilon()) | ||
#output | ||
f_weights=k.expand_dims(f_weights) | ||
output_weights=compute_dot(inp,f_weights) | ||
return k.sum(output_weights,axis=1) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import tensorflow as tf | ||
from tensorflow import keras | ||
from keras import backend as k | ||
from keras.layers import LSTM,Dense,Flatten,Bidirectional | ||
from keras.activations import softmax,relu,elu,sigmoid | ||
from keras.optimizers import Adagrad | ||
from keras.initializers import glorot_uniform | ||
from keras.regularizers import l2 | ||
from keras.constraints import min_max_norm | ||
from keras.layers import Embedding,Input | ||
from keras.preprocessing.text import Tokenizer | ||
from keras.preprocessing.sequence import pad_sequences | ||
from keras.layers import Layer | ||
from keras.models import Sequential,Model | ||
import MiniAttention | ||
|
||
'''This shows an example to implement the MiniAttention Layer with Sequential and Model models in Keras. | ||
Compatible with Functional and Sequential Models of Keras.''' | ||
|
||
|
||
def network(max_features): | ||
inp=Input(shape=(100,)) | ||
z=Embedding(max_features,128)(inp) | ||
z=Bidirectional(LSTM(128))(z) | ||
z=MiniAttention.MiniAttentionBlock(keras.initializers.he_normal,None,None,None,None,None,None,None,None)(z) | ||
z=Dense(128,activation='relu')(z) | ||
z=Dense(1,activation='sigmoid')(z) | ||
model=Model(inputs=inp,outputs=z) | ||
model.compile(loss='binary_crossentropy',metrics=['accuracy'],optimizer='Adagrad') | ||
model.summary() | ||
return model | ||
|
||
def network_sequential(max_features): | ||
model = Sequential() | ||
model.add(Embedding(max_features,128,input_shape=(100,))) | ||
model.add(MiniAttention.MiniAttentionBlock(None,None,None,None,None,None,None,None,None)) | ||
model.add(LSTM(128)) | ||
model.add(Dense(8,activation='relu')) | ||
model.add(Dense(4,activation='sigmoid')) | ||
model.compile(loss='binary_crossentropy',metrics=['accuracy'],optimizer='Adagrad') | ||
model.summary() | ||
return model | ||
|
||
network(10000) | ||
network_sequential(10000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Thu Aug 6 13:05:47 2020 | ||
@author: 45063883 | ||
""" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Thu Aug 6 16:02:43 2020 | ||
@author: 45063883 | ||
""" | ||
|
||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Fri Apr 10 20:45:49 2020 | ||
@author: Abhilash | ||
""" | ||
|
||
from distutils.core import setup | ||
setup( | ||
name = 'MiniAttention', | ||
packages = ['MiniAttention'], | ||
version = '0.1', | ||
license='MIT', | ||
description = 'A mini-Hierarchical Attention Layer built for Document classification compatible with Keras and Tensorflow', | ||
author = 'ABHILASH MAJUMDER', | ||
author_email = '[email protected]', | ||
url = 'https://github.com/abhilash1910/MiniAttention', | ||
download_url = 'https://github.com/abhilash1910/MiniAttention/archive/v_01.tar.gz', | ||
keywords = ['Document Classification','Attention Layer','Heirarchical Attention','Word Level Attention','Keras','Tensorflow'], | ||
install_requires=[ | ||
|
||
'numpy', | ||
'matplotlib', | ||
'keras', | ||
'tensorflow', | ||
'pandas' | ||
], | ||
classifiers=[ | ||
'Development Status :: 3 - Alpha', | ||
'Intended Audience :: Developers', | ||
'Topic :: Software Development :: Build Tools', | ||
'License :: OSI Approved :: MIT License', | ||
'Programming Language :: Python :: 3.5', | ||
'Programming Language :: Python :: 3.6', | ||
], | ||
) |