diff --git a/keras_transformer/attention.py b/keras_transformer/attention.py index ee6cb83..598c04d 100644 --- a/keras_transformer/attention.py +++ b/keras_transformer/attention.py @@ -36,7 +36,7 @@ def __init__(self, num_heads: int, use_masking: bool, if (compression_window_size is not None and compression_window_size <= 0): assert ValueError( - f"Too small compression window ({compression_window_size})") + "Too small compression window ({})".format(compression_window_size)) self.compression_window_size = compression_window_size super().__init__(**kwargs) @@ -83,10 +83,7 @@ def build_output_params(self, d_model): def validate_model_dimensionality(self, d_model: int): if d_model % self.num_heads != 0: - raise ValueError( - f'The size of the last dimension of the input ' - f'({d_model}) must be evenly divisible by the number' - f'of the attention heads {self.num_heads}') + raise ValueError('The size of the last dimension of the input ({}) must be evenly divisible by the number of the attention heads {}'.format([d_model,self.num_heads])) def attention(self, pre_q, pre_v, pre_k, out_seq_len: int, d_model: int, training=None): @@ -233,10 +230,7 @@ def build(self, input_shape): '(for keys/values and queries)') values_dim, query_dim = input_shape[0][-1], input_shape[1][-1] if query_dim != values_dim: - raise ValueError( - f'Both keys/value and query inputs must be ' - f'of the same dimensionality, instead of ' - f'{values_dim} and {query_dim}.') + raise ValueError('Both keys/value and query inputs must be of the same dimensionality, instead of {} and {}.'.format([values_dim,query_dim])) d_model = query_dim self.validate_model_dimensionality(d_model) # These weights are concatenated matrices W_k and W_v which