Skip to content

Commit

Permalink
Fix t5 tokenizer presets
Browse files Browse the repository at this point in the history
  • Loading branch information
mattdangerw committed Nov 30, 2023
1 parent fcf3398 commit 6a756ed
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 12 deletions.
24 changes: 12 additions & 12 deletions keras_nlp/models/t5/t5_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
"preprocessor_config": {},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/t5_small_multi/v1/model.weights.h5",
"weights_hash": "2e10b5f72405d464ee55026b07e60741",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/t5_small_multi/v1/vocab.spm",
"vocabulary_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/t5_small_multi/v1/vocab.spm",
"spm_proto_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
},
"t5_base_multi": {
"metadata": {
Expand Down Expand Up @@ -70,8 +70,8 @@
"preprocessor_config": {},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/t5_base_multi/v1/model.weights.h5",
"weights_hash": "bed6ef276cfe83d1323467051211978d",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/t5_base_multi/v1/vocab.spm",
"vocabulary_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/t5_base_multi/v1/vocab.spm",
"spm_proto_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
},
"t5_large_multi": {
"metadata": {
Expand Down Expand Up @@ -99,8 +99,8 @@
"preprocessor_config": {},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/t5_large_multi/v1/model.weights.h5",
"weights_hash": "7854a05c2e6812899bf6f0f104792cda",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/t5_large_multi/v1/vocab.spm",
"vocabulary_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/t5_large_multi/v1/vocab.spm",
"spm_proto_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
},
"flan_small_multi": {
"metadata": {
Expand Down Expand Up @@ -129,8 +129,8 @@
"preprocessor_config": {},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/flan_small_multi/v1/model.weights.h5",
"weights_hash": "aa0fbaddb1759ef313bbc4f9e4f1e197",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/flan_small_multi/v1/vocab.spm",
"vocabulary_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/flan_small_multi/v1/vocab.spm",
"spm_proto_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
},
"flan_base_multi": {
"metadata": {
Expand Down Expand Up @@ -158,8 +158,8 @@
"preprocessor_config": {},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/flan_base_multi/v1/model.weights.h5",
"weights_hash": "84a10bec83fd093931bb2a6264115d31",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/flan_base_multi/v1/vocab.spm",
"vocabulary_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/flan_base_multi/v1/vocab.spm",
"spm_proto_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
},
"flan_large_multi": {
"metadata": {
Expand Down Expand Up @@ -187,7 +187,7 @@
"preprocessor_config": {},
"weights_url": "https://storage.googleapis.com/keras-nlp/models/flan_large_multi/v1/model.weights.h5",
"weights_hash": "513f530ce790efa7e261c0ef965f3697",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/flan_large_multi/v1/vocab.spm",
"vocabulary_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/flan_large_multi/v1/vocab.spm",
"spm_proto_hash": "9d15ef55d09d5a425ceb63fa31f7cae3",
},
}
7 changes: 7 additions & 0 deletions keras_nlp/models/t5/t5_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy

from keras_nlp.api_export import keras_nlp_export
from keras_nlp.utils.python_utils import classproperty
from keras_nlp.models.t5.t5_presets import backbone_presets
from keras_nlp.tokenizers.sentence_piece_tokenizer import SentencePieceTokenizer


Expand Down Expand Up @@ -96,3 +99,7 @@ def set_proto(self, proto):
self.end_token_id = None
self.pad_token_id = None
self.start_token_id = None

@classproperty
def presets(cls):
return copy.deepcopy(backbone_presets)

0 comments on commit 6a756ed

Please sign in to comment.