From 61fcfc2e3c1c8afa82707ac88acf2851d3bd9d53 Mon Sep 17 00:00:00 2001 From: yujun <50394665+JunnYu@users.noreply.github.com> Date: Wed, 26 May 2021 19:47:10 +0800 Subject: [PATCH] Update configuration_roformer.py --- src/roformer/configuration_roformer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/roformer/configuration_roformer.py b/src/roformer/configuration_roformer.py index 47bced6..500c1fa 100644 --- a/src/roformer/configuration_roformer.py +++ b/src/roformer/configuration_roformer.py @@ -78,7 +78,7 @@ class RoFormerConfig(PretrainedConfig): gradient_checkpointing (:obj:`bool`, `optional`, defaults to :obj:`False`): If :obj:`True`, use gradient checkpointing to save memory at the expense of slower backward pass. - Example:: + Example:: >>> from transformers import RoFormerModel, RoFormerConfig @@ -96,7 +96,7 @@ class RoFormerConfig(PretrainedConfig): def __init__( self, vocab_size=50000, - embedding_size=768, + embedding_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, @@ -117,7 +117,7 @@ def __init__( super().__init__(pad_token_id=pad_token_id, **kwargs) self.vocab_size = vocab_size - self.embedding_size = embedding_size + self.embedding_size = hidden_size if embedding_size is None else embedding_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads