Skip to content

Commit

Permalink
Fix HF tests with Pin (mosaicml#3248)
Browse files Browse the repository at this point in the history
* fix tests

* pin hub

* ignore warnings
  • Loading branch information
mvpatel2000 authored May 2, 2024
1 parent acefd01 commit 79d6405
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 121 deletions.
4 changes: 2 additions & 2 deletions composer/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ class HuggingFaceModel(ComposerModel):
import transformers
from composer.models import HuggingFaceModel
hf_model = transformers.AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
hf_tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased')
hf_model = transformers.AutoModelForSequenceClassification.from_pretrained('google-bert/bert-base-uncased', num_labels=2)
hf_tokenizer = transformers.AutoTokenizer.from_pretrained('google-bert/bert-base-uncased')
model = HuggingFaceModel(hf_model, hf_tokenizer)
"""

Expand Down
2 changes: 1 addition & 1 deletion docs/source/composer_model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ and make it compatible with our trainer.
# huggingface model
model = AutoModelForSequenceClassification.from_pretrained(
'bert-base-uncased',
'google-bert/bert-base-uncased',
num_labels=2)
# list of torchmetrics
Expand Down
4 changes: 2 additions & 2 deletions examples/finetune_huggingface.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@
"import transformers\n",
"\n",
"# Create a BERT sequence classification model using Hugging Face transformers\n",
"model = transformers.AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)\n",
"tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased') "
"model = transformers.AutoModelForSequenceClassification.from_pretrained('google-bert/bert-base-uncased', num_labels=2)\n",
"tokenizer = transformers.AutoTokenizer.from_pretrained('google-bert/bert-base-uncased') "
]
},
{
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,9 @@ filterwarnings = [
# Ignore autograd kernel warning inside DeepSpeed
'''ignore:.*an autograd kernel was not registered to the Autograd key.*:UserWarning''',
# Ignore save_state_dict / load_state_dict deprecation warnings
'''ignore:'.*_state_dict' is deprecated and will be removed in future versions.*:UserWarning'''
'''ignore:'.*_state_dict' is deprecated and will be removed in future versions.*:UserWarning''',
# Ignore HF deprecation which affects their own libraries
'''ignore:'.*`resume_download` is deprecated and will be removed in version.*:FutureWarning'''
]

# Coverage
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ def package_files(prefix: str, directory: str, extension: str):
extra_deps['nlp'] = [
'transformers>=4.11,!=4.34.0,<4.41',
'datasets>=2.4,<3',
'huggingface-hub>=0.21.2,<0.23',
]

extra_deps['peft'] = [
Expand Down
111 changes: 0 additions & 111 deletions tests/datasets/test_streaming_datasets_train.py

This file was deleted.

8 changes: 4 additions & 4 deletions tests/fixtures/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def _session_tiny_bert_model(_session_tiny_bert_config): # type: ignore
def tiny_bert_tokenizer_helper():
transformers = pytest.importorskip('transformers')

return transformers.AutoTokenizer.from_pretrained('bert-base-uncased')
return transformers.AutoTokenizer.from_pretrained('google-bert/bert-base-uncased')


@pytest.fixture(scope='session')
Expand All @@ -162,7 +162,7 @@ def tiny_bert_config_helper():
'num_hidden_layers': 2,
'intermediate_size': 512,
}
return transformers.AutoConfig.from_pretrained('bert-base-uncased', **tiny_overrides)
return transformers.AutoConfig.from_pretrained('google-bert/bert-base-uncased', **tiny_overrides)


@pytest.fixture(scope='session')
Expand Down Expand Up @@ -302,7 +302,7 @@ def tiny_t5_config_helper():
transformers = pytest.importorskip('transformers')

tiny_overrides = {'d_ff': 128, 'd_model': 64, 'num_layers': 2, 'num_decoder_layers': 2, 'num_heads': 2}
return transformers.AutoConfig.from_pretrained('t5-small', **tiny_overrides)
return transformers.AutoConfig.from_pretrained('google-t5/t5-small', **tiny_overrides)


@pytest.fixture(scope='session')
Expand All @@ -313,7 +313,7 @@ def _session_tiny_t5_config(): # type: ignore
def tiny_t5_tokenizer_helper():
transformers = pytest.importorskip('transformers')

hf_tokenizer = transformers.AutoTokenizer.from_pretrained('t5-small', model_max_length=512)
hf_tokenizer = transformers.AutoTokenizer.from_pretrained('google-t5/t5-small', model_max_length=512)
return hf_tokenizer


Expand Down

0 comments on commit 79d6405

Please sign in to comment.