Skip to content

Commit

Permalink
Change the import of kenlm from github to pypi (huggingface#19770)
Browse files Browse the repository at this point in the history
* Change the import of kenlm from github to pypi

* Change the import of kenlm from github to pypi in circleci config

* Fix code quality issues

* Fix isort issue, add kenlm in extras for audio

* Add kenlm to deps

* Add kenlm to deps

* Commit 'make fixup' changes

* Remove version from kenlm deps

* commit make fixup changes

* Remove manual installation of kenlm

* Remove manual installation of kenlm

* Remove manual installation of kenlm
  • Loading branch information
raghavanone authored Oct 26, 2022
1 parent aeae978 commit 7829c89
Show file tree
Hide file tree
Showing 9 changed files with 25 additions and 16 deletions.
6 changes: 0 additions & 6 deletions .circleci/create_circleci_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ def job_name(self):
"pip install .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]",
TORCH_SCATTER_INSTALL,
"pip install tensorflow_probability",
"pip install https://github.com/kpu/kenlm/archive/master.zip",
"pip install git+https://github.com/huggingface/accelerate",
],
marker="is_pt_tf_cross_test",
Expand All @@ -143,7 +142,6 @@ def job_name(self):
"pip install --upgrade pip",
"pip install .[sklearn,flax,torch,testing,sentencepiece,torch-speech,vision]",
TORCH_SCATTER_INSTALL,
"pip install https://github.com/kpu/kenlm/archive/master.zip",
"pip install git+https://github.com/huggingface/accelerate",
],
marker="is_pt_flax_cross_test",
Expand All @@ -158,7 +156,6 @@ def job_name(self):
"pip install --upgrade pip",
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
TORCH_SCATTER_INSTALL,
"pip install https://github.com/kpu/kenlm/archive/master.zip",
"pip install git+https://github.com/huggingface/accelerate",
],
pytest_num_workers=3,
Expand All @@ -172,7 +169,6 @@ def job_name(self):
"pip install --upgrade pip",
"pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
"pip install tensorflow_probability",
"pip install https://github.com/kpu/kenlm/archive/master.zip",
],
pytest_options={"rA": None},
)
Expand All @@ -184,7 +180,6 @@ def job_name(self):
"sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
"pip install --upgrade pip",
"pip install .[flax,testing,sentencepiece,flax-speech,vision]",
"pip install https://github.com/kpu/kenlm/archive/master.zip",
],
pytest_options={"rA": None},
)
Expand All @@ -197,7 +192,6 @@ def job_name(self):
"pip install --upgrade pip",
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
TORCH_SCATTER_INSTALL,
"pip install https://github.com/kpu/kenlm/archive/master.zip",
],
pytest_options={"rA": None},
tests_to_run="tests/pipelines/"
Expand Down
2 changes: 1 addition & 1 deletion docker/transformers-all-latest-gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/onnx/tensorflow
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+$CUDA.html
RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT+cpu -f https://software.intel.com/ipex-whl-stable

RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
RUN python3 -m pip install -U "itsdangerous<2.1.0"

RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
Expand Down
2 changes: 1 addition & 1 deletion docker/transformers-doc-builder/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ RUN apt-get -y update && apt-get install -y libsndfile1-dev && apt install -y te
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]

RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python -c "from torch import version; print(version.__version__.split('+')[0])")+cpu.html
RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip
RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract
RUN python3 -m pip install --no-cache-dir pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
RUN python3 -m pip install -U "itsdangerous<2.1.0"

Expand Down
2 changes: 1 addition & 1 deletion docker/transformers-pytorch-gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' || VERSI
RUN python3 -m pip uninstall -y tensorflow flax

RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+cu113.html
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
RUN python3 -m pip install -U "itsdangerous<2.1.0"

# When installing in editable mode, `transformers` is not recognized as a package.
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@
"jax>=0.2.8,!=0.3.2,<=0.3.6",
"jaxlib>=0.1.65,<=0.3.6",
"jieba",
"kenlm",
"nltk",
"numpy>=1.17",
"onnxconverter-common",
Expand Down Expand Up @@ -274,7 +275,7 @@ def run(self):
extras["integrations"] = extras["optuna"] + extras["ray"] + extras["sigopt"]

extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
extras["audio"] = deps_list("librosa", "pyctcdecode", "phonemizer")
extras["audio"] = deps_list("librosa", "pyctcdecode", "phonemizer", "kenlm")
# `pip install ".[speech]"` is deprecated and `pip install ".[torch-speech]"` should be used instead
extras["speech"] = deps_list("torchaudio") + extras["audio"]
extras["torch-speech"] = deps_list("torchaudio") + extras["audio"]
Expand Down
1 change: 1 addition & 0 deletions src/transformers/dependency_versions_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"jax": "jax>=0.2.8,!=0.3.2,<=0.3.6",
"jaxlib": "jaxlib>=0.1.65,<=0.3.6",
"jieba": "jieba",
"kenlm": "kenlm",
"nltk": "nltk",
"numpy": "numpy>=1.17",
"onnxconverter-common": "onnxconverter-common",
Expand Down
20 changes: 14 additions & 6 deletions src/transformers/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,14 @@
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
from ..tokenization_utils import PreTrainedTokenizer
from ..tokenization_utils_fast import PreTrainedTokenizerFast
from ..utils import HUGGINGFACE_CO_RESOLVE_ENDPOINT, is_tf_available, is_torch_available, logging
from ..utils import (
HUGGINGFACE_CO_RESOLVE_ENDPOINT,
is_kenlm_available,
is_pyctcdecode_available,
is_tf_available,
is_torch_available,
logging,
)
from .audio_classification import AudioClassificationPipeline
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
from .base import (
Expand Down Expand Up @@ -837,11 +844,12 @@ def pipeline(

kwargs["decoder"] = decoder
except ImportError as e:
logger.warning(
f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Try to install"
" `pyctcdecode` and `kenlm`: (`pip install pyctcdecode`, `pip install"
f" https://github.com/kpu/kenlm/archive/master.zip`): Error: {e}"
)
logger.warning(f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}")
if not is_kenlm_available():
logger.warning("Try to install `kenlm`: `pip install kenlm")

if not is_pyctcdecode_available():
logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode")

if task == "translation" and model.config.task_specific_params:
for key in model.config.task_specific_params:
Expand Down
1 change: 1 addition & 0 deletions src/transformers/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
is_in_notebook,
is_ipex_available,
is_jumanpp_available,
is_kenlm_available,
is_librosa_available,
is_more_itertools_available,
is_ninja_available,
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,10 @@
TORCH_ONNX_DICT_INPUTS_MINIMUM_VERSION = version.parse("1.8")


def is_kenlm_available():
return importlib.util.find_spec("kenlm") is not None


def is_torch_available():
return _torch_available

Expand Down

0 comments on commit 7829c89

Please sign in to comment.