Skip to content

Commit

Permalink
fix: only reinstall hf model when cache files are changed
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed May 21, 2024
1 parent 3ea89f0 commit bc4423b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
8 changes: 6 additions & 2 deletions edsnlp/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,13 +1186,15 @@ def load_from_huggingface(
len(repo_id.split("/")) == 2
), "Invalid repo_id format (expected 'owner/repo_name' format)"
path = None
mtime = None
try:
path = snapshot_download(
repo_id,
local_files_only=auto_update,
token=token,
revision=revision,
)
mtime = max(os.path.getmtime(x) for x in Path(path).rglob("*"))
except FileNotFoundError:
pass

Expand All @@ -1205,13 +1207,15 @@ def load_from_huggingface(
token=token,
revision=revision,
)
should_install = True
new_mtime = max(os.path.getmtime(x) for x in Path(path).rglob("*"))
should_install = new_mtime != mtime

if should_install or not any(
p.startswith(module_name) and p.endswith(".dist-info") for p in os.listdir(path)
):
pip = sys.executable.rsplit("/", 1)[0] + "/pip"
subprocess.run(
["pip", "install", path, "--target", path, "--no-deps", "--upgrade"]
[pip, "install", path, "--target", path, "--no-deps", "--upgrade"]
)

if path not in sys.path:
Expand Down
7 changes: 7 additions & 0 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,13 @@ def test_huggingface():
assert doc.ents[0].text == "paracetamol"
assert doc.ents[1].text == "500mg"

# Try loading it twice for coverage
edsnlp.load(
"AP-HP/dummy-ner",
auto_update=True,
install_dependencies=True,
)

subprocess.run(["pip", "uninstall", "dummy-pip-package", "-y"], check=True)


Expand Down

0 comments on commit bc4423b

Please sign in to comment.