Skip to content

Commit

Permalink
Merge pull request #493 from kaushikacharya/nmslib
Browse files Browse the repository at this point in the history
Updating nmslib hyperparameters guide url
  • Loading branch information
dakinggg authored Sep 13, 2023
2 parents 23f443f + 9bdc0f8 commit 3da29c2
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions scispacy/candidate_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,12 +378,12 @@ def create_tfidf_ann_index(
tfidf_vectorizer_path = f"{out_path}/tfidf_vectorizer.joblib"
ann_index_path = f"{out_path}/nmslib_index.bin"
tfidf_vectors_path = f"{out_path}/tfidf_vectors_sparse.npz"
uml_concept_aliases_path = f"{out_path}/concept_aliases.json"
umls_concept_aliases_path = f"{out_path}/concept_aliases.json"

kb = kb or UmlsKnowledgeBase()

# nmslib hyperparameters (very important)
# guide: https://github.com/nmslib/nmslib/blob/master/python_bindings/parameters.md
# guide: https://github.com/nmslib/nmslib/blob/master/manual/methods.md
# Default values resulted in very low recall.

# set to the maximum recommended value. Improves recall at the expense of longer indexing time.
Expand Down Expand Up @@ -445,9 +445,9 @@ def create_tfidf_ann_index(
assert len(concept_aliases) == numpy.size(concept_alias_tfidfs, 0)

print(
f"Saving list of concept ids and tfidfs vectors to {uml_concept_aliases_path} and {tfidf_vectors_path}"
f"Saving list of concept ids and tfidfs vectors to {umls_concept_aliases_path} and {tfidf_vectors_path}"
)
json.dump(concept_aliases, open(uml_concept_aliases_path, "w"))
json.dump(concept_aliases, open(umls_concept_aliases_path, "w"))
scipy.sparse.save_npz(
tfidf_vectors_path, concept_alias_tfidfs.astype(numpy.float16)
)
Expand Down

0 comments on commit 3da29c2

Please sign in to comment.