This repository has been archived by the owner on Nov 30, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NLP-1955] Implement SpacyCore for spaCy 3.0+ (#16)
* feat: add spacy2 and spacy3 nox sessions * tests: implement spacy3 tests and update spacy2 tests * tests: refactor spacy tests * feat: implement spacy 3.0 support * fix: lint and typing * docs: update spacy documentation * refactor: spacy2 example * feat: implement spacy3 example * tests: lint spacy tests * docs: fixes * refactor: consistence references to hmrb in code
- Loading branch information
Kristian Boda
authored
May 14, 2021
1 parent
c69a015
commit 038484c
Showing
6 changed files
with
240 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import spacy | ||
|
||
nlp = spacy.load("en_core_web_sm") | ||
sentences = "I love gorillas. Peter loves gorillas. Jane loves Tarzan." | ||
|
||
|
||
def conj_be(subj: str) -> str: | ||
if subj == "I": | ||
return "am" | ||
elif subj == "you": | ||
return "are" | ||
else: | ||
return "is" | ||
|
||
|
||
@spacy.registry.callbacks("gorilla_callback") | ||
def gorilla_clb(seq: list, span: slice, data: dict) -> None: | ||
subj = seq[span.start].text | ||
be = conj_be(subj) | ||
print(f"{subj} {be} a gorilla person.") | ||
|
||
|
||
@spacy.registry.callbacks("lover_callback") | ||
def lover_clb(seq: list, span: slice, data: dict) -> None: | ||
print( | ||
f"{seq[span][-1].text} is a love interest of " | ||
f"{seq[span.start].text}." | ||
) | ||
|
||
|
||
grammar = """ | ||
Law: | ||
- callback: "loves_gorilla" | ||
( | ||
((pos: "PROPN") or (pos: "PRON")) | ||
(lemma: "love") | ||
(lemma: "gorilla") | ||
) | ||
Law: | ||
- callback: "loves_someone" | ||
( | ||
(pos: "PROPN") | ||
(lower: "loves") | ||
(pos: "PROPN") | ||
) | ||
""" | ||
|
||
|
||
@spacy.registry.augmenters("jsonify_span") | ||
def jsonify_span(span): | ||
return [ | ||
{"lemma": token.lemma_, "pos": token.pos_, "lower": token.lower_} | ||
for token in span | ||
] | ||
|
||
|
||
from hmrb.core import SpacyCore | ||
|
||
conf = { | ||
"rules": grammar, | ||
"callbacks": { | ||
"loves_gorilla": "callbacks.gorilla_callback", | ||
"loves_someone": "callbacks.lover_callback", | ||
}, | ||
"map_doc": "augmenters.jsonify_span", | ||
"sort_length": True, | ||
} | ||
|
||
nlp.add_pipe("hmrb", config=conf) | ||
nlp(sentences) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters