From 2cbe3518b5fc6a79fe20d24725e4dc92baaeb03f Mon Sep 17 00:00:00 2001 From: Lars Kjeldgaard Date: Mon, 23 Aug 2021 17:14:06 +0200 Subject: [PATCH] compute confidence scores --- NEWS.md | 4 ++++ setup.py | 2 +- src/NERDA/models.py | 15 +++++++++++-- src/NERDA/predictions.py | 33 ++++++++++++++++++++++++---- tests/unit_tests/test_predictions.py | 6 +++++ 5 files changed, 53 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 23d4d63..e2a60e0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# NERDA 0.9.7 + +* return confidence scores for predictions of all tokens, e.g. model.predict(x, return_confidence=True). + # NERDA 0.9.6 * compute Precision, Recall and Accuracy (optional) with evaluate_performance(). diff --git a/setup.py b/setup.py index f4ca177..8dbac5f 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="NERDA", - version="0.9.6", + version="0.9.7", author="Lars Kjeldgaard, Lukas Christian Nielsen", author_email="lars.kjeldgaard@eb.dk", description="A Framework for Finetuning Transformers for Named-Entity Recognition", diff --git a/src/NERDA/models.py b/src/NERDA/models.py index b3ed55d..bbd8537 100644 --- a/src/NERDA/models.py +++ b/src/NERDA/models.py @@ -266,7 +266,9 @@ def half(self): self.network.half() self.halved = True - def predict(self, sentences: List[List[str]], **kwargs) -> List[List[str]]: + def predict(self, sentences: List[List[str]], + return_confidence: bool = False, + **kwargs) -> List[List[str]]: """Predict Named Entities in Word-Tokenized Sentences Predicts word-tokenized sentences with trained model. @@ -275,6 +277,9 @@ def predict(self, sentences: List[List[str]], **kwargs) -> List[List[str]]: sentences (List[List[str]]): word-tokenized sentences. kwargs: arbitrary keyword arguments. For instance 'batch_size' and 'num_workers'. + return_confidence (bool, optional): if True, return + confidence scores for all predicted tokens. Defaults + to False. Returns: List[List[str]]: Predicted tags for sentences - one @@ -288,15 +293,20 @@ def predict(self, sentences: List[List[str]], **kwargs) -> List[List[str]]: device = self.device, tag_encoder = self.tag_encoder, tag_outside = self.tag_outside, + return_confidence = return_confidence, **kwargs) - def predict_text(self, text: str, **kwargs) -> list: + def predict_text(self, text: str, + return_confidence:bool = False, **kwargs) -> list: """Predict Named Entities in a Text Args: text (str): text to predict entities in. kwargs: arbitrary keyword arguments. For instance 'batch_size' and 'num_workers'. + return_confidence (bool, optional): if True, return + confidence scores for all predicted tokens. Defaults + to False. Returns: tuple: word-tokenized sentences and predicted @@ -310,6 +320,7 @@ def predict_text(self, text: str, **kwargs) -> list: device = self.device, tag_encoder = self.tag_encoder, tag_outside = self.tag_outside, + return_confidence=return_confidence, **kwargs) def evaluate_performance(self, dataset: dict, diff --git a/src/NERDA/predictions.py b/src/NERDA/predictions.py index 1f12bf1..3091ae2 100644 --- a/src/NERDA/predictions.py +++ b/src/NERDA/predictions.py @@ -12,6 +12,10 @@ import transformers import sklearn.preprocessing +def sigmoid_transform(x): + prob = 1/(1 + np.exp(-x)) + return prob + def predict(network: torch.nn.Module, sentences: List[List[str]], transformer_tokenizer: transformers.PreTrainedTokenizer, @@ -23,6 +27,7 @@ def predict(network: torch.nn.Module, batch_size: int = 8, num_workers: int = 1, return_tensors: bool = False, + return_confidence: bool = False, pad_sequences: bool = True) -> List[List[str]]: """Compute predictions. @@ -48,6 +53,9 @@ def predict(network: torch.nn.Module, num_workers (int, optional): Number of workers. Defaults to 1. return_tensors (bool, optional): if True, return tensors. + return_confidence (bool, optional): if True, return + confidence scores for all predicted tokens. Defaults + to False. pad_sequences (bool, optional): if True, pad sequences. Defaults to True. @@ -79,6 +87,7 @@ def predict(network: torch.nn.Module, pad_sequences = pad_sequences) predictions = [] + probabilities = [] tensors = [] with torch.no_grad(): @@ -90,18 +99,25 @@ def predict(network: torch.nn.Module, for i in range(outputs.shape[0]): # extract prediction and transform. - preds = tag_encoder.inverse_transform( - outputs[i].argmax(-1).cpu().numpy() - ) + + # find max by row. + values, indices = outputs[i].max(dim=1) + + preds = tag_encoder.inverse_transform(indices.cpu().numpy()) + probs = values.cpu().numpy() if return_tensors: tensors.append(outputs) # subset predictions for original word tokens. preds = [prediction for prediction, offset in zip(preds.tolist(), dl.get('offsets')[i]) if offset] + if return_confidence: + probs = [prob for prob, offset in zip(probs.tolist(), dl.get('offsets')[i]) if offset] # Remove special tokens ('CLS' + 'SEP'). preds = preds[1:-1] + if return_confidence: + probs = probs[1:-1] # make sure resulting predictions have same length as # original sentence. @@ -109,8 +125,12 @@ def predict(network: torch.nn.Module, # TODO: Move assert statement to unit tests. Does not work # in boundary. # assert len(preds) == len(sentences[i]) - predictions.append(preds) + if return_confidence: + probabilities.append(probs) + + if return_confidence: + return predictions, probabilities if return_tensors: return tensors @@ -128,6 +148,7 @@ def predict_text(network: torch.nn.Module, batch_size: int = 8, num_workers: int = 1, pad_sequences: bool = True, + return_confidence: bool = False, sent_tokenize: Callable = sent_tokenize, word_tokenize: Callable = word_tokenize) -> tuple: """Compute Predictions for Text. @@ -154,6 +175,9 @@ def predict_text(network: torch.nn.Module, to 1. pad_sequences (bool, optional): if True, pad sequences. Defaults to True. + return_confidence (bool, optional): if True, return + confidence scores for predicted tokens. Defaults + to False. Returns: tuple: sentence- and word-tokenized text with corresponding @@ -170,6 +194,7 @@ def predict_text(network: torch.nn.Module, transformer_config = transformer_config, max_len = max_len, device = device, + return_confidence = return_confidence, batch_size = batch_size, num_workers = num_workers, pad_sequences = pad_sequences, diff --git a/tests/unit_tests/test_predictions.py b/tests/unit_tests/test_predictions.py index 000c251..0e65a24 100644 --- a/tests/unit_tests/test_predictions.py +++ b/tests/unit_tests/test_predictions.py @@ -40,6 +40,12 @@ def test_predict_maxlen_exceed(): sentences = [nltk.word_tokenize(text)] model.predict(sentences) +# test confidence scores +words, preds = model.predict_text(text_single, return_confidence=True) + +def test_confs_len(): + assert len(preds[0])==len(preds[1]) + predictions_text_single = model.predict_text(text_single) def test_predict_text_format():