From 73590cf549847d6b3b1410b62c1ff9777fcc8699 Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 12:20:08 +0200 Subject: [PATCH 01/12] added onnx FS and deleted the old FH label scorer, latter will be added later on with the current version and once integrated in master branch --- rasr/feature_scorer.py | 62 ++++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index 50a0fe52..d988b8b9 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -7,6 +7,7 @@ "ReturnnScorer", "InvAlignmentPassThroughFeatureScorer", "PrecomputedHybridFeatureScorer", + "OnnxFeatureScorer", ] from sisyphus import * @@ -126,25 +127,50 @@ def __init__(self, prior_mixtures, scale=1.0, priori_scale=0.0, prior_file=None) self.config.normalize_mixture_weights = False -class TFLabelContextFeatureScorer(FeatureScorer): +class OnnxFeatureScorer(rasr.FeatureScorer): def __init__( self, - fs_tf_config, - contextPriorFile, - diphonePriorFile, - prior_mixtures, - prior_scale, + mixtures, + model, + io_map, + *args, + label_log_posterior_scale=1.0, + label_prior_scale=0.7, + label_log_prior_file=None, + apply_log_on_output=False, + negate_output=True, + intra_op_threads=1, + inter_op_threads=1, + **kwargs, ): - super().__init__() + """ + :param str mixtures: path to a *.mix file e.g. output of either EstimateMixturesJob or CreateDummyMixturesJob + :param str model: path of a model e.g. output of ExportPyTorchModelToOnnxJob + :param dict io_map: mapping between internal rasr identifiers and the model related input/output + :param float label_log_posterior_scale: scales for the log probability of a label e.g. 1.0 is recommended + :param float label_prior_scale: scale for the prior log probability of a label reasonable e.g. values in [0.1, 0.7] interval + :param str label_log_prior_file: xml file containing log prior probabilities e.g. estimated from the model via povey method + :param bool apply_log_on_output: whether to apply the log-function on the output, usefull if the model outputs softmax instead of log-softmax + :param bool negate_output: wheter negate output (because the model outputs log softmax and not negative log softmax + """ + super().__init__(*args, **kwargs) - self.config = RasrConfig() - self.config.feature_scorer_type = "tf-label-context-scorer" - self.config.file = prior_mixtures - self.config.num_label_contexts = 46 - self.config.prior_scale = prior_scale - self.config.context_prior = contextPriorFile - self.config.diphone_prior = diphonePriorFile - self.config.normalize_mixture_weights = False - self.config.loader = fs_tf_config.loader - self.config.input_map = fs_tf_config.input_map - self.config.output_map = fs_tf_config.output_map + self.config.feature_scorer_type = "onnx-feature-scorer" + self.config.file = mixtures + self.config.scale = label_log_posterior_scale + self.config.priori_scale = label_prior_scale + if label_log_prior_file is not None: + self.config.prior_file = label_log_prior_file + + self.config.session.file = model + + if label_log_prior_file: + self.config.apply_log_on_output = apply_log_on_output + if not negate_output: + self.config.negate_output = negate_output + + self.post_config.session.intra_op_num_threads = intra_op_threads + self.post_config.session.inter_op_num_threads = inter_op_threads + + for k, v in io_map.items(): + self.config.io_map[k] = v From f21f63f8c876f629a88f577cefc63050fbd38ee0 Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 12:59:47 +0200 Subject: [PATCH 02/12] minor bug --- rasr/feature_scorer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index d988b8b9..db967027 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -127,7 +127,7 @@ def __init__(self, prior_mixtures, scale=1.0, priori_scale=0.0, prior_file=None) self.config.normalize_mixture_weights = False -class OnnxFeatureScorer(rasr.FeatureScorer): +class OnnxFeatureScorer(FeatureScorer): def __init__( self, mixtures, From b39e4942c48f8d3e95d7bc56508394c374ea84e8 Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 13:14:43 +0200 Subject: [PATCH 03/12] improved docstring --- rasr/feature_scorer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index db967027..c7d43506 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -146,7 +146,8 @@ def __init__( """ :param str mixtures: path to a *.mix file e.g. output of either EstimateMixturesJob or CreateDummyMixturesJob :param str model: path of a model e.g. output of ExportPyTorchModelToOnnxJob - :param dict io_map: mapping between internal rasr identifiers and the model related input/output + :param dict io_map: mapping between internal rasr identifiers and the model related input/output. Default key values + are "features" and "output", and optionally "features-size" :param float label_log_posterior_scale: scales for the log probability of a label e.g. 1.0 is recommended :param float label_prior_scale: scale for the prior log probability of a label reasonable e.g. values in [0.1, 0.7] interval :param str label_log_prior_file: xml file containing log prior probabilities e.g. estimated from the model via povey method From a0354bfc07f3ffadd18a6aa53fb3f1ea0137466e Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 13:22:44 +0200 Subject: [PATCH 04/12] Onnxruntime docstring --- rasr/feature_scorer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index c7d43506..8b12fedc 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -153,6 +153,8 @@ def __init__( :param str label_log_prior_file: xml file containing log prior probabilities e.g. estimated from the model via povey method :param bool apply_log_on_output: whether to apply the log-function on the output, usefull if the model outputs softmax instead of log-softmax :param bool negate_output: wheter negate output (because the model outputs log softmax and not negative log softmax + :param int intra_op_threads: Onnxruntime session's number of parallel threads within each operator + :param int inter_op_threads: Onnxruntime session's number of parallel threads between operators used only for parallel execution mode """ super().__init__(*args, **kwargs) From 5638b95de7babc6e955ba519d7e9f415dbb3bbb0 Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 13:26:21 +0200 Subject: [PATCH 05/12] params as kwargs --- rasr/feature_scorer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index 8b12fedc..b1ee0500 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -130,10 +130,10 @@ def __init__(self, prior_mixtures, scale=1.0, priori_scale=0.0, prior_file=None) class OnnxFeatureScorer(FeatureScorer): def __init__( self, + *, mixtures, model, io_map, - *args, label_log_posterior_scale=1.0, label_prior_scale=0.7, label_log_prior_file=None, From 0269b08bed33485d6a11d310c3fcd2d5c7656273 Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 15:53:32 +0200 Subject: [PATCH 06/12] bene comments --- rasr/feature_scorer.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index b1ee0500..b75d39e5 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -144,15 +144,16 @@ def __init__( **kwargs, ): """ - :param str mixtures: path to a *.mix file e.g. output of either EstimateMixturesJob or CreateDummyMixturesJob - :param str model: path of a model e.g. output of ExportPyTorchModelToOnnxJob + :param str|tk.Path mixtures: path to a *.mix file e.g. output of either EstimateMixturesJob or CreateDummyMixturesJob + :param str|tk.Path model: path of a model e.g. output of ExportPyTorchModelToOnnxJob :param dict io_map: mapping between internal rasr identifiers and the model related input/output. Default key values - are "features" and "output", and optionally "features-size" + are "features" and "output", and optionally "features-size", e.g. + io_map = {"features": "data", "output": "classes"} :param float label_log_posterior_scale: scales for the log probability of a label e.g. 1.0 is recommended :param float label_prior_scale: scale for the prior log probability of a label reasonable e.g. values in [0.1, 0.7] interval - :param str label_log_prior_file: xml file containing log prior probabilities e.g. estimated from the model via povey method + :param str|tk.Path label_log_prior_file: xml file containing log prior probabilities e.g. estimated from the model via povey method :param bool apply_log_on_output: whether to apply the log-function on the output, usefull if the model outputs softmax instead of log-softmax - :param bool negate_output: wheter negate output (because the model outputs log softmax and not negative log softmax + :param bool negate_output: whether negate output (because the model outputs log softmax and not negative log softmax :param int intra_op_threads: Onnxruntime session's number of parallel threads within each operator :param int inter_op_threads: Onnxruntime session's number of parallel threads between operators used only for parallel execution mode """ From 718808b429e0a0bf496e6893e81c3cb7c842528e Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 16:07:34 +0200 Subject: [PATCH 07/12] typing --- rasr/feature_scorer.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index b75d39e5..a685a36c 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -16,6 +16,8 @@ import os +from typing import Union, Dict, Bool + from .config import * from i6_core.util import get_returnn_root @@ -131,22 +133,22 @@ class OnnxFeatureScorer(FeatureScorer): def __init__( self, *, - mixtures, - model, - io_map, - label_log_posterior_scale=1.0, - label_prior_scale=0.7, - label_log_prior_file=None, - apply_log_on_output=False, - negate_output=True, - intra_op_threads=1, - inter_op_threads=1, + mixtures: Union[str, tk.Path], + model: Union[str, tk.Path], + io_map: Dict[str, str], + label_log_posterior_scale: float = 1.0, + label_prior_scale: float = 0.7, + label_log_prior_file: Union[str, tk.Path] = None, + apply_log_on_output: Bool = False, + negate_output: Bool = True, + intra_op_threads: int = 1, + inter_op_threads: int = 1, **kwargs, ): """ - :param str|tk.Path mixtures: path to a *.mix file e.g. output of either EstimateMixturesJob or CreateDummyMixturesJob - :param str|tk.Path model: path of a model e.g. output of ExportPyTorchModelToOnnxJob - :param dict io_map: mapping between internal rasr identifiers and the model related input/output. Default key values + :param mixtures: path to a *.mix file e.g. output of either EstimateMixturesJob or CreateDummyMixturesJob + :param model: path of a model e.g. output of ExportPyTorchModelToOnnxJob + :param io_map: mapping between internal rasr identifiers and the model related input/output. Default key values are "features" and "output", and optionally "features-size", e.g. io_map = {"features": "data", "output": "classes"} :param float label_log_posterior_scale: scales for the log probability of a label e.g. 1.0 is recommended From ee176df217c345c2bf1dc0e4145f089dfc1dbd26 Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 16:08:05 +0200 Subject: [PATCH 08/12] more typing --- rasr/feature_scorer.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index a685a36c..1e6a5ef1 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -151,13 +151,13 @@ def __init__( :param io_map: mapping between internal rasr identifiers and the model related input/output. Default key values are "features" and "output", and optionally "features-size", e.g. io_map = {"features": "data", "output": "classes"} - :param float label_log_posterior_scale: scales for the log probability of a label e.g. 1.0 is recommended - :param float label_prior_scale: scale for the prior log probability of a label reasonable e.g. values in [0.1, 0.7] interval - :param str|tk.Path label_log_prior_file: xml file containing log prior probabilities e.g. estimated from the model via povey method - :param bool apply_log_on_output: whether to apply the log-function on the output, usefull if the model outputs softmax instead of log-softmax - :param bool negate_output: whether negate output (because the model outputs log softmax and not negative log softmax - :param int intra_op_threads: Onnxruntime session's number of parallel threads within each operator - :param int inter_op_threads: Onnxruntime session's number of parallel threads between operators used only for parallel execution mode + :param label_log_posterior_scale: scales for the log probability of a label e.g. 1.0 is recommended + :param label_prior_scale: scale for the prior log probability of a label reasonable e.g. values in [0.1, 0.7] interval + :param label_log_prior_file: xml file containing log prior probabilities e.g. estimated from the model via povey method + :param apply_log_on_output: whether to apply the log-function on the output, usefull if the model outputs softmax instead of log-softmax + :param negate_output: whether negate output (because the model outputs log softmax and not negative log softmax + :param intra_op_threads: Onnxruntime session's number of parallel threads within each operator + :param inter_op_threads: Onnxruntime session's number of parallel threads between operators used only for parallel execution mode """ super().__init__(*args, **kwargs) From 1d22545b4bf96d552ea45468db5b4e908de193e5 Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 17:16:24 +0200 Subject: [PATCH 09/12] nich and eugen comments --- rasr/feature_scorer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index 1e6a5ef1..4b4b5c5d 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -133,14 +133,14 @@ class OnnxFeatureScorer(FeatureScorer): def __init__( self, *, - mixtures: Union[str, tk.Path], - model: Union[str, tk.Path], + mixtures: tk.Path, + model: tk.Path, io_map: Dict[str, str], label_log_posterior_scale: float = 1.0, label_prior_scale: float = 0.7, - label_log_prior_file: Union[str, tk.Path] = None, - apply_log_on_output: Bool = False, - negate_output: Bool = True, + label_log_prior_file: tk.Path = None, + apply_log_on_output: bool = False, + negate_output: bool = True, intra_op_threads: int = 1, inter_op_threads: int = 1, **kwargs, @@ -170,7 +170,7 @@ def __init__( self.config.session.file = model - if label_log_prior_file: + if apply_log_on_output: self.config.apply_log_on_output = apply_log_on_output if not negate_output: self.config.negate_output = negate_output From 33edfc8190805b62dfec7d32413708583a658311 Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 17:17:07 +0200 Subject: [PATCH 10/12] removed Bool import --- rasr/feature_scorer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index 4b4b5c5d..849f1349 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -16,7 +16,7 @@ import os -from typing import Union, Dict, Bool +from typing import Union, Dict from .config import * from i6_core.util import get_returnn_root From 3f31fefaf7158bea67c54497601dbcf7b02de079 Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 17:24:25 +0200 Subject: [PATCH 11/12] willi comment --- rasr/feature_scorer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index 849f1349..90a5db84 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -16,7 +16,7 @@ import os -from typing import Union, Dict +from typing import Union, Dict, Optional from .config import * from i6_core.util import get_returnn_root @@ -138,7 +138,7 @@ def __init__( io_map: Dict[str, str], label_log_posterior_scale: float = 1.0, label_prior_scale: float = 0.7, - label_log_prior_file: tk.Path = None, + label_log_prior_file: Optional[tk.Path] = None, apply_log_on_output: bool = False, negate_output: bool = True, intra_op_threads: int = 1, From c95cc86b4947d90180313b94ca2178ca45984e4a Mon Sep 17 00:00:00 2001 From: marvin84 Date: Thu, 21 Sep 2023 17:37:46 +0200 Subject: [PATCH 12/12] eugen comments --- rasr/feature_scorer.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/rasr/feature_scorer.py b/rasr/feature_scorer.py index 90a5db84..6a390068 100644 --- a/rasr/feature_scorer.py +++ b/rasr/feature_scorer.py @@ -137,7 +137,7 @@ def __init__( model: tk.Path, io_map: Dict[str, str], label_log_posterior_scale: float = 1.0, - label_prior_scale: float = 0.7, + label_prior_scale: float, label_log_prior_file: Optional[tk.Path] = None, apply_log_on_output: bool = False, negate_output: bool = True, @@ -169,11 +169,8 @@ def __init__( self.config.prior_file = label_log_prior_file self.config.session.file = model - - if apply_log_on_output: - self.config.apply_log_on_output = apply_log_on_output - if not negate_output: - self.config.negate_output = negate_output + self.config.apply_log_on_output = apply_log_on_output + self.config.negate_output = negate_output self.post_config.session.intra_op_num_threads = intra_op_threads self.post_config.session.inter_op_num_threads = inter_op_threads