rth · rth · Jun 15, 2021 · Jun 15, 2021 · Jun 15, 2021
diff --git a/Cargo.toml b/Cargo.toml
@@ -38,14 +38,14 @@ overflow-checks = false
 [dependencies]
 regex = "1"
 lazy_static = "1.4.0"
-seahash = "4.0.0"
-itertools = "0.9"
+seahash = "4.1.0"
+itertools = "0.10.1"
 ndarray = "0.13.0"
 serde = { version = "1.0", features = ["derive"] }
 sprs = {version  = "0.7.1", default-features = false}
-unicode-segmentation = "1.6.0"
+unicode-segmentation = "1.7.1"
 thiserror = "1.0"
-hashbrown = { version = "0.8", features = ["rayon"] }
+hashbrown = { version = "0.11.2", features = ["rayon"] }
 rayon = {version = "1.3", optional = true}
 dict_derive = {version = "0.2", optional = true}
 pyo3 = {version = "0.10.1", optional = true}

diff --git a/benchmarks/bench_sentence_tokenizers.py b/benchmarks/bench_sentence_tokenizers.py
@@ -24,8 +24,14 @@ def regexp_tokenizer(txt):
 
     db = [
         (r"Python re.split('(?<=[!.?])', ...)", regexp_tokenizer),
-        ("UnicodeSentenceTokenizer()", UnicodeSentenceTokenizer().tokenize,),
-        ("PunctuationTokenizer()", PunctuationTokenizer().tokenize,),
+        (
+            "UnicodeSentenceTokenizer()",
+            UnicodeSentenceTokenizer().tokenize,
+        ),
+        (
+            "PunctuationTokenizer()",
+            PunctuationTokenizer().tokenize,
+        ),
     ]
 
     for label, func in db:

diff --git a/ci/azure/install.sh b/ci/azure/install.sh
@@ -24,6 +24,8 @@ rustup default nightly-2020-06-01
 
 cd python/
 python -m pip install -r ../ci/requirements-build.txt
+
+cargo tree
 python setup.py bdist_wheel
 
 pip install --pre --no-index --find-links dist/ vtext
diff --git a/python/vtext/tests/test_tokenize_sentence.py b/python/vtext/tests/test_tokenize_sentence.py
@@ -48,7 +48,9 @@ def test_punctuation_sentence_tokenizer():
 
 @hypothesis.given(st.text())
 @pytest.mark.parametrize(
-    "tokenizer", [UnicodeSentenceTokenizer(), PunctuationTokenizer()], ids=_pytest_ids,
+    "tokenizer",
+    [UnicodeSentenceTokenizer(), PunctuationTokenizer()],
+    ids=_pytest_ids,
 )
 def test_tokenize_edge_cases(tokenizer, txt):
     tokens = tokenizer.tokenize(txt)