Skip to content

Commit

Permalink
Merge branch 'master' into doctest_fail_fast
Browse files Browse the repository at this point in the history
  • Loading branch information
AA-Turner committed Mar 2, 2025
2 parents 5647e7d + 287ee20 commit dea1427
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 15 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,7 @@ Features added
Bugs fixed
----------

* #13392: Fix argument type for ``jieba.load_userdict()``.

Testing
-------
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ docs = [
"sphinxcontrib-websupport",
]
lint = [
"ruff==0.9.7",
"ruff==0.9.9",
"mypy==1.15.0",
"sphinx-lint>=0.9",
"types-colorama==0.4.15.20240311",
Expand All @@ -102,7 +102,7 @@ lint = [
"types-Pygments==2.19.0.20250219",
"types-requests==2.32.0.20241016", # align with requests
"types-urllib3==1.26.25.14",
"pyright==1.1.394",
"pyright==1.1.395",
"pytest>=8.0",
"pypi-attestations==0.0.21",
"betterproto==2.0.0b6",
Expand Down
36 changes: 23 additions & 13 deletions sphinx/search/zh.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,33 @@

import re
from pathlib import Path
from typing import TYPE_CHECKING

import snowballstemmer

from sphinx.search import SearchLanguage

if TYPE_CHECKING:
from collections.abc import Iterator

try:
import jieba # type: ignore[import-not-found]

JIEBA = True
JIEBA_DEFAULT_DICT = Path(jieba.__file__).parent / jieba.DEFAULT_DICT_NAME
from jieba import cut_for_search
from jieba import load_userdict as jieba_load_userdict
except ImportError:
JIEBA = False
JIEBA_DEFAULT_DICT = Path()
JIEBA_DEFAULT_DICT = ''

def jieba_load_userdict(f: str) -> None:
pass

def cut_for_search(sentence: str, HMM: bool = True) -> Iterator[str]:
yield from ()

else:
JIEBA_DEFAULT_DICT = (
Path(jieba.__file__, '..', jieba.DEFAULT_DICT_NAME).resolve().as_posix()
)
del jieba

english_stopwords = {
'a', 'and', 'are', 'as', 'at',
Expand Down Expand Up @@ -231,18 +245,14 @@ def __init__(self, options: dict[str, str]) -> None:
self.latin_terms: set[str] = set()

def init(self, options: dict[str, str]) -> None:
if JIEBA:
dict_path = options.get('dict', JIEBA_DEFAULT_DICT)
if dict_path and Path(dict_path).is_file():
jieba.load_userdict(dict_path)
dict_path = options.get('dict', JIEBA_DEFAULT_DICT)
if dict_path and Path(dict_path).is_file():
jieba_load_userdict(str(dict_path))

self.stemmer = snowballstemmer.stemmer('english')

def split(self, input: str) -> list[str]:
if JIEBA:
chinese: list[str] = list(jieba.cut_for_search(input))
else:
chinese = []
chinese: list[str] = list(cut_for_search(input))

latin1 = [term.strip() for term in self.latin1_letters.findall(input)]
self.latin_terms.update(latin1)
Expand Down

0 comments on commit dea1427

Please sign in to comment.