Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: ユーザー辞書データに改行やnull文字が入っていた場合にエラーとする #1522

Merged
merged 11 commits into from
Feb 7, 2025
25 changes: 25 additions & 0 deletions test/unit/user_dict/test_user_dict_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,20 @@ def test_convert_to_zenkaku() -> None:
assert surface == true_surface


def test_remove_newlines_and_null() -> None:
"""UserDictWord は surface 内の改行や null 文字を削除する。"""
# Inputs
test_value = generate_model()
test_value["surface"] = "te\n\r\x00st"
# Expects
true_surface = "test"
# Outputs
surface = UserDictWord(**test_value).surface

# Test
assert surface == true_surface


def test_count_mora() -> None:
"""UserDictWord は mora_count=None を上書きする。"""
# Inputs
Expand Down Expand Up @@ -126,6 +140,17 @@ def test_invalid_pronunciation_not_katakana() -> None:
UserDictWord(**test_value)


def test_invalid_pronunciation_newlines_and_null() -> None:
"""UserDictWord は改行や null 文字を含む pronunciation をエラーとする。"""
# Inputs
test_value = generate_model()
test_value["pronunciation"] = "ボイ\n\r\x00ボ"

# Test
with pytest.raises(ValidationError):
UserDictWord(**test_value)


def test_invalid_pronunciation_invalid_sutegana() -> None:
"""UserDictWord は無効な pronunciation をエラーとする。"""
# Inputs
Expand Down
5 changes: 5 additions & 0 deletions voicevox_engine/user_dict/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ def convert_to_zenkaku(cls, surface: str) -> str:
)
)

@field_validator("surface")
Hiroshiba marked this conversation as resolved.
Show resolved Hide resolved
@classmethod
def remove_newlines_and_null(cls, surface: str) -> str:
return surface.replace("\n", "").replace("\r", "").replace("\x00", "")

@field_validator("pronunciation", mode="before")
@classmethod
def check_is_katakana(cls, pronunciation: str) -> str:
Expand Down