Skip to content

Commit

Permalink
Fix the parsing of long flag strings
Browse files Browse the repository at this point in the history
Long flag strings such as "ABCD" were getting parsed as "AA", "BB",
"CC", "DD", rather than the correct "ABCD". Fix this, add a unit and
integration test, and do some mild reorganization.
  • Loading branch information
tgross35 committed Jun 12, 2024
1 parent fe17d09 commit 0b835ce
Show file tree
Hide file tree
Showing 6 changed files with 184 additions and 64 deletions.
48 changes: 25 additions & 23 deletions zspell/src/affix/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,6 @@ pub enum FlagType {
}

impl FlagType {
/// Convert a string flag to its u32 representation
pub(crate) fn str_to_flag(self, flag: &str) -> Result<Flag, ParseErrorKind> {
match self {
// Single ascii char
FlagType::Ascii => Self::parse_as_ascii(flag),
// Single unicode character
FlagType::Utf8 => Self::parse_as_utf8(flag),
// Two asii chars
FlagType::Long => Self::parse_as_long(flag),
FlagType::Number => Self::parse_as_number(flag),
}
}

/// Parse a string to multiple flags as they are defined in the dictionary
/// file
///
Expand All @@ -89,24 +76,40 @@ impl FlagType {
FlagType::Long => {
let mut ret = Vec::with_capacity(s.len() / 2);
let mut iter = s.chars();
for ch in s.chars() {

while let Some(ch) = iter.next() {
let ch_next = iter.next().ok_or(ParseErrorKind::FlagParse(self))?;
dbg!(ch, ch_next);
ret.push(Self::parse_chars_long([ch, ch_next])?);
}

Ok(ret)
}
}
}

fn parse_as_ascii(flag: &str) -> Result<Flag, ParseErrorKind> {
if flag.len() == 1 {
Ok(Flag(flag.bytes().next().unwrap().into()))
/// Convert a single string flag to its u32 representation
pub(crate) fn str_to_flag(self, flag: &str) -> Result<Flag, ParseErrorKind> {
match self {
// Single ascii char
FlagType::Ascii => Self::parse_str_ascii(flag),
// Single unicode character
FlagType::Utf8 => Self::parse_str_utf8(flag),
// Two asii chars
FlagType::Long => Self::parse_str_long(flag),
FlagType::Number => Self::parse_str_number(flag),
}
}

fn parse_str_ascii(flag: &str) -> Result<Flag, ParseErrorKind> {
if flag.len() == 1 && flag.as_bytes()[0].is_ascii() {
Ok(Flag::new_ascii(flag.as_bytes()[0]))
} else {
Err(ParseErrorKind::FlagParse(Self::Ascii))
}
}

fn parse_as_utf8(flag: &str) -> Result<Flag, ParseErrorKind> {
fn parse_str_utf8(flag: &str) -> Result<Flag, ParseErrorKind> {
let mut chars = flag.chars();
let err = Err(ParseErrorKind::FlagParse(Self::Utf8));

Expand All @@ -118,21 +121,20 @@ impl FlagType {
return err;
}

Ok(Flag(ch.into()))
Ok(Flag::new_utf8(ch))
}

/// Parse two ascii characters
fn parse_as_long(flag: &str) -> Result<Flag, ParseErrorKind> {
fn parse_str_long(flag: &str) -> Result<Flag, ParseErrorKind> {
if flag.len() != 2 || flag.chars().any(|c| !c.is_ascii()) {
Err(ParseErrorKind::FlagParse(Self::Long))
} else {
let v = u16::from_ne_bytes(flag[0..=1].as_bytes().try_into().unwrap());
Ok(Flag(v.into()))
Ok(Flag::new_long(flag))
}
}

/// Parse as a number
fn parse_as_number(flag: &str) -> Result<Flag, ParseErrorKind> {
fn parse_str_number(flag: &str) -> Result<Flag, ParseErrorKind> {
flag.parse()
.map_err(|_| ParseErrorKind::FlagParse(Self::Number))
.map(Flag)
Expand Down
45 changes: 43 additions & 2 deletions zspell/src/dict/flags.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,54 @@
use std::fmt::Display;
use std::fmt::{self, Display};
use std::sync::Arc;

use super::rule::AfxRule;

/// A flag representation is either an ASCII char, unicode char, or number. We can fit
/// any of those in a u32.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Flag(pub u32);

impl Flag {
pub fn new_ascii(ch: u8) -> Self {
debug_assert!(ch.is_ascii());
Self(ch.into())
}

pub fn new_utf8(ch: char) -> Self {
Self(ch.into())
}

/// Must be a 2-character string
pub fn new_long(s: &str) -> Self {
debug_assert!(s.len() == 2, "invalid string length: {s}");
debug_assert!(
s.chars().all(|ch| ch.is_ascii()),
"invalid string characters: {s}"

Check warning on line 26 in zspell/src/dict/flags.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/flags.rs#L26

Added line #L26 was not covered by tests
);

let num = u16::from_le_bytes(s[..=1].as_bytes().try_into().unwrap());

Self(num.into())
}

pub fn new_number(num: u32) -> Self {
Self(num)
}

Check warning on line 36 in zspell/src/dict/flags.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/flags.rs#L34-L36

Added lines #L34 - L36 were not covered by tests
}

impl fmt::Debug for Flag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Ok(single_flag) = u8::try_from(self.0) {
write!(f, "{}", char::from(single_flag))
} else if let Ok(long_flag) = u16::try_from(self.0) {
let [a, b] = long_flag.to_le_bytes();
write!(f, "{}{}", char::from(a), char::from(b))

Check warning on line 45 in zspell/src/dict/flags.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/flags.rs#L40-L45

Added lines #L40 - L45 were not covered by tests
} else {
write!(f, "{:#06x}", self.0)

Check warning on line 47 in zspell/src/dict/flags.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/flags.rs#L47

Added line #L47 was not covered by tests
}
}

Check warning on line 49 in zspell/src/dict/flags.rs

View check run for this annotation

Codecov / codecov/patch

zspell/src/dict/flags.rs#L49

Added line #L49 was not covered by tests
}

/// A representation of a flag value
#[non_exhaustive]
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
Expand Down
98 changes: 68 additions & 30 deletions zspell/src/dict/tests_parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,60 @@ use super::*;
fn test_dict_entry_ok() {
let f1 = FlagType::Utf8;
let f2 = FlagType::Ascii;
let f3 = FlagType::Long;

let s1 = "abcd";
let s2 = "abcd # comment";
let s3 = "abcd/ABC";
let s4 = "abcd/ABC # comment";
let s5 = "abcd/ABC ip:m1 tp:m2";
let s6 = "abcd/ABC ip:m1 tp:m2 # comment";
let s7 = "abcd ip:m1 tp:m2";
let s8 = "abcd ip:m1 tp:m2 # comment";
let s_0f0m_1 = "abcd";
let s_0f0m_2 = "abcd # comment";
let s_4f0m_1 = "abcd/ABCD";
let s_4f0m_2 = "abcd/ABCD # comment";
let s_4f2m_1 = "abcd/ABCD ip:m1 tp:m2";
let s_4f2m_2 = "abcd/ABCD ip:m1 tp:m2 # comment";
let s_0f2m_1 = "abcd ip:m1 tp:m2";
let s_0f2m_2 = "abcd ip:m1 tp:m2 # comment";

let r1 = DictEntry::new("abcd", &[], &[]);
let r2 = DictEntry::new(
// No flags
let r_0f0m = DictEntry::new("abcd", &[], &[]);

// All flags
let r_4f0m = DictEntry::new(
"abcd",
&[Flag('A'.into()), Flag('B'.into()), Flag('C'.into())],
&[
Flag::new_ascii(b'A'),
Flag::new_ascii(b'B'),
Flag::new_ascii(b'C'),
Flag::new_ascii(b'D'),
],
&[],
);
let r3 = DictEntry::new(

let r_2f0m = DictEntry::new("abcd", &[Flag::new_long("AB"), Flag::new_long("CD")], &[]);

// All flags plus morph info
let r_4f2m = DictEntry::new(
"abcd",
&[
Flag::new_ascii(b'A'),
Flag::new_ascii(b'B'),
Flag::new_ascii(b'C'),
Flag::new_ascii(b'D'),
],
&[
MorphInfo::InflecPfx("m1".into()),
MorphInfo::TermPfx("m2".into()),
],
);

let r_2f2m = DictEntry::new(
"abcd",
&[Flag('A'.into()), Flag('B'.into()), Flag('C'.into())],
&[Flag::new_long("AB"), Flag::new_long("CD")],
&[
MorphInfo::InflecPfx("m1".into()),
MorphInfo::TermPfx("m2".into()),
],
);
let r4 = DictEntry::new(

// No flags, including morph info
let r_0f2m = DictEntry::new(
"abcd",
&[],
&[
Expand All @@ -39,23 +68,32 @@ fn test_dict_entry_ok() {
],
);

assert_eq!(DictEntry::parse_single(s1, f1, 0), Ok(r1.clone()));
assert_eq!(DictEntry::parse_single(s2, f1, 0), Ok(r1.clone()));
assert_eq!(DictEntry::parse_single(s3, f1, 0), Ok(r2.clone()));
assert_eq!(DictEntry::parse_single(s4, f1, 0), Ok(r2.clone()));
assert_eq!(DictEntry::parse_single(s5, f1, 0), Ok(r3.clone()));
assert_eq!(DictEntry::parse_single(s6, f1, 0), Ok(r3.clone()));
assert_eq!(DictEntry::parse_single(s7, f1, 0), Ok(r4.clone()));
assert_eq!(DictEntry::parse_single(s8, f1, 0), Ok(r4.clone()));
assert_eq!(DictEntry::parse_single(s_0f0m_1, f1, 0), Ok(r_0f0m.clone()));
assert_eq!(DictEntry::parse_single(s_0f0m_2, f1, 0), Ok(r_0f0m.clone()));
assert_eq!(DictEntry::parse_single(s_4f0m_1, f1, 0), Ok(r_4f0m.clone()));
assert_eq!(DictEntry::parse_single(s_4f0m_2, f1, 0), Ok(r_4f0m.clone()));
assert_eq!(DictEntry::parse_single(s_4f2m_1, f1, 0), Ok(r_4f2m.clone()));
assert_eq!(DictEntry::parse_single(s_4f2m_2, f1, 0), Ok(r_4f2m.clone()));
assert_eq!(DictEntry::parse_single(s_0f2m_1, f1, 0), Ok(r_0f2m.clone()));
assert_eq!(DictEntry::parse_single(s_0f2m_2, f1, 0), Ok(r_0f2m.clone()));

assert_eq!(DictEntry::parse_single(s_0f0m_1, f2, 0), Ok(r_0f0m.clone()));
assert_eq!(DictEntry::parse_single(s_0f0m_2, f2, 0), Ok(r_0f0m.clone()));
assert_eq!(DictEntry::parse_single(s_4f0m_1, f2, 0), Ok(r_4f0m.clone()));
assert_eq!(DictEntry::parse_single(s_4f0m_2, f2, 0), Ok(r_4f0m.clone()));
assert_eq!(DictEntry::parse_single(s_4f2m_1, f2, 0), Ok(r_4f2m.clone()));
assert_eq!(DictEntry::parse_single(s_4f2m_1, f2, 0), Ok(r_4f2m.clone()));
assert_eq!(DictEntry::parse_single(s_0f2m_2, f2, 0), Ok(r_0f2m.clone()));
assert_eq!(DictEntry::parse_single(s_0f2m_2, f2, 0), Ok(r_0f2m.clone()));

assert_eq!(DictEntry::parse_single(s1, f2, 0), Ok(r1.clone()));
assert_eq!(DictEntry::parse_single(s2, f2, 0), Ok(r1));
assert_eq!(DictEntry::parse_single(s3, f2, 0), Ok(r2.clone()));
assert_eq!(DictEntry::parse_single(s4, f2, 0), Ok(r2));
assert_eq!(DictEntry::parse_single(s5, f2, 0), Ok(r3.clone()));
assert_eq!(DictEntry::parse_single(s6, f2, 0), Ok(r3));
assert_eq!(DictEntry::parse_single(s7, f2, 0), Ok(r4.clone()));
assert_eq!(DictEntry::parse_single(s8, f2, 0), Ok(r4));
assert_eq!(DictEntry::parse_single(s_0f0m_1, f3, 0), Ok(r_0f0m.clone()));
assert_eq!(DictEntry::parse_single(s_0f0m_2, f3, 0), Ok(r_0f0m));
assert_eq!(DictEntry::parse_single(s_4f0m_1, f3, 0), Ok(r_2f0m.clone()));
assert_eq!(DictEntry::parse_single(s_4f0m_2, f3, 0), Ok(r_2f0m));
assert_eq!(DictEntry::parse_single(s_4f2m_1, f3, 0), Ok(r_2f2m.clone()));
assert_eq!(DictEntry::parse_single(s_4f2m_1, f3, 0), Ok(r_2f2m));
assert_eq!(DictEntry::parse_single(s_0f2m_1, f3, 0), Ok(r_0f2m.clone()));
assert_eq!(DictEntry::parse_single(s_0f2m_2, f3, 0), Ok(r_0f2m));
}

#[test]
Expand Down
24 changes: 17 additions & 7 deletions zspell/test-suite/b-affix-forward-gen-num-flags.test
Original file line number Diff line number Diff line change
Expand Up @@ -13,36 +13,46 @@ SFX 999 0 bb .
SFX 12345 N 1
SFX 12345 0 cc .

SFX 1234 N 1
SFX 1234 0 dd .

==== dic ====
4
xxx/1
yyy/1,999,12345
zzz/999,12345
www/1
xxx/1,999,12345
yyy/999,12345
zzz/999,1234


==== valid ====
www
xxx
yyy
zzz
wwwaa
xxxaa
yyyaa
xxxbb
xxxcc
yyybb
yyycc
zzzbb
zzzcc
zzzdd


==== invalid ====
%% Nothing to see here
nothing

==== wordlist ====
www
xxx
yyy
zzz
wwwaa
xxxaa
yyyaa
xxxbb
xxxcc
yyybb
yyycc
zzzbb
zzzcc
zzzdd
29 changes: 29 additions & 0 deletions zspell/test-suite/b-flag-long.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
%% Verify that multicharacter flags work

==== afx ====
FLAG long

NEEDAFFIX ()
FORBIDDENWORD {}
KEEPCASE ||
NOSUGGEST --

%% Test same first character but different second
SFX -+ Y 1
SFX -+ 0 aa .

==== dic ====
foo/--
bar/||--
baz/-+

==== valid ====
foo bar baz bazaa

==== wordlist ====
baz
bazaa

==== nosuggest ====
foo
bar
4 changes: 2 additions & 2 deletions zspell/test-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl TestManager {
// check exhaustive matches)
for line in content_iter
.next()
.expect("Section title with no content")
.unwrap_or_else(|| panic!("Section title '{sec_title}' has no content"))
.lines()
{
match determine_line(line) {
Expand Down Expand Up @@ -261,7 +261,7 @@ impl TestManager {

if valid_fail_msg.is_some() || invalid_fail_msg.is_some() {
panic!(
"{}{}",
"{}{}\ndictionary: {dict:#?}",

Check warning on line 264 in zspell/test-util/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

zspell/test-util/src/lib.rs#L264

Added line #L264 was not covered by tests
valid_fail_msg.unwrap_or_default(),
invalid_fail_msg.unwrap_or_default()
);
Expand Down

0 comments on commit 0b835ce

Please sign in to comment.