From 723bdbc249ff8e2485c93af89356fca766146988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B3man=20Joost?= Date: Sat, 4 Jan 2025 20:17:17 +1000 Subject: [PATCH] fix: parsing of (text encoded) addresses --- src/Data/IMF.hs | 1 + src/Data/IMF/Syntax.hs | 1 + src/Data/IMF/Text.hs | 10 ++++++++-- tests/Headers.hs | 38 ++++++++++++++++++++++++++++++++++---- 4 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/Data/IMF.hs b/src/Data/IMF.hs index 4c5867d..9a6255c 100644 --- a/src/Data/IMF.hs +++ b/src/Data/IMF.hs @@ -150,6 +150,7 @@ module Data.IMF , Address(..) , address , addressList + , addressSpec , AddrSpec(..) , Domain(..) , Mailbox(..) diff --git a/src/Data/IMF/Syntax.hs b/src/Data/IMF/Syntax.hs index 3a593e5..7993496 100644 --- a/src/Data/IMF/Syntax.hs +++ b/src/Data/IMF/Syntax.hs @@ -43,6 +43,7 @@ module Data.IMF.Syntax , crlf , vchar , word + , dquote , quotedString , dotAtomText , dotAtom diff --git a/src/Data/IMF/Text.hs b/src/Data/IMF/Text.hs index 8070ab2..c1e92c3 100644 --- a/src/Data/IMF/Text.hs +++ b/src/Data/IMF/Text.hs @@ -39,6 +39,7 @@ import Control.Applicative ((<|>), optional) import Data.CaseInsensitive import Data.Foldable (fold) import Data.List (intersperse) +import Data.Char (isLetter) import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as LT @@ -49,7 +50,7 @@ import Data.List.NonEmpty (intersperse) import Data.MIME.Charset (decodeLenient) import Data.IMF (Mailbox(..), Address(..), AddrSpec(..), Domain(..)) -import Data.IMF.Syntax +import Data.IMF.Syntax hiding (word) renderMailboxes :: [Mailbox] -> T.Text @@ -99,6 +100,11 @@ mailbox = Mailbox <$> optional displayName <*> angleAddr readMailbox :: String -> Either String Mailbox readMailbox = parseOnly (mailbox <* endOfInput) . T.pack +word :: Parser T.Text +word = optionalFWS *> foldMany1 (singleton . toChar <$> A.satisfy classes) + where + classes c = isLetter c || isAtext c + -- | Version of 'phrase' that does not process encoded-word -- (we are parsing Text so will assume that the input does not -- contain encoded words. TODO this is probably wrong :) @@ -106,7 +112,7 @@ phrase :: Parser T.Text phrase = foldMany1Sep (singleton ' ') word displayName :: Parser T.Text -displayName = phrase +displayName = phrase <|> quotedString mailboxList :: Parser [Mailbox] mailboxList = mailbox `sepBy` char ',' diff --git a/tests/Headers.hs b/tests/Headers.hs index f93533a..f1c2449 100644 --- a/tests/Headers.hs +++ b/tests/Headers.hs @@ -23,6 +23,7 @@ import Data.List.NonEmpty (NonEmpty((:|))) import Data.String (IsString) import Data.Word (Word8) +import qualified Data.Text as T import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy.Char8 as L8 @@ -47,6 +48,7 @@ renderField = Builder.toLazyByteString . buildField unittests :: TestTree unittests = testGroup "Headers" [ parsesMailboxesSuccessfully + , parsesMailboxesNonASCIISuccessfully , parsesTextMailboxesSuccessfully , parsesAddressesSuccessfully , parsesTextAddressesSuccessfully @@ -136,6 +138,26 @@ rendersAddressesToTextSuccessfully = , "undisclosed-recipients:;") ] +nonASCIIDisplayNameFixtures :: IsString s => [(String, Either String Mailbox -> Assertion, s)] +nonASCIIDisplayNameFixtures = + [ + ( "Czech" + , (Right (Mailbox (Just "Lud\283k Tiberiu") (AddrSpec "foo" (DomainDotAtom $ "bar" :| ["test"]))) @=?) + , "Luděk Tiberiu") + , ( "Chinese" + , (Right (Mailbox (Just "佐藤 直樹") (AddrSpec "foo" (DomainDotAtom $ "bar" :| ["test"]))) @=?) + , "佐藤 直樹") + , ("Japanese" + ,(Right (Mailbox (Just "鈴木 一郎") (AddrSpec "foo" (DomainDotAtom $ "bar" :| ["test"]))) @=?) + , "鈴木 一郎") + , ("Korean" + , (Right (Mailbox (Just "김철수") (AddrSpec "foo" (DomainDotAtom $ "bar" :| ["test"]))) @=?) + , "김철수") + , ("Apostrophy" + , (Right (Mailbox (Just "O'Neill Mc Carthy") (AddrSpec "foo" (DomainDotAtom $ "bar" :| ["test"]))) @=?) + , "O'Neill Mc Carthy") + ] + -- | Note some examples are taken from https://tools.ietf.org/html/rfc3696#section-3 mailboxFixtures :: IsString s => [(String, Either String Mailbox -> Assertion, s)] mailboxFixtures = @@ -176,21 +198,29 @@ mailboxFixtures = , assertBool "Parse error expected" . isLeft , "foo@,bar,com") , ( "displayName without quotes but with spaces" - , (Right (Mailbox (Just "John Doe") (AddrSpec "jdoe" (DomainDotAtom $ "machine" :| ["example"]))) @=?) - , "John Doe " + , (Right (Mailbox (Just "John. (Doe)") (AddrSpec "jdoe" (DomainDotAtom $ "machine" :| ["example"]))) @=?) + , "\"John. (Doe)\" " ) ] parsesMailboxesSuccessfully :: TestTree parsesMailboxesSuccessfully = - testGroup "parsing mailboxes" $ + testGroup "parsing mailboxes (text)" $ (\(desc,f,input) -> testCase desc $ f (AText.parseOnly AddressText.mailbox input)) <$> mailboxFixtures +parsesMailboxesNonASCIISuccessfully :: TestTree +parsesMailboxesNonASCIISuccessfully = + testGroup "parsing mailboxes (nonASCII)" $ + (\(desc, assertion, input) -> + testCase desc $ assertion (AText.parseOnly AddressText.mailbox (input <> " "))) <$> + nonASCIIDisplayNameFixtures + + parsesTextMailboxesSuccessfully :: TestTree parsesTextMailboxesSuccessfully = - testGroup "parsing mailboxes (text)" $ + testGroup "parsing mailboxes" $ (\(desc,f,input) -> testCase desc $ f (parseOnly (mailbox defaultCharsets) input)) <$> mailboxFixtures