Skip to content

Commit

Permalink
Update header parsing to decode encoded words after parsing the heade…
Browse files Browse the repository at this point in the history
…r (RFC 2047)
  • Loading branch information
andrewtimberlake committed Nov 6, 2024
1 parent 68ab800 commit f8a1565
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 27 deletions.
120 changes: 93 additions & 27 deletions lib/mail/parsers/rfc_2822.ex
Original file line number Diff line number Diff line change
Expand Up @@ -302,18 +302,22 @@ defmodule Mail.Parsers.RFC2822 do
end)
end

defp parse_headers(message, [], _opts), do: message
defp parse_headers(message, headers, opts) do
headers =
Enum.reduce(headers, message.headers, fn header, headers ->
{key, value} = parse_header(header, opts)
put_header(headers, key, value)
end)

Map.put(message, :headers, headers)
end

defp parse_headers(message, [header | tail], opts) do
def parse_header(header, opts) do
[name, body] = String.split(header, ":", parts: 2)
key = String.downcase(name)
decoded = parse_encoded_word(body, opts)

headers =
put_header(message.headers, key, String.downcase(name) |> parse_header_value(decoded))

message = %{message | headers: headers}
parse_headers(message, tail, opts)
value = parse_header_value(key, body)
decoded = decode_header_value(key, value, opts)
{key, decoded}
end

defp put_header(headers, "received" = key, value),
Expand Down Expand Up @@ -372,6 +376,48 @@ defmodule Mail.Parsers.RFC2822 do
defp parse_header_value(_key, value),
do: value

defp decode_header_value(_key, nil, _opts),
do: nil

defp decode_header_value(_key, %DateTime{} = datetime, _opts),
do: datetime

defp decode_header_value("received", value, _opts),
do: value

defp decode_header_value(_key, [value | [param | _params] = params], opts)
when is_binary(value) and is_tuple(param) do
decoded = parse_encoded_word(value, opts)
params = Enum.map(params, fn {param, value} -> {param, parse_encoded_word(value, opts)} end)
[decoded | params]
end

defp decode_header_value(_key, {name, email}, opts) do
decoded = parse_encoded_word(name, opts)
{decoded, email}
end

defp decode_header_value(key, addresses, opts)
when key in ["to", "cc", "from", "reply-to"] and is_list(addresses) do
addresses =
Enum.map(addresses, fn
{name, email} ->
decoded = parse_encoded_word(name, opts)
{decoded, email}

email ->
email
end)

addresses
end

defp decode_header_value("from", value, _opts), do: value

defp decode_header_value(_key, value, opts) do
parse_encoded_word(value, opts)
end

# See https://tools.ietf.org/html/rfc2047
defp parse_encoded_word("", _opts), do: ""

Expand Down Expand Up @@ -404,39 +450,59 @@ defmodule Mail.Parsers.RFC2822 do
defp parse_encoded_word(<<char::utf8, rest::binary>>, opts),
do: <<char::utf8, parse_encoded_word(rest, opts)::binary>>

defp parse_structured_header_value(string, value \\ nil, sub_types \\ [], acc \\ "")
defp parse_structured_header_value(
string,
value \\ nil,
sub_types \\ [],
part \\ :value,
acc \\ ""
)

defp parse_structured_header_value("", value, [{key, nil} | sub_types], acc),
defp parse_structured_header_value("", value, [{key, nil} | sub_types], _part, acc),
do: [value | Enum.reverse([{key, acc} | sub_types])]

defp parse_structured_header_value("", nil, [], acc),
defp parse_structured_header_value("", nil, [], _part, acc),
do: acc

defp parse_structured_header_value("", value, sub_types, ""),
defp parse_structured_header_value("", value, sub_types, _part, ""),
do: [value | Enum.reverse(sub_types)]

defp parse_structured_header_value("", value, [], acc),
defp parse_structured_header_value("", value, [], _part, acc),
do: [value, String.trim(acc)]

defp parse_structured_header_value("", value, sub_types, acc),
do: parse_structured_header_value("", value, sub_types, String.trim(acc))
defp parse_structured_header_value("", value, sub_types, part, acc),
do: parse_structured_header_value("", value, sub_types, part, String.trim(acc))

defp parse_structured_header_value(<<"\"", rest::binary>>, value, sub_types, acc) do
defp parse_structured_header_value(<<"\"", rest::binary>>, value, sub_types, part, acc) do
{string, rest} = parse_quoted_string(rest)
parse_structured_header_value(rest, value, sub_types, <<acc::binary, string::binary>>)
parse_structured_header_value(rest, value, sub_types, part, <<acc::binary, string::binary>>)
end

defp parse_structured_header_value(<<";", rest::binary>>, nil, sub_types, acc),
do: parse_structured_header_value(rest, acc, sub_types, "")

defp parse_structured_header_value(<<";", rest::binary>>, value, [{key, nil} | sub_types], acc),
do: parse_structured_header_value(rest, value, [{key, acc} | sub_types], "")
defp parse_structured_header_value(<<";", rest::binary>>, nil, sub_types, part, acc)
when part in [:value, :param_value],
do: parse_structured_header_value(rest, acc, sub_types, :param_name, "")

defp parse_structured_header_value(<<"=", rest::binary>>, value, sub_types, acc),
do: parse_structured_header_value(rest, value, [{key_to_atom(acc), nil} | sub_types], "")
defp parse_structured_header_value(
<<";", rest::binary>>,
value,
[{key, nil} | sub_types],
:param_value,
acc
),
do: parse_structured_header_value(rest, value, [{key, acc} | sub_types], :param_name, "")

defp parse_structured_header_value(<<char::utf8, rest::binary>>, value, sub_types, acc),
do: parse_structured_header_value(rest, value, sub_types, <<acc::binary, char::utf8>>)
defp parse_structured_header_value(<<"=", rest::binary>>, value, sub_types, :param_name, acc),
do:
parse_structured_header_value(
rest,
value,
[{key_to_atom(acc), nil} | sub_types],
:param_value,
""
)

defp parse_structured_header_value(<<char::utf8, rest::binary>>, value, sub_types, part, acc),
do: parse_structured_header_value(rest, value, sub_types, part, <<acc::binary, char::utf8>>)

defp parse_quoted_string(string, acc \\ "")

Expand Down
70 changes: 70 additions & 0 deletions test/mail/parsers/rfc_2822_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,76 @@ defmodule Mail.Parsers.RFC2822Test do
assert message.headers["content-type"] == ["text/html", {"charset", "us-ascii"}]
end

test "parses encoded word cotaining 'special' characters RFC 2047§6.2" do
message =
parse_email("""
From: =?UTF-8?B?am9obi5kb2VAcmVkYWN0ZS4uLg==?= <[email protected]>
""")

assert message.headers["from"] == {"john.doe@redacte...", "[email protected]"}
end

test "correct handling of encoded words according to RFC 2047 (examples)" do
message =
parse_email("""
From: =?US-ASCII?Q?Keith_Moore?= <[email protected]>
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <[email protected]>
CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <[email protected]>
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
""")

assert message.headers["from"] == {"Keith Moore", "[email protected]"}
assert message.headers["to"] == [{"Keld J\xF8rn Simonsen", "[email protected]"}]
assert message.headers["cc"] == [{"Andr\xE9 Pirard", "[email protected]"}]
assert message.headers["subject"] == "If you can read this you understand the example."

message =
parse_email("""
From: =?ISO-8859-1?Q?Olle_J=E4rnefors?= <[email protected]>
To: [email protected], [email protected]
Subject: Time for ISO 10646?
""")

assert message.headers["from"] == {"Olle J\xE4rnefors", "[email protected]"}
assert message.headers["to"] == ["[email protected]", "[email protected]"]
assert message.headers["subject"] == "Time for ISO 10646?"

message =
parse_email("""
To: Dave Crocker <[email protected]>
Cc: [email protected], [email protected]
From: =?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?= <[email protected]>
Subject: Re: RFC-HDR care and feeding
""")

assert message.headers["from"] == {"Patrik F\xE4ltstr\xF6m", "[email protected]"}
assert message.headers["to"] == [{"Dave Crocker", "[email protected]"}]
assert message.headers["cc"] == ["[email protected]", "[email protected]"]
assert message.headers["subject"] == "Re: RFC-HDR care and feeding"

message =
parse_email("""
From: Nathaniel Borenstein <[email protected]>
(=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)
To: Greg Vaudreuil <[email protected]>, Ned Freed
<[email protected]>, Keith Moore <[email protected]>
Subject: Test of new header generator
MIME-Version: 1.0
Content-type: text/plain; charset=ISO-8859-1
""")

assert message.headers["from"] == {"Nathaniel Borenstein", "[email protected]"}

assert message.headers["to"] == [
{"Greg Vaudreuil", "[email protected]"},
{"Ned Freed", "[email protected]"},
{"Keith Moore", "[email protected]"}
]

assert message.headers["subject"] == "Test of new header generator"
end

defp parse_email(email, opts \\ []),
do: email |> convert_crlf |> Mail.Parsers.RFC2822.parse(opts)

Expand Down

0 comments on commit f8a1565

Please sign in to comment.