diff --git a/Cargo.lock b/Cargo.lock index a0c19c6..6722dcd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -590,6 +590,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" +dependencies = [ + "utf8-width", +] + [[package]] name = "html2text" version = "0.12.5" @@ -1272,6 +1281,7 @@ dependencies = [ "crossterm", "diligent-date-parser", "directories", + "html-escape", "html2text", "num_cpus", "opml", @@ -1727,6 +1737,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8-width" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" + [[package]] name = "utf8parse" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index a94fbeb..c0a728c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ crossterm = "0.27" diligent-date-parser = "0.1" directories = "5" html2text = "0.12" +html-escape = "0.2.13" num_cpus = "1.16" opml = "1.1" r2d2 = "0.8" diff --git a/src/rss.rs b/src/rss.rs index dce7ab4..e9aef23 100644 --- a/src/rss.rs +++ b/src/rss.rs @@ -5,6 +5,7 @@ use crate::modes::ReadMode; use anyhow::{bail, Context, Result}; use atom_syndication as atom; use chrono::prelude::{DateTime, Utc}; +use html_escape::decode_html_entities_to_string; use rss::Channel; use rusqlite::params; use rusqlite::types::{FromSql, ToSqlOutput}; @@ -155,11 +156,25 @@ struct IncomingEntry { impl From<&atom::Entry> for IncomingEntry { fn from(entry: &atom::Entry) -> Self { Self { - title: Some(entry.title().to_string()), - author: entry.authors().first().map(|author| author.name.to_owned()), + title: { + let mut title = String::new(); + decode_html_entities_to_string(entry.title(), &mut title); + Some(title) + }, + author: entry.authors().first().map(|entry_author| { + let mut author = String::new(); + decode_html_entities_to_string(&entry_author.name, &mut author); + author + }), pub_date: entry.published().map(|date| date.with_timezone(&Utc)), description: None, - content: entry.content().and_then(|content| content.value.to_owned()), + content: entry.content().and_then(|entry_content| { + entry_content.value().map(|entry_content| { + let mut content = String::new(); + decode_html_entities_to_string(entry_content, &mut content); + content + }) + }), link: entry.links().first().map(|link| link.href().to_string()), } } @@ -168,13 +183,27 @@ impl From<&atom::Entry> for IncomingEntry { impl From<&rss::Item> for IncomingEntry { fn from(entry: &rss::Item) -> Self { Self { - title: entry.title().map(|title| title.to_owned()), - author: entry.author().map(|author| author.to_owned()), + title: entry.title().map(|entry_title| { + let mut title = String::new(); + decode_html_entities_to_string(entry_title, &mut title); + title + }), + author: entry.author().map(|entry_author| { + let mut author = String::new(); + decode_html_entities_to_string(entry_author, &mut author); + author + }), pub_date: entry.pub_date().and_then(parse_datetime), - description: entry - .description() - .map(|description| description.to_owned()), - content: entry.content().map(|content| content.to_owned()), + description: entry.description().map(|entry_description| { + let mut description = String::new(); + decode_html_entities_to_string(entry_description, &mut description); + description + }), + content: entry.content().map(|entry_content| { + let mut content = String::new(); + decode_html_entities_to_string(entry_content, &mut content); + content + }), link: entry.link().map(|link| link.to_owned()), } }