Skip to content

Commit

Permalink
rss: ingest atom/rss feed contents already decoding HTML special chars
Browse files Browse the repository at this point in the history
Without this move, the main UI could show info and feed entries with
crude HTML marks, as in "“". Translating that at the source is
the best move, since they get stored right in the DB.

Signed-off-by: Gustavo Lima Chaves <[email protected]>
  • Loading branch information
glima committed Apr 21, 2024
1 parent 7440f6a commit 50b9678
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 9 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ crossterm = "0.27"
diligent-date-parser = "0.1"
directories = "5"
html2text = "0.12"
html-escape = "0.2.13"
num_cpus = "1.16"
opml = "1.1"
r2d2 = "0.8"
Expand Down
47 changes: 38 additions & 9 deletions src/rss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::modes::ReadMode;
use anyhow::{bail, Context, Result};
use atom_syndication as atom;
use chrono::prelude::{DateTime, Utc};
use html_escape::decode_html_entities_to_string;
use rss::Channel;
use rusqlite::params;
use rusqlite::types::ToSqlOutput;
Expand Down Expand Up @@ -91,11 +92,25 @@ impl From<&atom::Entry> for Entry {
Self {
id: -1,
feed_id: -1,
title: Some(entry.title().to_string()),
author: entry.authors().first().map(|author| author.name.to_owned()),
title: {
let mut title = String::new();
decode_html_entities_to_string(entry.title(), &mut title);
Some(title)
},
author: entry.authors().first().map(|entry_author| {
let mut author = String::new();
decode_html_entities_to_string(&entry_author.name, &mut author);
author
}),
pub_date: entry.published().map(|date| date.with_timezone(&Utc)),
description: None,
content: entry.content().and_then(|content| content.value.to_owned()),
content: entry.content().and_then(|entry_content| {
entry_content.value().map(|entry_content| {
let mut content = String::new();
decode_html_entities_to_string(entry_content, &mut content);
content
})
}),
link: entry.links().first().map(|link| link.href().to_string()),
read_at: None,
inserted_at: Utc::now(),
Expand All @@ -109,13 +124,27 @@ impl From<&rss::Item> for Entry {
Self {
id: -1,
feed_id: -1,
title: entry.title().map(|title| title.to_owned()),
author: entry.author().map(|author| author.to_owned()),
title: entry.title().map(|entry_title| {
let mut title = String::new();
decode_html_entities_to_string(entry_title, &mut title);
title
}),
author: entry.author().map(|entry_author| {
let mut author = String::new();
decode_html_entities_to_string(entry_author, &mut author);
author
}),
pub_date: entry.pub_date().and_then(parse_datetime),
description: entry
.description()
.map(|description| description.to_owned()),
content: entry.content().map(|content| content.to_owned()),
description: entry.description().map(|entry_description| {
let mut description = String::new();
decode_html_entities_to_string(entry_description, &mut description);
description
}),
content: entry.content().map(|entry_content| {
let mut content = String::new();
decode_html_entities_to_string(entry_content, &mut content);
content
}),
link: entry.link().map(|link| link.to_owned()),
read_at: None,
inserted_at: Utc::now(),
Expand Down

0 comments on commit 50b9678

Please sign in to comment.