Skip to content

Commit

Permalink
Added a paywalled article indicator.
Browse files Browse the repository at this point in the history
  • Loading branch information
dobicinaitis committed May 22, 2024
1 parent a320df7 commit 8d57808
Show file tree
Hide file tree
Showing 9 changed files with 90 additions and 10 deletions.
15 changes: 8 additions & 7 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,21 @@ gitVersioning.apply {

dependencies {
implementation("info.picocli:picocli:4.7.5")
implementation('io.micronaut.picocli:micronaut-picocli:5.2.0')
implementation("io.micronaut.picocli:micronaut-picocli:5.2.0")
implementation("com.github.tulskiy:jkeymaster:1.3")
implementation("ch.qos.logback:logback-classic:1.4.14")
implementation('org.fusesource.jansi:jansi:2.4.1')
implementation('com.apptasticsoftware:rssreader:3.6.0')
implementation('org.telegram:telegrambots:6.9.7.1')
implementation("org.fusesource.jansi:jansi:2.4.1")
implementation("com.apptasticsoftware:rssreader:3.6.0")
implementation("org.telegram:telegrambots:6.9.7.1")
implementation("com.google.code.gson:gson:2.10.1")
implementation('com.google.guava:guava:32.1.3-jre')
implementation("com.google.guava:guava:32.1.3-jre")
implementation("dev.failsafe:failsafe:3.3.2")
testImplementation('org.junit-pioneer:junit-pioneer:2.2.0')
implementation("org.jsoup:jsoup:1.17.2")
testImplementation("org.junit-pioneer:junit-pioneer:2.2.0")
testImplementation("org.wiremock:wiremock:3.4.2")
annotationProcessor("org.projectlombok:lombok")
annotationProcessor("info.picocli:picocli-codegen")
testImplementation('org.projectlombok:lombok:1.18.32')
testImplementation("org.projectlombok:lombok:1.18.32")
testAnnotationProcessor("org.projectlombok:lombok")
compileOnly("org.projectlombok:lombok")
runtimeOnly("org.yaml:snakeyaml")
Expand Down
1 change: 1 addition & 0 deletions src/main/java/dev/dobicinaitis/feedreader/dto/Article.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class Article implements Comparable<Article> {
private String link;
private String imageUrl;
private ZonedDateTime publicationDate;
private boolean paywalled;

@Override
public int compareTo(Article other) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ public enum TitleEmoji {
PHOTO("\uD83D\uDCF7"), // 📷
PHOTO_AND_VIDEO(" \uD83D\uDCF7\uD83C\uDFAC"), // 📷🎬
NOTICE("❗"), //❗️
ARTICLE("\uD83D\uDCF0"); // 📰
ARTICLE("\uD83D\uDCF0"), // 📰
PAYWALL("\uD83D\uDCB0"); // 💰

private final String unicode;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
import dev.dobicinaitis.feedreader.util.UrlUtils;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import java.io.File;
import java.io.FileWriter;
Expand All @@ -25,6 +28,7 @@
public class SyncService {

public static final String TITLE_TAG_SEPARATOR = "⟩";
public static final String PAYWALL_CSS_SELECTOR = "div.article__label.article__premium-flag";

private final TelegramService telegram;
private final FeedReaderService feedReader;
Expand Down Expand Up @@ -67,6 +71,10 @@ public void sync() {
log.info("Replacing shortened links with post-redirect ones.");
articles.parallelStream().forEach(article -> article.setLink(UrlUtils.getRedirectUrl(article.getLink())));

// Check if any of the new articles are subscription-only and set the paywalled flag accordingly.
log.info("Updating paywall flags.");
articles.parallelStream().forEach(article -> article.setPaywalled(hasPaywallLabel(article.getLink())));

log.info("Posting {} new articles to Telegram.", articles.size());
final Article lastPostedArticle = telegram.postArticles(articles);

Expand All @@ -90,6 +98,7 @@ private boolean isStatusFileUsed() {

/**
* Converts the RSS feed items to Article objects.
*
* @param items RSS feed items
* @return list of Article objects
*/
Expand Down Expand Up @@ -214,4 +223,27 @@ private void writeSyncStatusToFile(SyncStatus syncStatus) {
throw new FeedReaderRuntimeException(e);
}
}

/**
* Parses the HTML source code of a URL to check for the presence of a paywall label.
*
* @param url article URL
* @return true if the article is paywalled, false otherwise
*/
protected boolean hasPaywallLabel(final String url) {
if (url != null) {
try {
final Document htmlDocument = Jsoup.connect(url).get();
final Element paywallLabelElement = htmlDocument.select(PAYWALL_CSS_SELECTOR).first();
if (paywallLabelElement != null) {
log.debug("Article is paywalled: {}", url);
return true;
}
} catch (IOException e) {
log.error("Failed to connect to URL: {}", url, e);
return false;
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.google.common.util.concurrent.RateLimiter;
import dev.dobicinaitis.feedreader.dto.Article;
import dev.dobicinaitis.feedreader.dto.TitleEmoji;
import dev.dobicinaitis.feedreader.misc.LabelHolder;
import dev.dobicinaitis.feedreader.util.UrlUtils;
import dev.failsafe.Failsafe;
Expand Down Expand Up @@ -153,12 +154,13 @@ private boolean postTextOnlyArticle(final Article article) {
* @return caption
*/
private String prepareCaption(final Article article) {
final String paywallEmoji = article.isPaywalled() ? TitleEmoji.PAYWALL.getUnicode() : "";
return """
%s *%s*
%s
""".formatted(
article.getTitleEmoji().getUnicode(),
paywallEmoji + article.getTitleEmoji().getUnicode(),
escapeSpecialCharacters(article.getTitle()),
escapeSpecialCharacters(article.getDescription())
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ public class TestFeedServer {
private static final String RSS_FILE = "test-rss.xml";
public static final String IMAGE_FILE_RC_200 = "image.gif";
public static final String IMAGE_FILE_RC_503 = "unavailable.jpg"; // does not exist
public static final String PAYWALLED_ARTICLE_ENDPOINT = "paywalled-article";
public static final String FREE_ARTICLE_ENDPOINT = "free-article";

private final WireMockServer server;

Expand Down Expand Up @@ -47,6 +49,16 @@ public TestFeedServer() {
.withStatus(503)
.withBody("Service Unavailable")
));
stubFor(get(urlEqualTo("/" + PAYWALLED_ARTICLE_ENDPOINT)).willReturn(aResponse()
.withStatus(200)
.withBodyFile("article-with-paywall-label.html")
.withHeader("Content-Type", "text/html")
));
stubFor(get(urlEqualTo("/" + FREE_ARTICLE_ENDPOINT)).willReturn(aResponse()
.withStatus(200)
.withBodyFile("article-without-paywall-label.html")
.withHeader("Content-Type", "text/html")
));
}

public void stop() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ void shouldReturnANewSyncStatusObjectIfFileIsEmpty() throws Exception {
}

@Test
void ShouldRemoveProcessedArticles() {
void shouldRemoveProcessedArticles() {
// given
final ZonedDateTime cutOffDate = ZonedDateTime.now();
final List<Article> articles = new ArrayList<>();
Expand All @@ -141,6 +141,16 @@ void ShouldRemoveProcessedArticles() {
assertEquals("new", articles.get(0).getTitle(), "The newest article should be left.");
}

@Test
void shouldCheckForPresenceOfAPaywallLabel() {
// given
final String paywalledArticleUrl = feedServer.getBaseUrl() + "/" + TestFeedServer.PAYWALLED_ARTICLE_ENDPOINT;
final String freeArticleUrl = feedServer.getBaseUrl() + "/" + TestFeedServer.FREE_ARTICLE_ENDPOINT;
// when, then
assertTrue(syncService.hasPaywallLabel(paywalledArticleUrl), "Paywalled article should have a paywall label.");
assertFalse(syncService.hasPaywallLabel(freeArticleUrl), "Free article should not have a paywall label.");
}

/**
* Prepares a test RSS feed item.
*
Expand Down Expand Up @@ -183,4 +193,5 @@ private File prepareStatusFile(String title, ZonedDateTime publicationDate) thro
Files.write(statusFile.toPath(), jsonString.getBytes());
return statusFile;
}

}
10 changes: 10 additions & 0 deletions src/test/resources/__files/article-with-paywall-label.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="lv">
<article>
<div>
<div class="some-nested-div">
<div class="article__label article__premium-flag">Abonentiem</div>
</div>
</div>
</article>
</html>
10 changes: 10 additions & 0 deletions src/test/resources/__files/article-without-paywall-label.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="lv">
<article>
<div>
<div class="some-nested-div">
<div>bla, bla, bla</div>
</div>
</div>
</article>
</html>

0 comments on commit 8d57808

Please sign in to comment.