Skip to content

Commit

Permalink
Merge pull request #87 from leozqin/full-text
Browse files Browse the repository at this point in the history
Add retrieve content flag and documentation for unit tests
  • Loading branch information
leozqin authored Dec 15, 2024
2 parents d3854b1 + c804681 commit a48a703
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 14 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/full_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: unit_test
run: |
source .venv/bin/activate
pytest -vvv -cov
make unit-test
make clean
- name: integration_test
run: |
Expand All @@ -46,4 +46,4 @@ jobs:
precis load-feeds
precis check-feeds
make run-ci
make test
make integration-test
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ build:
clean:
rm -r ${DATA_DIR}

.PHONY: test
test:
.PHONY: integration-test
integration-test:
go test tests/integration/*.go -v

.PHONY: unit-test
unit-test:
pytest -vvv -cov
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,17 @@ Then to develop, in one terminal start tailwind by doing `make tw`. Then, in oth

If you use `nix` or `nixos`, do `nix develop` to assumme a dev shell and then follow the install instructions above (including creating and activating the venv).

### Unit Tests
Precis has unit tests written against `pytest`. They are automated to run during the pull request pipeline, but you can also run them locally.

Simply do `make unit-test` to run unit tests.

### Integration Tests
Precis has integration tests that are written in Go. They are automated to run during the pull request pipeline, but they also be run locally.

First, install the version of Go specified in `go.mod`. I recommend to use a Golang version manager such as `gvm` or `g`.

Then, start the application using `make run`. Finally, run the integration tests with `make test`.
Then, start the application using `make run`. Finally, run the integration tests with `make integration-test`.

# Features
## OPML Import/Export
Expand Down
2 changes: 2 additions & 0 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ async def update_feed(
preview_only: Annotated[bool, Form()] = False,
refresh_enabled: Annotated[bool, Form()] = False,
use_script: Annotated[bool, Form()] = False,
retrieve_content: Annotated[bool, Form()] = False,
):
try:
feed = Feed(
Expand All @@ -335,6 +336,7 @@ async def update_feed(
preview_only=preview_only,
refresh_enabled=refresh_enabled,
use_script=use_script,
retrieve_content=retrieve_content,
)

await bk.update_feed(feed=feed)
Expand Down
9 changes: 8 additions & 1 deletion app/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,14 @@ async def get_content(
return EntryContent(url=entry.url, banned=True)

try:
html = await self.get_html(url=entry.url, use_script=feed.use_script)
if not feed.retrieve_content:
logger.info(
"Feed is configured to not retrieve content, using rss content"
)
html = entry.content
else:
html = await self.get_html(url=entry.url, use_script=feed.use_script)

content = self.get_main_content(content=html)
if not html or not content:
return EntryContent(url=entry.url, unretrievable=True)
Expand Down
2 changes: 2 additions & 0 deletions app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class Feed(BaseModel):
preview_only: bool = False
refresh_enabled: bool = True
use_script: bool = False
retrieve_content: bool = True

@property
def rss(self) -> Type[FeedParserDict]:
Expand All @@ -37,6 +38,7 @@ class FeedEntry(BaseModel):
url: str
published_at: int
updated_at: int
content: str = None
authors: list[str] = []
preview: str = None

Expand Down
10 changes: 3 additions & 7 deletions app/rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from logging import getLogger
from pathlib import Path
from tempfile import SpooledTemporaryFile
from typing import List, Mapping, Type
from typing import List, Mapping

from opml import OpmlDocument, OpmlOutline
from ruamel.yaml import YAML
Expand Down Expand Up @@ -54,13 +54,15 @@ async def _process_feed_entry(
self, entry: Mapping, feed: Feed, start_ts: int
) -> True:
published_time = timegm(entry.published_parsed)
content = "".join(i.get("value", "") for i in entry.get("content", []))
feed_entry = FeedEntry(
**{
"title": entry.title,
"url": entry.link,
"published_at": timegm(entry.published_parsed),
"updated_at": timegm(entry.updated_parsed),
"preview": entry.summary,
"content": content if content != "" else None,
"feed_id": feed.id,
"authors": (
[i["name"] for i in entry.authors] if "authors" in entry else []
Expand Down Expand Up @@ -105,7 +107,6 @@ async def _check_feed(self, feed: Feed):
logger.info(f"Found {counter} new item(s) for feed {feed.name}")

async def check_feeds(self) -> List:

now = int(datetime.now(tz=timezone.utc).timestamp())
logger.info(f"Checking feeds starting at time {now}")

Expand All @@ -115,15 +116,13 @@ async def check_feeds(self) -> List:
await self._check_feed(feed=feed)

async def check_feed_by_id(self, id: str) -> List:

feed = self.db.get_feed(id=id)

logger.info(f"Manual refresh requested for feed {feed.name}")

await self._check_feed(feed=feed)

async def add_feed_entry(self, feed: Feed, entry: FeedEntry) -> None:

logger.info(f"Upserting entry from {feed.name}: {entry.title} - id {entry.id}")

self.db.upsert_feed_entry(feed=feed, entry=entry)
Expand Down Expand Up @@ -170,7 +169,6 @@ async def feeds_to_opml(self) -> OpmlDocument:
return out_path, file_name

async def opml_to_feeds(self, file: SpooledTemporaryFile):

opml = OpmlDocument.load(fp=file)

feeds = []
Expand All @@ -193,7 +191,6 @@ async def opml_to_feeds(self, file: SpooledTemporaryFile):
self.db.upsert_settings(settings=settings)

async def backup(self):

feeds = self.db.get_feeds()
settings: GlobalSettings = self.db.get_settings()
handlers = self.db.get_handlers()
Expand Down Expand Up @@ -229,7 +226,6 @@ async def backup(self):
return out_path, file_name

async def restore(self, file: SpooledTemporaryFile):

bk = load(file)

settings = GlobalSettings(db=self.db, **bk.get("settings", {}))
Expand Down
5 changes: 5 additions & 0 deletions app/templates/feed_config.html
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ <h2 class="text-4xl lg:text-2xl my-5 justify-center flex">
<input name="use_script" type="checkbox" class="toggle" {% if feed.use_script !=False %} checked {%
endif %} />
</label>
<label class="label cursor-pointer gap-2">
<span class="label-text text-2xl lg:text-xl">Retrieve Content from URL</span>
<input name="retrieve_content" type="checkbox" class="toggle" {% if feed.retrieve_content !=False %} checked {%
endif %} />
</label>
</div>
<div>
<button type="submit"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "precis"
version = "0.3.7"
version = "0.3.8"
description = "A framework for automating your media diet"
requires-python = ">=3.11"
license = {file = "LICENSE"}
Expand Down
4 changes: 4 additions & 0 deletions tests/integration/config/feeds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@
- name: The Verge
category: tech
url: https://www.theverge.com/rss/index.xml
- name: How to Fix the Internet
category: tech
url: https://feeds.eff.org/howtofixtheinternet
retrieve_content: false

0 comments on commit a48a703

Please sign in to comment.