dhvcc · dhvcc · Feb 16, 2024 · Feb 13, 2024 · Feb 13, 2024 · Feb 13, 2024
diff --git a/rss_parser/_parser.py b/rss_parser/_parser.py
@@ -1,43 +1,44 @@
-from typing import ClassVar, Optional, Type
+from typing import Optional, Type
 
 from xmltodict import parse
 
 from rss_parser.models import XMLBaseModel
+from rss_parser.models.atom import Atom
 from rss_parser.models.rss import RSS
 
 # >>> FUTURE
 # TODO: May be support generator based approach for big rss feeds
 # TODO: Add cli to parse to json
 # TODO: Possibly bundle as deb/rpm/exe
-# TODO: Atom support
-# TODO: Older RSS versions?
+# TODO: Older Atom versions
+# TODO: Older RSS versions
 
 
 class Parser:
-    """Parser for rss files."""
-
-    schema: ClassVar[Type[XMLBaseModel]] = RSS
+    """Parser for rss/atom files."""
 
     @staticmethod
-    def _check_atom(root: dict):
+    def check_schema(root: dict) -> tuple[dict, type[XMLBaseModel]]:
         if "feed" in root:
-            raise NotImplementedError("ATOM feed is not currently supported")
+            return root, Atom
+        return root["rss"], RSS
 
     @staticmethod
     def to_xml(data: str, *args, **kwargs):
         return parse(str(data), *args, **kwargs)
 
     @classmethod
-    def parse(cls, data: str, *, schema: Optional[Type[XMLBaseModel]] = None) -> XMLBaseModel:
+    def parse(cls, data: str, *, schema: Optional[Type[XMLBaseModel]] = None, root_key: str = "") -> XMLBaseModel:
         """
-        Parse XML data into schema (default: RSS 2.0).
+        Parse XML data into schema (default: RSS 2.0 or Atom).
 
         :param data: string of XML data that needs to be parsed
         :return: "schema" object
         """
         root = cls.to_xml(data)
-        cls._check_atom(root)
-
-        schema = schema or cls.schema
+        if not isinstance(schema, XMLBaseModel):
+            root, schema = cls.check_schema(root)
+        else:
+            root = root.get(root_key, root)
 
-        return schema.parse_obj(root["rss"])
+        return schema.parse_obj(root)
diff --git a/rss_parser/models/atom/__init__.py b/rss_parser/models/atom/__init__.py
@@ -0,0 +1,3 @@
+from .atom import Atom
+
+__all__ = ("Atom",)
diff --git a/rss_parser/models/atom/atom.py b/rss_parser/models/atom/atom.py
@@ -0,0 +1,15 @@
+from typing import Optional
+
+from rss_parser.models import XMLBaseModel
+from rss_parser.models.atom.feed import Feed
+from rss_parser.models.types.tag import Tag
+from rss_parser.pydantic_proxy import import_v1_pydantic
+
+pydantic = import_v1_pydantic()
+
+
+class Atom(XMLBaseModel):
+    """Atom 1.0"""
+
+    version: Optional[Tag[str]] = pydantic.Field(alias="@version")
+    feed: Tag[Feed]
diff --git a/rss_parser/models/atom/entry.py b/rss_parser/models/atom/entry.py
@@ -0,0 +1,54 @@
+from typing import Optional
+
+from rss_parser.models import XMLBaseModel
+from rss_parser.models.types.date import DateTimeOrStr
+from rss_parser.models.types.tag import Tag
+from rss_parser.pydantic_proxy import import_v1_pydantic
+
+pydantic = import_v1_pydantic()
+
+
+class RequiredAtomEntryMixin(XMLBaseModel):
+    entry_id: Tag[str] = pydantic.Field(alias="id")
+    "Identifier for the entry."
+
+    title: Tag[str]
+    "The title of the entry."
+
+    updated: Tag[str]
+    "Indicates when the entry was updated."
+
+
+class RecommendedAtomEntryMixin(XMLBaseModel):
+    author: Optional[Tag[dict]] = None
+    "Email, name, and URI of the author of the entry."
+
+    link: Optional[Tag[list]] = None
+    "The URL of the entry."
+
+    content: Optional[Tag[dict]] = None
+    "The main content of the entry."
+
+    summary: Optional[Tag[str]] = None
+    "Conveys a short summary, abstract, or excerpt of the entry. Some feeds use this tag as the main content."
+
+
+class OptionalAtomEntryMixin(XMLBaseModel):
+    category: Optional[Tag[dict]] = None
+    "Specifies a categories that the feed belongs to."
+
+    contributor: Optional[Tag[dict]] = None
+    "Email, name, and URI of the contributors of the entry."
+
+    rights: Optional[Tag[str]] = None
+    "The copyright of the entry."
+
+    published: Optional[Tag[DateTimeOrStr]] = None
+    "Indicates when the entry was published."
+
+    source: Optional[Tag[str]] = None
+    "Contains metadata from the source feed if this entry is a copy."
+
+
+class Entry(RequiredAtomEntryMixin, RecommendedAtomEntryMixin, OptionalAtomEntryMixin, XMLBaseModel):
+    """https://validator.w3.org/feed/docs/atom.html"""
diff --git a/rss_parser/models/atom/feed.py b/rss_parser/models/atom/feed.py
@@ -0,0 +1,61 @@
+from typing import Optional
+
+from rss_parser.models import XMLBaseModel
+from rss_parser.models.atom.entry import Entry
+from rss_parser.models.image import Image
+from rss_parser.models.types.date import DateTimeOrStr
+from rss_parser.models.types.only_list import OnlyList
+from rss_parser.models.types.tag import Tag
+from rss_parser.pydantic_proxy import import_v1_pydantic
+
+pydantic = import_v1_pydantic()
+
+
+class RequiredAtomFeedMixin(XMLBaseModel):
+    feed_id: Tag[str] = pydantic.Field(alias="id")
+    "Identifies the feed using a universally unique and permanent URI."
+
+    title: Tag[str]
+    "Contains a human readable title for the feed."
+
+    updated: Tag[DateTimeOrStr]
+    "Indicates the last time the feed was modified in a significant way."
+
+
+class RecommendedAtomFeedMixin(XMLBaseModel):
+    author: Optional[Tag[str]] = None
+    "Names one author of the feed. A feed may have multiple author elements."
+
+    link: Optional[Tag[list]] = None
+    "The URL to the feed. A feed may have multiple link elements."
+
+
+class OptionalAtomFeedMixin(XMLBaseModel):
+    entries: Optional[OnlyList[Tag[Entry]]] = pydantic.Field(alias="entry", default=[])
+    "The entries in the feed. A feed may have multiple entry elements."
+
+    category: Optional[Tag[str]] = None
+    "Specifies a categories that the feed belongs to. The feed may have multiple categories elements."
+
+    contributor: Optional[Tag[str]] = None
+    "Names one contributor to the feed. A feed may have multiple contributor elements."
+
+    generator: Optional[Tag[str]] = None
+    "Identifies the software used to generate the feed, for debugging and other purposes."
+
+    icon: Optional[Tag[Image]] = None
+    "Identifies a small image which provides iconic visual identification for the feed. Icons should be square."
+
+    logo: Optional[Tag[Image]] = None
+    "Identifies a larger image which provides visual identification for the feed.\
+    Images should be twice as wide as they are tall."
+
+    rights: Optional[Tag[str]] = None
+    "The copyright of the feed."
+
+    subtitle: Optional[Tag[str]] = None
+    "Contains a human readable description or subtitle for the feed."
+
+
+class Feed(RequiredAtomFeedMixin, RecommendedAtomFeedMixin, OptionalAtomFeedMixin, XMLBaseModel):
+    """https://validator.w3.org/feed/docs/atom.html"""
diff --git a/rss_parser/models/rss/__init__.py b/rss_parser/models/rss/__init__.py
@@ -0,0 +1,3 @@
+from .rss import RSS
+
+__all__ = ("RSS",)
diff --git a/rss_parser/models/channel.py → rss_parser/models/rss/channel.py b/rss_parser/models/channel.py → rss_parser/models/rss/channel.py
@@ -2,8 +2,8 @@
 
 from rss_parser.models import XMLBaseModel
 from rss_parser.models.image import Image
-from rss_parser.models.item import Item
-from rss_parser.models.text_input import TextInput
+from rss_parser.models.rss.item import Item
+from rss_parser.models.rss.text_input import TextInput
 from rss_parser.models.types.date import DateTimeOrStr
 from rss_parser.models.types.only_list import OnlyList
 from rss_parser.models.types.tag import Tag

diff --git a/rss_parser/models/item.py → rss_parser/models/rss/item.py b/rss_parser/models/item.py → rss_parser/models/rss/item.py
diff --git a/rss_parser/models/rss.py → rss_parser/models/rss/rss.py b/rss_parser/models/rss.py → rss_parser/models/rss/rss.py
@@ -1,7 +1,7 @@
 from typing import Optional
 
 from rss_parser.models import XMLBaseModel
-from rss_parser.models.channel import Channel
+from rss_parser.models.rss.channel import Channel
 from rss_parser.models.types.tag import Tag
 from rss_parser.pydantic_proxy import import_v1_pydantic
 

diff --git a/rss_parser/models/text_input.py → rss_parser/models/rss/text_input.py b/rss_parser/models/text_input.py → rss_parser/models/rss/text_input.py
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -15,7 +15,7 @@ def sample_and_result(request):
             return sample.read(), loads(result.read())
 
 
-@pytest.fixture
-def atom_feed():
-    with open(sample_dir / "atom.xml") as f:
-        return f.read()
+# @pytest.fixture
+# def atom_feed():
+#     with open(sample_dir / "atom.xml") as f:
+#         return f.read()
diff --git a/tests/samples/atom.json b/tests/samples/atom.json
@@ -0,0 +1,134 @@
+{
+  "feed": {
+    "attributes": {},
+    "content": {
+      "author": null,
+      "category": null,
+      "contributor": null,
+      "entries": [
+        {
+          "attributes": {},
+          "content": {
+            "author": {
+              "attributes": {},
+              "content": {
+                "email": "[email protected]",
+                "name": "John Doe",
+                "uri": "http://example.org/"
+              }
+            },
+            "category": null,
+            "content": {
+              "attributes": {
+                "type": "xhtml",
+                "xml:base": "http://diveintomark.org/",
+                "xml:lang": "en"
+              },
+              "content": {
+                "@type": "xhtml",
+                "@xml:base": "http://diveintomark.org/",
+                "@xml:lang": "en",
+                "div": {
+                  "p": {
+                    "i": "[Update: The Atom draft is finished.]"
+                  }
+                }
+              }
+            },
+            "contributor": {
+              "attributes": {},
+              "content": {
+                "name": "John Doe"
+              }
+            },
+            "entry_id": {
+              "attributes": {},
+              "content": "tag:example.org,2003:3.2397"
+            },
+            "link": {
+              "attributes": {},
+              "content": [
+                {
+                  "@href": "http://example.org/2005/04/02/atom",
+                  "@rel": "alternate",
+                  "@type": "text/html"
+                },
+                {
+                  "@href": "http://example.org/audio/ph34r_my_podcast.mp3",
+                  "@length": "1337",
+                  "@rel": "enclosure",
+                  "@type": "audio/mpeg"
+                }
+              ]
+            },
+            "published": {
+              "attributes": {},
+              "content": "2003-12-13 08:29:29-04:00"
+            },
+            "rights": null,
+            "source": null,
+            "summary": null,
+            "title": {
+              "attributes": {},
+              "content": "Atom draft-07 snapshot"
+            },
+            "updated": {
+              "attributes": {},
+              "content": "2005-07-31T12:29:29Z"
+            }
+          }
+        }
+      ],
+      "feed_id": {
+        "attributes": {},
+        "content": "tag:example.org,2003:3"
+      },
+      "generator": {
+        "attributes": {
+          "uri": "http://www.example.com/",
+          "version": "1.0"
+        },
+        "content": "Example Toolkit"
+      },
+      "icon": null,
+      "link": {
+        "attributes": {},
+        "content": [
+          {
+            "@href": "http://example.org/",
+            "@hreflang": "en",
+            "@rel": "alternate",
+            "@type": "text/html"
+          },
+          {
+            "@href": "http://example.org/feed.atom",
+            "@rel": "self",
+            "@type": "application/atom+xml"
+          }
+        ]
+      },
+      "logo": null,
+      "rights": {
+        "attributes": {},
+        "content": "Copyright (c) 2003, John Doe"
+      },
+      "subtitle": {
+        "attributes": {
+          "type": "html"
+        },
+        "content": "A <em>lot</em> of effort\n            went into making this effortless"
+      },
+      "title": {
+        "attributes": {
+          "type": "text"
+        },
+        "content": "Title"
+      },
+      "updated": {
+        "attributes": {},
+        "content": "2005-07-31 12:29:29+00:00"
+      }
+    }
+  },
+  "version": null
+}
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
@@ -7,7 +7,7 @@
 
 @pytest.mark.parametrize(
     "sample_and_result",
-    [["rss_2"], ["rss_2_no_category_attr"], ["apology_line"], ["rss_2_with_1_item"]],
+    [["rss_2"], ["rss_2_no_category_attr"], ["apology_line"], ["rss_2_with_1_item"], ["atom"]],
     indirect=True,
 )
 def test_parses_all_samples(sample_and_result):
@@ -38,7 +38,7 @@ def test_json_plain_ignores_attributes(sample_and_result):
     assert left == right
 
 
-def test_fails_atom_feed(atom_feed):
-    # Expect ATOM feed to fail since it's not supported
-    with pytest.raises(NotImplementedError):
-        Parser.parse(atom_feed)
+# def test_fails_atom_feed(atom_feed):
+#     # Expect ATOM feed to fail since it's not supported
+#     with pytest.raises(NotImplementedError):
+#         Parser.parse(atom_feed)