Skip to content

Commit

Permalink
Split Parser and reorganise package
Browse files Browse the repository at this point in the history
Parser turns out to not really make sense as a superclass / ABC: it
really only has one useful method, and because parsers use delegation
there's no real way to override the utility methods / shortcuts, so
they're only useful on the caller / client side but they constrain the
implementor (who has to extend the ABC and then possibly deal with
multiple-inheritance shenanigans).

Making the core object just a callable protocol instead makes the
implementation somewhat simpler and more flexible (e.g. just a
function or HoF can be a "parser"), however the convenient utility
methods *are* important for end users and should not be discounted.

For that, keep a wrapper `Parser` object which can be wrapped around a
"parser" in order to provide the additional convenience (similar to
the free functions at the root). Importantly, `Parser` methods can
also be used as free functions by passing a "parser" as `self`, they
are intended to be compatible. It doesn't work super well from the
typechecking perspective, but it works fine enough.

Consideration was given to making the free functions at the package
root parametric on the parser e.g.

    def parse(ua: str, resolver: Optional[Resolver] = None, /) -> ParseResult:
        if resolver is None:
            from . import parser as resolver

        return resolver(ua, Domain.ALL).complete()

but that feels like it would be pretty error prone, in the sense that
it would be too easy to forget to pass in the resolver, compared to
consistently resolving via a bespoke parser, or just installing a
parser globally.

Also move things around a bit:

- move matcher utility functions out of the core, un-prefix them since
  we're using `__all__` for visibility anyway
- move eager matchers out of the core, similar to the lazy matchers

Fixes #189
  • Loading branch information
masklinn committed Feb 27, 2024
1 parent 8d4e624 commit d523ecb
Show file tree
Hide file tree
Showing 18 changed files with 404 additions and 369 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ class EagerWriter(Writer):
__all__ = ["MATCHERS"]
from typing import Tuple, List
from .core import UserAgentMatcher, OSMatcher, DeviceMatcher
from .matchers import UserAgentMatcher, OSMatcher, DeviceMatcher
MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([
"""
Expand Down
98 changes: 70 additions & 28 deletions src/ua_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,25 @@
This way importing anything but the top-level package should not be
necessary unless you want to *implement* a parser.
"""
from __future__ import annotations

__all__ = [
"BasicParser",
"CachingParser",
"BasicResolver",
"CachingResolver",
"Clearing",
"DefaultedParseResult",
"Device",
"DeviceMatcher",
"Domain",
"LRU",
"Locking",
"Matchers",
"OS",
"OSMatcher",
"ParseResult",
"Parser",
"Resolver",
"PartialParseResult",
"UserAgent",
"UserAgentMatcher",
"load_builtins",
"load_lazy_builtins",
"load_data",
"load_yaml",
"parse",
"parse_device",
"parse_os",
Expand All @@ -48,43 +44,89 @@
import contextlib
from typing import Callable, Optional

from .basic import Parser as BasicParser
from .caching import CachingParser, Clearing, Locking, LRU
from .basic import Resolver as BasicResolver
from .caching import CachingResolver, Clearing, Locking, LRU
from .core import (
DefaultedParseResult,
Device,
DeviceMatcher,
Domain,
Matchers,
OS,
OSMatcher,
Parser,
ParseResult,
PartialParseResult,
Resolver,
UserAgent,
UserAgentMatcher,
)
from .loaders import load_builtins, load_data, load_lazy_builtins, load_yaml
from .loaders import load_builtins, load_lazy_builtins

Re2Parser: Optional[Callable[[Matchers], Parser]] = None
Re2Resolver: Optional[Callable[[Matchers], Resolver]] = None
with contextlib.suppress(ImportError):
from .re2 import Parser as Re2Parser
from .re2 import Resolver as Re2Resolver


VERSION = (1, 0, 0)


class Parser:
@classmethod
def from_matchers(cls, m: Matchers, /) -> Parser:
if Re2Resolver is not None:
return cls(Re2Resolver(m))
else:
return cls(
CachingResolver(
BasicResolver(m),
Locking(LRU(200)),
)
)

def __init__(self, resolver: Resolver) -> None:
self.resolver = resolver

def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:
"""Parses the ``ua`` string, returning a parse result with *at least*
the requested :class:`domains <Domain>` resolved (whether to success or
failure).
A parser may resolve more :class:`domains <Domain>` than
requested, but it *must not* resolve less.
"""
return self.resolver(ua, domains)

def parse(self, ua: str) -> ParseResult:
"""Convenience method for parsing all domains, and falling back to
default values for all failures.
"""
return self(ua, Domain.ALL).complete()

def parse_user_agent(self, ua: str) -> Optional[UserAgent]:
"""Convenience method for parsing the :class:`UserAgent` domain,
falling back to the default value in case of failure.
"""
return self(ua, Domain.USER_AGENT).user_agent

def parse_os(self, ua: str) -> Optional[OS]:
"""Convenience method for parsing the :class:`OS` domain, falling back
to the default value in case of failure.
"""
return self(ua, Domain.OS).os

def parse_device(self, ua: str) -> Optional[Device]:
"""Convenience method for parsing the :class:`Device` domain, falling
back to the default value in case of failure.
"""
return self(ua, Domain.DEVICE).device


parser: Parser


def __getattr__(name: str) -> Parser:
global parser
if name == "parser":
if Re2Parser is not None:
parser = Re2Parser(load_lazy_builtins())
else:
parser = CachingParser(
BasicParser(load_builtins()),
Locking(LRU(200)),
)
parser = Parser.from_matchers(
load_builtins() if Re2Resolver is None else load_lazy_builtins()
)
return parser
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

Expand All @@ -105,7 +147,7 @@ def parse(ua: str) -> ParseResult:
# parser, a `global` access fails to and we get a NameError
from . import parser

return parser.parse(ua)
return parser(ua, Domain.ALL).complete()


def parse_user_agent(ua: str) -> Optional[UserAgent]:
Expand All @@ -114,7 +156,7 @@ def parse_user_agent(ua: str) -> Optional[UserAgent]:
"""
from . import parser

return parser.parse_user_agent(ua)
return parser(ua, Domain.USER_AGENT).user_agent


def parse_os(ua: str) -> Optional[OS]:
Expand All @@ -123,7 +165,7 @@ def parse_os(ua: str) -> Optional[OS]:
"""
from . import parser

return parser.parse_os(ua)
return parser(ua, Domain.OS).os


def parse_device(ua: str) -> Optional[Device]:
Expand All @@ -132,4 +174,4 @@ def parse_device(ua: str) -> Optional[Device]:
"""
from . import parser

return parser.parse_device(ua)
return parser(ua, Domain.DEVICE).device
2 changes: 1 addition & 1 deletion src/ua_parser/_matchers.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ __all__ = ["MATCHERS"]

from typing import List, Tuple

from .core import DeviceMatcher, OSMatcher, UserAgentMatcher
from .matchers import DeviceMatcher, OSMatcher, UserAgentMatcher

MATCHERS: Tuple[
List[UserAgentMatcher],
Expand Down
9 changes: 4 additions & 5 deletions src/ua_parser/basic.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
__all__ = ["Resolver"]

from operator import methodcaller
from typing import List

Expand All @@ -7,13 +9,12 @@
Matcher,
Matchers,
OS,
Parser as AbstractParser,
PartialParseResult,
UserAgent,
)


class Parser(AbstractParser):
class Resolver:
"""A simple pure-python parser based around trying a numer of regular
expressions in sequence for each domain, and returning a result
when one matches.
Expand All @@ -27,9 +28,7 @@ def __init__(
self,
matchers: Matchers,
) -> None:
self.user_agent_matchers = matchers[0]
self.os_matchers = matchers[1]
self.device_matchers = matchers[2]
self.user_agent_matchers, self.os_matchers, self.device_matchers = matchers

def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:
parse = methodcaller("__call__", ua)
Expand Down
20 changes: 10 additions & 10 deletions src/ua_parser/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@
from typing import Any, Callable, Iterable, List, Optional

from . import (
BasicParser,
CachingParser,
BasicResolver,
CachingResolver,
Clearing,
Locking,
LRU,
Matchers,
Parser,
load_builtins,
load_yaml,
Resolver,
)
from .caching import Cache
from .re2 import Parser as Re2Parser
from .loaders import load_builtins, load_yaml
from .re2 import Resolver as Re2Resolver
from .user_agent_parser import Parse

CACHEABLE = {
Expand Down Expand Up @@ -222,19 +222,19 @@ def run_csv(args: argparse.Namespace) -> None:
def get_parser(
parser: str, cache: str, cachesize: int, rules: Matchers
) -> Callable[[str], Any]:
p: Parser
r: Resolver
if parser == "legacy":
return Parse
elif parser == "basic":
p = BasicParser(rules)
r = BasicResolver(rules)
elif parser == "re2":
p = Re2Parser(rules)
r = Re2Resolver(rules)
else:
sys.exit(f"unknown parser {parser!r}")

c: Callable[[int], Cache]
if cache == "none":
return p.parse
return Parser(r).parse
elif cache == "clearing":
c = Clearing
elif cache == "lru":
Expand All @@ -244,7 +244,7 @@ def get_parser(
else:
sys.exit(f"unknown cache algorithm {cache!r}")

return CachingParser(p, c(cachesize)).parse
return Parser(CachingResolver(r, c(cachesize))).parse


def run(
Expand Down
20 changes: 10 additions & 10 deletions src/ua_parser/caching.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import abc
import threading
from collections import OrderedDict
from typing import Dict, Optional
from typing import Dict, Optional, Protocol

from .core import Domain, Parser, PartialParseResult
from .core import Domain, PartialParseResult, Resolver

__all__ = [
"CachingParser",
"CachingResolver",
"Cache",
"Clearing",
"Locking",
"LRU",
]


class Cache(abc.ABC):
class Cache(Protocol):
"""Cache abstract protocol. The :class:`CachingParser` will look
values up, merge what was returned (possibly nothing) with what it
got from its actual parser, and *re-set the result*.
Expand All @@ -33,7 +33,7 @@ def __getitem__(self, key: str) -> Optional[PartialParseResult]:
...


class Clearing(Cache):
class Clearing:
"""A clearing cache, if the cache is full, just remove all the entries
and re-fill from scratch.
Expand Down Expand Up @@ -62,7 +62,7 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
self.cache[key] = value


class LRU(Cache):
class LRU:
"""Cache following a least-recently used replacement policy: when
there is no more room in the cache, whichever entry was last seen
the least recently is removed.
Expand Down Expand Up @@ -103,7 +103,7 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
self.cache.popitem(last=False)


class Locking(Cache):
class Locking:
"""Locking cache decorator. Takes a non-thread-safe cache and
ensures retrieving and setting entries is protected by a mutex.
Expand All @@ -122,7 +122,7 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
self.cache[key] = value


class CachingParser(Parser):
class CachingResolver:
"""A wrapping parser which takes an underlying concrete :class:`Cache`
for the actual caching and cache strategy.
Expand All @@ -134,8 +134,8 @@ class CachingParser(Parser):
really, they're immutable).
"""

def __init__(self, parser: Parser, cache: Cache):
self.parser: Parser = parser
def __init__(self, parser: Resolver, cache: Cache):
self.parser: Resolver = parser
self.cache: Cache = cache

def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:
Expand Down
Loading

0 comments on commit d523ecb

Please sign in to comment.