Skip to content

Commit

Permalink
feat: implement query checking
Browse files Browse the repository at this point in the history
The change introduces a check_query callable which runs an extensible
compose pipeline of query checkers.

Note regarding QueryParseException: This custom exception is intended
to be a thin wrapper around a pyparsing ParseException that RDFLib
raises. This avoids introducing pyparsing as a dependency just to be able to
test against this exception. I feel like RDFLib should not raise a
pyparsing exception but provide a thin wrapper itself.
See RDFLib/rdflib#3057.

The check_query function runs in SPARQLModelAdapter to enable fast
failures on inapplicable queries. Note that this somewhat couples
QueryConstructor to SPARQLModelAdapter; QueryConstructor should be
marked private for this reason.

Possible handling of queries with outer-level solution modifiers is
discussed in issue #206.

Closes #116. Closes #126.
  • Loading branch information
lu-pl committed Jan 29, 2025
1 parent 4d15e06 commit d25b5af
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 2 deletions.
3 changes: 2 additions & 1 deletion rdfproxy/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from rdfproxy.mapper import _ModelBindingsMapper
from rdfproxy.sparql_strategies import HttpxStrategy, SPARQLStrategy
from rdfproxy.utils._types import _TModelInstance
from rdfproxy.utils.checkers.query_checker import check_query
from rdfproxy.utils.models import Page, QueryParameters


Expand Down Expand Up @@ -40,7 +41,7 @@ def __init__(
sparql_strategy: type[SPARQLStrategy] = HttpxStrategy,
) -> None:
self._target = target
self._query = query
self._query = check_query(query)
self._model = model

self.sparql_strategy = sparql_strategy(self._target)
Expand Down
1 change: 1 addition & 0 deletions rdfproxy/constructor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from rdfproxy.utils._types import _TModelInstance
from rdfproxy.utils.checkers.query_checker import check_query
from rdfproxy.utils.models import QueryParameters
from rdfproxy.utils.sparql_utils import (
add_solution_modifier,
Expand Down
15 changes: 15 additions & 0 deletions rdfproxy/utils/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,18 @@ class InvalidGroupingKeyException(Exception):

class QueryConstructionException(Exception):
"""Exception for indicating failed SPARQL query construction."""


class UnsupportedQueryException(Exception):
"""Exception for indicating that a given SPARQL query is not supported."""


class QueryParseException(Exception):
"""Exception for indicating that a given SPARQL query raised a parse error.
This exception is intended to wrap and re-raise all exceptions
raised from parsing a SPARQL query with RDFLib's parseQuery function.
parseQuery raises a pyparsing.exceptions.ParseException,
which would require to introduce pyparsing as a dependency just for testing.
"""
27 changes: 26 additions & 1 deletion rdfproxy/utils/_types.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
"""Type definitions for rdfproxy."""

from collections import UserString
from collections.abc import Iterable
from typing import Protocol, TypeAlias, TypeVar, runtime_checkable
from typing import Generic, Protocol, TypeAlias, TypeVar, runtime_checkable

from pydantic import BaseModel, ConfigDict as PydanticConfigDict
from rdflib.plugins.sparql.parser import parseQuery
from rdflib.plugins.sparql.parserutils import CompValue
from rdfproxy.utils._exceptions import QueryParseException


_TModelInstance = TypeVar("_TModelInstance", bound=BaseModel)
Expand Down Expand Up @@ -49,3 +53,24 @@ class ConfigDict(PydanticConfigDict, total=False):

group_by: str
model_bool: _TModelBoolValue


_TQuery = TypeVar("_TQuery", bound=str)


class ParsedSPARQL(Generic[_TQuery], UserString):
"""UserString for encapsulating parsed SPARQL queries."""

def __init__(self, query: _TQuery) -> None:
self.data: _TQuery = query
self.parse_object: CompValue = self._get_parse_object(query)

@staticmethod
def _get_parse_object(query: str) -> CompValue:
try:
_parsed = parseQuery(query)
except Exception as e:
raise QueryParseException(e) from e
else:
_, parse_object = _parsed
return parse_object
57 changes: 57 additions & 0 deletions rdfproxy/utils/checkers/query_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Functionality for performing checks on SPARQL queries."""

import logging

from rdfproxy.utils._exceptions import UnsupportedQueryException
from rdfproxy.utils._types import ParsedSPARQL, _TQuery
from rdfproxy.utils.utils import compose_left


logger = logging.getLogger(__name__)


def _check_select_query(parsed_sparql: ParsedSPARQL) -> ParsedSPARQL:
"""Check if a SPARQL query is a SELECT query.
This is meant to run as a component in check_query.
"""
logger.debug("Running SELECT query check.")

if parsed_sparql.parse_object.name != "SelectQuery":
raise UnsupportedQueryException("Only SELECT queries are applicable.")
return parsed_sparql


def _check_solution_modifiers(parsed_sparql: ParsedSPARQL) -> ParsedSPARQL:
"""Check if a SPARQL query has a solution modifier.
This is meant to run as a component in check_query.
"""
logger.debug("Running solution modifier check.")

def _has_modifier():
for mod_name in ["limitoffset", "groupby", "having", "orderby"]:
if (mod := getattr(parsed_sparql.parse_object, mod_name)) is not None:
return mod
return False

if mod := _has_modifier():
logger.critical("Detected solution modifier '%s' in outer query.", mod)
raise UnsupportedQueryException(
"Solution modifiers for top-level queries are currently not supported."
)

return parsed_sparql


def check_query(query: _TQuery) -> _TQuery:
"""Check a SPARQL query by running a compose pipeline of checks."""
logger.debug("Running query check pipeline on '%s'", query)
parsed_sparql = ParsedSPARQL(query=query)

result: ParsedSPARQL = compose_left(
_check_select_query,
_check_solution_modifiers,
)(parsed_sparql)

return result.data

0 comments on commit d25b5af

Please sign in to comment.