You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
526 lines
16 KiB
526 lines
16 KiB
""" |
|
Contains the querying interface. |
|
|
|
Starting with :class:`~tinydb.queries.Query` you can construct complex |
|
queries: |
|
|
|
>>> ((where('f1') == 5) & (where('f2') != 2)) | where('s').matches(r'^\\w+$') |
|
(('f1' == 5) and ('f2' != 2)) or ('s' ~= ^\\w+$ ) |
|
|
|
Queries are executed by using the ``__call__``: |
|
|
|
>>> q = where('val') == 5 |
|
>>> q({'val': 5}) |
|
True |
|
>>> q({'val': 1}) |
|
False |
|
""" |
|
|
|
import re |
|
import sys |
|
from typing import Mapping, Tuple, Callable, Any, Union, List, Optional |
|
|
|
from .utils import freeze |
|
|
|
if sys.version_info >= (3, 8): |
|
from typing import Protocol |
|
else: |
|
from typing_extensions import Protocol |
|
|
|
__all__ = ('Query', 'QueryLike', 'where') |
|
|
|
|
|
def is_sequence(obj): |
|
return hasattr(obj, '__iter__') |
|
|
|
|
|
class QueryLike(Protocol): |
|
""" |
|
A typing protocol that acts like a query. |
|
|
|
Something that we use as a query must have two properties: |
|
|
|
1. It must be callable, accepting a `Mapping` object and returning a |
|
boolean that indicates whether the value matches the query, and |
|
2. it must have a stable hash that will be used for query caching. |
|
|
|
In addition, to mark a query as non-cacheable (e.g. if it involves |
|
some remote lookup) it needs to have a method called ``is_cacheable`` |
|
that returns ``False``. |
|
|
|
This query protocol is used to make MyPy correctly support the query |
|
pattern that TinyDB uses. |
|
|
|
See also https://mypy.readthedocs.io/en/stable/protocols.html#simple-user-defined-protocols |
|
""" |
|
def __call__(self, value: Mapping) -> bool: ... |
|
|
|
def __hash__(self) -> int: ... |
|
|
|
|
|
class QueryInstance: |
|
""" |
|
A query instance. |
|
|
|
This is the object on which the actual query operations are performed. The |
|
:class:`~tinydb.queries.Query` class acts like a query builder and |
|
generates :class:`~tinydb.queries.QueryInstance` objects which will |
|
evaluate their query against a given document when called. |
|
|
|
Query instances can be combined using logical OR and AND and inverted using |
|
logical NOT. |
|
|
|
In order to be usable in a query cache, a query needs to have a stable hash |
|
value with the same query always returning the same hash. That way a query |
|
instance can be used as a key in a dictionary. |
|
""" |
|
|
|
def __init__(self, test: Callable[[Mapping], bool], hashval: Optional[Tuple]): |
|
self._test = test |
|
self._hash = hashval |
|
|
|
def is_cacheable(self) -> bool: |
|
return self._hash is not None |
|
|
|
def __call__(self, value: Mapping) -> bool: |
|
""" |
|
Evaluate the query to check if it matches a specified value. |
|
|
|
:param value: The value to check. |
|
:return: Whether the value matches this query. |
|
""" |
|
return self._test(value) |
|
|
|
def __hash__(self) -> int: |
|
# We calculate the query hash by using the ``hashval`` object which |
|
# describes this query uniquely, so we can calculate a stable hash |
|
# value by simply hashing it |
|
return hash(self._hash) |
|
|
|
def __repr__(self): |
|
return 'QueryImpl{}'.format(self._hash) |
|
|
|
def __eq__(self, other: object): |
|
if isinstance(other, QueryInstance): |
|
return self._hash == other._hash |
|
|
|
return False |
|
|
|
# --- Query modifiers ----------------------------------------------------- |
|
|
|
def __and__(self, other: 'QueryInstance') -> 'QueryInstance': |
|
# We use a frozenset for the hash as the AND operation is commutative |
|
# (a & b == b & a) and the frozenset does not consider the order of |
|
# elements |
|
if self.is_cacheable() and other.is_cacheable(): |
|
hashval = ('and', frozenset([self._hash, other._hash])) |
|
else: |
|
hashval = None |
|
return QueryInstance(lambda value: self(value) and other(value), hashval) |
|
|
|
def __or__(self, other: 'QueryInstance') -> 'QueryInstance': |
|
# We use a frozenset for the hash as the OR operation is commutative |
|
# (a | b == b | a) and the frozenset does not consider the order of |
|
# elements |
|
if self.is_cacheable() and other.is_cacheable(): |
|
hashval = ('or', frozenset([self._hash, other._hash])) |
|
else: |
|
hashval = None |
|
return QueryInstance(lambda value: self(value) or other(value), hashval) |
|
|
|
def __invert__(self) -> 'QueryInstance': |
|
hashval = ('not', self._hash) if self.is_cacheable() else None |
|
return QueryInstance(lambda value: not self(value), hashval) |
|
|
|
|
|
class Query(QueryInstance): |
|
""" |
|
TinyDB Queries. |
|
|
|
Allows building queries for TinyDB databases. There are two main ways of |
|
using queries: |
|
|
|
1) ORM-like usage: |
|
|
|
>>> User = Query() |
|
>>> db.search(User.name == 'John Doe') |
|
>>> db.search(User['logged-in'] == True) |
|
|
|
2) Classical usage: |
|
|
|
>>> db.search(where('value') == True) |
|
|
|
Note that ``where(...)`` is a shorthand for ``Query(...)`` allowing for |
|
a more fluent syntax. |
|
|
|
Besides the methods documented here you can combine queries using the |
|
binary AND and OR operators: |
|
|
|
>>> # Binary AND: |
|
>>> db.search((where('field1').exists()) & (where('field2') == 5)) |
|
>>> # Binary OR: |
|
>>> db.search((where('field1').exists()) | (where('field2') == 5)) |
|
|
|
Queries are executed by calling the resulting object. They expect to get |
|
the document to test as the first argument and return ``True`` or |
|
``False`` depending on whether the documents match the query or not. |
|
""" |
|
|
|
def __init__(self) -> None: |
|
# The current path of fields to access when evaluating the object |
|
self._path: Tuple[Union[str, Callable], ...] = () |
|
|
|
# Prevent empty queries to be evaluated |
|
def notest(_): |
|
raise RuntimeError('Empty query was evaluated') |
|
|
|
super().__init__( |
|
test=notest, |
|
hashval=(None,) |
|
) |
|
|
|
def __repr__(self): |
|
return '{}()'.format(type(self).__name__) |
|
|
|
def __hash__(self): |
|
return super().__hash__() |
|
|
|
def __getattr__(self, item: str): |
|
# Generate a new query object with the new query path |
|
# We use type(self) to get the class of the current query in case |
|
# someone uses a subclass of ``Query`` |
|
query = type(self)() |
|
|
|
# Now we add the accessed item to the query path ... |
|
query._path = self._path + (item,) |
|
|
|
# ... and update the query hash |
|
query._hash = ('path', query._path) if self.is_cacheable() else None |
|
|
|
return query |
|
|
|
def __getitem__(self, item: str): |
|
# A different syntax for ``__getattr__`` |
|
|
|
# We cannot call ``getattr(item)`` here as it would try to resolve |
|
# the name as a method name first, only then call our ``__getattr__`` |
|
# method. By calling ``__getattr__`` directly, we make sure that |
|
# calling e.g. ``Query()['test']`` will always generate a query for a |
|
# document's ``test`` field instead of returning a reference to the |
|
# ``Query.test`` method |
|
return self.__getattr__(item) |
|
|
|
def _generate_test( |
|
self, |
|
test: Callable[[Any], bool], |
|
hashval: Tuple, |
|
allow_empty_path: bool = False |
|
) -> QueryInstance: |
|
""" |
|
Generate a query based on a test function that first resolves the query |
|
path. |
|
|
|
:param test: The test the query executes. |
|
:param hashval: The hash of the query. |
|
:return: A :class:`~tinydb.queries.QueryInstance` object |
|
""" |
|
if not self._path and not allow_empty_path: |
|
raise ValueError('Query has no path') |
|
|
|
def runner(value): |
|
try: |
|
# Resolve the path |
|
for part in self._path: |
|
if isinstance(part, str): |
|
value = value[part] |
|
else: |
|
value = part(value) |
|
except (KeyError, TypeError): |
|
return False |
|
else: |
|
# Perform the specified test |
|
return test(value) |
|
|
|
return QueryInstance( |
|
lambda value: runner(value), |
|
(hashval if self.is_cacheable() else None) |
|
) |
|
|
|
def __eq__(self, rhs: Any): |
|
""" |
|
Test a dict value for equality. |
|
|
|
>>> Query().f1 == 42 |
|
|
|
:param rhs: The value to compare against |
|
""" |
|
return self._generate_test( |
|
lambda value: value == rhs, |
|
('==', self._path, freeze(rhs)) |
|
) |
|
|
|
def __ne__(self, rhs: Any): |
|
""" |
|
Test a dict value for inequality. |
|
|
|
>>> Query().f1 != 42 |
|
|
|
:param rhs: The value to compare against |
|
""" |
|
return self._generate_test( |
|
lambda value: value != rhs, |
|
('!=', self._path, freeze(rhs)) |
|
) |
|
|
|
def __lt__(self, rhs: Any) -> QueryInstance: |
|
""" |
|
Test a dict value for being lower than another value. |
|
|
|
>>> Query().f1 < 42 |
|
|
|
:param rhs: The value to compare against |
|
""" |
|
return self._generate_test( |
|
lambda value: value < rhs, |
|
('<', self._path, rhs) |
|
) |
|
|
|
def __le__(self, rhs: Any) -> QueryInstance: |
|
""" |
|
Test a dict value for being lower than or equal to another value. |
|
|
|
>>> where('f1') <= 42 |
|
|
|
:param rhs: The value to compare against |
|
""" |
|
return self._generate_test( |
|
lambda value: value <= rhs, |
|
('<=', self._path, rhs) |
|
) |
|
|
|
def __gt__(self, rhs: Any) -> QueryInstance: |
|
""" |
|
Test a dict value for being greater than another value. |
|
|
|
>>> Query().f1 > 42 |
|
|
|
:param rhs: The value to compare against |
|
""" |
|
return self._generate_test( |
|
lambda value: value > rhs, |
|
('>', self._path, rhs) |
|
) |
|
|
|
def __ge__(self, rhs: Any) -> QueryInstance: |
|
""" |
|
Test a dict value for being greater than or equal to another value. |
|
|
|
>>> Query().f1 >= 42 |
|
|
|
:param rhs: The value to compare against |
|
""" |
|
return self._generate_test( |
|
lambda value: value >= rhs, |
|
('>=', self._path, rhs) |
|
) |
|
|
|
def exists(self) -> QueryInstance: |
|
""" |
|
Test for a dict where a provided key exists. |
|
|
|
>>> Query().f1.exists() |
|
""" |
|
return self._generate_test( |
|
lambda _: True, |
|
('exists', self._path) |
|
) |
|
|
|
def matches(self, regex: str, flags: int = 0) -> QueryInstance: |
|
""" |
|
Run a regex test against a dict value (whole string has to match). |
|
|
|
>>> Query().f1.matches(r'^\\w+$') |
|
|
|
:param regex: The regular expression to use for matching |
|
:param flags: regex flags to pass to ``re.match`` |
|
""" |
|
def test(value): |
|
if not isinstance(value, str): |
|
return False |
|
|
|
return re.match(regex, value, flags) is not None |
|
|
|
return self._generate_test(test, ('matches', self._path, regex)) |
|
|
|
def search(self, regex: str, flags: int = 0) -> QueryInstance: |
|
""" |
|
Run a regex test against a dict value (only substring string has to |
|
match). |
|
|
|
>>> Query().f1.search(r'^\\w+$') |
|
|
|
:param regex: The regular expression to use for matching |
|
:param flags: regex flags to pass to ``re.match`` |
|
""" |
|
|
|
def test(value): |
|
if not isinstance(value, str): |
|
return False |
|
|
|
return re.search(regex, value, flags) is not None |
|
|
|
return self._generate_test(test, ('search', self._path, regex)) |
|
|
|
def test(self, func: Callable[[Mapping], bool], *args) -> QueryInstance: |
|
""" |
|
Run a user-defined test function against a dict value. |
|
|
|
>>> def test_func(val): |
|
... return val == 42 |
|
... |
|
>>> Query().f1.test(test_func) |
|
|
|
.. warning:: |
|
|
|
The test function provided needs to be deterministic (returning the |
|
same value when provided with the same arguments), otherwise this |
|
may mess up the query cache that :class:`~tinydb.table.Table` |
|
implements. |
|
|
|
:param func: The function to call, passing the dict as the first |
|
argument |
|
:param args: Additional arguments to pass to the test function |
|
""" |
|
return self._generate_test( |
|
lambda value: func(value, *args), |
|
('test', self._path, func, args) |
|
) |
|
|
|
def any(self, cond: Union[QueryInstance, List[Any]]) -> QueryInstance: |
|
""" |
|
Check if a condition is met by any document in a list, |
|
where a condition can also be a sequence (e.g. list). |
|
|
|
>>> Query().f1.any(Query().f2 == 1) |
|
|
|
Matches:: |
|
|
|
{'f1': [{'f2': 1}, {'f2': 0}]} |
|
|
|
>>> Query().f1.any([1, 2, 3]) |
|
|
|
Matches:: |
|
|
|
{'f1': [1, 2]} |
|
{'f1': [3, 4, 5]} |
|
|
|
:param cond: Either a query that at least one document has to match or |
|
a list of which at least one document has to be contained |
|
in the tested document. |
|
""" |
|
if callable(cond): |
|
def test(value): |
|
return is_sequence(value) and any(cond(e) for e in value) |
|
|
|
else: |
|
def test(value): |
|
return is_sequence(value) and any(e in cond for e in value) |
|
|
|
return self._generate_test( |
|
lambda value: test(value), |
|
('any', self._path, freeze(cond)) |
|
) |
|
|
|
def all(self, cond: Union['QueryInstance', List[Any]]) -> QueryInstance: |
|
""" |
|
Check if a condition is met by all documents in a list, |
|
where a condition can also be a sequence (e.g. list). |
|
|
|
>>> Query().f1.all(Query().f2 == 1) |
|
|
|
Matches:: |
|
|
|
{'f1': [{'f2': 1}, {'f2': 1}]} |
|
|
|
>>> Query().f1.all([1, 2, 3]) |
|
|
|
Matches:: |
|
|
|
{'f1': [1, 2, 3, 4, 5]} |
|
|
|
:param cond: Either a query that all documents have to match or a list |
|
which has to be contained in the tested document. |
|
""" |
|
if callable(cond): |
|
def test(value): |
|
return is_sequence(value) and all(cond(e) for e in value) |
|
|
|
else: |
|
def test(value): |
|
return is_sequence(value) and all(e in value for e in cond) |
|
|
|
return self._generate_test( |
|
lambda value: test(value), |
|
('all', self._path, freeze(cond)) |
|
) |
|
|
|
def one_of(self, items: List[Any]) -> QueryInstance: |
|
""" |
|
Check if the value is contained in a list or generator. |
|
|
|
>>> Query().f1.one_of(['value 1', 'value 2']) |
|
|
|
:param items: The list of items to check with |
|
""" |
|
return self._generate_test( |
|
lambda value: value in items, |
|
('one_of', self._path, freeze(items)) |
|
) |
|
|
|
def fragment(self, document: Mapping) -> QueryInstance: |
|
def test(value): |
|
for key in document: |
|
if key not in value or value[key] != document[key]: |
|
return False |
|
|
|
return True |
|
|
|
return self._generate_test( |
|
lambda value: test(value), |
|
('fragment', freeze(document)), |
|
allow_empty_path=True |
|
) |
|
|
|
def noop(self) -> QueryInstance: |
|
""" |
|
Always evaluate to ``True``. |
|
|
|
Useful for having a base value when composing queries dynamically. |
|
""" |
|
|
|
return QueryInstance( |
|
lambda value: True, |
|
() |
|
) |
|
|
|
def map(self, fn: Callable[[Any], Any]) -> 'Query': |
|
""" |
|
Add a function to the query path. Similar to __getattr__ but for |
|
arbitrary functions. |
|
""" |
|
query = type(self)() |
|
|
|
# Now we add the callable to the query path ... |
|
query._path = self._path + (fn,) |
|
|
|
# ... and kill the hash - callable objects can be mutable, so it's |
|
# harmful to cache their results. |
|
query._hash = None |
|
|
|
return query |
|
|
|
def where(key: str) -> Query: |
|
""" |
|
A shorthand for ``Query()[key]`` |
|
""" |
|
return Query()[key]
|
|
|