You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
972 lines
28 KiB
972 lines
28 KiB
import re |
|
from copy import deepcopy |
|
from datetime import datetime |
|
from collections.abc import Mapping |
|
|
|
from ..errors import OperationFailure |
|
|
|
from .field_walker import FieldWalker |
|
from .weighted import ( |
|
Weighted, |
|
gravity, |
|
_cmp_decimal, |
|
) |
|
from ..types import ( |
|
bson, |
|
RE_PATTERN_TYPE, |
|
integer_types, |
|
string_types, |
|
is_duckument_type, |
|
is_integer_type, |
|
is_pattern_type, |
|
keep, |
|
compare_documents, |
|
re_str_flags_to_int, |
|
) |
|
|
|
|
|
def validate_sort_specifier(sort): |
|
if not (is_integer_type(sort) and sort in (1, -1)): |
|
raise OperationFailure("bad sort specification", code=2) |
|
|
|
|
|
def ordering(fieldwalkers, order, doc_type=None): |
|
""" """ |
|
total = len(fieldwalkers) |
|
pre_sect_stack = [] |
|
|
|
for path, revr in order.items(): |
|
validate_sort_specifier(revr) |
|
|
|
is_reverse = bool(1 - revr) |
|
value_stack = [] |
|
|
|
for indx, fieldwalker in enumerate(fieldwalkers): |
|
# get field value |
|
fieldwalker = FieldWalker(fieldwalker.doc, doc_type).go(path).get() |
|
values = list(fieldwalker.value.iter_flat()) |
|
if values: |
|
value = tuple([Weighted(val) for val in values]) |
|
|
|
if len(value): |
|
# list will firstly compare with other doc by it's smallest |
|
# or largest member |
|
value = max(value) if is_reverse else min(value) |
|
|
|
elif not fieldwalker.value.is_exists(): |
|
value = Weighted(None) |
|
|
|
else: |
|
# [] less than None |
|
value = (0, ()) |
|
|
|
# read previous section |
|
pre_sect = pre_sect_stack[indx] if pre_sect_stack else 0 |
|
# inverse if in reverse mode |
|
pre_sect = (total - pre_sect) if is_reverse else pre_sect |
|
indx = (total - indx) if is_reverse else indx |
|
|
|
value_stack.append((pre_sect, value, indx)) |
|
|
|
# sort docs |
|
value_stack.sort(reverse=is_reverse) |
|
|
|
ordereddoc = [] |
|
sect_stack = [] |
|
sect_id = -1 |
|
last_doc = None |
|
for _, value, indx in value_stack: |
|
# restore if in reverse mode |
|
indx = (total - indx) if is_reverse else indx |
|
ordereddoc.append(fieldwalkers[indx]) |
|
|
|
# define section |
|
# maintain the sorting result in next level sorting |
|
if not value == last_doc: |
|
sect_id += 1 |
|
sect_stack.append(sect_id) |
|
last_doc = value |
|
|
|
# save result for next level sorting |
|
fieldwalkers = ordereddoc |
|
pre_sect_stack = sect_stack |
|
|
|
return fieldwalkers |
|
|
|
|
|
class LogicBox(list): |
|
"""A callable operator/logic array for document filtering |
|
|
|
By defining a `theme`, the instance's bool processing logic will behave |
|
differently when being called, and return different operation result. |
|
|
|
Only operator functions or `LogicBox` type instance are valid member of |
|
the `LogicBox`. |
|
|
|
Args: |
|
theme (str): A document field path or logic name ($and/$or/$nor/$not). |
|
|
|
""" |
|
|
|
def __init__(self, theme, implicitly=False): |
|
self.theme = theme |
|
self.implicitly = implicitly |
|
self._logic = { |
|
"$and": self._call_and, |
|
"$or": self._call_or, |
|
"$nor": self._call_nor, |
|
"$not": self._call_not, |
|
"$elemMatch": self._call_elemMatch, |
|
} |
|
|
|
@property |
|
def __name__(self): |
|
return self.__repr__() |
|
|
|
def __repr__(self): |
|
"""Display `theme` and `LogicBox` or operators content within""" |
|
content = [] |
|
name = "[{}]" |
|
if not self.implicitly: |
|
content = ["LogicBox({})".format(self.theme)] |
|
name = "{}" |
|
|
|
for i in self[:]: |
|
if callable(i): |
|
if hasattr(i, "_keep"): |
|
# query ops |
|
content.append("{}({})".format(i.__name__, i._keep())) |
|
else: |
|
# LogicBox |
|
content.append(i.__name__) |
|
else: |
|
content.append(i) |
|
|
|
return name.format(content)[1:-1].replace("'", "") |
|
|
|
def __call__(self, fieldwalker): |
|
"""Recursively calling `LogicBox` or operators content within |
|
|
|
A short-circuit logic sub-structure, passing `FieldWalker` instance. |
|
|
|
Args: |
|
fieldwalker (FieldWalker): Received from `QueryFilter` instance. |
|
|
|
""" |
|
try: |
|
return self._logic[self.theme](fieldwalker) |
|
except KeyError: |
|
return self._call_field(fieldwalker) |
|
|
|
def _gen(self, fieldwalker): |
|
return (cond(fieldwalker) for cond in self[:]) |
|
|
|
def _call_field(self, fieldwalker): |
|
"""Entering document field context before process""" |
|
with fieldwalker.go(self.theme).get(): |
|
return all(self._gen(fieldwalker)) |
|
|
|
def _call_elemMatch(self, fieldwalker): |
|
""" """ |
|
with fieldwalker.value as field_value: |
|
for elem in field_value.iter_elements(): |
|
|
|
def elem_iter(): |
|
try: |
|
for e in elem: |
|
yield e |
|
except TypeError: |
|
pass |
|
finally: |
|
yield elem |
|
|
|
field_value.change_iter(elem_iter) |
|
if all(self._gen(fieldwalker)): |
|
return True |
|
|
|
def _call_and(self, fieldwalker): |
|
return all(self._gen(fieldwalker)) |
|
|
|
def _call_or(self, fieldwalker): |
|
return any(self._gen(fieldwalker)) |
|
|
|
def _call_nor(self, fieldwalker): |
|
return not any(self._gen(fieldwalker)) |
|
|
|
def _call_not(self, fieldwalker): |
|
return not all(self._gen(fieldwalker)) |
|
|
|
|
|
class QueryFilter(object): |
|
"""Document query filter |
|
|
|
Parsing MongoDB document query language, generate a callable instance for |
|
documents query filtering. |
|
|
|
Args: |
|
spec (dict): MongoDB document query language object. |
|
|
|
""" |
|
|
|
def __init__(self, spec): |
|
|
|
# Top-level operators, work on top of fields. |
|
self.pathless_ops = { |
|
# Logical |
|
"$and": self.parse_logic("$and"), |
|
"$nor": self.parse_logic("$nor"), |
|
"$or": self.parse_logic("$or"), |
|
# Evaluation |
|
"$expr": None, |
|
} |
|
|
|
# Field-level operators, need to work inside a field context. |
|
self.field_ops = { |
|
# Logical |
|
"$not": self._parse_not, |
|
# Comparison |
|
"$eq": parse_eq, |
|
"$gt": parse_gt, |
|
"$gte": parse_gte, |
|
"$in": parse_in, |
|
"$lt": parse_lt, |
|
"$lte": parse_lte, |
|
"$ne": parse_ne, |
|
"$nin": parse_nin, |
|
# Element |
|
"$exists": parse_exists, |
|
"$type": parse_type, |
|
# Array |
|
"$all": parse_all, |
|
"$elemMatch": self._parse_elemMatch, |
|
"$size": parse_size, |
|
# Evaluation |
|
"$jsonSchema": parse_jsonSchema, |
|
"$mod": parse_mod, |
|
"$regex": parse_regex, |
|
} |
|
|
|
# Start parsing query object |
|
self.conditions = self.parser(spec) |
|
self.__fieldwalker = None |
|
|
|
# ready to be called. |
|
|
|
def __repr__(self): |
|
return "QueryFilter({})".format(str(self.conditions)) |
|
|
|
def __call__(self, doc, doc_type=None): |
|
"""Recursively calling `LogicBox` or operators content within |
|
|
|
A short-circuit logic structure to determine the document can pass the |
|
filter or not. |
|
|
|
Args: |
|
doc (dict): Document received from database. |
|
|
|
""" |
|
self.__fieldwalker = FieldWalker(doc, doc_type) |
|
return all(cond(self.__fieldwalker) for cond in self.conditions) |
|
|
|
@property |
|
def fieldwalker(self): |
|
return self.__fieldwalker |
|
|
|
def parser(self, spec): |
|
"""Top-level parser""" |
|
|
|
# Implementation of implicitly $and operation, fundamental query |
|
# container. |
|
logic_box = LogicBox("$and", implicitly=True) |
|
|
|
for path, sub_spec in spec.items(): |
|
if path.startswith("$"): |
|
try: |
|
logic_box.append(self.pathless_ops[path](sub_spec)) |
|
except KeyError: |
|
raise OperationFailure( |
|
"unknown top level operator: {}".format(path) |
|
) |
|
else: |
|
logic_box.append(self.subparser(path, sub_spec)) |
|
|
|
return logic_box |
|
|
|
def subparser(self, path, sub_spec): |
|
"""Field-level parser""" |
|
|
|
# Implementation of field-level operation container. |
|
logic_box = LogicBox(path) |
|
|
|
# There are two processing direction in field-level, one is filtering |
|
# with operators, the other is implicitly value $eq operation. |
|
# The direction was first defined by the expression value type, if is |
|
# <dict>, then by *first* key is starts with "$" or not. |
|
# |
|
# Example: |
|
# |
|
# {"field.name": {"$ne": 5, "$exists": True}} -> by operators |
|
# {"field.name": {"data": 5, "id": 2}} -> value matching ($eq) |
|
# |
|
# But if something like this, mixing operator and non-operator key. |
|
# {"field.name": {"$eq": 5, "id": 2}} |
|
# |
|
# Depend on which key get iter *first*, then this query might: |
|
# 1) return no document and without any error, or |
|
# 2) raise an "OperationFailure: unknown operator" error. |
|
# |
|
if isinstance(sub_spec, bson.Regex): |
|
sub_spec = {"$regex": sub_spec} |
|
|
|
if _is_expression_obj(sub_spec): |
|
# Modify `sub_spec` for $regex and $options |
|
# before parse to `logic_box` |
|
if "$regex" in sub_spec: |
|
sub_spec = _modify_regex_optins(sub_spec) |
|
elif "$options" in sub_spec: |
|
raise OperationFailure("$options needs a $regex") |
|
|
|
for op, value in sub_spec.items(): |
|
# Regex can't do $ne directly |
|
if op == "$ne" and isinstance(value, (RE_PATTERN_TYPE, bson.Regex)): |
|
raise OperationFailure("Can't have RegEx as arg to $ne.") |
|
# is predictable ? |
|
if op in ("$gt", "$gte", "$lt", "$lte"): |
|
if isinstance(value, (RE_PATTERN_TYPE, bson.Regex)): |
|
raise OperationFailure( |
|
"Can't have RegEx as arg to predicate over " |
|
"field {!r}.".format(path) |
|
) |
|
|
|
try: |
|
logic_box.append(self.field_ops[op](value)) |
|
except KeyError: |
|
raise OperationFailure("unknown operator: {}".format(op)) |
|
else: |
|
logic_box.append(parse_eq(sub_spec)) |
|
|
|
return logic_box |
|
|
|
def parse_logic(self, theme): |
|
"""Logical operator parser (un-themed)""" |
|
|
|
def _parse_logic(sub_spec): |
|
"""Themed logical operator""" |
|
if not isinstance(sub_spec, list): |
|
raise OperationFailure("{} must be an array".format(theme)) |
|
|
|
logic_box = LogicBox(theme) |
|
|
|
for cond in sub_spec: |
|
if not is_duckument_type(cond): |
|
raise OperationFailure( |
|
"$or/$and/$nor entries need to be full objects" |
|
) |
|
|
|
logic_box.append(self.parser(cond)) |
|
return logic_box |
|
|
|
return _parse_logic |
|
|
|
def _parse_not(self, sub_spec): |
|
# $not logic only available in field-level |
|
if isinstance(sub_spec, (RE_PATTERN_TYPE, bson.Regex)): |
|
return self.subparser("$not", {"$regex": sub_spec}) |
|
|
|
elif is_duckument_type(sub_spec): |
|
for op in sub_spec: |
|
if op not in self.field_ops: |
|
raise OperationFailure("unknown operator: {}".format(op)) |
|
_not_subspec_op_check(op) |
|
|
|
return self.subparser("$not", sub_spec) |
|
|
|
else: |
|
raise OperationFailure("$not needs a regex or a document") |
|
|
|
def _parse_elemMatch(self, sub_spec): |
|
# $elemMatch only available in field-level |
|
if not is_duckument_type(sub_spec): |
|
raise OperationFailure("$elemMatch needs an Object") |
|
|
|
for op in sub_spec: |
|
if op in self.field_ops: |
|
return self.subparser("$elemMatch", sub_spec) |
|
elif not op.startswith("$") or op in self.pathless_ops: |
|
return parse_elemMatch(sub_spec) |
|
else: |
|
raise OperationFailure("unknown operator: {}".format(op)) |
|
|
|
|
|
def _is_expression_obj(sub_spec): |
|
return is_duckument_type(sub_spec) and next(iter(sub_spec)).startswith("$") |
|
|
|
|
|
def _not_validate_subspec_op_(op): |
|
if op == "$regex": |
|
raise OperationFailure("$not cannot have a regex") |
|
|
|
|
|
def _not_validate_subspec_op_v4(sub_spec): |
|
pass |
|
|
|
|
|
_not_subspec_op_check = _not_validate_subspec_op_ |
|
|
|
|
|
# Only for preserving `int` type flags to bypass |
|
# internal "flags must be string" type check |
|
class _FALG(object): |
|
def __init__(self, int_flags): |
|
self.retrieve = int_flags |
|
|
|
__slots__ = ("retrieve",) |
|
|
|
|
|
def _regex_options_(regex_flag, opt_flag): |
|
pass |
|
|
|
|
|
def _regex_options_v42(regex_flag, opt_flag): |
|
if regex_flag and opt_flag: |
|
raise OperationFailure("options set in both $regex and $options") |
|
|
|
|
|
_regex_options_check = _regex_options_v42 |
|
|
|
|
|
def _modify_regex_optins(sub_spec): |
|
"""Merging $regex and $options values in query document |
|
|
|
Besides string type value, field $regex accept `bson.Regex` and |
|
`re._pattern_type` in pymongo. For re.flags and $options, based |
|
on the key order of dict, seconded will override the first, if |
|
they both exists in the query document. |
|
""" |
|
new_sub_spec = None |
|
_re = None |
|
regex_flags = "" |
|
opt_flags = "" |
|
flags = "" |
|
|
|
for key, val in sub_spec.items(): |
|
if key == "$options": |
|
opt_flags = val |
|
flags = opt_flags |
|
if key == "$regex" and isinstance(val, (RE_PATTERN_TYPE, bson.Regex)): |
|
regex_flags = _FALG(val.flags) |
|
flags = regex_flags |
|
# We will deepcopy `sub_spec` later for merging "$regex" and |
|
# "$options" to query parser, but we can't deepcopy regex |
|
# object, so move it to somewhere else and retrieve it later. |
|
_re = sub_spec["$regex"] |
|
sub_spec["$regex"] = None |
|
|
|
_regex_options_check(regex_flags, opt_flags) |
|
|
|
new_sub_spec = deepcopy(sub_spec) |
|
new_sub_spec["$regex"] = { |
|
"pattern": _re.pattern if _re else sub_spec["$regex"], |
|
"flags": flags, |
|
} |
|
|
|
# (monument): This is edge case, and only MongoDB 4.0 don't fail the |
|
# operation. |
|
# |
|
# if (MONTY_MONGO_COMPAT_36 |
|
# and "#" in new_sub_spec["$regex"]["pattern"].rsplit("\n")[-1]): |
|
# # (NOTE) davidlatwe: |
|
# # if pound(#) char exists in $regex string value and not ends with |
|
# # newline(\n), Mongo raise error. (but the message seems incomplete) |
|
# raise OperationFailure("Regular expression is invalid: missing )") |
|
|
|
if _re: |
|
# Put `re._pattern_type` or `Regex` object back. |
|
sub_spec["$regex"] = _re |
|
|
|
if "$options" in new_sub_spec: |
|
# Remove $options, Monty can't digest it |
|
del new_sub_spec["$options"] |
|
|
|
return new_sub_spec |
|
|
|
|
|
""" |
|
Field-level Query Operators |
|
- Comparison |
|
""" |
|
|
|
|
|
def _is_comparable_ver4(val, qry): |
|
return gravity(val, weight_only=True) == gravity( |
|
qry, weight_only=True |
|
) or isinstance(qry, (bson.MinKey, bson.MaxKey)) |
|
|
|
|
|
def _is_comparable_ver3(val, qry): |
|
return gravity(val, weight_only=True) == gravity(qry, weight_only=True) |
|
|
|
|
|
_is_comparable = _is_comparable_ver4 |
|
|
|
|
|
def _eq_match(fieldwalker, query): |
|
""" """ |
|
if is_duckument_type(query): |
|
for val in fieldwalker.value: |
|
if is_duckument_type(val): |
|
if compare_documents(query, val): |
|
return True |
|
|
|
else: |
|
if query is None: |
|
return fieldwalker.value.null_or_missing() |
|
|
|
if isinstance(query, bson.Decimal128): |
|
query = _cmp_decimal(query) |
|
|
|
for val in fieldwalker.value: |
|
if isinstance(val, bson.Decimal128): |
|
val = _cmp_decimal(val) |
|
|
|
if val == query and _is_comparable(val, query): |
|
return True |
|
|
|
|
|
def parse_eq(query): |
|
@keep(query) |
|
def _eq(fieldwalker): |
|
return _eq_match(fieldwalker, query) |
|
|
|
return _eq |
|
|
|
|
|
def parse_ne(query): |
|
@keep(query) |
|
def _ne(fieldwalker): |
|
return not _eq_match(fieldwalker, query) |
|
|
|
return _ne |
|
|
|
|
|
def parse_gt(query): |
|
@keep(query) |
|
def _gt(fieldwalker): |
|
for value in fieldwalker.value: |
|
if _is_comparable(value, query): |
|
if query in bson.decimal128_NaN_ls: |
|
return False |
|
if Weighted(value) > Weighted(query): |
|
return True |
|
elif isinstance(query, (bson.MinKey, bson.MaxKey)): |
|
return True |
|
|
|
return _gt |
|
|
|
|
|
def parse_gte(query): |
|
@keep(query) |
|
def _gte(fieldwalker): |
|
for value in fieldwalker.value: |
|
if _is_comparable(value, query): |
|
if query in bson.decimal128_NaN_ls: |
|
return True if value in bson.decimal128_NaN_ls else False |
|
if query == bson.decimal128_INF and not value == bson.decimal128_INF: |
|
return False |
|
if Weighted(value) >= Weighted(query): |
|
return True |
|
elif isinstance(query, (bson.MinKey, bson.MaxKey)): |
|
return True |
|
|
|
return _gte |
|
|
|
|
|
def parse_lt(query): |
|
@keep(query) |
|
def _lt(fieldwalker): |
|
for value in fieldwalker.value: |
|
if _is_comparable(value, query): |
|
if value in bson.decimal128_NaN_ls: |
|
return False |
|
if Weighted(value) < Weighted(query): |
|
return True |
|
elif isinstance(query, (bson.MinKey, bson.MaxKey)): |
|
return True |
|
|
|
return _lt |
|
|
|
|
|
def parse_lte(query): |
|
_dec_NaN_INF_ls = list(bson.decimal128_NaN_ls) + [bson.decimal128_INF] |
|
|
|
@keep(query) |
|
def _lte(fieldwalker): |
|
for value in fieldwalker.value: |
|
if _is_comparable(value, query): |
|
if query in bson.decimal128_NaN_ls: |
|
return True if value in bson.decimal128_NaN_ls else False |
|
if query == bson.decimal128_INF and value in bson.decimal128_NaN_ls: |
|
return False |
|
if query not in _dec_NaN_INF_ls and value in _dec_NaN_INF_ls: |
|
return False |
|
if Weighted(value) <= Weighted(query): |
|
return True |
|
elif isinstance(query, (bson.MinKey, bson.MaxKey)): |
|
return True |
|
|
|
return _lte |
|
|
|
|
|
def _in_match(fieldwalker, query): |
|
"""Helper function for $in and $nin""" |
|
q_regex = [] |
|
q_value = [] |
|
for q in query: |
|
if is_pattern_type(q): |
|
q_regex.append(q) |
|
elif isinstance(q, bson.Regex): |
|
try: |
|
q_regex.append(q.try_compile()) |
|
except re.error as e: |
|
raise OperationFailure("Regular expression is invalid: {}".format(e)) |
|
else: |
|
q_value.append(q) |
|
|
|
for q in q_value: |
|
if _eq_match(fieldwalker, q): |
|
return True |
|
|
|
for q in q_regex: |
|
for value in fieldwalker.value: |
|
if isinstance(value, string_types) and q.search(value): |
|
return True |
|
|
|
|
|
def parse_in(query): |
|
if not isinstance(query, list): |
|
raise OperationFailure("$in needs an array") |
|
|
|
if any(_is_expression_obj(q) for q in query): |
|
raise OperationFailure("cannot nest $ under $in") |
|
|
|
@keep(query) |
|
def _in(fieldwalker): |
|
return _in_match(fieldwalker, query) |
|
|
|
return _in |
|
|
|
|
|
def parse_nin(query): |
|
if not isinstance(query, list): |
|
raise OperationFailure("$nin needs an array") |
|
|
|
if any(_is_expression_obj(q) for q in query): |
|
raise OperationFailure("cannot nest $ under $nin") |
|
|
|
@keep(query) |
|
def _nin(fieldwalker): |
|
return not _in_match(fieldwalker, query) |
|
|
|
return _nin |
|
|
|
|
|
""" |
|
Field-level Query Operators |
|
- Array |
|
""" |
|
|
|
|
|
def parse_all(query): |
|
|
|
field_op_ls = set(QueryFilter({}).field_ops.keys()) |
|
field_op_ls.remove("$eq") |
|
field_op_ls.remove("$not") |
|
|
|
if not isinstance(query, list): |
|
raise OperationFailure("$all needs an array") |
|
|
|
if is_duckument_type(query[0]) and next(iter(query[0])) == "$elemMatch": |
|
go_match = True |
|
for q in query: |
|
if not (is_duckument_type(q) and next(iter(q)) == "$elemMatch"): |
|
raise OperationFailure("$all/$elemMatch has to be consistent") |
|
else: |
|
go_match = False |
|
for q in query: |
|
if is_duckument_type(q) and next(iter(q)) in field_op_ls: |
|
raise OperationFailure("no $ expressions in $all") |
|
|
|
@keep(query) |
|
def _all(fieldwalker): |
|
if go_match: |
|
for q in query: |
|
queryfilter = QueryFilter(q["$elemMatch"]) |
|
doc_type = fieldwalker.doc_type |
|
for value in fieldwalker.value.iter_arrays(): |
|
if not any(queryfilter(v, doc_type) for v in value): |
|
return False |
|
return True |
|
else: |
|
for q in query: |
|
if q not in fieldwalker.value: |
|
return False |
|
return True |
|
|
|
return _all |
|
|
|
|
|
def parse_elemMatch(query): |
|
# (NOTE) $elemMatch in MontyDB may require document input to proceed |
|
# further filter error.OperationFailure check, here we put one |
|
# fake doc {} |
|
QueryFilter(query)({}) |
|
|
|
@keep(query) |
|
def _elemMatch(fieldwalker): |
|
queryfilter = QueryFilter(query) |
|
doc_type = fieldwalker.doc_type |
|
for elem in fieldwalker.value.iter_elements(): |
|
if queryfilter(elem, doc_type): |
|
return True |
|
|
|
return _elemMatch |
|
|
|
|
|
def parse_size(query): |
|
if isinstance(query, float): |
|
raise OperationFailure("$size must be a whole number") |
|
if not isinstance(query, int): |
|
raise OperationFailure("$size needs a number") |
|
|
|
@keep(query) |
|
def _size(fieldwalker): |
|
for value in fieldwalker.value.iter_arrays(): |
|
if len(value) == query: |
|
return True |
|
|
|
return _size |
|
|
|
|
|
""" |
|
Field-level Query Operators |
|
- Element |
|
""" |
|
|
|
|
|
def parse_exists(query): |
|
@keep(query) |
|
def _exists(fieldwalker): |
|
return fieldwalker.value.is_exists() == bool(query) |
|
|
|
return _exists |
|
|
|
|
|
BSON_TYPE_ALIAS_ID = { |
|
"double": 1, |
|
"string": 2, |
|
"object": 3, |
|
"array": 4, |
|
"binData": 5, |
|
# undefined (Deprecated) |
|
"objectId": 7, |
|
"bool": 8, |
|
"date": 9, |
|
"null": 10, |
|
"regex": 11, |
|
# dbPointer (Deprecated) |
|
"javascript": 13, |
|
# symbol (Deprecated) |
|
"javascriptWithScope": 15, |
|
"int": 16, |
|
"timestamp": 17, |
|
"long": 18, |
|
"decimal": 19, |
|
"minKey": -1, |
|
"maxKey": 127, |
|
} |
|
|
|
|
|
_BSON_TYPE_ID = tuple(BSON_TYPE_ALIAS_ID.values()) |
|
|
|
|
|
def obj_to_bson_type_id(obj): |
|
|
|
BSON_TYPE_ID = { |
|
float: 1, |
|
# string: 2, |
|
bson.SON: 3, |
|
dict: 3, |
|
list: 4, |
|
tuple: 4, |
|
bson.Binary: 5, |
|
# bytes: 5, |
|
# undefined (Deprecated) |
|
bson.ObjectId: 7, |
|
bool: 8, |
|
datetime: 9, |
|
type(None): 10, |
|
bson.Regex: 11, |
|
RE_PATTERN_TYPE: 11, |
|
# dbPointer (Deprecated) |
|
# javascript: 13, |
|
# symbol (Deprecated) |
|
# javascriptWithScope: 15, |
|
int: 16, |
|
bson.Timestamp: 17, |
|
bson.Int64: 18, |
|
bson.Decimal128: 19, |
|
bson.MinKey: -1, |
|
bson.MaxKey: 127, |
|
} |
|
|
|
try: |
|
type_id = BSON_TYPE_ID[type(obj)] |
|
except KeyError: |
|
if isinstance(obj, bson.Code): # also an instance of string_types |
|
type_id = 13 if obj.scope is None else 15 |
|
elif isinstance(obj, string_types): |
|
type_id = 2 |
|
elif isinstance(obj, bytes): |
|
type_id = 5 |
|
elif isinstance(obj, Mapping): |
|
type_id = 3 |
|
else: |
|
type_id = None |
|
finally: |
|
if type_id is None: |
|
raise TypeError("Unknown data type: {!r}".format(type(obj))) |
|
|
|
return type_id |
|
|
|
|
|
def parse_type(query): |
|
def get_bson_type_id_set(values): |
|
return set([obj_to_bson_type_id(v) for v in values]) |
|
|
|
def str_type_to_int(query): |
|
if len(query) == 0: |
|
raise OperationFailure("$type must match at least one type") |
|
|
|
int_types = [] |
|
for q in query: |
|
if isinstance(q, string_types): |
|
try: |
|
int_types.append(BSON_TYPE_ALIAS_ID[q]) |
|
except KeyError: |
|
raise OperationFailure("Unknown type name alias: {}".format(q)) |
|
elif isinstance(q, int): |
|
if q not in _BSON_TYPE_ID: |
|
raise OperationFailure("Invalid numerical type code: {}".format(q)) |
|
int_types.append(q) |
|
else: |
|
raise OperationFailure( |
|
"type must be represented as a number or a string" |
|
) |
|
return int_types |
|
|
|
if not isinstance(query, list): |
|
query = set(str_type_to_int([query])) |
|
query = set(str_type_to_int(query)) |
|
|
|
@keep(query) |
|
def _type(fieldwalker): |
|
if fieldwalker.value.is_exists(): |
|
bids = get_bson_type_id_set(fieldwalker.value) |
|
return bids.intersection(query) |
|
|
|
return _type |
|
|
|
|
|
""" |
|
Field-level Query Operators |
|
- Evaluation |
|
""" |
|
|
|
|
|
def parse_regex(query): |
|
if isinstance(query, bson.Regex): |
|
q = query.try_compile() |
|
else: |
|
if not isinstance(query["pattern"], string_types): |
|
raise OperationFailure("$regex has to be a string") |
|
if not isinstance(query["flags"], (string_types, _FALG)): |
|
raise OperationFailure("$options has to be a string") |
|
|
|
if isinstance(query["flags"], _FALG): |
|
flags = query["flags"].retrieve |
|
else: |
|
flags = re_str_flags_to_int(query["flags"]) |
|
|
|
q = re.compile(query["pattern"], flags) |
|
|
|
@keep(query) |
|
def _regex(fieldwalker): |
|
for value in fieldwalker.value: |
|
if isinstance(value, (string_types, bytes)) and q.search(value): |
|
return True |
|
|
|
return _regex |
|
|
|
|
|
def _mod_remainder_not_num_(): |
|
pass |
|
|
|
|
|
def _mod_remainder_not_num_v42(): |
|
# mongo-4.2.19+ |
|
# https://jira.mongodb.org/browse/SERVER-23664 |
|
raise OperationFailure("malformed mod, remainder not a number") |
|
|
|
|
|
_mod_remainder_not_num = _mod_remainder_not_num_v42 |
|
|
|
|
|
def parse_mod(query): |
|
if not isinstance(query, list): |
|
raise OperationFailure("malformed mod, needs to be an array") |
|
if len(query) < 2: |
|
raise OperationFailure("malformed mod, not enough elements") |
|
if len(query) > 2: |
|
raise OperationFailure("malformed mod, too many elements") |
|
|
|
divisor = query[0] |
|
remainder = query[1] |
|
|
|
num_types = (integer_types, float, bson.Decimal128) |
|
|
|
if not isinstance(divisor, num_types): |
|
raise OperationFailure("malformed mod, divisor not a number") |
|
if not isinstance(remainder, num_types): |
|
_mod_remainder_not_num() |
|
remainder = 0 |
|
|
|
if isinstance(divisor, bson.Decimal128): |
|
divisor = divisor.to_decimal() |
|
if isinstance(remainder, bson.Decimal128): |
|
remainder = remainder.to_decimal() |
|
|
|
def mod_scan(field_value, query): |
|
for value in field_value: |
|
if isinstance(value, bool) or not isinstance(value, num_types): |
|
continue |
|
if isinstance(value, bson.Decimal128): |
|
value = value.to_decimal() |
|
if int(value % divisor) == int(remainder): |
|
return True |
|
return False |
|
|
|
@keep(query) |
|
def _mod(fieldwalker): |
|
field_value = fieldwalker.value |
|
if mod_scan(field_value, query): |
|
return True |
|
|
|
return _mod |
|
|
|
|
|
def parse_jsonSchema(query): |
|
@keep(query) |
|
def _jsonSchema(fieldwalker): |
|
pass |
|
|
|
return _jsonSchema
|
|
|