You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
918 lines
36 KiB
918 lines
36 KiB
# Copyright 2009-present MongoDB, Inc. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
|
|
"""Tools for using Python's :mod:`json` module with BSON documents. |
|
|
|
This module provides two helper methods `dumps` and `loads` that wrap the |
|
native :mod:`json` methods and provide explicit BSON conversion to and from |
|
JSON. :class:`~bson.json_util.JSONOptions` provides a way to control how JSON |
|
is emitted and parsed, with the default being the Relaxed Extended JSON format. |
|
:mod:`~bson.json_util` can also generate Canonical or legacy `Extended JSON`_ |
|
when :const:`CANONICAL_JSON_OPTIONS` or :const:`LEGACY_JSON_OPTIONS` is |
|
provided, respectively. |
|
|
|
.. _Extended JSON: https://github.com/mongodb/specifications/blob/master/source/extended-json.rst |
|
|
|
Example usage (deserialization): |
|
|
|
.. doctest:: |
|
|
|
>>> from bson.json_util import loads |
|
>>> loads( |
|
... '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "80", "$binary": "AQIDBA=="}}]' |
|
... ) |
|
[{'foo': [1, 2]}, {'bar': {'hello': 'world'}}, {'code': Code('function x() { return 1; }', {})}, {'bin': Binary(b'...', 128)}] |
|
|
|
Example usage with :const:`RELAXED_JSON_OPTIONS` (the default): |
|
|
|
.. doctest:: |
|
|
|
>>> from bson import Binary, Code |
|
>>> from bson.json_util import dumps |
|
>>> dumps( |
|
... [ |
|
... {"foo": [1, 2]}, |
|
... {"bar": {"hello": "world"}}, |
|
... {"code": Code("function x() { return 1; }")}, |
|
... {"bin": Binary(b"\x01\x02\x03\x04")}, |
|
... ] |
|
... ) |
|
'[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]' |
|
|
|
Example usage (with :const:`CANONICAL_JSON_OPTIONS`): |
|
|
|
.. doctest:: |
|
|
|
>>> from bson import Binary, Code |
|
>>> from bson.json_util import dumps, CANONICAL_JSON_OPTIONS |
|
>>> dumps( |
|
... [ |
|
... {"foo": [1, 2]}, |
|
... {"bar": {"hello": "world"}}, |
|
... {"code": Code("function x() { return 1; }")}, |
|
... {"bin": Binary(b"\x01\x02\x03\x04")}, |
|
... ], |
|
... json_options=CANONICAL_JSON_OPTIONS, |
|
... ) |
|
'[{"foo": [{"$numberInt": "1"}, {"$numberInt": "2"}]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]' |
|
|
|
Example usage (with :const:`LEGACY_JSON_OPTIONS`): |
|
|
|
.. doctest:: |
|
|
|
>>> from bson import Binary, Code |
|
>>> from bson.json_util import dumps, LEGACY_JSON_OPTIONS |
|
>>> dumps( |
|
... [ |
|
... {"foo": [1, 2]}, |
|
... {"bar": {"hello": "world"}}, |
|
... {"code": Code("function x() { return 1; }", {})}, |
|
... {"bin": Binary(b"\x01\x02\x03\x04")}, |
|
... ], |
|
... json_options=LEGACY_JSON_OPTIONS, |
|
... ) |
|
'[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }", "$scope": {}}}, {"bin": {"$binary": "AQIDBA==", "$type": "00"}}]' |
|
|
|
Alternatively, you can manually pass the `default` to :func:`json.dumps`. |
|
It won't handle :class:`~bson.binary.Binary` and :class:`~bson.code.Code` |
|
instances (as they are extended strings you can't provide custom defaults), |
|
but it will be faster as there is less recursion. |
|
|
|
.. note:: |
|
If your application does not need the flexibility offered by |
|
:class:`JSONOptions` and spends a large amount of time in the `json_util` |
|
module, look to |
|
`python-bsonjs <https://pypi.python.org/pypi/python-bsonjs>`_ for a nice |
|
performance improvement. `python-bsonjs` is a fast BSON to MongoDB |
|
Extended JSON converter for Python built on top of |
|
`libbson <https://github.com/mongodb/libbson>`_. `python-bsonjs` works best |
|
with PyMongo when using :class:`~bson.raw_bson.RawBSONDocument`. |
|
""" |
|
|
|
import base64 |
|
import datetime |
|
import json |
|
import math |
|
import re |
|
import uuid |
|
from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Type, Union, cast |
|
|
|
from bson.binary import ALL_UUID_SUBTYPES, UUID_SUBTYPE, Binary, UuidRepresentation |
|
from bson.code import Code |
|
from bson.codec_options import CodecOptions, DatetimeConversion |
|
from bson.datetime_ms import ( |
|
EPOCH_AWARE, |
|
DatetimeMS, |
|
_datetime_to_millis, |
|
_max_datetime_ms, |
|
_millis_to_datetime, |
|
) |
|
from bson.dbref import DBRef |
|
from bson.decimal128 import Decimal128 |
|
from bson.int64 import Int64 |
|
from bson.max_key import MaxKey |
|
from bson.min_key import MinKey |
|
from bson.objectid import ObjectId |
|
from bson.regex import Regex |
|
from bson.son import RE_TYPE, SON |
|
from bson.timestamp import Timestamp |
|
from bson.tz_util import utc |
|
|
|
_RE_OPT_TABLE = { |
|
"i": re.I, |
|
"l": re.L, |
|
"m": re.M, |
|
"s": re.S, |
|
"u": re.U, |
|
"x": re.X, |
|
} |
|
|
|
|
|
class DatetimeRepresentation: |
|
LEGACY = 0 |
|
"""Legacy MongoDB Extended JSON datetime representation. |
|
|
|
:class:`datetime.datetime` instances will be encoded to JSON in the |
|
format `{"$date": <dateAsMilliseconds>}`, where `dateAsMilliseconds` is |
|
a 64-bit signed integer giving the number of milliseconds since the Unix |
|
epoch UTC. This was the default encoding before PyMongo version 3.4. |
|
|
|
.. versionadded:: 3.4 |
|
""" |
|
|
|
NUMBERLONG = 1 |
|
"""NumberLong datetime representation. |
|
|
|
:class:`datetime.datetime` instances will be encoded to JSON in the |
|
format `{"$date": {"$numberLong": "<dateAsMilliseconds>"}}`, |
|
where `dateAsMilliseconds` is the string representation of a 64-bit signed |
|
integer giving the number of milliseconds since the Unix epoch UTC. |
|
|
|
.. versionadded:: 3.4 |
|
""" |
|
|
|
ISO8601 = 2 |
|
"""ISO-8601 datetime representation. |
|
|
|
:class:`datetime.datetime` instances greater than or equal to the Unix |
|
epoch UTC will be encoded to JSON in the format `{"$date": "<ISO-8601>"}`. |
|
:class:`datetime.datetime` instances before the Unix epoch UTC will be |
|
encoded as if the datetime representation is |
|
:const:`~DatetimeRepresentation.NUMBERLONG`. |
|
|
|
.. versionadded:: 3.4 |
|
""" |
|
|
|
|
|
class JSONMode: |
|
LEGACY = 0 |
|
"""Legacy Extended JSON representation. |
|
|
|
In this mode, :func:`~bson.json_util.dumps` produces PyMongo's legacy |
|
non-standard JSON output. Consider using |
|
:const:`~bson.json_util.JSONMode.RELAXED` or |
|
:const:`~bson.json_util.JSONMode.CANONICAL` instead. |
|
|
|
.. versionadded:: 3.5 |
|
""" |
|
|
|
RELAXED = 1 |
|
"""Relaxed Extended JSON representation. |
|
|
|
In this mode, :func:`~bson.json_util.dumps` produces Relaxed Extended JSON, |
|
a mostly JSON-like format. Consider using this for things like a web API, |
|
where one is sending a document (or a projection of a document) that only |
|
uses ordinary JSON type primitives. In particular, the ``int``, |
|
:class:`~bson.int64.Int64`, and ``float`` numeric types are represented in |
|
the native JSON number format. This output is also the most human readable |
|
and is useful for debugging and documentation. |
|
|
|
.. seealso:: The specification for Relaxed `Extended JSON`_. |
|
|
|
.. versionadded:: 3.5 |
|
""" |
|
|
|
CANONICAL = 2 |
|
"""Canonical Extended JSON representation. |
|
|
|
In this mode, :func:`~bson.json_util.dumps` produces Canonical Extended |
|
JSON, a type preserving format. Consider using this for things like |
|
testing, where one has to precisely specify expected types in JSON. In |
|
particular, the ``int``, :class:`~bson.int64.Int64`, and ``float`` numeric |
|
types are encoded with type wrappers. |
|
|
|
.. seealso:: The specification for Canonical `Extended JSON`_. |
|
|
|
.. versionadded:: 3.5 |
|
""" |
|
|
|
|
|
class JSONOptions(CodecOptions): |
|
json_mode: int |
|
strict_number_long: bool |
|
datetime_representation: int |
|
strict_uuid: bool |
|
|
|
def __init__(self, *args: Any, **kwargs: Any): |
|
"""Encapsulates JSON options for :func:`dumps` and :func:`loads`. |
|
|
|
:Parameters: |
|
- `strict_number_long`: If ``True``, :class:`~bson.int64.Int64` objects |
|
are encoded to MongoDB Extended JSON's *Strict mode* type |
|
`NumberLong`, ie ``'{"$numberLong": "<number>" }'``. Otherwise they |
|
will be encoded as an `int`. Defaults to ``False``. |
|
- `datetime_representation`: The representation to use when encoding |
|
instances of :class:`datetime.datetime`. Defaults to |
|
:const:`~DatetimeRepresentation.LEGACY`. |
|
- `strict_uuid`: If ``True``, :class:`uuid.UUID` object are encoded to |
|
MongoDB Extended JSON's *Strict mode* type `Binary`. Otherwise it |
|
will be encoded as ``'{"$uuid": "<hex>" }'``. Defaults to ``False``. |
|
- `json_mode`: The :class:`JSONMode` to use when encoding BSON types to |
|
Extended JSON. Defaults to :const:`~JSONMode.LEGACY`. |
|
- `document_class`: BSON documents returned by :func:`loads` will be |
|
decoded to an instance of this class. Must be a subclass of |
|
:class:`collections.MutableMapping`. Defaults to :class:`dict`. |
|
- `uuid_representation`: The :class:`~bson.binary.UuidRepresentation` |
|
to use when encoding and decoding instances of :class:`uuid.UUID`. |
|
Defaults to :const:`~bson.binary.UuidRepresentation.UNSPECIFIED`. |
|
- `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type |
|
`Date` will be decoded to timezone aware instances of |
|
:class:`datetime.datetime`. Otherwise they will be naive. Defaults |
|
to ``False``. |
|
- `tzinfo`: A :class:`datetime.tzinfo` subclass that specifies the |
|
timezone from which :class:`~datetime.datetime` objects should be |
|
decoded. Defaults to :const:`~bson.tz_util.utc`. |
|
- `datetime_conversion`: Specifies how UTC datetimes should be decoded |
|
within BSON. Valid options include 'datetime_ms' to return as a |
|
DatetimeMS, 'datetime' to return as a datetime.datetime and |
|
raising a ValueError for out-of-range values, 'datetime_auto' to |
|
return DatetimeMS objects when the underlying datetime is |
|
out-of-range and 'datetime_clamp' to clamp to the minimum and |
|
maximum possible datetimes. Defaults to 'datetime'. See |
|
:ref:`handling-out-of-range-datetimes` for details. |
|
- `args`: arguments to :class:`~bson.codec_options.CodecOptions` |
|
- `kwargs`: arguments to :class:`~bson.codec_options.CodecOptions` |
|
|
|
.. seealso:: The specification for Relaxed and Canonical `Extended JSON`_. |
|
|
|
.. versionchanged:: 4.0 |
|
The default for `json_mode` was changed from :const:`JSONMode.LEGACY` |
|
to :const:`JSONMode.RELAXED`. |
|
The default for `uuid_representation` was changed from |
|
:const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` to |
|
:const:`~bson.binary.UuidRepresentation.UNSPECIFIED`. |
|
|
|
.. versionchanged:: 3.5 |
|
Accepts the optional parameter `json_mode`. |
|
|
|
.. versionchanged:: 4.0 |
|
Changed default value of `tz_aware` to False. |
|
""" |
|
super().__init__() |
|
|
|
def __new__( |
|
cls: Type["JSONOptions"], |
|
strict_number_long: Optional[bool] = None, |
|
datetime_representation: Optional[int] = None, |
|
strict_uuid: Optional[bool] = None, |
|
json_mode: int = JSONMode.RELAXED, |
|
*args: Any, |
|
**kwargs: Any, |
|
) -> "JSONOptions": |
|
kwargs["tz_aware"] = kwargs.get("tz_aware", False) |
|
if kwargs["tz_aware"]: |
|
kwargs["tzinfo"] = kwargs.get("tzinfo", utc) |
|
if datetime_representation not in ( |
|
DatetimeRepresentation.LEGACY, |
|
DatetimeRepresentation.NUMBERLONG, |
|
DatetimeRepresentation.ISO8601, |
|
None, |
|
): |
|
raise ValueError( |
|
"JSONOptions.datetime_representation must be one of LEGACY, " |
|
"NUMBERLONG, or ISO8601 from DatetimeRepresentation." |
|
) |
|
self = cast(JSONOptions, super().__new__(cls, *args, **kwargs)) |
|
if json_mode not in (JSONMode.LEGACY, JSONMode.RELAXED, JSONMode.CANONICAL): |
|
raise ValueError( |
|
"JSONOptions.json_mode must be one of LEGACY, RELAXED, " |
|
"or CANONICAL from JSONMode." |
|
) |
|
self.json_mode = json_mode |
|
if self.json_mode == JSONMode.RELAXED: |
|
if strict_number_long: |
|
raise ValueError("Cannot specify strict_number_long=True with JSONMode.RELAXED") |
|
if datetime_representation not in (None, DatetimeRepresentation.ISO8601): |
|
raise ValueError( |
|
"datetime_representation must be DatetimeRepresentation." |
|
"ISO8601 or omitted with JSONMode.RELAXED" |
|
) |
|
if strict_uuid not in (None, True): |
|
raise ValueError("Cannot specify strict_uuid=False with JSONMode.RELAXED") |
|
self.strict_number_long = False |
|
self.datetime_representation = DatetimeRepresentation.ISO8601 |
|
self.strict_uuid = True |
|
elif self.json_mode == JSONMode.CANONICAL: |
|
if strict_number_long not in (None, True): |
|
raise ValueError("Cannot specify strict_number_long=False with JSONMode.RELAXED") |
|
if datetime_representation not in (None, DatetimeRepresentation.NUMBERLONG): |
|
raise ValueError( |
|
"datetime_representation must be DatetimeRepresentation." |
|
"NUMBERLONG or omitted with JSONMode.RELAXED" |
|
) |
|
if strict_uuid not in (None, True): |
|
raise ValueError("Cannot specify strict_uuid=False with JSONMode.RELAXED") |
|
self.strict_number_long = True |
|
self.datetime_representation = DatetimeRepresentation.NUMBERLONG |
|
self.strict_uuid = True |
|
else: # JSONMode.LEGACY |
|
self.strict_number_long = False |
|
self.datetime_representation = DatetimeRepresentation.LEGACY |
|
self.strict_uuid = False |
|
if strict_number_long is not None: |
|
self.strict_number_long = strict_number_long |
|
if datetime_representation is not None: |
|
self.datetime_representation = datetime_representation |
|
if strict_uuid is not None: |
|
self.strict_uuid = strict_uuid |
|
return self |
|
|
|
def _arguments_repr(self) -> str: |
|
return ( |
|
"strict_number_long={!r}, " |
|
"datetime_representation={!r}, " |
|
"strict_uuid={!r}, json_mode={!r}, {}".format( |
|
self.strict_number_long, |
|
self.datetime_representation, |
|
self.strict_uuid, |
|
self.json_mode, |
|
super()._arguments_repr(), |
|
) |
|
) |
|
|
|
def _options_dict(self) -> Dict[Any, Any]: |
|
# TODO: PYTHON-2442 use _asdict() instead |
|
options_dict = super()._options_dict() |
|
options_dict.update( |
|
{ |
|
"strict_number_long": self.strict_number_long, |
|
"datetime_representation": self.datetime_representation, |
|
"strict_uuid": self.strict_uuid, |
|
"json_mode": self.json_mode, |
|
} |
|
) |
|
return options_dict |
|
|
|
def with_options(self, **kwargs: Any) -> "JSONOptions": |
|
""" |
|
Make a copy of this JSONOptions, overriding some options:: |
|
|
|
>>> from bson.json_util import CANONICAL_JSON_OPTIONS |
|
>>> CANONICAL_JSON_OPTIONS.tz_aware |
|
True |
|
>>> json_options = CANONICAL_JSON_OPTIONS.with_options(tz_aware=False, tzinfo=None) |
|
>>> json_options.tz_aware |
|
False |
|
|
|
.. versionadded:: 3.12 |
|
""" |
|
opts = self._options_dict() |
|
for opt in ("strict_number_long", "datetime_representation", "strict_uuid", "json_mode"): |
|
opts[opt] = kwargs.get(opt, getattr(self, opt)) |
|
opts.update(kwargs) |
|
return JSONOptions(**opts) |
|
|
|
|
|
LEGACY_JSON_OPTIONS: JSONOptions = JSONOptions(json_mode=JSONMode.LEGACY) |
|
""":class:`JSONOptions` for encoding to PyMongo's legacy JSON format. |
|
|
|
.. seealso:: The documentation for :const:`bson.json_util.JSONMode.LEGACY`. |
|
|
|
.. versionadded:: 3.5 |
|
""" |
|
|
|
CANONICAL_JSON_OPTIONS: JSONOptions = JSONOptions(json_mode=JSONMode.CANONICAL) |
|
""":class:`JSONOptions` for Canonical Extended JSON. |
|
|
|
.. seealso:: The documentation for :const:`bson.json_util.JSONMode.CANONICAL`. |
|
|
|
.. versionadded:: 3.5 |
|
""" |
|
|
|
RELAXED_JSON_OPTIONS: JSONOptions = JSONOptions(json_mode=JSONMode.RELAXED) |
|
""":class:`JSONOptions` for Relaxed Extended JSON. |
|
|
|
.. seealso:: The documentation for :const:`bson.json_util.JSONMode.RELAXED`. |
|
|
|
.. versionadded:: 3.5 |
|
""" |
|
|
|
DEFAULT_JSON_OPTIONS: JSONOptions = RELAXED_JSON_OPTIONS |
|
"""The default :class:`JSONOptions` for JSON encoding/decoding. |
|
|
|
The same as :const:`RELAXED_JSON_OPTIONS`. |
|
|
|
.. versionchanged:: 4.0 |
|
Changed from :const:`LEGACY_JSON_OPTIONS` to |
|
:const:`RELAXED_JSON_OPTIONS`. |
|
|
|
.. versionadded:: 3.4 |
|
""" |
|
|
|
|
|
def dumps(obj: Any, *args: Any, **kwargs: Any) -> str: |
|
"""Helper function that wraps :func:`json.dumps`. |
|
|
|
Recursive function that handles all BSON types including |
|
:class:`~bson.binary.Binary` and :class:`~bson.code.Code`. |
|
|
|
:Parameters: |
|
- `json_options`: A :class:`JSONOptions` instance used to modify the |
|
encoding of MongoDB Extended JSON types. Defaults to |
|
:const:`DEFAULT_JSON_OPTIONS`. |
|
|
|
.. versionchanged:: 4.0 |
|
Now outputs MongoDB Relaxed Extended JSON by default (using |
|
:const:`DEFAULT_JSON_OPTIONS`). |
|
|
|
.. versionchanged:: 3.4 |
|
Accepts optional parameter `json_options`. See :class:`JSONOptions`. |
|
""" |
|
json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS) |
|
return json.dumps(_json_convert(obj, json_options), *args, **kwargs) |
|
|
|
|
|
def loads(s: Union[str, bytes, bytearray], *args: Any, **kwargs: Any) -> Any: |
|
"""Helper function that wraps :func:`json.loads`. |
|
|
|
Automatically passes the object_hook for BSON type conversion. |
|
|
|
Raises ``TypeError``, ``ValueError``, ``KeyError``, or |
|
:exc:`~bson.errors.InvalidId` on invalid MongoDB Extended JSON. |
|
|
|
:Parameters: |
|
- `json_options`: A :class:`JSONOptions` instance used to modify the |
|
decoding of MongoDB Extended JSON types. Defaults to |
|
:const:`DEFAULT_JSON_OPTIONS`. |
|
|
|
.. versionchanged:: 4.0 |
|
Now loads :class:`datetime.datetime` instances as naive by default. To |
|
load timezone aware instances utilize the `json_options` parameter. |
|
See :ref:`tz_aware_default_change` for an example. |
|
|
|
.. versionchanged:: 3.5 |
|
Parses Relaxed and Canonical Extended JSON as well as PyMongo's legacy |
|
format. Now raises ``TypeError`` or ``ValueError`` when parsing JSON |
|
type wrappers with values of the wrong type or any extra keys. |
|
|
|
.. versionchanged:: 3.4 |
|
Accepts optional parameter `json_options`. See :class:`JSONOptions`. |
|
""" |
|
json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS) |
|
kwargs["object_pairs_hook"] = lambda pairs: object_pairs_hook(pairs, json_options) |
|
return json.loads(s, *args, **kwargs) |
|
|
|
|
|
def _json_convert(obj: Any, json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any: |
|
"""Recursive helper method that converts BSON types so they can be |
|
converted into json. |
|
""" |
|
if hasattr(obj, "items"): |
|
return SON(((k, _json_convert(v, json_options)) for k, v in obj.items())) |
|
elif hasattr(obj, "__iter__") and not isinstance(obj, (str, bytes)): |
|
return [_json_convert(v, json_options) for v in obj] |
|
try: |
|
return default(obj, json_options) |
|
except TypeError: |
|
return obj |
|
|
|
|
|
def object_pairs_hook( |
|
pairs: Sequence[Tuple[str, Any]], json_options: JSONOptions = DEFAULT_JSON_OPTIONS |
|
) -> Any: |
|
return object_hook(json_options.document_class(pairs), json_options) |
|
|
|
|
|
def object_hook(dct: Mapping[str, Any], json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any: |
|
if "$oid" in dct: |
|
return _parse_canonical_oid(dct) |
|
if ( |
|
isinstance(dct.get("$ref"), str) |
|
and "$id" in dct |
|
and isinstance(dct.get("$db"), (str, type(None))) |
|
): |
|
return _parse_canonical_dbref(dct) |
|
if "$date" in dct: |
|
return _parse_canonical_datetime(dct, json_options) |
|
if "$regex" in dct: |
|
return _parse_legacy_regex(dct) |
|
if "$minKey" in dct: |
|
return _parse_canonical_minkey(dct) |
|
if "$maxKey" in dct: |
|
return _parse_canonical_maxkey(dct) |
|
if "$binary" in dct: |
|
if "$type" in dct: |
|
return _parse_legacy_binary(dct, json_options) |
|
else: |
|
return _parse_canonical_binary(dct, json_options) |
|
if "$code" in dct: |
|
return _parse_canonical_code(dct) |
|
if "$uuid" in dct: |
|
return _parse_legacy_uuid(dct, json_options) |
|
if "$undefined" in dct: |
|
return None |
|
if "$numberLong" in dct: |
|
return _parse_canonical_int64(dct) |
|
if "$timestamp" in dct: |
|
tsp = dct["$timestamp"] |
|
return Timestamp(tsp["t"], tsp["i"]) |
|
if "$numberDecimal" in dct: |
|
return _parse_canonical_decimal128(dct) |
|
if "$dbPointer" in dct: |
|
return _parse_canonical_dbpointer(dct) |
|
if "$regularExpression" in dct: |
|
return _parse_canonical_regex(dct) |
|
if "$symbol" in dct: |
|
return _parse_canonical_symbol(dct) |
|
if "$numberInt" in dct: |
|
return _parse_canonical_int32(dct) |
|
if "$numberDouble" in dct: |
|
return _parse_canonical_double(dct) |
|
return dct |
|
|
|
|
|
def _parse_legacy_regex(doc: Any) -> Any: |
|
pattern = doc["$regex"] |
|
# Check if this is the $regex query operator. |
|
if not isinstance(pattern, (str, bytes)): |
|
return doc |
|
flags = 0 |
|
# PyMongo always adds $options but some other tools may not. |
|
for opt in doc.get("$options", ""): |
|
flags |= _RE_OPT_TABLE.get(opt, 0) |
|
return Regex(pattern, flags) |
|
|
|
|
|
def _parse_legacy_uuid(doc: Any, json_options: JSONOptions) -> Union[Binary, uuid.UUID]: |
|
"""Decode a JSON legacy $uuid to Python UUID.""" |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $uuid, extra field(s): {doc}") |
|
if not isinstance(doc["$uuid"], str): |
|
raise TypeError(f"$uuid must be a string: {doc}") |
|
if json_options.uuid_representation == UuidRepresentation.UNSPECIFIED: |
|
return Binary.from_uuid(uuid.UUID(doc["$uuid"])) |
|
else: |
|
return uuid.UUID(doc["$uuid"]) |
|
|
|
|
|
def _binary_or_uuid(data: Any, subtype: int, json_options: JSONOptions) -> Union[Binary, uuid.UUID]: |
|
# special handling for UUID |
|
if subtype in ALL_UUID_SUBTYPES: |
|
uuid_representation = json_options.uuid_representation |
|
binary_value = Binary(data, subtype) |
|
if uuid_representation == UuidRepresentation.UNSPECIFIED: |
|
return binary_value |
|
if subtype == UUID_SUBTYPE: |
|
# Legacy behavior: use STANDARD with binary subtype 4. |
|
uuid_representation = UuidRepresentation.STANDARD |
|
elif uuid_representation == UuidRepresentation.STANDARD: |
|
# subtype == OLD_UUID_SUBTYPE |
|
# Legacy behavior: STANDARD is the same as PYTHON_LEGACY. |
|
uuid_representation = UuidRepresentation.PYTHON_LEGACY |
|
return binary_value.as_uuid(uuid_representation) |
|
|
|
if subtype == 0: |
|
return cast(uuid.UUID, data) |
|
return Binary(data, subtype) |
|
|
|
|
|
def _parse_legacy_binary(doc: Any, json_options: JSONOptions) -> Union[Binary, uuid.UUID]: |
|
if isinstance(doc["$type"], int): |
|
doc["$type"] = "%02x" % doc["$type"] |
|
subtype = int(doc["$type"], 16) |
|
if subtype >= 0xFFFFFF80: # Handle mongoexport values |
|
subtype = int(doc["$type"][6:], 16) |
|
data = base64.b64decode(doc["$binary"].encode()) |
|
return _binary_or_uuid(data, subtype, json_options) |
|
|
|
|
|
def _parse_canonical_binary(doc: Any, json_options: JSONOptions) -> Union[Binary, uuid.UUID]: |
|
binary = doc["$binary"] |
|
b64 = binary["base64"] |
|
subtype = binary["subType"] |
|
if not isinstance(b64, str): |
|
raise TypeError(f"$binary base64 must be a string: {doc}") |
|
if not isinstance(subtype, str) or len(subtype) > 2: |
|
raise TypeError(f"$binary subType must be a string at most 2 characters: {doc}") |
|
if len(binary) != 2: |
|
raise TypeError(f'$binary must include only "base64" and "subType" components: {doc}') |
|
|
|
data = base64.b64decode(b64.encode()) |
|
return _binary_or_uuid(data, int(subtype, 16), json_options) |
|
|
|
|
|
def _parse_canonical_datetime( |
|
doc: Any, json_options: JSONOptions |
|
) -> Union[datetime.datetime, DatetimeMS]: |
|
"""Decode a JSON datetime to python datetime.datetime.""" |
|
dtm = doc["$date"] |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $date, extra field(s): {doc}") |
|
# mongoexport 2.6 and newer |
|
if isinstance(dtm, str): |
|
# Parse offset |
|
if dtm[-1] == "Z": |
|
dt = dtm[:-1] |
|
offset = "Z" |
|
elif dtm[-6] in ("+", "-") and dtm[-3] == ":": |
|
# (+|-)HH:MM |
|
dt = dtm[:-6] |
|
offset = dtm[-6:] |
|
elif dtm[-5] in ("+", "-"): |
|
# (+|-)HHMM |
|
dt = dtm[:-5] |
|
offset = dtm[-5:] |
|
elif dtm[-3] in ("+", "-"): |
|
# (+|-)HH |
|
dt = dtm[:-3] |
|
offset = dtm[-3:] |
|
else: |
|
dt = dtm |
|
offset = "" |
|
|
|
# Parse the optional factional seconds portion. |
|
dot_index = dt.rfind(".") |
|
microsecond = 0 |
|
if dot_index != -1: |
|
microsecond = int(float(dt[dot_index:]) * 1000000) |
|
dt = dt[:dot_index] |
|
|
|
aware = datetime.datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S").replace( |
|
microsecond=microsecond, tzinfo=utc |
|
) |
|
|
|
if offset and offset != "Z": |
|
if len(offset) == 6: |
|
hours, minutes = offset[1:].split(":") |
|
secs = int(hours) * 3600 + int(minutes) * 60 |
|
elif len(offset) == 5: |
|
secs = int(offset[1:3]) * 3600 + int(offset[3:]) * 60 |
|
elif len(offset) == 3: |
|
secs = int(offset[1:3]) * 3600 |
|
if offset[0] == "-": |
|
secs *= -1 |
|
aware = aware - datetime.timedelta(seconds=secs) |
|
|
|
if json_options.tz_aware: |
|
if json_options.tzinfo: |
|
aware = aware.astimezone(json_options.tzinfo) |
|
if json_options.datetime_conversion == DatetimeConversion.DATETIME_MS: |
|
return DatetimeMS(aware) |
|
return aware |
|
else: |
|
aware_tzinfo_none = aware.replace(tzinfo=None) |
|
if json_options.datetime_conversion == DatetimeConversion.DATETIME_MS: |
|
return DatetimeMS(aware_tzinfo_none) |
|
return aware_tzinfo_none |
|
return _millis_to_datetime(int(dtm), json_options) |
|
|
|
|
|
def _parse_canonical_oid(doc: Any) -> ObjectId: |
|
"""Decode a JSON ObjectId to bson.objectid.ObjectId.""" |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $oid, extra field(s): {doc}") |
|
return ObjectId(doc["$oid"]) |
|
|
|
|
|
def _parse_canonical_symbol(doc: Any) -> str: |
|
"""Decode a JSON symbol to Python string.""" |
|
symbol = doc["$symbol"] |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $symbol, extra field(s): {doc}") |
|
return str(symbol) |
|
|
|
|
|
def _parse_canonical_code(doc: Any) -> Code: |
|
"""Decode a JSON code to bson.code.Code.""" |
|
for key in doc: |
|
if key not in ("$code", "$scope"): |
|
raise TypeError(f"Bad $code, extra field(s): {doc}") |
|
return Code(doc["$code"], scope=doc.get("$scope")) |
|
|
|
|
|
def _parse_canonical_regex(doc: Any) -> Regex: |
|
"""Decode a JSON regex to bson.regex.Regex.""" |
|
regex = doc["$regularExpression"] |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $regularExpression, extra field(s): {doc}") |
|
if len(regex) != 2: |
|
raise TypeError( |
|
'Bad $regularExpression must include only "pattern"' |
|
'and "options" components: {}'.format(doc) |
|
) |
|
opts = regex["options"] |
|
if not isinstance(opts, str): |
|
raise TypeError( |
|
"Bad $regularExpression options, options must be string, was type %s" % (type(opts)) |
|
) |
|
return Regex(regex["pattern"], opts) |
|
|
|
|
|
def _parse_canonical_dbref(doc: Any) -> DBRef: |
|
"""Decode a JSON DBRef to bson.dbref.DBRef.""" |
|
return DBRef(doc.pop("$ref"), doc.pop("$id"), database=doc.pop("$db", None), **doc) |
|
|
|
|
|
def _parse_canonical_dbpointer(doc: Any) -> Any: |
|
"""Decode a JSON (deprecated) DBPointer to bson.dbref.DBRef.""" |
|
dbref = doc["$dbPointer"] |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $dbPointer, extra field(s): {doc}") |
|
if isinstance(dbref, DBRef): |
|
dbref_doc = dbref.as_doc() |
|
# DBPointer must not contain $db in its value. |
|
if dbref.database is not None: |
|
raise TypeError(f"Bad $dbPointer, extra field $db: {dbref_doc}") |
|
if not isinstance(dbref.id, ObjectId): |
|
raise TypeError(f"Bad $dbPointer, $id must be an ObjectId: {dbref_doc}") |
|
if len(dbref_doc) != 2: |
|
raise TypeError(f"Bad $dbPointer, extra field(s) in DBRef: {dbref_doc}") |
|
return dbref |
|
else: |
|
raise TypeError(f"Bad $dbPointer, expected a DBRef: {doc}") |
|
|
|
|
|
def _parse_canonical_int32(doc: Any) -> int: |
|
"""Decode a JSON int32 to python int.""" |
|
i_str = doc["$numberInt"] |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $numberInt, extra field(s): {doc}") |
|
if not isinstance(i_str, str): |
|
raise TypeError(f"$numberInt must be string: {doc}") |
|
return int(i_str) |
|
|
|
|
|
def _parse_canonical_int64(doc: Any) -> Int64: |
|
"""Decode a JSON int64 to bson.int64.Int64.""" |
|
l_str = doc["$numberLong"] |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $numberLong, extra field(s): {doc}") |
|
return Int64(l_str) |
|
|
|
|
|
def _parse_canonical_double(doc: Any) -> float: |
|
"""Decode a JSON double to python float.""" |
|
d_str = doc["$numberDouble"] |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $numberDouble, extra field(s): {doc}") |
|
if not isinstance(d_str, str): |
|
raise TypeError(f"$numberDouble must be string: {doc}") |
|
return float(d_str) |
|
|
|
|
|
def _parse_canonical_decimal128(doc: Any) -> Decimal128: |
|
"""Decode a JSON decimal128 to bson.decimal128.Decimal128.""" |
|
d_str = doc["$numberDecimal"] |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $numberDecimal, extra field(s): {doc}") |
|
if not isinstance(d_str, str): |
|
raise TypeError(f"$numberDecimal must be string: {doc}") |
|
return Decimal128(d_str) |
|
|
|
|
|
def _parse_canonical_minkey(doc: Any) -> MinKey: |
|
"""Decode a JSON MinKey to bson.min_key.MinKey.""" |
|
if type(doc["$minKey"]) is not int or doc["$minKey"] != 1: |
|
raise TypeError(f"$minKey value must be 1: {doc}") |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $minKey, extra field(s): {doc}") |
|
return MinKey() |
|
|
|
|
|
def _parse_canonical_maxkey(doc: Any) -> MaxKey: |
|
"""Decode a JSON MaxKey to bson.max_key.MaxKey.""" |
|
if type(doc["$maxKey"]) is not int or doc["$maxKey"] != 1: |
|
raise TypeError("$maxKey value must be 1: %s", (doc,)) |
|
if len(doc) != 1: |
|
raise TypeError(f"Bad $minKey, extra field(s): {doc}") |
|
return MaxKey() |
|
|
|
|
|
def _encode_binary(data: bytes, subtype: int, json_options: JSONOptions) -> Any: |
|
if json_options.json_mode == JSONMode.LEGACY: |
|
return SON([("$binary", base64.b64encode(data).decode()), ("$type", "%02x" % subtype)]) |
|
return { |
|
"$binary": SON([("base64", base64.b64encode(data).decode()), ("subType", "%02x" % subtype)]) |
|
} |
|
|
|
|
|
def default(obj: Any, json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any: |
|
# We preserve key order when rendering SON, DBRef, etc. as JSON by |
|
# returning a SON for those types instead of a dict. |
|
if isinstance(obj, ObjectId): |
|
return {"$oid": str(obj)} |
|
if isinstance(obj, DBRef): |
|
return _json_convert(obj.as_doc(), json_options=json_options) |
|
if isinstance(obj, datetime.datetime): |
|
if json_options.datetime_representation == DatetimeRepresentation.ISO8601: |
|
if not obj.tzinfo: |
|
obj = obj.replace(tzinfo=utc) |
|
assert obj.tzinfo is not None |
|
if obj >= EPOCH_AWARE: |
|
off = obj.tzinfo.utcoffset(obj) |
|
if (off.days, off.seconds, off.microseconds) == (0, 0, 0): # type: ignore |
|
tz_string = "Z" |
|
else: |
|
tz_string = obj.strftime("%z") |
|
millis = int(obj.microsecond / 1000) |
|
fracsecs = ".%03d" % (millis,) if millis else "" |
|
return { |
|
"$date": "{}{}{}".format(obj.strftime("%Y-%m-%dT%H:%M:%S"), fracsecs, tz_string) |
|
} |
|
|
|
millis = _datetime_to_millis(obj) |
|
if json_options.datetime_representation == DatetimeRepresentation.LEGACY: |
|
return {"$date": millis} |
|
return {"$date": {"$numberLong": str(millis)}} |
|
if isinstance(obj, DatetimeMS): |
|
if ( |
|
json_options.datetime_representation == DatetimeRepresentation.ISO8601 |
|
and 0 <= int(obj) <= _max_datetime_ms() |
|
): |
|
return default(obj.as_datetime(), json_options) |
|
elif json_options.datetime_representation == DatetimeRepresentation.LEGACY: |
|
return {"$date": str(int(obj))} |
|
return {"$date": {"$numberLong": str(int(obj))}} |
|
if json_options.strict_number_long and isinstance(obj, Int64): |
|
return {"$numberLong": str(obj)} |
|
if isinstance(obj, (RE_TYPE, Regex)): |
|
flags = "" |
|
if obj.flags & re.IGNORECASE: |
|
flags += "i" |
|
if obj.flags & re.LOCALE: |
|
flags += "l" |
|
if obj.flags & re.MULTILINE: |
|
flags += "m" |
|
if obj.flags & re.DOTALL: |
|
flags += "s" |
|
if obj.flags & re.UNICODE: |
|
flags += "u" |
|
if obj.flags & re.VERBOSE: |
|
flags += "x" |
|
if isinstance(obj.pattern, str): |
|
pattern = obj.pattern |
|
else: |
|
pattern = obj.pattern.decode("utf-8") |
|
if json_options.json_mode == JSONMode.LEGACY: |
|
return SON([("$regex", pattern), ("$options", flags)]) |
|
return {"$regularExpression": SON([("pattern", pattern), ("options", flags)])} |
|
if isinstance(obj, MinKey): |
|
return {"$minKey": 1} |
|
if isinstance(obj, MaxKey): |
|
return {"$maxKey": 1} |
|
if isinstance(obj, Timestamp): |
|
return {"$timestamp": SON([("t", obj.time), ("i", obj.inc)])} |
|
if isinstance(obj, Code): |
|
if obj.scope is None: |
|
return {"$code": str(obj)} |
|
return SON([("$code", str(obj)), ("$scope", _json_convert(obj.scope, json_options))]) |
|
if isinstance(obj, Binary): |
|
return _encode_binary(obj, obj.subtype, json_options) |
|
if isinstance(obj, bytes): |
|
return _encode_binary(obj, 0, json_options) |
|
if isinstance(obj, uuid.UUID): |
|
if json_options.strict_uuid: |
|
binval = Binary.from_uuid(obj, uuid_representation=json_options.uuid_representation) |
|
return _encode_binary(binval, binval.subtype, json_options) |
|
else: |
|
return {"$uuid": obj.hex} |
|
if isinstance(obj, Decimal128): |
|
return {"$numberDecimal": str(obj)} |
|
if isinstance(obj, bool): |
|
return obj |
|
if json_options.json_mode == JSONMode.CANONICAL and isinstance(obj, int): |
|
if -(2**31) <= obj < 2**31: |
|
return {"$numberInt": str(obj)} |
|
return {"$numberLong": str(obj)} |
|
if json_options.json_mode != JSONMode.LEGACY and isinstance(obj, float): |
|
if math.isnan(obj): |
|
return {"$numberDouble": "NaN"} |
|
elif math.isinf(obj): |
|
representation = "Infinity" if obj > 0 else "-Infinity" |
|
return {"$numberDouble": representation} |
|
elif json_options.json_mode == JSONMode.CANONICAL: |
|
# repr() will return the shortest string guaranteed to produce the |
|
# original value, when float() is called on it. |
|
return {"$numberDouble": str(repr(obj))} |
|
raise TypeError("%r is not JSON serializable" % obj)
|
|
|