You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
196 lines
7.2 KiB
196 lines
7.2 KiB
# Copyright 2015-present MongoDB, Inc. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
|
|
"""Tools for representing raw BSON documents. |
|
|
|
Inserting and Retrieving RawBSONDocuments |
|
========================================= |
|
|
|
Example: Moving a document between different databases/collections |
|
|
|
.. doctest:: |
|
|
|
>>> import bson |
|
>>> from pymongo import MongoClient |
|
>>> from bson.raw_bson import RawBSONDocument |
|
>>> client = MongoClient(document_class=RawBSONDocument) |
|
>>> client.drop_database("db") |
|
>>> client.drop_database("replica_db") |
|
>>> db = client.db |
|
>>> result = db.test.insert_many( |
|
... [{"_id": 1, "a": 1}, {"_id": 2, "b": 1}, {"_id": 3, "c": 1}, {"_id": 4, "d": 1}] |
|
... ) |
|
>>> replica_db = client.replica_db |
|
>>> for doc in db.test.find(): |
|
... print(f"raw document: {doc.raw}") |
|
... print(f"decoded document: {bson.decode(doc.raw)}") |
|
... result = replica_db.test.insert_one(doc) |
|
... |
|
raw document: b'...' |
|
decoded document: {'_id': 1, 'a': 1} |
|
raw document: b'...' |
|
decoded document: {'_id': 2, 'b': 1} |
|
raw document: b'...' |
|
decoded document: {'_id': 3, 'c': 1} |
|
raw document: b'...' |
|
decoded document: {'_id': 4, 'd': 1} |
|
|
|
For use cases like moving documents across different databases or writing binary |
|
blobs to disk, using raw BSON documents provides better speed and avoids the |
|
overhead of decoding or encoding BSON. |
|
""" |
|
|
|
from typing import Any, Dict, ItemsView, Iterator, Mapping, Optional |
|
|
|
from bson import _get_object_size, _raw_to_dict |
|
from bson.codec_options import _RAW_BSON_DOCUMENT_MARKER |
|
from bson.codec_options import DEFAULT_CODEC_OPTIONS as DEFAULT |
|
from bson.codec_options import CodecOptions |
|
from bson.son import SON |
|
|
|
|
|
def _inflate_bson( |
|
bson_bytes: bytes, codec_options: CodecOptions, raw_array: bool = False |
|
) -> Dict[Any, Any]: |
|
"""Inflates the top level fields of a BSON document. |
|
|
|
:Parameters: |
|
- `bson_bytes`: the BSON bytes that compose this document |
|
- `codec_options`: An instance of |
|
:class:`~bson.codec_options.CodecOptions` whose ``document_class`` |
|
must be :class:`RawBSONDocument`. |
|
""" |
|
# Use SON to preserve ordering of elements. |
|
return _raw_to_dict( |
|
bson_bytes, 4, len(bson_bytes) - 1, codec_options, SON(), raw_array=raw_array |
|
) |
|
|
|
|
|
class RawBSONDocument(Mapping[str, Any]): |
|
"""Representation for a MongoDB document that provides access to the raw |
|
BSON bytes that compose it. |
|
|
|
Only when a field is accessed or modified within the document does |
|
RawBSONDocument decode its bytes. |
|
""" |
|
|
|
__slots__ = ("__raw", "__inflated_doc", "__codec_options") |
|
_type_marker = _RAW_BSON_DOCUMENT_MARKER |
|
|
|
def __init__(self, bson_bytes: bytes, codec_options: Optional[CodecOptions] = None) -> None: |
|
"""Create a new :class:`RawBSONDocument` |
|
|
|
:class:`RawBSONDocument` is a representation of a BSON document that |
|
provides access to the underlying raw BSON bytes. Only when a field is |
|
accessed or modified within the document does RawBSONDocument decode |
|
its bytes. |
|
|
|
:class:`RawBSONDocument` implements the ``Mapping`` abstract base |
|
class from the standard library so it can be used like a read-only |
|
``dict``:: |
|
|
|
>>> from bson import encode |
|
>>> raw_doc = RawBSONDocument(encode({'_id': 'my_doc'})) |
|
>>> raw_doc.raw |
|
b'...' |
|
>>> raw_doc['_id'] |
|
'my_doc' |
|
|
|
:Parameters: |
|
- `bson_bytes`: the BSON bytes that compose this document |
|
- `codec_options` (optional): An instance of |
|
:class:`~bson.codec_options.CodecOptions` whose ``document_class`` |
|
must be :class:`RawBSONDocument`. The default is |
|
:attr:`DEFAULT_RAW_BSON_OPTIONS`. |
|
|
|
.. versionchanged:: 3.8 |
|
:class:`RawBSONDocument` now validates that the ``bson_bytes`` |
|
passed in represent a single bson document. |
|
|
|
.. versionchanged:: 3.5 |
|
If a :class:`~bson.codec_options.CodecOptions` is passed in, its |
|
`document_class` must be :class:`RawBSONDocument`. |
|
""" |
|
self.__raw = bson_bytes |
|
self.__inflated_doc: Optional[Mapping[str, Any]] = None |
|
# Can't default codec_options to DEFAULT_RAW_BSON_OPTIONS in signature, |
|
# it refers to this class RawBSONDocument. |
|
if codec_options is None: |
|
codec_options = DEFAULT_RAW_BSON_OPTIONS |
|
elif not issubclass(codec_options.document_class, RawBSONDocument): |
|
raise TypeError( |
|
"RawBSONDocument cannot use CodecOptions with document " |
|
"class {}".format(codec_options.document_class) |
|
) |
|
self.__codec_options = codec_options |
|
# Validate the bson object size. |
|
_get_object_size(bson_bytes, 0, len(bson_bytes)) |
|
|
|
@property |
|
def raw(self) -> bytes: |
|
"""The raw BSON bytes composing this document.""" |
|
return self.__raw |
|
|
|
def items(self) -> ItemsView[str, Any]: |
|
"""Lazily decode and iterate elements in this document.""" |
|
return self.__inflated.items() |
|
|
|
@property |
|
def __inflated(self) -> Mapping[str, Any]: |
|
if self.__inflated_doc is None: |
|
# We already validated the object's size when this document was |
|
# created, so no need to do that again. |
|
# Use SON to preserve ordering of elements. |
|
self.__inflated_doc = self._inflate_bson(self.__raw, self.__codec_options) |
|
return self.__inflated_doc |
|
|
|
@staticmethod |
|
def _inflate_bson(bson_bytes: bytes, codec_options: CodecOptions) -> Mapping[Any, Any]: |
|
return _inflate_bson(bson_bytes, codec_options) |
|
|
|
def __getitem__(self, item: str) -> Any: |
|
return self.__inflated[item] |
|
|
|
def __iter__(self) -> Iterator[str]: |
|
return iter(self.__inflated) |
|
|
|
def __len__(self) -> int: |
|
return len(self.__inflated) |
|
|
|
def __eq__(self, other: Any) -> bool: |
|
if isinstance(other, RawBSONDocument): |
|
return self.__raw == other.raw |
|
return NotImplemented |
|
|
|
def __repr__(self) -> str: |
|
return "{}({!r}, codec_options={!r})".format( |
|
self.__class__.__name__, |
|
self.raw, |
|
self.__codec_options, |
|
) |
|
|
|
|
|
class _RawArrayBSONDocument(RawBSONDocument): |
|
"""A RawBSONDocument that only expands sub-documents and arrays when accessed.""" |
|
|
|
@staticmethod |
|
def _inflate_bson(bson_bytes: bytes, codec_options: CodecOptions) -> Mapping[Any, Any]: |
|
return _inflate_bson(bson_bytes, codec_options, raw_array=True) |
|
|
|
|
|
DEFAULT_RAW_BSON_OPTIONS: CodecOptions = DEFAULT.with_options(document_class=RawBSONDocument) |
|
_RAW_ARRAY_BSON_OPTIONS: CodecOptions = DEFAULT.with_options(document_class=_RawArrayBSONDocument) |
|
"""The default :class:`~bson.codec_options.CodecOptions` for |
|
:class:`RawBSONDocument`. |
|
"""
|
|
|