You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
341 lines
10 KiB
341 lines
10 KiB
|
|
import os |
|
import time |
|
from collections import defaultdict, OrderedDict |
|
from datetime import datetime |
|
|
|
from ..types import string_types, init_bson, bson as bson_ |
|
from ..client import MontyClient |
|
from ..errors import DuplicateKeyError |
|
|
|
|
|
def _collection(database, collection): |
|
client = MontyClient() |
|
return client[database][collection] |
|
|
|
|
|
def montyimport(database, |
|
collection, |
|
file, |
|
mode="insert", |
|
json_options=None, |
|
use_bson=False): |
|
"""Imports content from an Extended JSON file into a MontyCollection instance |
|
|
|
Example: |
|
>>> from montydb import open_repo, utils |
|
>>> with open_repo("foo/bar"): |
|
>>> utils.montyimport("db", "col", "/data/dump.json") |
|
>>> |
|
|
|
Args: |
|
database (str): Database name |
|
collection (str): Collection name to import to |
|
file (str): Input file path |
|
mode (str): Specifies how the import process should handle existing |
|
documents in the database that match documents in the |
|
import file. |
|
Options: ["insert", "upsert", "merge"] |
|
Default: "insert" |
|
json_options (JSONOptions): A JSONOptions instance used to modify |
|
the decoding of MongoDB Extended JSON |
|
types. Default None. |
|
|
|
""" |
|
init_bson(use_bson) |
|
collection = _collection(database, collection) |
|
|
|
with open(file, "r") as fp: |
|
lines = [line.strip() for line in fp.readlines()] |
|
serialized = "[{}]".format(", ".join(lines)) |
|
|
|
documents = bson_.json_loads(serialized, json_options=json_options) |
|
|
|
if mode == "insert": |
|
for doc in documents: |
|
try: |
|
collection.insert_one(doc) |
|
except DuplicateKeyError: |
|
print("Duplicate id: %s" % doc["_id"]) |
|
|
|
elif mode == "upsert": |
|
for doc in documents: |
|
collection.replace_one({"_id": doc["_id"]}, doc, upsert=True) |
|
|
|
elif mode == "merge": |
|
for doc in documents: |
|
update = {"$setOnInsert": {k: v} for k, v in doc.items()} |
|
collection.update_one({"_id": doc["_id"]}, update, upsert=True) |
|
|
|
|
|
def montyexport(database, |
|
collection, |
|
out, |
|
fields=None, |
|
query=None, |
|
json_options=None, |
|
use_bson=False): |
|
"""Produces a JSON export of data stored in a MontyCollection instance |
|
|
|
Example: |
|
>>> from montydb import open_repo, utils |
|
>>> with open_repo("foo/bar"): |
|
>>> utils.montyexport("db", "col", "/data/dump.json") |
|
>>> |
|
|
|
Args: |
|
database (str): Database name |
|
collection (str): Collection name to export from |
|
out (str): Output file path |
|
fields (str, list): Specifies a field name string or a list fields |
|
to include in the export. |
|
query (dict): Provides a query document to return matching documents |
|
in the export. |
|
json_options (JSONOptions): A JSONOptions instance used to modify |
|
the decoding of MongoDB Extended JSON |
|
types. Default None. |
|
|
|
""" |
|
init_bson(use_bson) |
|
collection = _collection(database, collection) |
|
fields = fields or [] |
|
|
|
out = os.path.abspath(out) |
|
if not os.path.isdir(os.path.dirname(out)): |
|
os.makedirs(os.path.dirname(out)) |
|
|
|
if isinstance(fields, string_types): |
|
fields = [fields] |
|
|
|
projection = {field: True for field in fields} or None |
|
|
|
with open(out, "w") as fp: |
|
for doc in collection.find(query, projection=projection): |
|
serialized = bson_.json_dumps(doc, json_options=json_options) |
|
fp.write(serialized + "\n") |
|
|
|
|
|
def montyrestore(database, collection, dumpfile): |
|
"""Loads a binary database dump into a MontyCollection instance |
|
|
|
Should be able to accept the dump created by `mongodump`. |
|
bson required. |
|
|
|
Example: |
|
>>> from montydb import open_repo, utils |
|
>>> with open_repo("foo/bar"): |
|
>>> utils.montyrestore("db", "col", "/data/dump.bson") |
|
>>> |
|
|
|
Args: |
|
database (str): Database name |
|
collection (str): Collection name to load into |
|
dumpfile (str): File path to load from |
|
|
|
""" |
|
from bson import decode_all |
|
|
|
collection = _collection(database, collection) |
|
|
|
with open(dumpfile, "rb") as fp: |
|
raw = fp.read() |
|
|
|
try: |
|
collection.insert_many(decode_all(raw)) |
|
except DuplicateKeyError: |
|
pass |
|
|
|
|
|
def montydump(database, collection, dumpfile): |
|
"""Creates a binary export from a MontyCollection instance |
|
|
|
The export should be able to be accepted by `mongorestore`. |
|
bson required. |
|
|
|
Example: |
|
>>> from montydb import open_repo, utils |
|
>>> with open_repo("foo/bar"): |
|
>>> utils.montydump("db", "col", "/data/dump.bson") |
|
>>> |
|
|
|
Args: |
|
database (str): Database name |
|
collection (str): Collection name to export from |
|
dumpfile (str): File path to export to |
|
|
|
""" |
|
from bson import BSON |
|
|
|
collection = _collection(database, collection) |
|
|
|
dumpfile = os.path.abspath(dumpfile) |
|
if not os.path.isdir(os.path.dirname(dumpfile)): |
|
os.makedirs(os.path.dirname(dumpfile)) |
|
|
|
raw = b"" |
|
for doc in collection.find(): |
|
raw += BSON.encode(doc) |
|
|
|
with open(dumpfile, "wb") as fp: |
|
fp.write(raw) |
|
|
|
|
|
class MongoQueryRecorder(object): |
|
"""Record MongoDB query results in a period of time |
|
|
|
:Important: Requires to access database profiler. |
|
|
|
This works via filtering the database profile data and reproduce the |
|
queries of `find` and `distinct` commands. |
|
|
|
bson required. |
|
|
|
Example: |
|
>>> from pymongo import MongoClient |
|
>>> from montydb.utils import MongoQueryRecorder |
|
>>> client = MongoClient() |
|
>>> recorder = MongoQueryRecorder(client["mydb"]) |
|
>>> recorder.start() |
|
>>> # Make some queries or run the App... |
|
>>> recorder.stop() |
|
>>> recorder.extract() |
|
{<collection_1>: [<doc_1>, <doc_2>, ...], ...} |
|
|
|
Args: |
|
mongodb (pymongo.database.Database): An instance of mongo database |
|
namespace (str or regex, optional): A MongoDB namespace string/regex. |
|
user (str, optional): Name of authenticated user to record with. |
|
|
|
""" |
|
|
|
def __init__(self, mongodb, namespace=None, user=None): |
|
self._mongodb = mongodb |
|
self._namespace = namespace or {"$regex": mongodb.name + r"\..*"} |
|
self._user = user |
|
|
|
self._epoch = datetime(1970, 1, 1) |
|
self._rec_stime = None |
|
self._rec_etime = None |
|
|
|
def __repr__(self): |
|
return ("MongoQueryRecorder(mongodb=%s, namespace=%s, user=%s)" |
|
"" % (self._mongodb.name, self._namespace, self._user)) |
|
|
|
def reset_profile(self, level=0): |
|
"""Drop and reset database profile |
|
|
|
Args: |
|
level (int): Database profile level, default 0. |
|
|
|
""" |
|
self._mongodb.command({"profile": 0}) |
|
self._mongodb.system.profile.drop() |
|
if level: |
|
self._mongodb.command({"profile": level}) |
|
|
|
def current_level(self): |
|
"""Return current database's profile level""" |
|
return self._mongodb.command({"profile": -1})["was"] |
|
|
|
def start(self): |
|
"""Start recording and set database profile level to 2""" |
|
self._mongodb.command({"profile": 2}) |
|
self._rec_stime = datetime.utcnow() |
|
time.sleep(0.1) # Wait for db |
|
|
|
def stop(self): |
|
"""Stop recording and set database profile level to 0""" |
|
time.sleep(0.1) # Wait for db |
|
self._rec_etime = datetime.utcnow() |
|
self._mongodb.command({"profile": 0}) |
|
|
|
def extract(self): |
|
"""Collect documents via previous queries |
|
|
|
Via filtering the `[database].system.profile`, parsing previous |
|
commands to reproduce the query results. |
|
|
|
NOTE: Depend on the `namespace`, the result may across multiple |
|
collections. |
|
|
|
Returns: |
|
dict: A dict of {collection: list of documents} |
|
|
|
""" |
|
from bson.codec_options import CodecOptions |
|
|
|
filter = { |
|
"$or": [ |
|
{ |
|
"op": "query", |
|
"command.find": {"$exists": True}, |
|
"nreturned": {"$gte": 1} |
|
}, |
|
{ |
|
"op": "command", |
|
"command.distinct": {"$exists": True} |
|
}, |
|
], |
|
"ns": self._namespace, |
|
"ts": {"$gte": self._rec_stime, "$lte": self._rec_etime}, |
|
} |
|
|
|
if self._user is not None: |
|
filter.update({"user": self._user}) |
|
|
|
projection = { |
|
"op": 1, |
|
|
|
"command.find": 1, |
|
"command.filter": 1, |
|
"command.sort": 1, |
|
"command.limit": 1, |
|
|
|
"command.distinct": 1, |
|
"command.key": 1, |
|
"command.query": 1 |
|
} |
|
|
|
profile = self._mongodb.system.profile |
|
code_opt = CodecOptions(document_class=OrderedDict) |
|
profile = profile.with_options(codec_options=code_opt) |
|
|
|
history = defaultdict(list) |
|
for log in profile.find(filter, projection=projection): |
|
op = log.pop("op") |
|
if log not in history[op]: |
|
history[op].append(log) |
|
|
|
documents = defaultdict(dict) |
|
|
|
# Query - find |
|
for cmd in (log["command"] for log in history["query"]): |
|
col = cmd["find"] |
|
filter = cmd["filter"] |
|
limit = cmd.get("limit", 0) |
|
sort = None |
|
|
|
if limit: |
|
sort = list() |
|
for k, v in cmd.get("sort", dict()): |
|
sort.append((k, v)) |
|
|
|
for doc in self._mongodb[col].find(filter, sort=sort, limit=limit): |
|
id = doc["_id"] |
|
if id not in documents[col]: |
|
documents[col][id] = doc |
|
|
|
# Command - distinct |
|
for cmd in (log["command"] for log in history["command"]): |
|
col = cmd["distinct"] |
|
key = cmd["key"] |
|
query = cmd.get("query") |
|
|
|
for value in self._mongodb[col].distinct(key, query): |
|
doc = self._mongodb[col].find_one({key: value}) |
|
id = doc["_id"] |
|
if id not in documents[col]: |
|
documents[col][id] = doc |
|
|
|
# Done |
|
return {col: list(docs.values()) for col, docs in documents.items()}
|
|
|