You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
224 lines
7.8 KiB
224 lines
7.8 KiB
# Copyright 2016 MongoDB, Inc. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
|
|
"""Tools for working with `collations`_. |
|
|
|
.. _collations: https://www.mongodb.com/docs/manual/reference/collation/ |
|
""" |
|
from typing import Any, Dict, Mapping, Optional, Union |
|
|
|
from pymongo import common |
|
|
|
|
|
class CollationStrength: |
|
""" |
|
An enum that defines values for `strength` on a |
|
:class:`~pymongo.collation.Collation`. |
|
""" |
|
|
|
PRIMARY = 1 |
|
"""Differentiate base (unadorned) characters.""" |
|
|
|
SECONDARY = 2 |
|
"""Differentiate character accents.""" |
|
|
|
TERTIARY = 3 |
|
"""Differentiate character case.""" |
|
|
|
QUATERNARY = 4 |
|
"""Differentiate words with and without punctuation.""" |
|
|
|
IDENTICAL = 5 |
|
"""Differentiate unicode code point (characters are exactly identical).""" |
|
|
|
|
|
class CollationAlternate: |
|
""" |
|
An enum that defines values for `alternate` on a |
|
:class:`~pymongo.collation.Collation`. |
|
""" |
|
|
|
NON_IGNORABLE = "non-ignorable" |
|
"""Spaces and punctuation are treated as base characters.""" |
|
|
|
SHIFTED = "shifted" |
|
"""Spaces and punctuation are *not* considered base characters. |
|
|
|
Spaces and punctuation are distinguished regardless when the |
|
:class:`~pymongo.collation.Collation` strength is at least |
|
:data:`~pymongo.collation.CollationStrength.QUATERNARY`. |
|
|
|
""" |
|
|
|
|
|
class CollationMaxVariable: |
|
""" |
|
An enum that defines values for `max_variable` on a |
|
:class:`~pymongo.collation.Collation`. |
|
""" |
|
|
|
PUNCT = "punct" |
|
"""Both punctuation and spaces are ignored.""" |
|
|
|
SPACE = "space" |
|
"""Spaces alone are ignored.""" |
|
|
|
|
|
class CollationCaseFirst: |
|
""" |
|
An enum that defines values for `case_first` on a |
|
:class:`~pymongo.collation.Collation`. |
|
""" |
|
|
|
UPPER = "upper" |
|
"""Sort uppercase characters first.""" |
|
|
|
LOWER = "lower" |
|
"""Sort lowercase characters first.""" |
|
|
|
OFF = "off" |
|
"""Default for locale or collation strength.""" |
|
|
|
|
|
class Collation: |
|
"""Collation |
|
|
|
:Parameters: |
|
- `locale`: (string) The locale of the collation. This should be a string |
|
that identifies an `ICU locale ID` exactly. For example, ``en_US`` is |
|
valid, but ``en_us`` and ``en-US`` are not. Consult the MongoDB |
|
documentation for a list of supported locales. |
|
- `caseLevel`: (optional) If ``True``, turn on case sensitivity if |
|
`strength` is 1 or 2 (case sensitivity is implied if `strength` is |
|
greater than 2). Defaults to ``False``. |
|
- `caseFirst`: (optional) Specify that either uppercase or lowercase |
|
characters take precedence. Must be one of the following values: |
|
|
|
* :data:`~CollationCaseFirst.UPPER` |
|
* :data:`~CollationCaseFirst.LOWER` |
|
* :data:`~CollationCaseFirst.OFF` (the default) |
|
|
|
- `strength`: (optional) Specify the comparison strength. This is also |
|
known as the ICU comparison level. This must be one of the following |
|
values: |
|
|
|
* :data:`~CollationStrength.PRIMARY` |
|
* :data:`~CollationStrength.SECONDARY` |
|
* :data:`~CollationStrength.TERTIARY` (the default) |
|
* :data:`~CollationStrength.QUATERNARY` |
|
* :data:`~CollationStrength.IDENTICAL` |
|
|
|
Each successive level builds upon the previous. For example, a |
|
`strength` of :data:`~CollationStrength.SECONDARY` differentiates |
|
characters based both on the unadorned base character and its accents. |
|
|
|
- `numericOrdering`: (optional) If ``True``, order numbers numerically |
|
instead of in collation order (defaults to ``False``). |
|
- `alternate`: (optional) Specify whether spaces and punctuation are |
|
considered base characters. This must be one of the following values: |
|
|
|
* :data:`~CollationAlternate.NON_IGNORABLE` (the default) |
|
* :data:`~CollationAlternate.SHIFTED` |
|
|
|
- `maxVariable`: (optional) When `alternate` is |
|
:data:`~CollationAlternate.SHIFTED`, this option specifies what |
|
characters may be ignored. This must be one of the following values: |
|
|
|
* :data:`~CollationMaxVariable.PUNCT` (the default) |
|
* :data:`~CollationMaxVariable.SPACE` |
|
|
|
- `normalization`: (optional) If ``True``, normalizes text into Unicode |
|
NFD. Defaults to ``False``. |
|
- `backwards`: (optional) If ``True``, accents on characters are |
|
considered from the back of the word to the front, as it is done in some |
|
French dictionary ordering traditions. Defaults to ``False``. |
|
- `kwargs`: (optional) Keyword arguments supplying any additional options |
|
to be sent with this Collation object. |
|
|
|
.. versionadded: 3.4 |
|
|
|
""" |
|
|
|
__slots__ = ("__document",) |
|
|
|
def __init__( |
|
self, |
|
locale: str, |
|
caseLevel: Optional[bool] = None, |
|
caseFirst: Optional[str] = None, |
|
strength: Optional[int] = None, |
|
numericOrdering: Optional[bool] = None, |
|
alternate: Optional[str] = None, |
|
maxVariable: Optional[str] = None, |
|
normalization: Optional[bool] = None, |
|
backwards: Optional[bool] = None, |
|
**kwargs: Any, |
|
) -> None: |
|
locale = common.validate_string("locale", locale) |
|
self.__document: Dict[str, Any] = {"locale": locale} |
|
if caseLevel is not None: |
|
self.__document["caseLevel"] = common.validate_boolean("caseLevel", caseLevel) |
|
if caseFirst is not None: |
|
self.__document["caseFirst"] = common.validate_string("caseFirst", caseFirst) |
|
if strength is not None: |
|
self.__document["strength"] = common.validate_integer("strength", strength) |
|
if numericOrdering is not None: |
|
self.__document["numericOrdering"] = common.validate_boolean( |
|
"numericOrdering", numericOrdering |
|
) |
|
if alternate is not None: |
|
self.__document["alternate"] = common.validate_string("alternate", alternate) |
|
if maxVariable is not None: |
|
self.__document["maxVariable"] = common.validate_string("maxVariable", maxVariable) |
|
if normalization is not None: |
|
self.__document["normalization"] = common.validate_boolean( |
|
"normalization", normalization |
|
) |
|
if backwards is not None: |
|
self.__document["backwards"] = common.validate_boolean("backwards", backwards) |
|
self.__document.update(kwargs) |
|
|
|
@property |
|
def document(self) -> Dict[str, Any]: |
|
"""The document representation of this collation. |
|
|
|
.. note:: |
|
:class:`Collation` is immutable. Mutating the value of |
|
:attr:`document` does not mutate this :class:`Collation`. |
|
""" |
|
return self.__document.copy() |
|
|
|
def __repr__(self) -> str: |
|
document = self.document |
|
return "Collation({})".format(", ".join(f"{key}={document[key]!r}" for key in document)) |
|
|
|
def __eq__(self, other: Any) -> bool: |
|
if isinstance(other, Collation): |
|
return self.document == other.document |
|
return NotImplemented |
|
|
|
def __ne__(self, other: Any) -> bool: |
|
return not self == other |
|
|
|
|
|
def validate_collation_or_none( |
|
value: Optional[Union[Mapping[str, Any], Collation]] |
|
) -> Optional[Dict[str, Any]]: |
|
if value is None: |
|
return None |
|
if isinstance(value, Collation): |
|
return value.document |
|
if isinstance(value, dict): |
|
return value |
|
raise TypeError("collation must be a dict, an instance of collation.Collation, or None.")
|
|
|