diff --git a/misc.py b/misc.py new file mode 100644 index 0000000..1b32ba1 --- /dev/null +++ b/misc.py @@ -0,0 +1,29 @@ +# -*- encoding: utf-8 -*- +from __future__ import absolute_import, unicode_literals +import re + + +def flatten(domain): + for tok in domain: + if isinstance(tok, list): + for sub in flatten(tok): + yield sub + else: + yield tok + +def _make_mutable(domain): + out = [] + for tok in domain: + if isinstance(tok, tuple): + out.append(list(tok)) + elif isinstance(tok, list): + if len(tok) == 3 and not any(isinstance(x, list) for x in tok): + out.append(tok[:]) + else: + out.append(_make_mutable(tok)) + else: + out.append(tok) # '|', '&', '!' … + return out + + +pattern_type = type(re.compile('')) \ No newline at end of file diff --git a/mongodb2.py b/mongodb2.py index 7b5ffc4..581620a 100644 --- a/mongodb2.py +++ b/mongodb2.py @@ -19,7 +19,8 @@ # along with this program. If not, see . # ############################################################################## - +from __future__ import unicode_literals +from six import string_types import tools from pymongo import MongoClient from pymongo.errors import AutoReconnect @@ -28,6 +29,7 @@ import netsvc from osv.orm import except_orm from time import sleep +from .misc import pattern_type logger = netsvc.Logger() @@ -55,18 +57,137 @@ class MDBConn(object): '$not': re.compile(l3.replace('%', '.*'), re.I)}}, } - def translate_domain(self, domain): - """Translate an OpenERP domain object to a corresponding - MongoDB domain + def translate_domain(self, domain, orm_obj=None): """ - new_domain = {} - for field, operator, value in domain: - clause = self.OPERATOR_MAPPING[operator](field, value) - if field in new_domain.keys(): - new_domain[field].update(clause[field]) - else: - new_domain.update(clause) - return new_domain + Convert an **domain** written in Polish notation (prefix form) + into an equivalent **MongoDB filter**. + + Parameters + ---------- + domain : list + A domain in Odoo format. It may contain: + + * Leaf conditions written either as tuples + ``('field', 'operator', value)`` **or** lists + ``['field', 'operator', value]``. + * Logical operators applied in *prefix* order: + + | → logical OR (exactly two children expected) + & → logical AND (exactly two children expected) + ! → logical NOT (exactly one child expected) + + * Arbitrary nesting using sub-lists, e.g. + ``['|', ('a', '=', 1), ['&', ('b', '>', 5), ('c', '<', 10)]]`` + + orm_obj : orm_mongodb (optional) + If provided, extra type-specific conversions are applied + **before** the Mongo filter is built: + + * *Date / datetime*: incoming strings are converted to `datetime` + objects; bare dates in ``<, <=, >, >=`` predicates are expanded + to *00:00:00* or *23:59:59* so that the semantic matches. + * *Boolean*: truthy / falsy values are normalized to `True`/`False`. + * *exact_match* fields: for columns declared with the custom + attribute ``exact_match=True`` any regular-expression value that + still starts/ends with ``.*`` is stripped (“ilike” always + wraps wildcards, but an exact match should not include them). + + Returns + ------- + dict + A MongoDB query document using ``$eq``, ``$gt``, ``$lte``, ``$or``, + ``$and``, ``$nor`` … ready to be passed to + ``collection.find(filter, …)``. + + Raises + ------ + ValueError + If an unrecognized token is encountered (e.g., malformed domain). + + Notes + ----- + * **The input list is consumed / mutated** (tokens are popped). + Callers that still need the original domain should pass + ``copy.deepcopy(domain)`` instead. + * Consecutive leaf conditions without an explicit logical operator are + combined with an **implicit AND**, mimicking behavior. + """ + # ---------- Helper: convert a single leaf --------------------------- + def _build_leaf(leaf): + field, op, val = leaf + # Optional type coercions that depend on the model definition + if orm_obj is not None: + # ---- Date / Datetime coercion -------------------------------- + if field in orm_obj.get_date_fields(): + if (orm_obj._columns[field]._type == 'datetime' + and isinstance(val, string_types) and len(val) == 10): + # Bare date on a datetime column → expand time component + if op in ('>', '>='): + val += ' 00:00:00' + elif op in ('<', '<='): + val += ' 23:59:59' + val = orm_obj.transform_date_field(field, val, 'write') + # ---- Boolean coercion ---------------------------------------- + if field in orm_obj.get_bool_fields(): + val = bool(val) + + # ---- exact_match cleanup ------------------------------------- + col = orm_obj._columns.get(field) + if col and getattr(col, 'exact_match', False): + import re + if isinstance(val, pattern_type): + val = val.pattern.lstrip('.*').rstrip('.*') + + return self.OPERATOR_MAPPING[op](field, val) + + # ---------- Helper: recursive‐descent parser ------------------------ + def _parse(tokens): + """ + Consume *tokens* (list) from the left, return a MongoDB filter + for the first complete expression found. + """ + if not tokens: + return {} + + tok = tokens.pop(0) + + # 1. Logical operators in prefix form + if tok == '|': + return {'$or': [_parse(tokens), _parse(tokens)]} + if tok == '&': + return {'$and': [_parse(tokens), _parse(tokens)]} + if tok == '!': + return {'$nor': [_parse(tokens)]} + + # 2. Leaf (tuple or list) ('field', 'op', value) + if (isinstance(tok, (list, tuple)) + and len(tok) == 3 + and isinstance(tok[1], string_types)): + return _build_leaf(tok) + + # 3. Sub-list without an explicit operator ⇒ implicit AND + if isinstance(tok, list): + sub_tokens = list(tok) # work on a copy + sub_filters = [] + while sub_tokens: + sub_filters.append(_parse(sub_tokens)) + return sub_filters[0] if len(sub_filters) == 1 \ + else {'$and': sub_filters} + + raise ValueError('Domain Token not supported: %s' % tok) + + # -------------------------------------------------------------------- + + tokens = list(domain) + filters = [] + while tokens: + filters.append(_parse(tokens)) + + if not filters: + return {} + if len(filters) == 1: + return filters[0] + return {'$and': filters} @property def uri(self): diff --git a/orm_mongodb.py b/orm_mongodb.py index d321e91..a5d222b 100644 --- a/orm_mongodb.py +++ b/orm_mongodb.py @@ -19,7 +19,7 @@ # along with this program. If not, see . # ############################################################################## -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals from osv import orm, fields from osv.orm import except_orm import netsvc @@ -507,24 +507,15 @@ def _compute_order(self, cr, user, order=None, context=None): def search(self, cr, user, args, offset=0, limit=0, order=None, context=None, count=False): - #Make a copy of args for working - #Domain has to be list of lists - tmp_args = [isinstance(arg, tuple) and list(arg) - or arg for arg in args] - collection = mdbpool.get_collection(self._table) - self.search_trans_fields(tmp_args) + import copy - new_args = mdbpool.translate_domain(tmp_args) - # Implement exact match for fields char which defaults to ilike - for k in new_args: - field = self._columns.get(k) - if not field: - pass - if getattr(field, 'exact_match', False): - if isinstance(new_args[k], re._pattern_type): - new_args[k] = new_args[k].pattern.lstrip('.*').rstrip('.*') - if not context: + if args is None: + args = [] + if context is None: context = {} + + new_args = mdbpool.translate_domain(copy.deepcopy(args), orm_obj=self) + self.pool.get('ir.model.access').check(cr, user, self._name, 'read', context=context) #In very large collections when no args @@ -534,6 +525,8 @@ def search(self, cr, user, args, offset=0, limit=0, order=None, if not args: order = 'id' + collection = mdbpool.get_collection(self._table) + if count: return collection.find( new_args, diff --git a/tests/__init__.py b/tests/__init__.py index 2d7146f..6c47088 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import, unicode_literals import unittest import re @@ -53,14 +54,20 @@ def test_translate_domain(self): self.assertEqual(res, {'_id': {'$gt': 10}}) res = mdbconn.translate_domain([('_id', '>', 10), ('_id', '<', 15)]) - self.assertEqual(res, {'_id': {'$gt': 10, '$lt': 15}}) + self.assertEqual(res, {'$and': [{'_id': {'$gt': 10}}, {'_id': {'$lt': 15}}]}) res = mdbconn.translate_domain([ ('_id', '>', 10), ('_id', '<', 15), ('name', 'ilike', '%ol%') ]) - self.assertEqual(res, {'_id': {'$gt': 10, '$lt': 15}, 'name': re.compile('.*ol.*', re.IGNORECASE)}) + self.assertEqual(res, { + '$and': [ + {'_id': {'$gt': 10}}, + {'_id': {'$lt': 15}}, + {'name': re.compile('.*ol.*', re.IGNORECASE)} + ] + }) def test_compute_order_parsing(self): @@ -124,6 +131,33 @@ class NoMongoModelTestWithGridFs(osv.osv): 'file_example': mdb_fields.gridfs('test') } +class TestModel(osv_mongodb.osv_mongodb): + _name = 'comprehensive.domain.model' + + _columns = { + 'name': fields.char('Name', size=64, exact_match=True), + 'num': fields.integer('Numeric'), + 'flag': fields.boolean('Flag'), + 'day': fields.date('Day'), + 'moment': fields.datetime('Moment'), + } + _defaults = { + 'flag': lambda *a: False + } + +def _mk(model, cursor, uid, **vals): + import uuid + _vals = { + 'name': '{}'.format(uuid.uuid4()), + 'num': 0, + 'day': '2024-01-01', + 'moment': '2024-01-01 00:00:00', + 'flag': False + } + _vals.update(vals) + _id = model.create(cursor, uid, _vals) + return _id, _vals + class MongoDBBackendTest(testing.MongoDBTestCase): @@ -629,3 +663,185 @@ def test_aggregate_pymongo(self): res, [{"_id": "Bar", "total": 15, "count": 2}, {"_id": "Bar2", "total": 5, "count": 1}] ) + + +class TranslateDomainComprehensive(testing.MongoDBTestCase): + + def setUp(self): + self.mdb = mongodb2.MDBConn() + + def _assert(self, domain, expected): + res = self.mdb.translate_domain(domain) + self.assertEqual(res, expected) + + def test_simple_operators(self): + self._assert([('x', '=', 5)], {'x': {'$eq': 5}}) + self._assert([('x', '!=', 5)], {'x': {'$ne': 5}}) + self._assert([('x', '>', 5)], {'x': {'$gt': 5}}) + self._assert([('x', '>=', 5)], {'x': {'$gte': 5}}) + self._assert([('x', '<', 5)], {'x': {'$lt': 5}}) + self._assert([('x', '<=', 5)], {'x': {'$lte': 5}}) + self._assert([('x', 'in', [1, 2])], {'x': {'$in': [1, 2]}}) + self._assert([('x', 'not in', [1, 2])], {'x': {'$nin': [1, 2]}}) + + def test_like_variants(self): + self._assert([('name', 'like', 'fo%')], + {'name': {'$regex': re.compile('fo.*')}}) + self._assert([('name', 'not like', '%fo%')], + {'name': {'$not': re.compile('.*fo.*')}}) + self._assert([('name', 'ilike', '%fo%')], + {'name': re.compile('.*fo.*', re.I)}) + self._assert([('name', 'not ilike', '%fo%')], + {'name': {'$not': re.compile('.*fo.*', re.I)}}) + + def test_and_or_not_flat(self): + dom = [ + '|', + ('a', '=', 1), + ('b', '>', 2) + ] + exp = {'$or': [{'a': {'$eq': 1}}, {'b': {'$gt': 2}}]} + self._assert(dom, exp) + + dom = [ + '&', + ('a', '=', 1), + ('b', '<', 5) + ] + exp = {'$and': [{'a': {'$eq': 1}}, {'b': {'$lt': 5}}]} + self._assert(dom, exp) + + dom = ['!', ('a', '=', 1)] + exp = {'$nor': [{'a': {'$eq': 1}}]} + self._assert(dom, exp) + + def test_and_implicit_multiple_leaves(self): + dom = [('a', '=', 1), ('b', '>', 2)] + exp = {'$and': [{'a': {'$eq': 1}}, {'b': {'$gt': 2}}]} + self._assert(dom, exp) + + def test_nested_sub_lists(self): + dom = [ + '|', + ('a', '=', 1), + [ + '&', + ('b', '>', 2), + ('c', '<', 10) + ] + ] + exp = { + '$or': [ + {'a': {'$eq': 1}}, + {'$and': [{'b': {'$gt': 2}}, {'c': {'$lt': 10}}]} + ] + } + self._assert(dom, exp) + + +class MongoDomainCombinations(testing.MongoDBTestCase): + + def setUp(self): + self.tx = Transaction().start(self.database) + cursor = self.tx.cursor + TestModel() + osv.class_pool[TestModel._name].createInstance( + self.openerp.pool, 'mongodb_backend', cursor + ) + self.obj = self.openerp.pool.get(TestModel._name) + self.obj._auto_init(cursor) + + uid = self.tx.user + self.r1, v1 = _mk(self.obj, cursor, uid, + name='FOO', num=1, flag=False, + day='2025-05-05', moment='2025-05-05 02:00:00') + self.r2, v2 = _mk(self.obj, cursor, uid, + name='BAR', num=7, flag=True, + day='2025-05-06', moment='2025-05-06 20:00:00') + self.r3, v3 = _mk(self.obj, cursor, uid, + name='BAZ', num=5, flag=False, + day='2025-05-04', moment='2025-05-04 23:00:00') + + def tearDown(self): + from mongodb_backend.mongodb2 import mdbpool + mdbpool.get_db().drop_collection("comprehensive_domain_model") + self.tx.stop() + + # -------------------------------------------------------------- + # BOOLEAN and EXACT_MATCH + # -------------------------------------------------------------- + def test_boolean_and_exact_match(self): + c, u = self.tx.cursor, self.tx.user + + # exact_match: debe ser coincidencia exacta (no regex) + ids = self.obj.search(c, u, [('name', '=', 'FOO')]) + self.assertEqual(ids, [self.r1]) + + # booleano + ids = self.obj.search(c, u, [('flag', '=', True)]) + self.assertEqual(set(ids), {self.r2}) + + ids = self.obj.search(c, u, [('flag', '=', False)]) + self.assertEqual(set(ids), {self.r1, self.r3}) + + # -------------------------------------------------------------- + # Complex logic operators + # -------------------------------------------------------------- + def test_or_and_not(self): + c, u = self.tx.cursor, self.tx.user + + dom = [ + '|', + ('name', '=', 'FOO'), + ('num', '>', 6) + ] + ids = self.obj.search(c, u, dom) + self.assertEqual(set(ids), {self.r1, self.r2}) + + dom = [ + '&', + ('num', '>', 1), + ('num', '<', 6) + ] + ids = self.obj.search(c, u, dom) + self.assertEqual(set(ids), {self.r3}) + + dom = ['!', ('name', 'ilike', '%A%')] + ids = self.obj.search(c, u, dom) + self.assertEqual(set(ids), {self.r1}) + + # -------------------------------------------------------------- + # Dates and Datetimes + # -------------------------------------------------------------- + def test_date_range_implicit_time(self): + c, u = self.tx.cursor, self.tx.user + # < '2025-05-06' -> 2025-05-06 23:59:59 (include r2) + dom = [('moment', '<', '2025-05-06')] + ids = self.obj.search(c, u, dom) + # This is a rare case. With https://github.com/gisce/mongodb_backend/pull/49 + # 2025-05-06 is treated as 2025-05-06 23:59:59 and 2025-05-06 20:00:00 + # in included in the search result. + # self.assertEqual(set(ids), {self.r1, self.r3}) + self.assertEqual(set(ids), {self.r1, self.r2, self.r3}) + + # >= '2025-05-06' -> 2025-05-06 00:00:00 (only r2) + dom = [('moment', '>=', '2025-05-06')] + ids = self.obj.search(c, u, dom) + self.assertEqual(set(ids), {self.r2}) + + # -------------------------------------------------------------- + # NESTED AND IMPLICIT MULTI-SHEET JOIN + # -------------------------------------------------------------- + def test_nested_combination(self): + c, u = self.tx.cursor, self.tx.user + dom = [ + '|', + ('name', '=', 'FOO'), + [ + '&', + ('num', '>', 4), + ('flag', '=', True) + ] + ] + ids = self.obj.search(c, u, dom) + self.assertEqual(set(ids), {self.r1, self.r2}) \ No newline at end of file