Skip to content

Commit ca5ff71

Browse files
committed
Support json query filter
Related bug: kennknowles#8
1 parent a039cf8 commit ca5ff71

File tree

5 files changed

+173
-28
lines changed

5 files changed

+173
-28
lines changed

jsonpath_rw/jsonpath.py

+111-23
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from six.moves import xrange
55
from itertools import *
66
import functools
7+
import operator
78

89
logger = logging.getLogger(__name__)
910

@@ -14,7 +15,7 @@
1415
class JSONPath(object):
1516
"""
1617
The base class for JSONPath abstract syntax; those
17-
methods stubbed here are the interface to supported
18+
methods stubbed here are the interface to supported
1819
JSONPath semantics.
1920
"""
2021

@@ -53,8 +54,8 @@ class DatumInContext(object):
5354
"""
5455
Represents a datum along a path from a context.
5556
56-
Essentially a zipper but with a structure represented by JsonPath,
57-
and where the context is more of a parent pointer than a proper
57+
Essentially a zipper but with a structure represented by JsonPath,
58+
and where the context is more of a parent pointer than a proper
5859
representation of the context.
5960
6061
For quick-and-dirty work, this proxies any non-special attributes
@@ -115,17 +116,17 @@ class AutoIdForDatum(DatumInContext):
115116
"""
116117
This behaves like a DatumInContext, but the value is
117118
always the path leading up to it, not including the "id",
118-
and with any "id" fields along the way replacing the prior
119+
and with any "id" fields along the way replacing the prior
119120
segment of the path
120121
121122
For example, it will make "foo.bar.id" return a datum
122123
that behaves like DatumInContext(value="foo.bar", path="foo.bar.id").
123124
124125
This is disabled by default; it can be turned on by
125126
settings the `auto_id_field` global to a value other
126-
than `None`.
127+
than `None`.
127128
"""
128-
129+
129130
def __init__(self, datum, id_field=None):
130131
"""
131132
Invariant is that datum.path is the path from context to datum. The auto id
@@ -212,7 +213,7 @@ class Child(JSONPath):
212213
JSONPath that first matches the left, then the right.
213214
Concrete syntax is <left> '.' <right>
214215
"""
215-
216+
216217
def __init__(self, left, right):
217218
self.left = left
218219
self.right = right
@@ -222,7 +223,7 @@ def find(self, datum):
222223
Extra special case: auto ids do not have children,
223224
so cut it off right now rather than auto id the auto id
224225
"""
225-
226+
226227
return [submatch
227228
for subdata in self.left.find(datum)
228229
if not isinstance(subdata, AutoIdForDatum)
@@ -256,7 +257,7 @@ def __str__(self):
256257

257258
def __repr__(self):
258259
return 'Parent()'
259-
260+
260261

261262
class Where(JSONPath):
262263
"""
@@ -267,7 +268,7 @@ class Where(JSONPath):
267268
WARNING: Subject to change. May want to have "contains"
268269
or some other better word for it.
269270
"""
270-
271+
271272
def __init__(self, left, right):
272273
self.left = left
273274
self.right = right
@@ -286,7 +287,7 @@ class Descendants(JSONPath):
286287
JSONPath that matches first the left expression then any descendant
287288
of it which matches the right expression.
288289
"""
289-
290+
290291
def __init__(self, left, right):
291292
self.left = left
292293
self.right = right
@@ -295,7 +296,7 @@ def find(self, datum):
295296
# <left> .. <right> ==> <left> . (<right> | *..<right> | [*]..<right>)
296297
#
297298
# With with a wonky caveat that since Slice() has funky coercions
298-
# we cannot just delegate to that equivalence or we'll hit an
299+
# we cannot just delegate to that equivalence or we'll hit an
299300
# infinite loop. So right here we implement the coercion-free version.
300301

301302
# Get all left matches into a list
@@ -321,12 +322,12 @@ def match_recursively(datum):
321322
recursive_matches = []
322323

323324
return right_matches + list(recursive_matches)
324-
325+
325326
# TODO: repeatable iterator instead of list?
326327
return [submatch
327328
for left_match in left_matches
328329
for submatch in match_recursively(left_match)]
329-
330+
330331
def is_singular():
331332
return False
332333

@@ -385,7 +386,7 @@ class Fields(JSONPath):
385386
WARNING: If '*' is any of the field names, then they will
386387
all be returned.
387388
"""
388-
389+
389390
def __init__(self, *fields):
390391
self.fields = fields
391392

@@ -411,7 +412,7 @@ def reified_fields(self, datum):
411412

412413
def find(self, datum):
413414
datum = DatumInContext.wrap(datum)
414-
415+
415416
return [field_datum
416417
for field_datum in [self.get_field_datum(datum, field) for field in self.reified_fields(datum)]
417418
if field_datum is not None]
@@ -429,7 +430,7 @@ def __eq__(self, other):
429430
class Index(JSONPath):
430431
"""
431432
JSONPath that matches indices of the current datum, or none if not large enough.
432-
Concrete syntax is brackets.
433+
Concrete syntax is brackets.
433434
434435
WARNING: If the datum is not long enough, it will not crash but will not match anything.
435436
NOTE: For the concrete syntax of `[*]`, the abstract syntax is a Slice() with no parameters (equiv to `[:]`
@@ -440,7 +441,7 @@ def __init__(self, index):
440441

441442
def find(self, datum):
442443
datum = DatumInContext.wrap(datum)
443-
444+
444445
if len(datum.value) > self.index:
445446
return [DatumInContext(datum.value[self.index], path=self, context=datum)]
446447
else:
@@ -454,15 +455,15 @@ def __str__(self):
454455

455456
class Slice(JSONPath):
456457
"""
457-
JSONPath matching a slice of an array.
458+
JSONPath matching a slice of an array.
458459
459460
Because of a mismatch between JSON and XML when schema-unaware,
460461
this always returns an iterable; if the incoming data
461462
was not a list, then it returns a one element list _containing_ that
462463
data.
463464
464465
Consider these two docs, and their schema-unaware translation to JSON:
465-
466+
466467
<a><b>hello</b></a> ==> {"a": {"b": "hello"}}
467468
<a><b>hello</b><b>goodbye</b></a> ==> {"a": {"b": ["hello", "goodbye"]}}
468469
@@ -480,10 +481,10 @@ def __init__(self, start=None, end=None, step=None):
480481
self.start = start
481482
self.end = end
482483
self.step = step
483-
484+
484485
def find(self, datum):
485486
datum = DatumInContext.wrap(datum)
486-
487+
487488
# Here's the hack. If it is a dictionary or some kind of constant,
488489
# put it in a single-element list
489490
if (isinstance(datum.value, dict) or isinstance(datum.value, six.integer_types) or isinstance(datum.value, six.string_types)):
@@ -500,7 +501,7 @@ def __str__(self):
500501
if self.start == None and self.end == None and self.step == None:
501502
return '[*]'
502503
else:
503-
return '[%s%s%s]' % (self.start or '',
504+
return '[%s%s%s]' % (self.start or '',
504505
':%d'%self.end if self.end else '',
505506
':%d'%self.step if self.step else '')
506507

@@ -559,3 +560,90 @@ def __repr__(self):
559560

560561
def __str__(self):
561562
return '[?%s]' % self.expressions
563+
564+
565+
OPERATOR_MAP = {
566+
'!=': operator.ne,
567+
'==': operator.eq,
568+
'=': operator.eq,
569+
'<=': operator.le,
570+
'<': operator.lt,
571+
'>=': operator.ge,
572+
'>': operator.gt,
573+
}
574+
575+
576+
class Filter(JSONPath):
577+
"""The JSONQuery filter"""
578+
579+
def __init__(self, expressions):
580+
self.expressions = expressions
581+
582+
def find(self, datum):
583+
if not self.expressions:
584+
return []
585+
586+
datum = DatumInContext.wrap(datum)
587+
return [DatumInContext(datum.value[i],
588+
path=Index(i),
589+
context=datum)
590+
for i in xrange(0, len(datum.value))
591+
if (len(self.expressions) ==
592+
len(list(filter(lambda x: x.find(datum.value[i]),
593+
self.expressions))))]
594+
595+
def __repr__(self):
596+
return '%s(%r)' % (self.__class__.__name__, self.expressions)
597+
598+
def __str__(self):
599+
return '[?%s]' % self.expressions
600+
601+
602+
class FilterExpression(JSONPath):
603+
"""The JSONQuery expression"""
604+
605+
def __init__(self, target, op, value):
606+
self.target = target
607+
self.op = op
608+
self.value = value
609+
610+
def find(self, datum):
611+
datum = self.target.find(DatumInContext.wrap(datum))
612+
613+
if not datum:
614+
return []
615+
if self.op is None:
616+
return datum
617+
618+
found = []
619+
for data in datum:
620+
value = data.value
621+
if isinstance(self.value, int):
622+
try:
623+
value = int(value)
624+
except ValueError:
625+
continue
626+
627+
if OPERATOR_MAP[self.op](value, self.value):
628+
found.append(data)
629+
630+
return found
631+
632+
def __eq__(self, other):
633+
return (isinstance(other, Filter) and
634+
self.target == other.target and
635+
self.op == other.op and
636+
self.value == other.value)
637+
638+
def __repr__(self):
639+
if self.op is None:
640+
return '%s(%r)' % (self.__class__.__name__, self.target)
641+
else:
642+
return '%s(%r %s %r)' % (self.__class__.__name__,
643+
self.target, self.op, self.value)
644+
645+
def __str__(self):
646+
if self.op is None:
647+
return '%s' % self.target
648+
else:
649+
return '%s %s %s' % (self.target, self.op, self.value)

jsonpath_rw/lexer.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,12 @@ def tokenize(self, string):
4646
#
4747
# Anyhow, it is pythonic to give some rope to hang oneself with :-)
4848

49-
literals = ['*', '.', '[', ']', '(', ')', '$', ',', ':', '|', '&']
49+
literals = ['*', '.', '[', ']', '(', ')', '$', ',', ':', '|', '&', '@', '?']
5050

5151
reserved_words = { 'where': 'WHERE' }
5252

53-
tokens = ['DOUBLEDOT', 'NUMBER', 'ID', 'NAMED_OPERATOR', 'SORT_DIRECTION'] + list(reserved_words.values())
53+
tokens = ['DOUBLEDOT', 'NUMBER', 'ID', 'NAMED_OPERATOR', 'SORT_DIRECTION',
54+
'FILTER_OP'] + list(reserved_words.values())
5455

5556
states = [ ('singlequote', 'exclusive'),
5657
('doublequote', 'exclusive'),
@@ -59,9 +60,10 @@ def tokenize(self, string):
5960
# Normal lexing, rather easy
6061
t_DOUBLEDOT = r'\.\.'
6162
t_ignore = ' \t'
63+
t_FILTER_OP = r'(==?|<=|>=|!=|<|>)'
6264

6365
def t_ID(self, t):
64-
r'[a-zA-Z_@][a-zA-Z0-9_@\-]*'
66+
r'@?[a-zA-Z_][a-zA-Z0-9_@\-]*'
6567
t.type = self.reserved_words.get(t.value, 'ID')
6668
return t
6769

jsonpath_rw/parser.py

+35
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,41 @@ def p_jsonpath_sort(self, p):
187187
sort = SortedThis(p[3])
188188
p[0] = Child(p[1], sort)
189189

190+
def p_jsonpath_this(self, p):
191+
"jsonpath : '@'"
192+
p[0] = This()
193+
194+
def p_expression(self, p):
195+
"""expression : jsonpath
196+
| jsonpath FILTER_OP ID
197+
| jsonpath FILTER_OP NUMBER
198+
"""
199+
if len(p) == 2:
200+
left, op, right = p[1], None, None
201+
else:
202+
__, left, op, right = p
203+
p[0] = FilterExpression(left, op, right)
204+
205+
def p_expressions_expression(self, p):
206+
"expressions : expression"
207+
p[0] = [p[1]]
208+
209+
def p_expressions_and(self, p):
210+
"expressions : expressions '&' expressions"
211+
p[0] = p[1] + p[3]
212+
213+
def p_expressions_parens(self, p):
214+
"expressions : '(' expressions ')'"
215+
p[0] = p[2]
216+
217+
def p_filter(self, p):
218+
"filter : '?' expressions "
219+
p[0] = Filter(p[2])
220+
221+
def p_jsonpath_filter(self, p):
222+
"jsonpath : jsonpath '[' filter ']'"
223+
p[0] = Child(p[1], p[3])
224+
190225

191226
class IteratorToTokenStream(object):
192227
def __init__(self, iterator):

tests/test_jsonpath.py

+13
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,19 @@ def test_sort_value(self):
127127
[{'cat': {'dog': 2, 'cow': 1}}, {'cat': {'dog': 2, 'bow': 2}}, {'cat': {'dog': 3, 'cow': 2}}, {'cat': {'dog': 1, 'bow': 3}}]),
128128
])
129129

130+
def test_filter_value(self):
131+
jsonpath.auto_id_field = None
132+
self.check_cases([
133+
('objects[?cow]', {'objects': [{'cow': 'moo'}, {'cat': 'neigh'}]}, [{'cow': 'moo'}]),
134+
('objects[[email protected]]', {'objects': [{'cow': 'moo'}, {'cat': 'neigh'}]}, [{'cow': 'moo'}]),
135+
('objects[?(@.cow)]', {'objects': [{'cow': 'moo'}, {'cat': 'neigh'}]}, [{'cow': 'moo'}]),
136+
('objects[?(@."cow!?cat")]', {'objects': [{'cow!?cat': 'moo'}, {'cat': 'neigh'}]}, [{'cow!?cat': 'moo'}]),
137+
('objects[?cow="moo"]', {'objects': [{'cow': 'moo'}, {'cow': 'neigh'}, {'cat': 'neigh'}]}, [{'cow': 'moo'}]),
138+
('objects[?(@.["cow"]="moo")]', {'objects': [{'cow': 'moo'}, {'cow': 'neigh'}, {'cat': 'neigh'}]}, [{'cow': 'moo'}]),
139+
('objects[?cow=="moo"]', {'objects': [{'cow': 'moo'}, {'cow': 'neigh'}, {'cat': 'neigh'}]}, [{'cow': 'moo'}]),
140+
('objects[?cow>5]', {'objects': [{'cow': 8}, {'cow': 7}, {'cow': 5}, {'cow': 'neigh'}]}, [{'cow': 8}, {'cow': 7}]),
141+
('objects[?cow>5&cat=2]', {'objects': [{'cow': 8, 'cat': 2}, {'cow': 7, 'cat': 2}, {'cow': 2, 'cat': 2}, {'cow': 5, 'cat': 3}, {'cow': 8, 'cat': 3}]}, [{'cow': 8, 'cat': 2}, {'cow': 7, 'cat': 2}]),
142+
])
130143

131144
def test_root_value(self):
132145
jsonpath.auto_id_field = None

0 commit comments

Comments
 (0)