2
2
import avro .schema
3
3
import sys
4
4
import urlparse
5
- from typing import Any
5
+ from typing import Any , Union
6
6
7
7
class ValidationException (Exception ):
8
8
pass
9
9
10
+ class ClassValidationException (ValidationException ):
11
+ pass
12
+
10
13
def validate (expected_schema , datum , identifiers = set (), strict = False , foreign_properties = set ()):
11
14
# type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode]) -> bool
12
- try :
13
- return validate_ex (expected_schema , datum , identifiers , strict = strict , foreign_properties = foreign_properties )
14
- except ValidationException :
15
- return False
15
+ return validate_ex (expected_schema , datum , identifiers , strict = strict , foreign_properties = foreign_properties , raise_ex = False )
16
16
17
17
INT_MIN_VALUE = - (1 << 31 )
18
18
INT_MAX_VALUE = (1 << 31 ) - 1
19
19
LONG_MIN_VALUE = - (1 << 63 )
20
20
LONG_MAX_VALUE = (1 << 63 ) - 1
21
21
22
- def indent (v , nolead = False ): # type: (str, bool) -> str
22
+ def indent (v , nolead = False ): # type: (Union[ str, unicode], bool) -> unicode
23
23
if nolead :
24
- return v .splitlines ()[0 ] + "\n " .join ([" " + l for l in v .splitlines ()[1 :]])
24
+ return v .splitlines ()[0 ] + u "\n " .join ([u " " + l for l in v .splitlines ()[1 :]])
25
25
else :
26
- return "\n " .join ([" " + l for l in v .splitlines ()])
26
+ return u "\n " .join ([" " + l for l in v .splitlines ()])
27
27
28
28
def friendly (v ): # type: (Any) -> Any
29
29
if isinstance (v , avro .schema .NamedSchema ):
@@ -37,11 +37,11 @@ def friendly(v): # type: (Any) -> Any
37
37
else :
38
38
return v
39
39
40
- def multi (v , q = "" ): # type: (str, str) -> str
40
+ def multi (v , q = "" ): # type: (Union[ str, unicode], Union[ str, unicode] ) -> unicode
41
41
if '\n ' in v :
42
- return "%s%s%s\n " % (q , v , q )
42
+ return u "%s%s%s\n " % (q , v , q )
43
43
else :
44
- return "%s%s%s" % (q , v , q )
44
+ return u "%s%s%s" % (q , v , q )
45
45
46
46
def vpformat (datum ): # type: (Any) -> str
47
47
a = pprint .pformat (datum )
@@ -50,8 +50,8 @@ def vpformat(datum): # type: (Any) -> str
50
50
return a
51
51
52
52
def validate_ex (expected_schema , datum , identifiers = None , strict = False ,
53
- foreign_properties = None ):
54
- # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode]) -> bool
53
+ foreign_properties = None , raise_ex = True ):
54
+ # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode], bool ) -> bool
55
55
"""Determine if a python datum is an instance of a schema."""
56
56
57
57
if not identifiers :
@@ -66,93 +66,154 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
66
66
if datum is None :
67
67
return True
68
68
else :
69
- raise ValidationException ("the value `%s` is not null" % vpformat (datum ))
69
+ if raise_ex :
70
+ raise ValidationException (u"the value `%s` is not null" % vpformat (datum ))
71
+ else :
72
+ return False
70
73
elif schema_type == 'boolean' :
71
74
if isinstance (datum , bool ):
72
75
return True
73
76
else :
74
- raise ValidationException ("the value `%s` is not boolean" % vpformat (datum ))
77
+ if raise_ex :
78
+ raise ValidationException (u"the value `%s` is not boolean" % vpformat (datum ))
79
+ else :
80
+ return False
75
81
elif schema_type == 'string' :
76
82
if isinstance (datum , basestring ):
77
83
return True
78
84
elif isinstance (datum , bytes ):
79
- datum = datum .decode ("utf-8" )
85
+ datum = datum .decode (u "utf-8" )
80
86
return True
81
87
else :
82
- raise ValidationException ("the value `%s` is not string" % vpformat (datum ))
88
+ if raise_ex :
89
+ raise ValidationException (u"the value `%s` is not string" % vpformat (datum ))
90
+ else :
91
+ return False
83
92
elif schema_type == 'bytes' :
84
93
if isinstance (datum , str ):
85
94
return True
86
95
else :
87
- raise ValidationException ("the value `%s` is not bytes" % vpformat (datum ))
96
+ if raise_ex :
97
+ raise ValidationException (u"the value `%s` is not bytes" % vpformat (datum ))
98
+ else :
99
+ return False
88
100
elif schema_type == 'int' :
89
101
if ((isinstance (datum , int ) or isinstance (datum , long ))
90
102
and INT_MIN_VALUE <= datum <= INT_MAX_VALUE ):
91
103
return True
92
104
else :
93
- raise ValidationException ("`%s` is not int" % vpformat (datum ))
105
+ if raise_ex :
106
+ raise ValidationException (u"`%s` is not int" % vpformat (datum ))
107
+ else :
108
+ return False
94
109
elif schema_type == 'long' :
95
110
if ((isinstance (datum , int ) or isinstance (datum , long ))
96
111
and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE ):
97
112
return True
98
113
else :
99
- raise ValidationException ("the value `%s` is not long" % vpformat (datum ))
114
+ if raise_ex :
115
+ raise ValidationException (u"the value `%s` is not long" % vpformat (datum ))
116
+ else :
117
+ return False
100
118
elif schema_type in ['float' , 'double' ]:
101
119
if (isinstance (datum , int ) or isinstance (datum , long )
102
120
or isinstance (datum , float )):
103
121
return True
104
122
else :
105
- raise ValidationException ("the value `%s` is not float or double" % vpformat (datum ))
123
+ if raise_ex :
124
+ raise ValidationException (u"the value `%s` is not float or double" % vpformat (datum ))
125
+ else :
126
+ return False
106
127
elif isinstance (expected_schema , avro .schema .FixedSchema ):
107
128
if isinstance (datum , str ) and len (datum ) == expected_schema .size :
108
129
return True
109
130
else :
110
- raise ValidationException ("the value `%s` is not fixed" % vpformat (datum ))
131
+ if raise_ex :
132
+ raise ValidationException (u"the value `%s` is not fixed" % vpformat (datum ))
133
+ else :
134
+ return False
111
135
elif isinstance (expected_schema , avro .schema .EnumSchema ):
112
136
if expected_schema .name == "Any" :
113
137
if datum is not None :
114
138
return True
115
139
else :
116
- raise ValidationException ("Any type must be non-null" )
140
+ if raise_ex :
141
+ raise ValidationException (u"'Any' type must be non-null" )
142
+ else :
143
+ return False
117
144
if datum in expected_schema .symbols :
118
145
return True
119
146
else :
120
- raise ValidationException ("the value `%s`\n is not a valid symbol in enum %s, expected one of %s" % (vpformat (datum ), expected_schema .name , "'" + "', '" .join (expected_schema .symbols ) + "'" ))
147
+ if raise_ex :
148
+ raise ValidationException (u"the value `%s`\n is not a valid symbol in enum %s, expected one of %s" % (vpformat (datum ), expected_schema .name , "'" + "', '" .join (expected_schema .symbols ) + "'" ))
149
+ else :
150
+ return False
121
151
elif isinstance (expected_schema , avro .schema .ArraySchema ):
122
152
if isinstance (datum , list ):
123
153
for i , d in enumerate (datum ):
124
154
try :
125
- validate_ex (expected_schema .items , d , identifiers , strict = strict , foreign_properties = foreign_properties )
155
+ if not validate_ex (expected_schema .items , d , identifiers , strict = strict , foreign_properties = foreign_properties , raise_ex = raise_ex ):
156
+ return False
126
157
except ValidationException as v :
127
- raise ValidationException ("At position %i\n %s" % (i , indent (str (v ))))
128
- return True
129
- else :
130
- raise ValidationException ("the value `%s` is not a list, expected list of %s" % (vpformat (datum ), friendly (expected_schema .items )))
131
- elif isinstance (expected_schema , avro .schema .MapSchema ):
132
- if (isinstance (datum , dict ) and
133
- False not in [isinstance (k , basestring ) for k in datum .keys ()] and
134
- False not in [validate (expected_schema .values , v , strict = strict ) for v in datum .values ()]):
158
+ if raise_ex :
159
+ raise ValidationException (u"At position %i\n %s" % (i , indent (str (v ))))
160
+ else :
161
+ return False
135
162
return True
136
163
else :
137
- raise ValidationException ("`%s` is not a valid map value, expected\n %s" % (vpformat (datum ), vpformat (expected_schema .values )))
164
+ if raise_ex :
165
+ raise ValidationException (u"the value `%s` is not a list, expected list of %s" % (vpformat (datum ), friendly (expected_schema .items )))
166
+ else :
167
+ return False
138
168
elif isinstance (expected_schema , avro .schema .UnionSchema ):
139
- if True in [validate (s , datum , identifiers , strict = strict ) for s in expected_schema .schemas ]:
140
- return True
141
- else :
142
- errors = []
143
- for s in expected_schema .schemas :
144
- try :
145
- validate_ex (s , datum , identifiers , strict = strict , foreign_properties = foreign_properties )
146
- except ValidationException as e :
147
- errors .append (str (e ))
148
- raise ValidationException ("the value %s is not a valid type in the union, expected one of:\n %s" % (multi (vpformat (datum ), '`' ), "\n " .join (["- %s, but\n %s" % (friendly (expected_schema .schemas [i ]), indent (multi (errors [i ]))) for i in range (0 , len (expected_schema .schemas ))])))
169
+ for s in expected_schema .schemas :
170
+ if validate_ex (s , datum , identifiers , strict = strict , raise_ex = False ):
171
+ return True
172
+
173
+ if not raise_ex :
174
+ return False
175
+
176
+ errors = [] # type: List[unicode]
177
+ for s in expected_schema .schemas :
178
+ try :
179
+ validate_ex (s , datum , identifiers , strict = strict , foreign_properties = foreign_properties , raise_ex = True )
180
+ except ClassValidationException as e :
181
+ raise
182
+ except ValidationException as e :
183
+ errors .append (unicode (e ))
184
+
185
+ raise ValidationException (u"the value %s is not a valid type in the union, expected one of:\n %s" % (
186
+ multi (vpformat (datum ), '`' ), u"\n " .join ([
187
+ u"- %s, but\n %s" % (
188
+ friendly (expected_schema .schemas [i ]), indent (multi (errors [i ])))
189
+ for i in range (0 , len (expected_schema .schemas ))])))
149
190
150
191
elif isinstance (expected_schema , avro .schema .RecordSchema ):
151
192
if not isinstance (datum , dict ):
152
- raise ValidationException ("`%s`\n is not a dict" % vpformat (datum ))
193
+ if raise_ex :
194
+ raise ValidationException (u"`%s`\n is not a dict" % vpformat (datum ))
195
+ else :
196
+ return False
197
+
198
+ classmatch = None
199
+ for f in expected_schema .fields :
200
+ if f .name == "class" :
201
+ d = datum .get ("class" )
202
+ if not d :
203
+ if raise_ex :
204
+ raise ValidationException (u"Missing 'class' field" )
205
+ else :
206
+ return False
207
+ if not validate_ex (f .type , d , identifiers , strict = strict , foreign_properties = foreign_properties , raise_ex = raise_ex ):
208
+ return False
209
+ classmatch = d
210
+ break
153
211
154
212
errors = []
155
213
for f in expected_schema .fields :
214
+ if f .name == "class" :
215
+ continue
216
+
156
217
if f .name in datum :
157
218
fieldval = datum [f .name ]
158
219
else :
@@ -162,12 +223,14 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
162
223
fieldval = None
163
224
164
225
try :
165
- validate_ex (f .type , fieldval , identifiers , strict = strict , foreign_properties = foreign_properties )
226
+ if not validate_ex (f .type , fieldval , identifiers , strict = strict , foreign_properties = foreign_properties , raise_ex = raise_ex ):
227
+ return False
166
228
except ValidationException as v :
167
229
if f .name not in datum :
168
- errors .append ("missing required field `%s`" % f .name )
230
+ errors .append (u "missing required field `%s`" % f .name )
169
231
else :
170
- errors .append ("could not validate field `%s` because\n %s" % (f .name , multi (indent (str (v )))))
232
+ errors .append (u"could not validate field `%s` because\n %s" % (f .name , multi (indent (str (v )))))
233
+
171
234
if strict :
172
235
for d in datum :
173
236
found = False
@@ -176,14 +239,25 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
176
239
found = True
177
240
if not found :
178
241
if d not in identifiers and d not in foreign_properties and d [0 ] not in ("@" , "$" ):
242
+ if not raise_ex :
243
+ return False
179
244
split = urlparse .urlsplit (d )
180
245
if split .scheme :
181
- errors .append ("could not validate extension field `%s` because it is not recognized and strict is True. Did you include a $schemas section?" % (d ))
246
+ errors .append (u "could not validate extension field `%s` because it is not recognized and strict is True. Did you include a $schemas section?" % (d ))
182
247
else :
183
- errors .append ("could not validate field `%s` because it is not recognized and strict is True, valid fields are: %s" % (d , ", " .join (fn .name for fn in expected_schema .fields )))
248
+ errors .append (u "could not validate field `%s` because it is not recognized and strict is True, valid fields are: %s" % (d , ", " .join (fn .name for fn in expected_schema .fields )))
184
249
185
250
if errors :
186
- raise ValidationException ("\n " .join (errors ))
251
+ if raise_ex :
252
+ if classmatch :
253
+ raise ClassValidationException (u"%s record %s" % (classmatch , "\n " .join (errors )))
254
+ else :
255
+ raise ValidationException (u"\n " .join (errors ))
256
+ else :
257
+ return False
187
258
else :
188
259
return True
189
- raise ValidationException ("Unrecognized schema_type %s" % schema_type )
260
+ if raise_ex :
261
+ raise ValidationException (u"Unrecognized schema_type %s" % schema_type )
262
+ else :
263
+ return False
0 commit comments