|
13 | 13 | build routines, and which it does not use.
|
14 | 14 |
|
15 | 15 | """
|
| 16 | +import re |
| 17 | +from typing import List |
16 | 18 | import warnings
|
17 | 19 |
|
18 | 20 | import cf_units
|
|
28 | 30 | import iris.exceptions
|
29 | 31 | import iris.fileformats.cf as cf
|
30 | 32 | import iris.fileformats.netcdf
|
31 |
| -from iris.fileformats.netcdf import ( |
32 |
| - UnknownCellMethodWarning, |
33 |
| - parse_cell_methods, |
34 |
| -) |
35 | 33 | from iris.fileformats.netcdf.loader import _get_cf_var_data
|
36 | 34 | import iris.std_names
|
37 | 35 | import iris.util
|
|
184 | 182 | CF_VALUE_STD_NAME_PROJ_Y = "projection_y_coordinate"
|
185 | 183 |
|
186 | 184 |
|
| 185 | +################################################################################ |
| 186 | +# Handling of cell-methods. |
| 187 | + |
| 188 | +_CM_COMMENT = "comment" |
| 189 | +_CM_EXTRA = "extra" |
| 190 | +_CM_INTERVAL = "interval" |
| 191 | +_CM_METHOD = "method" |
| 192 | +_CM_NAME = "name" |
| 193 | +_CM_PARSE_NAME = re.compile(r"([\w_]+\s*?:\s+)+") |
| 194 | +_CM_PARSE = re.compile( |
| 195 | + r""" |
| 196 | + (?P<name>([\w_]+\s*?:\s+)+) |
| 197 | + (?P<method>[\w_\s]+(?![\w_]*\s*?:))\s* |
| 198 | + (?: |
| 199 | + \(\s* |
| 200 | + (?P<extra>.+) |
| 201 | + \)\s* |
| 202 | + )? |
| 203 | + """, |
| 204 | + re.VERBOSE, |
| 205 | +) |
| 206 | + |
| 207 | +# Cell methods. |
| 208 | +_CM_KNOWN_METHODS = [ |
| 209 | + "point", |
| 210 | + "sum", |
| 211 | + "mean", |
| 212 | + "maximum", |
| 213 | + "minimum", |
| 214 | + "mid_range", |
| 215 | + "standard_deviation", |
| 216 | + "variance", |
| 217 | + "mode", |
| 218 | + "median", |
| 219 | +] |
| 220 | + |
| 221 | + |
| 222 | +def _split_cell_methods(nc_cell_methods: str) -> List[re.Match]: |
| 223 | + """ |
| 224 | + Split a CF cell_methods attribute string into a list of zero or more cell |
| 225 | + methods, each of which is then parsed with a regex to return a list of match |
| 226 | + objects. |
| 227 | +
|
| 228 | + Args: |
| 229 | +
|
| 230 | + * nc_cell_methods: The value of the cell methods attribute to be split. |
| 231 | +
|
| 232 | + Returns: |
| 233 | +
|
| 234 | + * nc_cell_methods_matches: A list of the re.Match objects associated with |
| 235 | + each parsed cell method |
| 236 | +
|
| 237 | + Splitting is done based on words followed by colons outside of any brackets. |
| 238 | + Validation of anything other than being laid out in the expected format is |
| 239 | + left to the calling function. |
| 240 | + """ |
| 241 | + |
| 242 | + # Find name candidates |
| 243 | + name_start_inds = [] |
| 244 | + for m in _CM_PARSE_NAME.finditer(nc_cell_methods): |
| 245 | + name_start_inds.append(m.start()) |
| 246 | + |
| 247 | + # Remove those that fall inside brackets |
| 248 | + bracket_depth = 0 |
| 249 | + for ind, cha in enumerate(nc_cell_methods): |
| 250 | + if cha == "(": |
| 251 | + bracket_depth += 1 |
| 252 | + elif cha == ")": |
| 253 | + bracket_depth -= 1 |
| 254 | + if bracket_depth < 0: |
| 255 | + msg = ( |
| 256 | + "Cell methods may be incorrectly parsed due to mismatched " |
| 257 | + "brackets" |
| 258 | + ) |
| 259 | + warnings.warn(msg, UserWarning, stacklevel=2) |
| 260 | + if bracket_depth > 0 and ind in name_start_inds: |
| 261 | + name_start_inds.remove(ind) |
| 262 | + |
| 263 | + # List tuples of indices of starts and ends of the cell methods in the string |
| 264 | + method_indices = [] |
| 265 | + for ii in range(len(name_start_inds) - 1): |
| 266 | + method_indices.append((name_start_inds[ii], name_start_inds[ii + 1])) |
| 267 | + method_indices.append((name_start_inds[-1], len(nc_cell_methods))) |
| 268 | + |
| 269 | + # Index the string and match against each substring |
| 270 | + nc_cell_methods_matches = [] |
| 271 | + for start_ind, end_ind in method_indices: |
| 272 | + nc_cell_method_str = nc_cell_methods[start_ind:end_ind] |
| 273 | + nc_cell_method_match = _CM_PARSE.match(nc_cell_method_str.strip()) |
| 274 | + if not nc_cell_method_match: |
| 275 | + msg = ( |
| 276 | + f"Failed to fully parse cell method string: {nc_cell_methods}" |
| 277 | + ) |
| 278 | + warnings.warn(msg, UserWarning, stacklevel=2) |
| 279 | + continue |
| 280 | + nc_cell_methods_matches.append(nc_cell_method_match) |
| 281 | + |
| 282 | + return nc_cell_methods_matches |
| 283 | + |
| 284 | + |
| 285 | +class UnknownCellMethodWarning(Warning): |
| 286 | + pass |
| 287 | + |
| 288 | + |
| 289 | +def parse_cell_methods(nc_cell_methods): |
| 290 | + """ |
| 291 | + Parse a CF cell_methods attribute string into a tuple of zero or |
| 292 | + more CellMethod instances. |
| 293 | +
|
| 294 | + Args: |
| 295 | +
|
| 296 | + * nc_cell_methods (str): |
| 297 | + The value of the cell methods attribute to be parsed. |
| 298 | +
|
| 299 | + Returns: |
| 300 | +
|
| 301 | + * cell_methods |
| 302 | + An iterable of :class:`iris.coords.CellMethod`. |
| 303 | +
|
| 304 | + Multiple coordinates, intervals and comments are supported. |
| 305 | + If a method has a non-standard name a warning will be issued, but the |
| 306 | + results are not affected. |
| 307 | +
|
| 308 | + """ |
| 309 | + |
| 310 | + cell_methods = [] |
| 311 | + if nc_cell_methods is not None: |
| 312 | + for m in _split_cell_methods(nc_cell_methods): |
| 313 | + d = m.groupdict() |
| 314 | + method = d[_CM_METHOD] |
| 315 | + method = method.strip() |
| 316 | + # Check validity of method, allowing for multi-part methods |
| 317 | + # e.g. mean over years. |
| 318 | + method_words = method.split() |
| 319 | + if method_words[0].lower() not in _CM_KNOWN_METHODS: |
| 320 | + msg = "NetCDF variable contains unknown cell method {!r}" |
| 321 | + warnings.warn( |
| 322 | + msg.format("{}".format(method_words[0])), |
| 323 | + UnknownCellMethodWarning, |
| 324 | + ) |
| 325 | + d[_CM_METHOD] = method |
| 326 | + name = d[_CM_NAME] |
| 327 | + name = name.replace(" ", "") |
| 328 | + name = name.rstrip(":") |
| 329 | + d[_CM_NAME] = tuple([n for n in name.split(":")]) |
| 330 | + interval = [] |
| 331 | + comment = [] |
| 332 | + if d[_CM_EXTRA] is not None: |
| 333 | + # |
| 334 | + # tokenise the key words and field colon marker |
| 335 | + # |
| 336 | + d[_CM_EXTRA] = d[_CM_EXTRA].replace( |
| 337 | + "comment:", "<<comment>><<:>>" |
| 338 | + ) |
| 339 | + d[_CM_EXTRA] = d[_CM_EXTRA].replace( |
| 340 | + "interval:", "<<interval>><<:>>" |
| 341 | + ) |
| 342 | + d[_CM_EXTRA] = d[_CM_EXTRA].split("<<:>>") |
| 343 | + if len(d[_CM_EXTRA]) == 1: |
| 344 | + comment.extend(d[_CM_EXTRA]) |
| 345 | + else: |
| 346 | + next_field_type = comment |
| 347 | + for field in d[_CM_EXTRA]: |
| 348 | + field_type = next_field_type |
| 349 | + index = field.rfind("<<interval>>") |
| 350 | + if index == 0: |
| 351 | + next_field_type = interval |
| 352 | + continue |
| 353 | + elif index > 0: |
| 354 | + next_field_type = interval |
| 355 | + else: |
| 356 | + index = field.rfind("<<comment>>") |
| 357 | + if index == 0: |
| 358 | + next_field_type = comment |
| 359 | + continue |
| 360 | + elif index > 0: |
| 361 | + next_field_type = comment |
| 362 | + if index != -1: |
| 363 | + field = field[:index] |
| 364 | + field_type.append(field.strip()) |
| 365 | + # |
| 366 | + # cater for a shared interval over multiple axes |
| 367 | + # |
| 368 | + if len(interval): |
| 369 | + if len(d[_CM_NAME]) != len(interval) and len(interval) == 1: |
| 370 | + interval = interval * len(d[_CM_NAME]) |
| 371 | + # |
| 372 | + # cater for a shared comment over multiple axes |
| 373 | + # |
| 374 | + if len(comment): |
| 375 | + if len(d[_CM_NAME]) != len(comment) and len(comment) == 1: |
| 376 | + comment = comment * len(d[_CM_NAME]) |
| 377 | + d[_CM_INTERVAL] = tuple(interval) |
| 378 | + d[_CM_COMMENT] = tuple(comment) |
| 379 | + cell_method = iris.coords.CellMethod( |
| 380 | + d[_CM_METHOD], |
| 381 | + coords=d[_CM_NAME], |
| 382 | + intervals=d[_CM_INTERVAL], |
| 383 | + comments=d[_CM_COMMENT], |
| 384 | + ) |
| 385 | + cell_methods.append(cell_method) |
| 386 | + return tuple(cell_methods) |
| 387 | + |
| 388 | + |
187 | 389 | ################################################################################
|
188 | 390 | def build_cube_metadata(engine):
|
189 | 391 | """Add the standard meta data to the cube."""
|
|
0 commit comments