|
| 1 | +""" |
| 2 | +select - Select data table subsets based on multiple spatial criteria. |
| 3 | +""" |
| 4 | +import pandas as pd |
| 5 | +from pygmt.clib import Session |
| 6 | +from pygmt.helpers import ( |
| 7 | + GMTTempFile, |
| 8 | + build_arg_string, |
| 9 | + fmt_docstring, |
| 10 | + kwargs_to_strings, |
| 11 | + use_alias, |
| 12 | +) |
| 13 | + |
| 14 | + |
| 15 | +@fmt_docstring |
| 16 | +@use_alias( |
| 17 | + I="reverse", |
| 18 | + J="projection", |
| 19 | + R="region", |
| 20 | + V="verbose", |
| 21 | + Z="z_subregion", |
| 22 | + b="binary", |
| 23 | + d="nodata", |
| 24 | + e="find", |
| 25 | + f="coltypes", |
| 26 | + g="gap", |
| 27 | + h="header", |
| 28 | + i="incols", |
| 29 | + o="outcols", |
| 30 | + r="registration", |
| 31 | + s="skiprows", |
| 32 | + w="wrap", |
| 33 | +) |
| 34 | +@kwargs_to_strings(R="sequence") |
| 35 | +def select(table=None, outfile=None, **kwargs): |
| 36 | + r""" |
| 37 | + Select data table subsets based on multiple spatial criteria. |
| 38 | +
|
| 39 | + This is a filter that reads (x, y) or (longitude, latitude) positions from |
| 40 | + the first 2 columns of *table* and uses a combination of 1-7 criteria to |
| 41 | + pass or reject the records. Records can be selected based on whether or not |
| 42 | + they are: |
| 43 | +
|
| 44 | + 1. inside a rectangular region (**region** [and **projection**]) |
| 45 | + 2. within *dist* km of any point in *pointfile* |
| 46 | + 3. within *dist* km of any line in *linefile* |
| 47 | + 4. inside one of the polygons in the *polygonfile* |
| 48 | + 5. inside geographical features (based on coastlines) |
| 49 | + 6. has z-values within a given range, or |
| 50 | + 7. inside bins of a grid mask whose nodes are non-zero |
| 51 | +
|
| 52 | + The sense of the tests can be reversed for each of these 7 criteria by |
| 53 | + using the **reverse** option. |
| 54 | +
|
| 55 | + Full option list at :gmt-docs:`gmtselect.html` |
| 56 | +
|
| 57 | + {aliases} |
| 58 | +
|
| 59 | + Parameters |
| 60 | + ---------- |
| 61 | + table : str or {table-like} |
| 62 | + Pass in either a file name to an ASCII data table, a 2D |
| 63 | + {table-classes}. |
| 64 | + outfile : str |
| 65 | + The file name for the output ASCII file. |
| 66 | + reverse : str |
| 67 | + [**cflrsz**]. |
| 68 | + Reverses the sense of the test for each of the criteria specified: |
| 69 | +
|
| 70 | + - **c** select records NOT inside any point's circle of influence. |
| 71 | + - **f** select records NOT inside any of the polygons. |
| 72 | + - **g** will pass records inside the cells with z equal zero of the |
| 73 | + grid mask in **-G**. |
| 74 | + - **l** select records NOT within the specified distance of any line. |
| 75 | + - **r** select records NOT inside the specified rectangular region. |
| 76 | + - **s** select records NOT considered inside as specified by **-N** |
| 77 | + (and **-A**, **-D**). |
| 78 | + - **z** select records NOT within the range specified by |
| 79 | + **z_subregion**. |
| 80 | + z_subregion : str |
| 81 | + *min*\ [/*max*]\ [**+a**]\ [**+c**\ *col*]\ [**+i**]. |
| 82 | + Pass all records whose 3rd column (*z*; *col* = 2) lies within the |
| 83 | + given range or is NaN (use **skiprows** to skip NaN records). If *max* |
| 84 | + is omitted then we test if *z* equals *min* instead. This means |
| 85 | + equality within 5 ULPs (unit of least precision; |
| 86 | + http://en.wikipedia.org/wiki/Unit_in_the_last_place). Input file must |
| 87 | + have at least three columns. To indicate no limit on min or max, |
| 88 | + specify a hyphen (-). If your 3rd column is absolute time then remember |
| 89 | + to supply ``coltypes="2T"``. To specify another column, append |
| 90 | + **+c**\ *col*, and to specify several tests just repeat the |
| 91 | + **z_subregion** option as many times as you have columns to test. |
| 92 | + **Note**: When more than one **z_subregion** option is given then the |
| 93 | + ``reverse="z"`` option cannot be used. In the case of multiple tests |
| 94 | + you may use these modifiers as well: **+a** passes any record that |
| 95 | + passes at least one of your *z* tests [Default is all tests must pass], |
| 96 | + and **+i** reverses the tests to pass record with *z* value NOT in the |
| 97 | + given range. Finally, if **+c** is not used then it is automatically |
| 98 | + incremented for each new **z_subregion** option, starting with 2. |
| 99 | + {J} |
| 100 | + {R} |
| 101 | + {V} |
| 102 | + {b} |
| 103 | + {d} |
| 104 | + {e} |
| 105 | + {f} |
| 106 | + {g} |
| 107 | + {h} |
| 108 | + {i} |
| 109 | + {o} |
| 110 | + {r} |
| 111 | + {s} |
| 112 | + {w} |
| 113 | +
|
| 114 | + Returns |
| 115 | + ------- |
| 116 | + output : pandas.DataFrame or None |
| 117 | + Return type depends on whether the ``outfile`` parameter is set: |
| 118 | +
|
| 119 | + - :class:`pandas.DataFrame` table if ``outfile`` is not set. |
| 120 | + - None if ``outfile`` is set (filtered output will be stored in file |
| 121 | + set by ``outfile``). |
| 122 | + """ |
| 123 | + |
| 124 | + with GMTTempFile(suffix=".csv") as tmpfile: |
| 125 | + with Session() as lib: |
| 126 | + # Choose how data will be passed into the module |
| 127 | + table_context = lib.virtualfile_from_data(check_kind="vector", data=table) |
| 128 | + with table_context as infile: |
| 129 | + if outfile is None: |
| 130 | + outfile = tmpfile.name |
| 131 | + arg_str = " ".join([infile, build_arg_string(kwargs), "->" + outfile]) |
| 132 | + lib.call_module(module="gmtselect", args=arg_str) |
| 133 | + |
| 134 | + # Read temporary csv output to a pandas table |
| 135 | + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame |
| 136 | + try: |
| 137 | + column_names = table.columns.to_list() |
| 138 | + result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) |
| 139 | + except AttributeError: # 'str' object has no attribute 'columns' |
| 140 | + result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") |
| 141 | + elif outfile != tmpfile.name: # return None if outfile set, output in outfile |
| 142 | + result = None |
| 143 | + |
| 144 | + return result |
0 commit comments