Skip to content

Commit 53ef119

Browse files
committed
Wrap gmtselect
Initial commit for wrapping the gmtselect function for #1427 which selects data table subsets based on multiple spatial criteria. Original GMT `gmtselect` documentation is at https://docs.generic-mapping-tools.org/6.2/gmtselect.html. Aliased non-common optional parameters reverse (I) and z_subregion (Z).
1 parent 85d78d6 commit 53ef119

File tree

5 files changed

+211
-0
lines changed

5 files changed

+211
-0
lines changed

doc/api/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ Operations on tabular data:
8181

8282
blockmean
8383
blockmedian
84+
select
8485
surface
8586

8687
Operations on grids:

pygmt/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
grdtrack,
4545
info,
4646
makecpt,
47+
select,
4748
surface,
4849
which,
4950
x2sys_cross,

pygmt/src/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from pygmt.src.plot import plot
3434
from pygmt.src.plot3d import plot3d
3535
from pygmt.src.rose import rose
36+
from pygmt.src.select import select
3637
from pygmt.src.solar import solar
3738
from pygmt.src.subplot import set_panel, subplot
3839
from pygmt.src.surface import surface

pygmt/src/select.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
"""
2+
select - Select data table subsets based on multiple spatial criteria.
3+
"""
4+
import pandas as pd
5+
from pygmt.clib import Session
6+
from pygmt.helpers import (
7+
GMTTempFile,
8+
build_arg_string,
9+
fmt_docstring,
10+
kwargs_to_strings,
11+
use_alias,
12+
)
13+
14+
15+
@fmt_docstring
16+
@use_alias(
17+
I="reverse",
18+
J="projection",
19+
R="region",
20+
V="verbose",
21+
Z="z_subregion",
22+
b="binary",
23+
d="nodata",
24+
e="find",
25+
f="coltypes",
26+
g="gap",
27+
h="header",
28+
i="incols",
29+
o="outcols",
30+
r="registration",
31+
s="skiprows",
32+
w="wrap",
33+
)
34+
@kwargs_to_strings(R="sequence")
35+
def select(table=None, outfile=None, **kwargs):
36+
r"""
37+
Select data table subsets based on multiple spatial criteria.
38+
39+
This is a filter that reads (x, y) or (longitude, latitude) positions from
40+
the first 2 columns of *table* and uses a combination of 1-7 criteria to
41+
pass or reject the records. Records can be selected based on whether or not
42+
they are:
43+
44+
1. inside a rectangular region (**region** [and **projection**])
45+
2. within *dist* km of any point in *pointfile*
46+
3. within *dist* km of any line in *linefile*
47+
4. inside one of the polygons in the *polygonfile*
48+
5. inside geographical features (based on coastlines)
49+
6. has z-values within a given range, or
50+
7. inside bins of a grid mask whose nodes are non-zero
51+
52+
The sense of the tests can be reversed for each of these 7 criteria by
53+
using the **reverse** option.
54+
55+
Full option list at :gmt-docs:`gmtselect.html`
56+
57+
{aliases}
58+
59+
Parameters
60+
----------
61+
table : str or {table-like}
62+
Pass in either a file name to an ASCII data table, a 2D
63+
{table-classes}.
64+
outfile : str
65+
The file name for the output ASCII file.
66+
reverse : str
67+
[**cflrsz**].
68+
Reverses the sense of the test for each of the criteria specified:
69+
70+
- **c** select records NOT inside any point's circle of influence.
71+
- **f** select records NOT inside any of the polygons.
72+
- **g** will pass records inside the cells with z equal zero of the
73+
grid mask in **-G**.
74+
- **l** select records NOT within the specified distance of any line.
75+
- **r** select records NOT inside the specified rectangular region.
76+
- **s** select records NOT considered inside as specified by **-N**
77+
(and **-A**, **-D**).
78+
- **z** select records NOT within the range specified by
79+
**z_subregion**.
80+
z_subregion : str
81+
*min*\ [/*max*]\ [**+a**]\ [**+c**\ *col*]\ [**+i**].
82+
Pass all records whose 3rd column (*z*; *col* = 2) lies within the
83+
given range or is NaN (use **skiprows** to skip NaN records). If *max*
84+
is omitted then we test if *z* equals *min* instead. This means
85+
equality within 5 ULPs (unit of least precision;
86+
http://en.wikipedia.org/wiki/Unit_in_the_last_place). Input file must
87+
have at least three columns. To indicate no limit on min or max,
88+
specify a hyphen (-). If your 3rd column is absolute time then remember
89+
to supply ``coltypes="2T"``. To specify another column, append
90+
**+c**\ *col*, and to specify several tests just repeat the
91+
**z_subregion** option as many times as you have columns to test.
92+
**Note**: When more than one **z_subregion** option is given then the
93+
``reverse="z"`` option cannot be used. In the case of multiple tests
94+
you may use these modifiers as well: **+a** passes any record that
95+
passes at least one of your *z* tests [Default is all tests must pass],
96+
and **+i** reverses the tests to pass record with *z* value NOT in the
97+
given range. Finally, if **+c** is not used then it is automatically
98+
incremented for each new **z_subregion** option, starting with 2.
99+
{J}
100+
{R}
101+
{V}
102+
{b}
103+
{d}
104+
{e}
105+
{f}
106+
{g}
107+
{h}
108+
{i}
109+
{o}
110+
{r}
111+
{s}
112+
{w}
113+
114+
Returns
115+
-------
116+
output : pandas.DataFrame or None
117+
Return type depends on whether the ``outfile`` parameter is set:
118+
119+
- :class:`pandas.DataFrame` table if ``outfile`` is not set.
120+
- None if ``outfile`` is set (filtered output will be stored in file
121+
set by ``outfile``).
122+
"""
123+
124+
with GMTTempFile(suffix=".csv") as tmpfile:
125+
with Session() as lib:
126+
# Choose how data will be passed into the module
127+
table_context = lib.virtualfile_from_data(check_kind="vector", data=table)
128+
with table_context as infile:
129+
if outfile is None:
130+
outfile = tmpfile.name
131+
arg_str = " ".join([infile, build_arg_string(kwargs), "->" + outfile])
132+
lib.call_module(module="gmtselect", args=arg_str)
133+
134+
# Read temporary csv output to a pandas table
135+
if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame
136+
try:
137+
column_names = table.columns.to_list()
138+
result = pd.read_csv(tmpfile.name, sep="\t", names=column_names)
139+
except AttributeError: # 'str' object has no attribute 'columns'
140+
result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">")
141+
elif outfile != tmpfile.name: # return None if outfile set, output in outfile
142+
result = None
143+
144+
return result

pygmt/tests/test_select.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""
2+
Tests for select.
3+
"""
4+
import os
5+
6+
import numpy.testing as npt
7+
import pandas as pd
8+
import pytest
9+
from pygmt import select
10+
from pygmt.datasets import load_sample_bathymetry
11+
from pygmt.helpers import GMTTempFile
12+
13+
14+
@pytest.fixture(scope="module", name="dataframe")
15+
def fixture_dataframe():
16+
"""
17+
Load the table data from the sample bathymetry dataset.
18+
"""
19+
return load_sample_bathymetry()
20+
21+
22+
def test_select_input_dataframe(dataframe):
23+
"""
24+
Run select by passing in a pandas.DataFrame as input.
25+
"""
26+
output = select(table=dataframe, region=[250, 251, 26, 27])
27+
assert isinstance(output, pd.DataFrame)
28+
assert all(dataframe.columns == output.columns)
29+
assert output.shape == (65, 3)
30+
npt.assert_allclose(output.median(), [250.31464, 26.33893, -270.0])
31+
32+
33+
def test_select_input_table_matrix(dataframe):
34+
"""
35+
Run select using table input that is not a pandas.DataFrame but still a
36+
matrix.
37+
38+
Also testing the reverse (I) alias.
39+
"""
40+
table = dataframe.values
41+
output = select(table=table, region=[245.5, 254.5, 20.5, 29.5], reverse="r")
42+
assert isinstance(output, pd.DataFrame)
43+
assert output.shape == (9177, 3)
44+
npt.assert_allclose(output.median(), [247.235, 20.48624, -3241.0])
45+
46+
47+
def test_select_input_filename():
48+
"""
49+
Run select by passing in an ASCII text file as input.
50+
51+
Also testing the z_subregion (Z) alias.
52+
"""
53+
with GMTTempFile() as tmpfile:
54+
output = select(
55+
table="@tut_ship.xyz",
56+
region=[250, 251, 26, 27],
57+
z_subregion=["-/-630", "-120/0+a"],
58+
outfile=tmpfile.name,
59+
)
60+
assert output is None # check that output is None since outfile is set
61+
assert os.path.exists(path=tmpfile.name)
62+
output = pd.read_csv(tmpfile.name, sep="\t", header=None)
63+
assert output.shape == (5, 3)
64+
npt.assert_allclose(output.median(), [250.12149, 26.04296, -674.0])

0 commit comments

Comments
 (0)