|
6 | 6 | :suppress:
|
7 | 7 |
|
8 | 8 | import numpy as np
|
| 9 | + import random |
9 | 10 | np.random.seed(123456)
|
10 | 11 | from pandas import *
|
11 | 12 | randn = np.random.randn
|
| 13 | + randint = np.random.randint |
12 | 14 | np.set_printoptions(precision=4, suppress=True)
|
13 | 15 |
|
14 | 16 | ***************************
|
@@ -202,7 +204,7 @@ select out rows where one or more columns have values you want:
|
202 | 204 |
|
203 | 205 | df2 = DataFrame({'a' : ['one', 'one', 'two', 'three', 'two', 'one', 'six'],
|
204 | 206 | 'b' : ['x', 'y', 'y', 'x', 'y', 'x', 'x'],
|
205 |
| - 'c' : np.random.randn(7)}) |
| 207 | + 'c' : randn(7)}) |
206 | 208 | df2[df2['a'].isin(['one', 'two'])]
|
207 | 209 |
|
208 | 210 | Note, with the :ref:`advanced indexing <indexing.advanced>` ``ix`` method, you
|
@@ -232,7 +234,66 @@ Take Methods
|
232 | 234 |
|
233 | 235 | .. _indexing.take:
|
234 | 236 |
|
235 |
| -TODO: Fill Me In |
| 237 | +Similar to numpy ndarrays, pandas Index, Series, and DataFrame also provides |
| 238 | +the ``take`` method that retrieves elements along a given axis at the given |
| 239 | +indices. The given indices must be either a list or an ndarray of integer |
| 240 | +index positions. |
| 241 | + |
| 242 | +.. ipython:: python |
| 243 | +
|
| 244 | + index = Index(randint(0, 1000, 10)) |
| 245 | + index |
| 246 | +
|
| 247 | + positions = [0, 9, 3] |
| 248 | +
|
| 249 | + index.ix[positions] |
| 250 | + index.take(positions) |
| 251 | +
|
| 252 | + ser = Series(randn(10)) |
| 253 | + ser |
| 254 | +
|
| 255 | + ser.ix[positions] |
| 256 | + ser.take(positions) |
| 257 | +
|
| 258 | +For DataFrames, the given indices should be a 1d list or ndarray that specifies |
| 259 | +row or column positions. |
| 260 | + |
| 261 | +.. ipython:: python |
| 262 | +
|
| 263 | + df = DataFrame(randn(5, 3)) |
| 264 | + df |
| 265 | +
|
| 266 | + df.take([0, 2]) |
| 267 | +
|
| 268 | + df.take([1, 4, 6], axis=1) |
| 269 | +
|
| 270 | +Like ndarray, the ``take`` method on pandas objects are not intended |
| 271 | +to work on boolean indices and may return unexpected results. |
| 272 | + |
| 273 | +.. ipython:: python |
| 274 | +
|
| 275 | + arr = randn(10) |
| 276 | + arr |
| 277 | + arr.take([False, True]) |
| 278 | + arr[[0, 1]] |
| 279 | +
|
| 280 | + ser = Series(randn(10)) |
| 281 | + ser |
| 282 | + ser.take([False, True]) |
| 283 | + ser.ix[[0, 1]] |
| 284 | +
|
| 285 | +Finally, as a small note on performance, because the ``take`` method handles |
| 286 | +more a narrower range of inputs, it is more optimized internally in numpy |
| 287 | +and thus offers performance that is a good deal faster than indexing. |
| 288 | + |
| 289 | +.. ipython:: |
| 290 | + |
| 291 | + arr = randn(10000, 5) |
| 292 | + indexer = np.arange(10000) |
| 293 | + random.shuffle(indexer) |
| 294 | + |
| 295 | + timeit arr[indexer] |
| 296 | + timeit arr.take(indexer, axis=0) |
236 | 297 |
|
237 | 298 | Duplicate Data
|
238 | 299 | ~~~~~~~~~~~~~~
|
|
0 commit comments