5
5
import numpy as np
6
6
7
7
from pandas .compat import range , PY3
8
- import pandas .io .formats .printing as printing
9
8
10
- import pandas as pd
9
+ from pandas import (DataFrame , Series , date_range , timedelta_range ,
10
+ Timedelta , NaT )
11
11
import pandas .util .testing as tm
12
12
13
13
from pandas .tests .frame .common import _check_mixed_float , _check_mixed_int
18
18
19
19
class TestFrameComparisons (object ):
20
20
def test_flex_comparison_nat (self ):
21
- # GH# 15697, GH# 22163 df.eq(pd. NaT) should behave like df == pd. NaT,
21
+ # GH 15697, GH 22163 df.eq(NaT) should behave like df == NaT,
22
22
# and _definitely_ not be NaN
23
- df = pd . DataFrame ([pd . NaT ])
23
+ df = DataFrame ([NaT ])
24
24
25
- result = df == pd . NaT
25
+ result = df == NaT
26
26
# result.iloc[0, 0] is a np.bool_ object
27
27
assert result .iloc [0 , 0 ].item () is False
28
28
29
- result = df .eq (pd . NaT )
29
+ result = df .eq (NaT )
30
30
assert result .iloc [0 , 0 ].item () is False
31
31
32
- result = df != pd . NaT
32
+ result = df != NaT
33
33
assert result .iloc [0 , 0 ].item () is True
34
34
35
- result = df .ne (pd . NaT )
35
+ result = df .ne (NaT )
36
36
assert result .iloc [0 , 0 ].item () is True
37
37
38
38
def test_mixed_comparison (self ):
39
- # GH# 13128, GH# 22163 != datetime64 vs non-dt64 should be False,
39
+ # GH 13128, GH 22163 != datetime64 vs non-dt64 should be False,
40
40
# not raise TypeError
41
41
# (this appears to be fixed before #22163, not sure when)
42
- df = pd . DataFrame ([['1989-08-01' , 1 ], ['1989-08-01' , 2 ]])
43
- other = pd . DataFrame ([['a' , 'b' ], ['c' , 'd' ]])
42
+ df = DataFrame ([['1989-08-01' , 1 ], ['1989-08-01' , 2 ]])
43
+ other = DataFrame ([['a' , 'b' ], ['c' , 'd' ]])
44
44
45
45
result = df == other
46
46
assert not result .any ().any ()
@@ -49,9 +49,9 @@ def test_mixed_comparison(self):
49
49
assert result .all ().all ()
50
50
51
51
def test_df_boolean_comparison_error (self ):
52
- # GH# 4576
52
+ # GH 4576
53
53
# boolean comparisons with a tuple/list give unexpected results
54
- df = pd . DataFrame (np .arange (6 ).reshape ((3 , 2 )))
54
+ df = DataFrame (np .arange (6 ).reshape ((3 , 2 )))
55
55
56
56
# not shape compatible
57
57
with pytest .raises (ValueError ):
@@ -60,14 +60,14 @@ def test_df_boolean_comparison_error(self):
60
60
df == [2 , 2 ]
61
61
62
62
def test_df_float_none_comparison (self ):
63
- df = pd . DataFrame (np .random .randn (8 , 3 ), index = range (8 ),
64
- columns = ['A' , 'B' , 'C' ])
63
+ df = DataFrame (np .random .randn (8 , 3 ), index = range (8 ),
64
+ columns = ['A' , 'B' , 'C' ])
65
65
66
66
result = df .__eq__ (None )
67
67
assert not result .any ().any ()
68
68
69
69
def test_df_string_comparison (self ):
70
- df = pd . DataFrame ([{"a" : 1 , "b" : "foo" }, {"a" : 2 , "b" : "bar" }])
70
+ df = DataFrame ([{"a" : 1 , "b" : "foo" }, {"a" : 2 , "b" : "bar" }])
71
71
mask_a = df .a > 1
72
72
tm .assert_frame_equal (df [mask_a ], df .loc [1 :1 , :])
73
73
tm .assert_frame_equal (df [- mask_a ], df .loc [0 :0 , :])
@@ -78,181 +78,145 @@ def test_df_string_comparison(self):
78
78
79
79
@pytest .mark .parametrize ('opname' , ['eq' , 'ne' , 'gt' , 'lt' , 'ge' , 'le' ])
80
80
def test_df_flex_cmp_constant_return_types (self , opname ):
81
- # GH# 15077, non-empty DataFrame
82
- df = pd . DataFrame ({'x' : [1 , 2 , 3 ], 'y' : [1. , 2. , 3. ]})
81
+ # GH 15077, non-empty DataFrame
82
+ df = DataFrame ({'x' : [1 , 2 , 3 ], 'y' : [1. , 2. , 3. ]})
83
83
const = 2
84
84
85
85
result = getattr (df , opname )(const ).get_dtype_counts ()
86
- tm .assert_series_equal (result , pd . Series ([2 ], ['bool' ]))
86
+ tm .assert_series_equal (result , Series ([2 ], ['bool' ]))
87
87
88
88
@pytest .mark .parametrize ('opname' , ['eq' , 'ne' , 'gt' , 'lt' , 'ge' , 'le' ])
89
89
def test_df_flex_cmp_constant_return_types_empty (self , opname ):
90
- # GH# 15077 empty DataFrame
91
- df = pd . DataFrame ({'x' : [1 , 2 , 3 ], 'y' : [1. , 2. , 3. ]})
90
+ # GH 15077 empty DataFrame
91
+ df = DataFrame ({'x' : [1 , 2 , 3 ], 'y' : [1. , 2. , 3. ]})
92
92
const = 2
93
93
94
94
empty = df .iloc [:0 ]
95
95
result = getattr (empty , opname )(const ).get_dtype_counts ()
96
- tm .assert_series_equal (result , pd . Series ([2 ], ['bool' ]))
96
+ tm .assert_series_equal (result , Series ([2 ], ['bool' ]))
97
97
98
98
99
99
# -------------------------------------------------------------------
100
100
# Arithmetic
101
101
102
102
class TestFrameFlexArithmetic (object ):
103
103
def test_df_add_td64_columnwise (self ):
104
- # GH# 22534 Check that column-wise addition broadcasts correctly
105
- dti = pd . date_range ('2016-01-01' , periods = 10 )
106
- tdi = pd . timedelta_range ('1' , periods = 10 )
107
- tser = pd . Series (tdi )
108
- df = pd . DataFrame ({0 : dti , 1 : tdi })
104
+ # GH 22534 Check that column-wise addition broadcasts correctly
105
+ dti = date_range ('2016-01-01' , periods = 10 )
106
+ tdi = timedelta_range ('1' , periods = 10 )
107
+ tser = Series (tdi )
108
+ df = DataFrame ({0 : dti , 1 : tdi })
109
109
110
110
result = df .add (tser , axis = 0 )
111
- expected = pd .DataFrame ({0 : dti + tdi ,
112
- 1 : tdi + tdi })
111
+ expected = DataFrame ({0 : dti + tdi , 1 : tdi + tdi })
113
112
tm .assert_frame_equal (result , expected )
114
113
115
114
def test_df_add_flex_filled_mixed_dtypes (self ):
116
- # GH# 19611
117
- dti = pd . date_range ('2016-01-01' , periods = 3 )
118
- ser = pd . Series (['1 Day' , 'NaT' , '2 Days' ], dtype = 'timedelta64[ns]' )
119
- df = pd . DataFrame ({'A' : dti , 'B' : ser })
120
- other = pd . DataFrame ({'A' : ser , 'B' : ser })
121
- fill = pd . Timedelta (days = 1 ).to_timedelta64 ()
115
+ # GH 19611
116
+ dti = date_range ('2016-01-01' , periods = 3 )
117
+ ser = Series (['1 Day' , 'NaT' , '2 Days' ], dtype = 'timedelta64[ns]' )
118
+ df = DataFrame ({'A' : dti , 'B' : ser })
119
+ other = DataFrame ({'A' : ser , 'B' : ser })
120
+ fill = Timedelta (days = 1 ).to_timedelta64 ()
122
121
result = df .add (other , fill_value = fill )
123
122
124
- expected = pd . DataFrame (
125
- {'A' : pd . Series (['2016-01-02' , '2016-01-03' , '2016-01-05' ],
126
- dtype = 'datetime64[ns]' ),
123
+ expected = DataFrame (
124
+ {'A' : Series (['2016-01-02' , '2016-01-03' , '2016-01-05' ],
125
+ dtype = 'datetime64[ns]' ),
127
126
'B' : ser * 2 })
128
127
tm .assert_frame_equal (result , expected )
129
128
130
- def test_arith_flex_frame (self ):
131
- seriesd = tm .getSeriesData ()
132
- frame = pd .DataFrame (seriesd ).copy ()
129
+ @pytest .mark .parametrize ('op' , ['add' , 'sub' , 'mul' , 'div' , 'truediv' ,
130
+ 'pow' , 'floordiv' , 'mod' ])
131
+ def test_arith_flex_frame (self , op , int_frame , mixed_int_frame ,
132
+ float_frame , mixed_float_frame ):
133
133
134
- mixed_float = pd .DataFrame ({'A' : frame ['A' ].copy ().astype ('float32' ),
135
- 'B' : frame ['B' ].copy ().astype ('float32' ),
136
- 'C' : frame ['C' ].copy ().astype ('float16' ),
137
- 'D' : frame ['D' ].copy ().astype ('float64' )})
138
-
139
- intframe = pd .DataFrame ({k : v .astype (int )
140
- for k , v in seriesd .items ()})
141
- mixed_int = pd .DataFrame ({'A' : intframe ['A' ].copy ().astype ('int32' ),
142
- 'B' : np .ones (len (intframe ), dtype = 'uint64' ),
143
- 'C' : intframe ['C' ].copy ().astype ('uint8' ),
144
- 'D' : intframe ['D' ].copy ().astype ('int64' )})
145
-
146
- # force these all to int64 to avoid platform testing issues
147
- intframe = pd .DataFrame ({c : s for c , s in intframe .items ()},
148
- dtype = np .int64 )
149
-
150
- ops = ['add' , 'sub' , 'mul' , 'div' , 'truediv' , 'pow' , 'floordiv' , 'mod' ]
151
134
if not PY3 :
152
135
aliases = {}
153
136
else :
154
137
aliases = {'div' : 'truediv' }
155
-
156
- for op in ops :
157
- try :
158
- alias = aliases .get (op , op )
159
- f = getattr (operator , alias )
160
- result = getattr (frame , op )(2 * frame )
161
- exp = f (frame , 2 * frame )
138
+ alias = aliases .get (op , op )
139
+
140
+ f = getattr (operator , alias )
141
+ result = getattr (float_frame , op )(2 * float_frame )
142
+ exp = f (float_frame , 2 * float_frame )
143
+ tm .assert_frame_equal (result , exp )
144
+
145
+ # vs mix float
146
+ result = getattr (mixed_float_frame , op )(2 * mixed_float_frame )
147
+ exp = f (mixed_float_frame , 2 * mixed_float_frame )
148
+ tm .assert_frame_equal (result , exp )
149
+ _check_mixed_float (result , dtype = dict (C = None ))
150
+
151
+ # vs mix int
152
+ if op in ['add' , 'sub' , 'mul' ]:
153
+ result = getattr (mixed_int_frame , op )(2 + mixed_int_frame )
154
+ exp = f (mixed_int_frame , 2 + mixed_int_frame )
155
+
156
+ # no overflow in the uint
157
+ dtype = None
158
+ if op in ['sub' ]:
159
+ dtype = dict (B = 'uint64' , C = None )
160
+ elif op in ['add' , 'mul' ]:
161
+ dtype = dict (C = None )
162
+ tm .assert_frame_equal (result , exp )
163
+ _check_mixed_int (result , dtype = dtype )
164
+
165
+ # rops
166
+ r_f = lambda x , y : f (y , x )
167
+ result = getattr (float_frame , 'r' + op )(2 * float_frame )
168
+ exp = r_f (float_frame , 2 * float_frame )
169
+ tm .assert_frame_equal (result , exp )
170
+
171
+ # vs mix float
172
+ result = getattr (mixed_float_frame , op )(2 * mixed_float_frame )
173
+ exp = f (mixed_float_frame , 2 * mixed_float_frame )
174
+ tm .assert_frame_equal (result , exp )
175
+ _check_mixed_float (result , dtype = dict (C = None ))
176
+
177
+ result = getattr (int_frame , op )(2 * int_frame )
178
+ exp = f (int_frame , 2 * int_frame )
179
+ tm .assert_frame_equal (result , exp )
180
+
181
+ # vs mix int
182
+ if op in ['add' , 'sub' , 'mul' ]:
183
+ result = getattr (mixed_int_frame , op )(2 + mixed_int_frame )
184
+ exp = f (mixed_int_frame , 2 + mixed_int_frame )
185
+
186
+ # no overflow in the uint
187
+ dtype = None
188
+ if op in ['sub' ]:
189
+ dtype = dict (B = 'uint64' , C = None )
190
+ elif op in ['add' , 'mul' ]:
191
+ dtype = dict (C = None )
162
192
tm .assert_frame_equal (result , exp )
193
+ _check_mixed_int (result , dtype = dtype )
163
194
164
- # vs mix float
165
- result = getattr (mixed_float , op )(2 * mixed_float )
166
- exp = f (mixed_float , 2 * mixed_float )
167
- tm .assert_frame_equal (result , exp )
168
- _check_mixed_float (result , dtype = dict (C = None ))
169
-
170
- # vs mix int
171
- if op in ['add' , 'sub' , 'mul' ]:
172
- result = getattr (mixed_int , op )(2 + mixed_int )
173
- exp = f (mixed_int , 2 + mixed_int )
174
-
175
- # no overflow in the uint
176
- dtype = None
177
- if op in ['sub' ]:
178
- dtype = dict (B = 'uint64' , C = None )
179
- elif op in ['add' , 'mul' ]:
180
- dtype = dict (C = None )
181
- tm .assert_frame_equal (result , exp )
182
- _check_mixed_int (result , dtype = dtype )
183
-
184
- # rops
185
- r_f = lambda x , y : f (y , x )
186
- result = getattr (frame , 'r' + op )(2 * frame )
187
- exp = r_f (frame , 2 * frame )
188
- tm .assert_frame_equal (result , exp )
189
-
190
- # vs mix float
191
- result = getattr (mixed_float , op )(2 * mixed_float )
192
- exp = f (mixed_float , 2 * mixed_float )
193
- tm .assert_frame_equal (result , exp )
194
- _check_mixed_float (result , dtype = dict (C = None ))
195
-
196
- result = getattr (intframe , op )(2 * intframe )
197
- exp = f (intframe , 2 * intframe )
198
- tm .assert_frame_equal (result , exp )
199
-
200
- # vs mix int
201
- if op in ['add' , 'sub' , 'mul' ]:
202
- result = getattr (mixed_int , op )(2 + mixed_int )
203
- exp = f (mixed_int , 2 + mixed_int )
204
-
205
- # no overflow in the uint
206
- dtype = None
207
- if op in ['sub' ]:
208
- dtype = dict (B = 'uint64' , C = None )
209
- elif op in ['add' , 'mul' ]:
210
- dtype = dict (C = None )
211
- tm .assert_frame_equal (result , exp )
212
- _check_mixed_int (result , dtype = dtype )
213
- except :
214
- printing .pprint_thing ("Failing operation %r" % op )
215
- raise
216
-
217
- # ndim >= 3
218
- ndim_5 = np .ones (frame .shape + (3 , 4 , 5 ))
219
- msg = "Unable to coerce to Series/DataFrame"
220
- with tm .assert_raises_regex (ValueError , msg ):
221
- f (frame , ndim_5 )
222
-
223
- with tm .assert_raises_regex (ValueError , msg ):
224
- getattr (frame , op )(ndim_5 )
225
-
226
- # res_add = frame.add(frame)
227
- # res_sub = frame.sub(frame)
228
- # res_mul = frame.mul(frame)
229
- # res_div = frame.div(2 * frame)
230
-
231
- # tm.assert_frame_equal(res_add, frame + frame)
232
- # tm.assert_frame_equal(res_sub, frame - frame)
233
- # tm.assert_frame_equal(res_mul, frame * frame)
234
- # tm.assert_frame_equal(res_div, frame / (2 * frame))
235
-
236
- const_add = frame .add (1 )
237
- tm .assert_frame_equal (const_add , frame + 1 )
195
+ # ndim >= 3
196
+ ndim_5 = np .ones (float_frame .shape + (3 , 4 , 5 ))
197
+ msg = "Unable to coerce to Series/DataFrame"
198
+ with tm .assert_raises_regex (ValueError , msg ):
199
+ f (float_frame , ndim_5 )
200
+
201
+ with tm .assert_raises_regex (ValueError , msg ):
202
+ getattr (float_frame , op )(ndim_5 )
203
+
204
+ const_add = float_frame .add (1 )
205
+ tm .assert_frame_equal (const_add , float_frame + 1 )
238
206
239
207
# corner cases
240
- result = frame .add (frame [:0 ])
241
- tm .assert_frame_equal (result , frame * np .nan )
208
+ result = float_frame .add (float_frame [:0 ])
209
+ tm .assert_frame_equal (result , float_frame * np .nan )
242
210
243
- result = frame [:0 ].add (frame )
244
- tm .assert_frame_equal (result , frame * np .nan )
211
+ result = float_frame [:0 ].add (float_frame )
212
+ tm .assert_frame_equal (result , float_frame * np .nan )
245
213
with tm .assert_raises_regex (NotImplementedError , 'fill_value' ):
246
- frame .add (frame .iloc [0 ], fill_value = 3 )
214
+ float_frame .add (float_frame .iloc [0 ], fill_value = 3 )
247
215
with tm .assert_raises_regex (NotImplementedError , 'fill_value' ):
248
- frame .add (frame .iloc [0 ], axis = 'index' , fill_value = 3 )
216
+ float_frame .add (float_frame .iloc [0 ], axis = 'index' , fill_value = 3 )
249
217
250
- def test_arith_flex_series (self ):
251
- arr = np .array ([[1. , 2. , 3. ],
252
- [4. , 5. , 6. ],
253
- [7. , 8. , 9. ]])
254
- df = pd .DataFrame (arr , columns = ['one' , 'two' , 'three' ],
255
- index = ['a' , 'b' , 'c' ])
218
+ def test_arith_flex_series (self , simple_frame ):
219
+ df = simple_frame
256
220
257
221
row = df .xs ('a' )
258
222
col = df ['two' ]
@@ -271,23 +235,23 @@ def test_arith_flex_series(self):
271
235
tm .assert_frame_equal (df .div (row ), df / row )
272
236
tm .assert_frame_equal (df .div (col , axis = 0 ), (df .T / col ).T )
273
237
274
- # broadcasting issue in GH# 7325
275
- df = pd . DataFrame (np .arange (3 * 2 ).reshape ((3 , 2 )), dtype = 'int64' )
276
- expected = pd . DataFrame ([[np .nan , np .inf ], [1.0 , 1.5 ], [1.0 , 1.25 ]])
238
+ # broadcasting issue in GH 7325
239
+ df = DataFrame (np .arange (3 * 2 ).reshape ((3 , 2 )), dtype = 'int64' )
240
+ expected = DataFrame ([[np .nan , np .inf ], [1.0 , 1.5 ], [1.0 , 1.25 ]])
277
241
result = df .div (df [0 ], axis = 'index' )
278
242
tm .assert_frame_equal (result , expected )
279
243
280
- df = pd . DataFrame (np .arange (3 * 2 ).reshape ((3 , 2 )), dtype = 'float64' )
281
- expected = pd . DataFrame ([[np .nan , np .inf ], [1.0 , 1.5 ], [1.0 , 1.25 ]])
244
+ df = DataFrame (np .arange (3 * 2 ).reshape ((3 , 2 )), dtype = 'float64' )
245
+ expected = DataFrame ([[np .nan , np .inf ], [1.0 , 1.5 ], [1.0 , 1.25 ]])
282
246
result = df .div (df [0 ], axis = 'index' )
283
247
tm .assert_frame_equal (result , expected )
284
248
285
249
def test_arith_flex_zero_len_raises (self ):
286
- # GH# 19522 passing fill_value to frame flex arith methods should
250
+ # GH 19522 passing fill_value to frame flex arith methods should
287
251
# raise even in the zero-length special cases
288
- ser_len0 = pd . Series ([])
289
- df_len0 = pd . DataFrame ([], columns = ['A' , 'B' ])
290
- df = pd . DataFrame ([[1 , 2 ], [3 , 4 ]], columns = ['A' , 'B' ])
252
+ ser_len0 = Series ([])
253
+ df_len0 = DataFrame ([], columns = ['A' , 'B' ])
254
+ df = DataFrame ([[1 , 2 ], [3 , 4 ]], columns = ['A' , 'B' ])
291
255
292
256
with tm .assert_raises_regex (NotImplementedError , 'fill_value' ):
293
257
df .add (ser_len0 , fill_value = 'E' )
@@ -298,9 +262,9 @@ def test_arith_flex_zero_len_raises(self):
298
262
299
263
class TestFrameArithmetic (object ):
300
264
def test_df_bool_mul_int (self ):
301
- # GH# 22047, GH# 22163 multiplication by 1 should result in int dtype,
265
+ # GH 22047, GH 22163 multiplication by 1 should result in int dtype,
302
266
# not object dtype
303
- df = pd . DataFrame ([[False , True ], [False , False ]])
267
+ df = DataFrame ([[False , True ], [False , False ]])
304
268
result = df * 1
305
269
306
270
# On appveyor this comes back as np.int32 instead of np.int64,
0 commit comments