Skip to content

Commit b0430cb

Browse files
committed
BUG: fix read_csv to parse timezone correctly
- use box=True for to_datetime(), and adjust downstream processing to the change.
1 parent 8bb2cc1 commit b0430cb

File tree

3 files changed

+23
-5
lines changed

3 files changed

+23
-5
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,7 @@ I/O
674674

675675
- :func:`read_html()` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
676676
- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
677+
- :func:`read_csv()` will correctly parse timezone-aware datetimes. (:issue:`22256`)
677678
-
678679

679680
Plotting

pandas/io/parsers.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1638,15 +1638,16 @@ def _infer_types(self, values, na_values, try_num_bool=True):
16381638
except Exception:
16391639
result = values
16401640
if values.dtype == np.object_:
1641-
na_count = parsers.sanitize_objects(result, na_values,
1642-
False)
1641+
na_count = parsers.sanitize_objects(np.asarray(result),
1642+
na_values, False)
16431643
else:
16441644
result = values
16451645
if values.dtype == np.object_:
1646-
na_count = parsers.sanitize_objects(values, na_values, False)
1646+
na_count = parsers.sanitize_objects(np.asarray(values),
1647+
na_values, False)
16471648

16481649
if result.dtype == np.object_ and try_num_bool:
1649-
result = libops.maybe_convert_bool(values,
1650+
result = libops.maybe_convert_bool(np.asarray(values),
16501651
true_values=self.true_values,
16511652
false_values=self.false_values)
16521653

@@ -3033,7 +3034,7 @@ def converter(*date_cols):
30333034
return tools.to_datetime(
30343035
ensure_object(strs),
30353036
utc=None,
3036-
box=False,
3037+
box=True,
30373038
dayfirst=dayfirst,
30383039
errors='ignore',
30393040
infer_datetime_format=infer_datetime_format

pandas/tests/io/parser/parse_dates.py

+16
Original file line numberDiff line numberDiff line change
@@ -674,3 +674,19 @@ def test_parse_date_float(self, data, expected, parse_dates):
674674
# (i.e. float precision should remain unchanged).
675675
result = self.read_csv(StringIO(data), parse_dates=parse_dates)
676676
tm.assert_frame_equal(result, expected)
677+
678+
def test_parse_timezone(self):
679+
import pytz
680+
data = """dt,val
681+
2018-01-04 09:01:00+09:00,23350
682+
2018-01-04 09:02:00+09:00,23400
683+
2018-01-04 09:03:00+09:00,23400
684+
2018-01-04 09:04:00+09:00,23400
685+
2018-01-04 09:05:00+09:00,23400"""
686+
parsed = self.read_csv(StringIO(data), parse_dates=['dt'])
687+
dti = pd.DatetimeIndex(start='2018-01-04 09:01:00',
688+
end='2018-01-04 09:05:00', freq='1min',
689+
tz=pytz.FixedOffset(540))
690+
expected_data = {'dt': dti, 'val': [23350, 23400, 23400, 23400, 23400]}
691+
expected = DataFrame(expected_data)
692+
tm.assert_frame_equal(parsed, expected)

0 commit comments

Comments
 (0)