From 242a48bc8be345efbd3e4bfb8efb2070d500c115 Mon Sep 17 00:00:00 2001 From: Fabian Gebhart Date: Sat, 14 Nov 2020 17:28:28 +0000 Subject: [PATCH 1/5] adding test case for #13044 --- pandas/tests/tools/test_to_datetime.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 10bda16655586..8ba497f0ccaf9 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2443,3 +2443,23 @@ def test_na_to_datetime(nulls_fixture, klass): result = pd.to_datetime(klass([nulls_fixture])) assert result[0] is pd.NaT + + +def test_empty_string_datetime_coerce(): + # GH13044 + td = pd.Series(['May 04', 'Jun 02', '']) + format = "%b %y" + + # coerce empty string to pd.NaT + result = pd.to_datetime(td, format=format, errors="coerce") + expected = pd.Series(["2004-05-01", "2002-06-01", pd.NaT], dtype="datetime64[ns]") + pd.testing.assert_series_equal(expected, result) + + # raise an exception in case a format is given + with pytest.raises(ValueError, match="does not match format"): + result = pd.to_datetime(td, format=format, errors='raise') + + # don't raise an expection in case no format is given + result = pd.to_datetime([1, ""], unit="s", errors="raise") + expected = pd.DatetimeIndex(["1970-01-01 00:00:01", pd.NaT], dtype="datetime64[ns]") + pd.testing.assert_index_equal(result, expected) From 409a84d2c62767173aa449366a2f46f1efcda5e1 Mon Sep 17 00:00:00 2001 From: Fabian Gebhart Date: Sat, 14 Nov 2020 17:55:53 +0000 Subject: [PATCH 2/5] run pre-commit to fix ci --- pandas/tests/tools/test_to_datetime.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 8ba497f0ccaf9..178f3009bd7c4 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2447,19 +2447,19 @@ def test_na_to_datetime(nulls_fixture, klass): def test_empty_string_datetime_coerce(): # GH13044 - td = pd.Series(['May 04', 'Jun 02', '']) + td = Series(["May 04", "Jun 02", ""]) format = "%b %y" # coerce empty string to pd.NaT result = pd.to_datetime(td, format=format, errors="coerce") - expected = pd.Series(["2004-05-01", "2002-06-01", pd.NaT], dtype="datetime64[ns]") + expected = Series(["2004-05-01", "2002-06-01", pd.NaT], dtype="datetime64[ns]") pd.testing.assert_series_equal(expected, result) # raise an exception in case a format is given with pytest.raises(ValueError, match="does not match format"): - result = pd.to_datetime(td, format=format, errors='raise') + result = pd.to_datetime(td, format=format, errors="raise") # don't raise an expection in case no format is given result = pd.to_datetime([1, ""], unit="s", errors="raise") - expected = pd.DatetimeIndex(["1970-01-01 00:00:01", pd.NaT], dtype="datetime64[ns]") + expected = DatetimeIndex(["1970-01-01 00:00:01", pd.NaT], dtype="datetime64[ns]") pd.testing.assert_index_equal(result, expected) From 0c58d58d1159b0942a40b0e6fb535fd7a4f5b078 Mon Sep 17 00:00:00 2001 From: Fabian Gebhart Date: Sat, 14 Nov 2020 18:55:03 +0000 Subject: [PATCH 3/5] modify input data to have streamline tests --- pandas/tests/tools/test_to_datetime.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 178f3009bd7c4..14e683c9e35fd 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2447,12 +2447,12 @@ def test_na_to_datetime(nulls_fixture, klass): def test_empty_string_datetime_coerce(): # GH13044 - td = Series(["May 04", "Jun 02", ""]) - format = "%b %y" + td = Series(["03/24/2016", "03/25/2016", ""]) + format = "%m/%d/%Y" # coerce empty string to pd.NaT result = pd.to_datetime(td, format=format, errors="coerce") - expected = Series(["2004-05-01", "2002-06-01", pd.NaT], dtype="datetime64[ns]") + expected = Series(["2016-03-24", "2016-03-25", pd.NaT], dtype="datetime64[ns]") pd.testing.assert_series_equal(expected, result) # raise an exception in case a format is given @@ -2460,6 +2460,5 @@ def test_empty_string_datetime_coerce(): result = pd.to_datetime(td, format=format, errors="raise") # don't raise an expection in case no format is given - result = pd.to_datetime([1, ""], unit="s", errors="raise") - expected = DatetimeIndex(["1970-01-01 00:00:01", pd.NaT], dtype="datetime64[ns]") - pd.testing.assert_index_equal(result, expected) + result = pd.to_datetime(td, errors="raise") + pd.testing.assert_series_equal(result, expected) From 95d3075feaf360a3d773a9ca152a72479908463d Mon Sep 17 00:00:00 2001 From: Fabian Gebhart Date: Sun, 15 Nov 2020 09:07:11 +0000 Subject: [PATCH 4/5] add test for unit cases --- pandas/tests/tools/test_to_datetime.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 14e683c9e35fd..24cd21cd364db 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2445,7 +2445,7 @@ def test_na_to_datetime(nulls_fixture, klass): assert result[0] is pd.NaT -def test_empty_string_datetime_coerce(): +def test_empty_string_datetime_coerce__format(): # GH13044 td = Series(["03/24/2016", "03/25/2016", ""]) format = "%m/%d/%Y" @@ -2462,3 +2462,15 @@ def test_empty_string_datetime_coerce(): # don't raise an expection in case no format is given result = pd.to_datetime(td, errors="raise") pd.testing.assert_series_equal(result, expected) + + +def test_empty_string_datetime_coerce__unit(): + # GH13044 + # coerce empty string to pd.NaT + result = pd.to_datetime([1, ""], unit="s", errors="coerce") + expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[ns]") + pd.testing.assert_index_equal(expected, result) + + # verify that no exception is raised even when errors='raise' is set + result = pd.to_datetime([1, ""], unit="s", errors="raise") + pd.testing.assert_index_equal(expected, result) From 39fdc4d1d731a77d94fdd7c31bcb551bb2bbc33e Mon Sep 17 00:00:00 2001 From: Fabian Gebhart Date: Thu, 19 Nov 2020 15:53:12 +0000 Subject: [PATCH 5/5] use tm for pandas assertions --- pandas/tests/tools/test_to_datetime.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 24cd21cd364db..278a315a479bd 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2453,7 +2453,7 @@ def test_empty_string_datetime_coerce__format(): # coerce empty string to pd.NaT result = pd.to_datetime(td, format=format, errors="coerce") expected = Series(["2016-03-24", "2016-03-25", pd.NaT], dtype="datetime64[ns]") - pd.testing.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) # raise an exception in case a format is given with pytest.raises(ValueError, match="does not match format"): @@ -2461,7 +2461,7 @@ def test_empty_string_datetime_coerce__format(): # don't raise an expection in case no format is given result = pd.to_datetime(td, errors="raise") - pd.testing.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_empty_string_datetime_coerce__unit(): @@ -2469,8 +2469,8 @@ def test_empty_string_datetime_coerce__unit(): # coerce empty string to pd.NaT result = pd.to_datetime([1, ""], unit="s", errors="coerce") expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[ns]") - pd.testing.assert_index_equal(expected, result) + tm.assert_index_equal(expected, result) # verify that no exception is raised even when errors='raise' is set result = pd.to_datetime([1, ""], unit="s", errors="raise") - pd.testing.assert_index_equal(expected, result) + tm.assert_index_equal(expected, result)