Skip to content

Commit 421d492

Browse files
committed
BUG: fix segfault introduced by prior #2981 fix
1 parent 0c0231f commit 421d492

File tree

2 files changed

+38
-21
lines changed

2 files changed

+38
-21
lines changed

pandas/io/tests/test_parsers.py

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,26 +1548,6 @@ def test_int64_min_issues(self):
15481548

15491549
tm.assert_frame_equal(result, expected)
15501550

1551-
def test_parse_ragged_csv(self):
1552-
data = """1,2,3
1553-
1,2,3,4
1554-
1,2,3,4,5
1555-
1,2
1556-
1,2,3,4"""
1557-
1558-
nice_data = """1,2,3,,
1559-
1,2,3,4,
1560-
1,2,3,4,5
1561-
1,2,,,
1562-
1,2,3,4,"""
1563-
result = self.read_csv(StringIO(data), header=None,
1564-
names=['a', 'b', 'c', 'd', 'e'])
1565-
1566-
expected = self.read_csv(StringIO(nice_data), header=None,
1567-
names=['a', 'b', 'c', 'd', 'e'])
1568-
1569-
tm.assert_frame_equal(result, expected)
1570-
15711551

15721552
class TestPythonParser(ParserTests, unittest.TestCase):
15731553

@@ -2078,6 +2058,36 @@ def test_na_trailing_columns(self):
20782058
self.assertEquals(result['Date'][1], '2012-05-12')
20792059
self.assertTrue(result['UnitPrice'].isnull().all())
20802060

2061+
def test_parse_ragged_csv(self):
2062+
data = """1,2,3
2063+
1,2,3,4
2064+
1,2,3,4,5
2065+
1,2
2066+
1,2,3,4"""
2067+
2068+
nice_data = """1,2,3,,
2069+
1,2,3,4,
2070+
1,2,3,4,5
2071+
1,2,,,
2072+
1,2,3,4,"""
2073+
result = self.read_csv(StringIO(data), header=None,
2074+
names=['a', 'b', 'c', 'd', 'e'])
2075+
2076+
expected = self.read_csv(StringIO(nice_data), header=None,
2077+
names=['a', 'b', 'c', 'd', 'e'])
2078+
2079+
tm.assert_frame_equal(result, expected)
2080+
2081+
# too many columns, cause segfault if not careful
2082+
data = "1,2\n3,4,5"
2083+
2084+
result = self.read_csv(StringIO(data), header=None,
2085+
names=range(50))
2086+
expected = self.read_csv(StringIO(data), header=None,
2087+
names=range(3)).reindex(columns=range(50))
2088+
2089+
tm.assert_frame_equal(result, expected)
2090+
20812091

20822092
class TestParseSQL(unittest.TestCase):
20832093

pandas/src/parser/tokenizer.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,14 @@ static int end_line(parser_t *self) {
498498
}
499499
else {
500500
/* missing trailing delimiters */
501-
if (self->lines >= self->header + 1) {
501+
if ((self->lines >= self->header + 1) && fields < ex_fields) {
502+
503+
/* Might overrun the buffer when closing fields */
504+
if (make_stream_space(self, ex_fields - fields) < 0) {
505+
self->error_msg = "out of memory";
506+
return -1;
507+
}
508+
502509
while (fields < ex_fields){
503510
end_field(self);
504511
/* printf("Prior word: %s\n", self->words[self->words_len - 2]); */

0 commit comments

Comments
 (0)