From 434f1e0728497eb3d460ce2e076af9377d1c7efb Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Mon, 25 Jul 2016 20:00:34 +0300
Subject: [PATCH 01/12] FIX: 'parser_trim_buffers' properly initializes word
 pointers

---
 pandas/src/parser/tokenizer.c | 44 ++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index 6091c79e2b4fc..7562b088efb8c 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -1221,20 +1221,7 @@ int parser_trim_buffers(parser_t *self) {
     size_t new_cap;
     void *newptr;
 
-    /* trim stream */
-    new_cap = _next_pow2(self->stream_len) + 1;
-    TRACE(("parser_trim_buffers: new_cap = %zu, stream_cap = %zu, lines_cap = %zu\n",
-           new_cap, self->stream_cap, self->lines_cap));
-    if (new_cap < self->stream_cap) {
-        TRACE(("parser_trim_buffers: new_cap < self->stream_cap, calling safe_realloc\n"));
-        newptr = safe_realloc((void*) self->stream, new_cap);
-        if (newptr == NULL) {
-            return PARSER_OUT_OF_MEMORY;
-        } else {
-            self->stream = newptr;
-            self->stream_cap = new_cap;
-        }
-    }
+    int i;
 
     /* trim words, word_starts */
     new_cap = _next_pow2(self->words_len) + 1;
@@ -1255,6 +1242,35 @@ int parser_trim_buffers(parser_t *self) {
         }
     }
 
+    /* trim stream */
+    new_cap = _next_pow2(self->stream_len) + 1;
+    TRACE(("parser_trim_buffers: new_cap = %zu, stream_cap = %zu, lines_cap = %zu\n",
+           new_cap, self->stream_cap, self->lines_cap));
+    if (new_cap < self->stream_cap) {
+        TRACE(("parser_trim_buffers: new_cap < self->stream_cap, calling safe_realloc\n"));
+        newptr = safe_realloc((void*) self->stream, new_cap);
+        if (newptr == NULL) {
+            return PARSER_OUT_OF_MEMORY;
+        } else {
+            // realloc sets errno when moving buffer?
+            if (self->stream != newptr) {
+                // uff
+                /* TRACE(("Moving word pointers\n")) */
+
+                self->pword_start = newptr + self->word_start;
+
+                for (i = 0; i < self->words_len; ++i)
+                {
+                    self->words[i] = newptr + self->word_starts[i];
+                }
+            }
+
+            self->stream = newptr;
+            self->stream_cap = new_cap;
+
+        }
+    }
+
     /* trim line_start, line_fields */
     new_cap = _next_pow2(self->lines) + 1;
     if (new_cap < self->lines_cap) {

From 21207198958738f6f583028c804551da6369ab25 Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 26 Jul 2016 11:57:32 +0300
Subject: [PATCH 02/12] A memory 'stress' test of parser.pyx to cause
 corruption or segfault

---
 pandas/tests/test_parser.py | 42 +++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 pandas/tests/test_parser.py

diff --git a/pandas/tests/test_parser.py b/pandas/tests/test_parser.py
new file mode 100644
index 0000000000000..1e8139569e96c
--- /dev/null
+++ b/pandas/tests/test_parser.py
@@ -0,0 +1,42 @@
+import os
+import subprocess
+
+import pandas.util.testing as tm
+
+class TestParser(tm.TestCase):
+    _multiprocess_can_split_ = True
+
+    def test_parse_trim_buffers(self):
+        code_ = """\n
+import pandas as pd
+from cStringIO import StringIO
+record_ = "9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"
+csv_data = "\\n".join([record_]*173) + "\\n"
+for n_lines in range(82, 90):
+    iterator_ = pd.read_csv(StringIO(csv_data), header=None, engine="c",
+                            dtype=object, chunksize=n_lines, na_filter=True)
+    for chunk_ in iterator_:
+        print n_lines, chunk_.iloc[0, 0], chunk_.iloc[-1, 0]
+exit(0)
+"""
+        expected_ = "".join("%d 9999-9 9999-9\n"%(n_lines,)
+                            for n_lines in range(82, 90)
+                            for _ in range((173 + n_lines - 1) // n_lines))
+
+        # Run the faulty code via ang explicit argumnet to python
+        proc_ = subprocess.Popen(("python", "-c", code_), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        # Wait until the subprocess finishes and then collect the output
+        stdout_, stderr_ = proc_.communicate()
+        exit_code = proc_.poll()
+
+        # Check whether a segfault or memory corruption occurred
+        # self.assertTrue(exit_code == -11 or (exit_code == 0 and stdout_ != expected_))
+
+        # Check for correct exit code and output
+        self.assertTrue(exit_code == 0 and stdout_ == expected_, msg="success")
+
+if __name__ == '__main__':
+    import nose
+    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
+                   exit=False)

From 07b4647a28a65a884a596939b3a79fa3b795ebc4 Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 26 Jul 2016 12:29:34 +0300
Subject: [PATCH 03/12] praser_trim_fix: More stressful test

---
 pandas/tests/test_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/test_parser.py b/pandas/tests/test_parser.py
index 1e8139569e96c..6b3593e82cd73 100644
--- a/pandas/tests/test_parser.py
+++ b/pandas/tests/test_parser.py
@@ -12,7 +12,7 @@ def test_parse_trim_buffers(self):
 from cStringIO import StringIO
 record_ = "9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"
 csv_data = "\\n".join([record_]*173) + "\\n"
-for n_lines in range(82, 90):
+for n_lines in range(60, 90):
     iterator_ = pd.read_csv(StringIO(csv_data), header=None, engine="c",
                             dtype=object, chunksize=n_lines, na_filter=True)
     for chunk_ in iterator_:
@@ -20,7 +20,7 @@ def test_parse_trim_buffers(self):
 exit(0)
 """
         expected_ = "".join("%d 9999-9 9999-9\n"%(n_lines,)
-                            for n_lines in range(82, 90)
+                            for n_lines in range(60, 90)
                             for _ in range((173 + n_lines - 1) // n_lines))
 
         # Run the faulty code via ang explicit argumnet to python

From a831dbb86e65f8aae0b63ffe9e098130fa3b57f3 Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 26 Jul 2016 13:08:12 +0300
Subject: [PATCH 04/12] Moved 'parser_trim_buffers' test to its proper place

---
 pandas/io/tests/parser/common.py | 33 +++++++++++++++++++++++++
 pandas/tests/test_parser.py      | 42 --------------------------------
 2 files changed, 33 insertions(+), 42 deletions(-)
 delete mode 100644 pandas/tests/test_parser.py

diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 11eed79e03267..f8cc3c69fea5f 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -5,6 +5,8 @@
 import platform
 import codecs
 
+import subprocess
+
 import re
 import sys
 from datetime import datetime
@@ -1491,3 +1493,34 @@ def test_memory_map(self):
 
         out = self.read_csv(mmap_file, memory_map=True)
         tm.assert_frame_equal(out, expected)
+
+
+    def test_parse_trim_buffers(self):
+        code_ = """\n
+import pandas as pd
+from pandas.compat import StringIO
+record_ = "9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"
+csv_data = "\\n".join([record_]*173) + "\\n"
+for n_lines in range(57, 90):
+    iterator_ = pd.read_csv(StringIO(csv_data), header=None, engine="c",
+                            dtype=object, chunksize=n_lines, na_filter=True)
+    for chunk_ in iterator_:
+        print n_lines, chunk_.iloc[0, 0], chunk_.iloc[-1, 0]
+exit(0)
+"""
+        expected_ = "".join("%d 9999-9 9999-9\n"%(n_lines,)
+                            for n_lines in range(57, 90)
+                            for _ in range((173 + n_lines - 1) // n_lines))
+
+        # Run the faulty code via ang explicit argumnet to python
+        proc_ = subprocess.Popen(("python", "-c", code_), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        # Wait until the subprocess finishes and then collect the output
+        stdout_, stderr_ = proc_.communicate()
+        exit_code = proc_.poll()
+
+        # Check whether a segfault or memory corruption occurred
+        # tm.assertTrue(exit_code == -11 or (exit_code == 0 and stdout_ != expected_))
+
+        # Check for correct exit code and output
+        tm.assert_equal(exit_code == 0 and stdout_ == expected_, True)
diff --git a/pandas/tests/test_parser.py b/pandas/tests/test_parser.py
deleted file mode 100644
index 6b3593e82cd73..0000000000000
--- a/pandas/tests/test_parser.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import os
-import subprocess
-
-import pandas.util.testing as tm
-
-class TestParser(tm.TestCase):
-    _multiprocess_can_split_ = True
-
-    def test_parse_trim_buffers(self):
-        code_ = """\n
-import pandas as pd
-from cStringIO import StringIO
-record_ = "9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"
-csv_data = "\\n".join([record_]*173) + "\\n"
-for n_lines in range(60, 90):
-    iterator_ = pd.read_csv(StringIO(csv_data), header=None, engine="c",
-                            dtype=object, chunksize=n_lines, na_filter=True)
-    for chunk_ in iterator_:
-        print n_lines, chunk_.iloc[0, 0], chunk_.iloc[-1, 0]
-exit(0)
-"""
-        expected_ = "".join("%d 9999-9 9999-9\n"%(n_lines,)
-                            for n_lines in range(60, 90)
-                            for _ in range((173 + n_lines - 1) // n_lines))
-
-        # Run the faulty code via ang explicit argumnet to python
-        proc_ = subprocess.Popen(("python", "-c", code_), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-        # Wait until the subprocess finishes and then collect the output
-        stdout_, stderr_ = proc_.communicate()
-        exit_code = proc_.poll()
-
-        # Check whether a segfault or memory corruption occurred
-        # self.assertTrue(exit_code == -11 or (exit_code == 0 and stdout_ != expected_))
-
-        # Check for correct exit code and output
-        self.assertTrue(exit_code == 0 and stdout_ == expected_, msg="success")
-
-if __name__ == '__main__':
-    import nose
-    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
-                   exit=False)

From 5ab36363414ef0542932d7dbc174cdba07d60ed9 Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 26 Jul 2016 13:40:48 +0300
Subject: [PATCH 05/12] Expanded the explanation of the patch

---
 pandas/src/parser/tokenizer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index 7562b088efb8c..ac909f2c8bfdb 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -1252,11 +1252,11 @@ int parser_trim_buffers(parser_t *self) {
         if (newptr == NULL) {
             return PARSER_OUT_OF_MEMORY;
         } else {
-            // realloc sets errno when moving buffer?
+            // Update the pointers in the self->words array (char **) if `safe_realloc`
+            //  moved the `self->stream` buffer. This block mirrors a similar block in
+            //  `make_stream_space`.
             if (self->stream != newptr) {
-                // uff
                 /* TRACE(("Moving word pointers\n")) */
-
                 self->pword_start = newptr + self->word_start;
 
                 for (i = 0; i < self->words_len; ++i)

From bdba66f2d6c84b504c03a31634aa1e02edb4a71f Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 26 Jul 2016 14:47:58 +0300
Subject: [PATCH 06/12] Rewritten the 'parser_trim_buffers' test

---
 pandas/io/tests/parser/common.py | 59 +++++++++++++++++---------------
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index f8cc3c69fea5f..546a9adf7e9c3 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -1496,31 +1496,34 @@ def test_memory_map(self):
 
 
     def test_parse_trim_buffers(self):
-        code_ = """\n
-import pandas as pd
-from pandas.compat import StringIO
-record_ = "9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"
-csv_data = "\\n".join([record_]*173) + "\\n"
-for n_lines in range(57, 90):
-    iterator_ = pd.read_csv(StringIO(csv_data), header=None, engine="c",
-                            dtype=object, chunksize=n_lines, na_filter=True)
-    for chunk_ in iterator_:
-        print n_lines, chunk_.iloc[0, 0], chunk_.iloc[-1, 0]
-exit(0)
-"""
-        expected_ = "".join("%d 9999-9 9999-9\n"%(n_lines,)
-                            for n_lines in range(57, 90)
-                            for _ in range((173 + n_lines - 1) // n_lines))
-
-        # Run the faulty code via ang explicit argumnet to python
-        proc_ = subprocess.Popen(("python", "-c", code_), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-        # Wait until the subprocess finishes and then collect the output
-        stdout_, stderr_ = proc_.communicate()
-        exit_code = proc_.poll()
-
-        # Check whether a segfault or memory corruption occurred
-        # tm.assertTrue(exit_code == -11 or (exit_code == 0 and stdout_ != expected_))
-
-        # Check for correct exit code and output
-        tm.assert_equal(exit_code == 0 and stdout_ == expected_, True)
+        # This test is designed to cause a `segfault` with unpatched `tokenizer.c`,
+        # Sometimes the test fails on `segfault`, other times it fails due to memory
+        # corruption, which causes the loaded DataFrame to differ from the expected
+        # one.
+        n_lines, chunksizes = 173, range(57, 90)
+
+        # Create the expected output
+        expected_ = [(chunksize_, "9999-9", "9999-9")
+                            for chunksize_ in chunksizes
+                            for _ in range((n_lines + chunksize_ - 1) // chunksize_)]
+        expected = pd.DataFrame(expected_, columns=None, index=None)
+
+        # Generate a large mixed-type CSV file on-the-fly (approx 272 KiB)
+        record_ = "9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"
+        csv_data = "\n".join([record_] * n_lines) + "\n"
+
+        output_ = list()
+        for chunksize_ in chunksizes:
+            try:
+                iterator_ = self.read_csv(StringIO(csv_data), header=None, dtype=object,
+                                          chunksize=chunksize_, na_filter=True)
+            except ValueError, e:
+                # Ignore unsuported dtype=object by engine=python
+                pass
+
+            for chunk_ in iterator_:
+                output_.append((chunksize_, chunk_.iloc[0, 0], chunk_.iloc[-1, 0]))
+
+        df = pd.DataFrame(output_, columns=None, index=None)
+
+        tm.assert_frame_equal(df, expected)
\ No newline at end of file

From 020d706259c45c98e04288970ce607f754341d9d Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 26 Jul 2016 15:00:49 +0300
Subject: [PATCH 07/12] Updated WHATSNEW with the bug fix information

---
 doc/source/whatsnew/v0.19.0.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 06625e09d70a1..cc69a293d5945 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -673,6 +673,7 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
+- Bug in ``pandas.parser.parser_trim_buffers()``, which did not update the word vectors (``parser->words``) when stream buffer was shrunk (:issue:`13788`, :issue:`13703`)
 - Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
 - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
 - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)

From e0b4c8320bfd48bdd0ba67fd19c5d2fcd044b3a4 Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 26 Jul 2016 15:44:26 +0300
Subject: [PATCH 08/12] flake8 style test correction

---
 pandas/io/tests/parser/common.py | 63 ++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 546a9adf7e9c3..a3497d0df0ff2 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -5,8 +5,6 @@
 import platform
 import codecs
 
-import subprocess
-
 import re
 import sys
 from datetime import datetime
@@ -1494,36 +1492,71 @@ def test_memory_map(self):
         out = self.read_csv(mmap_file, memory_map=True)
         tm.assert_frame_equal(out, expected)
 
-
     def test_parse_trim_buffers(self):
-        # This test is designed to cause a `segfault` with unpatched `tokenizer.c`,
-        # Sometimes the test fails on `segfault`, other times it fails due to memory
-        # corruption, which causes the loaded DataFrame to differ from the expected
-        # one.
+        # This test is designed to cause a `segfault` with unpatched
+        # `tokenizer.c`, Sometimes the test fails on `segfault`, other
+        # times it fails due to memory corruption, which causes the
+        # loaded DataFrame to differ from the expected one.
         n_lines, chunksizes = 173, range(57, 90)
 
         # Create the expected output
         expected_ = [(chunksize_, "9999-9", "9999-9")
-                            for chunksize_ in chunksizes
-                            for _ in range((n_lines + chunksize_ - 1) // chunksize_)]
+                     for chunksize_ in chunksizes
+                     for _ in range((n_lines + chunksize_ - 1) // chunksize_)]
         expected = pd.DataFrame(expected_, columns=None, index=None)
 
         # Generate a large mixed-type CSV file on-the-fly (approx 272 KiB)
-        record_ = "9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"
+        record_ = \
+            """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.""" \
+            """99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-""" \
+            """ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-""" \
+            """ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-""" \
+            """ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-""" \
+            """ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-""" \
+            """ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,""" \
+            """ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-""" \
+            """ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-""" \
+            """ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.""" \
+            """99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.""" \
+            """99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.""" \
+            """99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-""" \
+            """ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-""" \
+            """ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.""" \
+            """999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.""" \
+            """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-""" \
+            """ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-""" \
+            """ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-""" \
+            """ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-""" \
+            """ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-""" \
+            """ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.""" \
+            """99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.""" \
+            """99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9""" \
+            """.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.""" \
+            """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-""" \
+            """ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.""" \
+            """99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.""" \
+            """99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-""" \
+            """ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.""" \
+            """99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.""" \
+            """99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.""" \
+            """99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
         csv_data = "\n".join([record_] * n_lines) + "\n"
 
         output_ = list()
         for chunksize_ in chunksizes:
             try:
-                iterator_ = self.read_csv(StringIO(csv_data), header=None, dtype=object,
-                                          chunksize=chunksize_, na_filter=True)
-            except ValueError, e:
+                iterator_ = self.read_csv(StringIO(csv_data), header=None,
+                                          dtype=object, chunksize=chunksize_,
+                                          na_filter=True)
+            except ValueError:
                 # Ignore unsuported dtype=object by engine=python
                 pass
 
             for chunk_ in iterator_:
-                output_.append((chunksize_, chunk_.iloc[0, 0], chunk_.iloc[-1, 0]))
+                output_.append((chunksize_,
+                                chunk_.iloc[0, 0],
+                                chunk_.iloc[-1, 0]))
 
         df = pd.DataFrame(output_, columns=None, index=None)
 
-        tm.assert_frame_equal(df, expected)
\ No newline at end of file
+        tm.assert_frame_equal(df, expected)

From 834c851007f4fe79c153d3ae9b6b3a414cda07da Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 26 Jul 2016 15:59:46 +0300
Subject: [PATCH 09/12] Improved readability of bugfix description; minor style
 fixes of the test

---
 doc/source/whatsnew/v0.19.0.txt  | 2 +-
 pandas/io/tests/parser/common.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index cc69a293d5945..c24ab1f5dcb08 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -673,7 +673,7 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
-- Bug in ``pandas.parser.parser_trim_buffers()``, which did not update the word vectors (``parser->words``) when stream buffer was shrunk (:issue:`13788`, :issue:`13703`)
+- Bug in ``pd.read_csv()`` causing a segfault when iterating over a large file in chunks (:issue:`13703`)
 - Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
 - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
 - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index a3497d0df0ff2..af2ef38171844 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -1542,7 +1542,7 @@ def test_parse_trim_buffers(self):
             """99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
         csv_data = "\n".join([record_] * n_lines) + "\n"
 
-        output_ = list()
+        output_ = []
         for chunksize_ in chunksizes:
             try:
                 iterator_ = self.read_csv(StringIO(csv_data), header=None,

From 629198d2c8058db10283fd6991e92602a64379ad Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 26 Jul 2016 17:44:25 +0300
Subject: [PATCH 10/12] Referenced issue in the test, rewrote the bugfix
 description

---
 doc/source/whatsnew/v0.19.0.txt  |  2 +-
 pandas/io/tests/parser/common.py | 35 ++++++++++++++++++--------------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index c24ab1f5dcb08..326e124c2f03e 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -673,7 +673,7 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
-- Bug in ``pd.read_csv()`` causing a segfault when iterating over a large file in chunks (:issue:`13703`)
+- Bug in ``pd.read_csv()``, which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`)
 - Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
 - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
 - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index af2ef38171844..969b407529aca 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -1493,8 +1493,13 @@ def test_memory_map(self):
         tm.assert_frame_equal(out, expected)
 
     def test_parse_trim_buffers(self):
+        # This test is part of a bugfix for issue #13703. It attmepts to
+        # to stress the system memory allocator, to cause it to move the
+        # stream buffer and either let the OS reclaim the region, or let
+        # other memory requests of parser otherwise modify the contents
+        # of memory space, where it was formely located.
         # This test is designed to cause a `segfault` with unpatched
-        # `tokenizer.c`, Sometimes the test fails on `segfault`, other
+        # `tokenizer.c`. Sometimes the test fails on `segfault`, other
         # times it fails due to memory corruption, which causes the
         # loaded DataFrame to differ from the expected one.
         n_lines, chunksizes = 173, range(57, 90)
@@ -1543,20 +1548,20 @@ def test_parse_trim_buffers(self):
         csv_data = "\n".join([record_] * n_lines) + "\n"
 
         output_ = []
-        for chunksize_ in chunksizes:
-            try:
+        try:
+            for chunksize_ in chunksizes:
                 iterator_ = self.read_csv(StringIO(csv_data), header=None,
                                           dtype=object, chunksize=chunksize_,
                                           na_filter=True)
-            except ValueError:
-                # Ignore unsuported dtype=object by engine=python
-                pass
-
-            for chunk_ in iterator_:
-                output_.append((chunksize_,
-                                chunk_.iloc[0, 0],
-                                chunk_.iloc[-1, 0]))
-
-        df = pd.DataFrame(output_, columns=None, index=None)
-
-        tm.assert_frame_equal(df, expected)
+                for chunk_ in iterator_:
+                    output_.append((chunksize_,
+                                    chunk_.iloc[0, 0],
+                                    chunk_.iloc[-1, 0]))
+        except ValueError:
+            # Ignore unsuported dtype=object by engine=python
+            # in this case output_ list is empty
+            pass
+
+        if output_:
+            df = pd.DataFrame(output_, columns=None, index=None)
+            tm.assert_frame_equal(df, expected)

From 9b521f6eb4a73047bf026a5b0b5c6bde6136cd31 Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 26 Jul 2016 19:32:01 +0300
Subject: [PATCH 11/12] Improved the clarity and logic of the test

---
 pandas/io/tests/parser/common.py | 106 ++++++++++++++++---------------
 1 file changed, 56 insertions(+), 50 deletions(-)

diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 969b407529aca..c17ca0f1700fa 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -1502,66 +1502,72 @@ def test_parse_trim_buffers(self):
         # `tokenizer.c`. Sometimes the test fails on `segfault`, other
         # times it fails due to memory corruption, which causes the
         # loaded DataFrame to differ from the expected one.
-        n_lines, chunksizes = 173, range(57, 90)
 
-        # Create the expected output
-        expected_ = [(chunksize_, "9999-9", "9999-9")
-                     for chunksize_ in chunksizes
-                     for _ in range((n_lines + chunksize_ - 1) // chunksize_)]
-        expected = pd.DataFrame(expected_, columns=None, index=None)
-
-        # Generate a large mixed-type CSV file on-the-fly (approx 272 KiB)
+        # Generate a large mixed-type CSV file on-the-fly (one record is
+        # approx 1.5KiB).
         record_ = \
-            """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.""" \
-            """99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-""" \
-            """ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-""" \
-            """ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-""" \
-            """ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-""" \
-            """ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-""" \
-            """ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,""" \
-            """ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-""" \
-            """ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-""" \
-            """ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.""" \
-            """99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.""" \
-            """99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.""" \
-            """99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-""" \
-            """ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-""" \
-            """ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.""" \
-            """999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.""" \
-            """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-""" \
-            """ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-""" \
-            """ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-""" \
-            """ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-""" \
-            """ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-""" \
-            """ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.""" \
-            """99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.""" \
-            """99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9""" \
-            """.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.""" \
-            """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-""" \
-            """ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.""" \
-            """99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.""" \
-            """99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-""" \
-            """ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.""" \
-            """99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.""" \
-            """99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.""" \
-            """99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
+            """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,Z""" \
+            """ZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,""" \
+            """ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,9""" \
+            """99,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,""" \
+            """9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.""" \
+            """99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.""" \
+            """99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZ""" \
+            """ZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZ""" \
+            """ZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,Z""" \
+            """ZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,""" \
+            """9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,""" \
+            """999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,""" \
+            """,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999""" \
+            """,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.""" \
+            """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,""" \
+            """,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-Z""" \
+            """ZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ""" \
+            """,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99""" \
+            """,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-""" \
+            """9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9""" \
+            """.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,""" \
+            """,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.""" \
+            """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZ""" \
+            """ZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ""" \
+            """-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZ""" \
+            """ZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ""" \
+            """,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99""" \
+            """,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9""" \
+            """.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
+
+        # Set the number of line so that a call to `parser_trim_buffers`
+        #  is trgiggered: a couple of full chunks and a relatively small
+        # 'residual' chunk.
+        chunksize, n_lines = 128, 2 * 128 + 15
         csv_data = "\n".join([record_] * n_lines) + "\n"
 
+        # We will use StringIO to load the CSV from this text buffer.
+        # pd.read_csv() will iterate over the file in chunks and will
+        # finally read a residual chunk of really small size.
+
+        # Create the expected output: maually create the dataframe
+        # by splitting by comma and repeating the `n_lines` number
+        # of times.
+        row = tuple(val_ if val_ else float("nan")
+                    for val_ in record_.split(","))
+        expected_ = [row for _ in range(n_lines)]
+        expected = pd.DataFrame(expected_, dtype=object,
+                                columns=None, index=None)
+
+        # Iterate over the CSV file in chunks of `chunksize` lines
         output_ = []
         try:
-            for chunksize_ in chunksizes:
-                iterator_ = self.read_csv(StringIO(csv_data), header=None,
-                                          dtype=object, chunksize=chunksize_,
-                                          na_filter=True)
-                for chunk_ in iterator_:
-                    output_.append((chunksize_,
-                                    chunk_.iloc[0, 0],
-                                    chunk_.iloc[-1, 0]))
+            iterator_ = self.read_csv(StringIO(csv_data), header=None,
+                                      dtype=object, chunksize=chunksize)
+            for chunk_ in iterator_:
+                output_.append(chunk_)
         except ValueError:
             # Ignore unsuported dtype=object by engine=python
             # in this case output_ list is empty
             pass
 
+        # Check for data corruption if there is any output.
         if output_:
-            df = pd.DataFrame(output_, columns=None, index=None)
+            df = pd.concat(output_, axis=0, ignore_index=True)
             tm.assert_frame_equal(df, expected)

From d59624eddf063c9da12f1f95227d8e9c9b00cf6e Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Wed, 27 Jul 2016 02:24:48 +0300
Subject: [PATCH 12/12] Moved the test to 'c_parser_only'

---
 pandas/io/tests/parser/c_parser_only.py | 70 ++++++++++++++++++++++
 pandas/io/tests/parser/common.py        | 80 -------------------------
 2 files changed, 70 insertions(+), 80 deletions(-)

diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py
index b6048051edc4d..103c9fa2b7ce8 100644
--- a/pandas/io/tests/parser/c_parser_only.py
+++ b/pandas/io/tests/parser/c_parser_only.py
@@ -381,3 +381,73 @@ def test_empty_header_read(count):
 
         for count in range(1, 101):
             test_empty_header_read(count)
+
+    def test_parse_trim_buffers(self):
+        # This test is part of a bugfix for issue #13703. It attmepts to
+        # to stress the system memory allocator, to cause it to move the
+        # stream buffer and either let the OS reclaim the region, or let
+        # other memory requests of parser otherwise modify the contents
+        # of memory space, where it was formely located.
+        # This test is designed to cause a `segfault` with unpatched
+        # `tokenizer.c`. Sometimes the test fails on `segfault`, other
+        # times it fails due to memory corruption, which causes the
+        # loaded DataFrame to differ from the expected one.
+
+        # Generate a large mixed-type CSV file on-the-fly (one record is
+        # approx 1.5KiB).
+        record_ = \
+            """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,Z""" \
+            """ZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,""" \
+            """ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,9""" \
+            """99,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,""" \
+            """9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.""" \
+            """99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.""" \
+            """99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZ""" \
+            """ZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZ""" \
+            """ZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,Z""" \
+            """ZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,""" \
+            """9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,""" \
+            """999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,""" \
+            """,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999""" \
+            """,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.""" \
+            """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,""" \
+            """,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-Z""" \
+            """ZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ""" \
+            """,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99""" \
+            """,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-""" \
+            """9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9""" \
+            """.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,""" \
+            """,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.""" \
+            """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZ""" \
+            """ZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ""" \
+            """-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZ""" \
+            """ZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ""" \
+            """,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99""" \
+            """,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9""" \
+            """.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
+
+        # Set the number of lines so that a call to `parser_trim_buffers`
+        # is triggered: after a couple of full chunks are consumed a
+        # relatively small 'residual' chunk would cause reallocation
+        # within the parser.
+        chunksize, n_lines = 128, 2 * 128 + 15
+        csv_data = "\n".join([record_] * n_lines) + "\n"
+
+        # We will use StringIO to load the CSV from this text buffer.
+        # pd.read_csv() will iterate over the file in chunks and will
+        # finally read a residual chunk of really small size.
+
+        # Generate the expected output: manually create the dataframe
+        # by splitting by comma and repeating the `n_lines` times.
+        row = tuple(val_ if val_ else float("nan")
+                    for val_ in record_.split(","))
+        expected = pd.DataFrame([row for _ in range(n_lines)],
+                                dtype=object, columns=None, index=None)
+
+        # Iterate over the CSV file in chunks of `chunksize` lines
+        chunks_ = self.read_csv(StringIO(csv_data), header=None,
+                                dtype=object, chunksize=chunksize)
+        result = pd.concat(chunks_, axis=0, ignore_index=True)
+
+        # Check for data corruption if there was no segfault
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index c17ca0f1700fa..11eed79e03267 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -1491,83 +1491,3 @@ def test_memory_map(self):
 
         out = self.read_csv(mmap_file, memory_map=True)
         tm.assert_frame_equal(out, expected)
-
-    def test_parse_trim_buffers(self):
-        # This test is part of a bugfix for issue #13703. It attmepts to
-        # to stress the system memory allocator, to cause it to move the
-        # stream buffer and either let the OS reclaim the region, or let
-        # other memory requests of parser otherwise modify the contents
-        # of memory space, where it was formely located.
-        # This test is designed to cause a `segfault` with unpatched
-        # `tokenizer.c`. Sometimes the test fails on `segfault`, other
-        # times it fails due to memory corruption, which causes the
-        # loaded DataFrame to differ from the expected one.
-
-        # Generate a large mixed-type CSV file on-the-fly (one record is
-        # approx 1.5KiB).
-        record_ = \
-            """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,Z""" \
-            """ZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,""" \
-            """ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,9""" \
-            """99,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,""" \
-            """9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.""" \
-            """99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.""" \
-            """99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZ""" \
-            """ZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZ""" \
-            """ZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,Z""" \
-            """ZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,""" \
-            """9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,""" \
-            """999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,""" \
-            """,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999""" \
-            """,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.""" \
-            """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,""" \
-            """,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-Z""" \
-            """ZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ""" \
-            """,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99""" \
-            """,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-""" \
-            """9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9""" \
-            """.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,""" \
-            """,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.""" \
-            """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZ""" \
-            """ZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ""" \
-            """-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZ""" \
-            """ZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ""" \
-            """,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99""" \
-            """,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9""" \
-            """.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
-
-        # Set the number of line so that a call to `parser_trim_buffers`
-        #  is trgiggered: a couple of full chunks and a relatively small
-        # 'residual' chunk.
-        chunksize, n_lines = 128, 2 * 128 + 15
-        csv_data = "\n".join([record_] * n_lines) + "\n"
-
-        # We will use StringIO to load the CSV from this text buffer.
-        # pd.read_csv() will iterate over the file in chunks and will
-        # finally read a residual chunk of really small size.
-
-        # Create the expected output: maually create the dataframe
-        # by splitting by comma and repeating the `n_lines` number
-        # of times.
-        row = tuple(val_ if val_ else float("nan")
-                    for val_ in record_.split(","))
-        expected_ = [row for _ in range(n_lines)]
-        expected = pd.DataFrame(expected_, dtype=object,
-                                columns=None, index=None)
-
-        # Iterate over the CSV file in chunks of `chunksize` lines
-        output_ = []
-        try:
-            iterator_ = self.read_csv(StringIO(csv_data), header=None,
-                                      dtype=object, chunksize=chunksize)
-            for chunk_ in iterator_:
-                output_.append(chunk_)
-        except ValueError:
-            # Ignore unsuported dtype=object by engine=python
-            # in this case output_ list is empty
-            pass
-
-        # Check for data corruption if there is any output.
-        if output_:
-            df = pd.concat(output_, axis=0, ignore_index=True)
-            tm.assert_frame_equal(df, expected)