From e693c3a196a96e65accbd4ec93c93185141baf6d Mon Sep 17 00:00:00 2001
From: Jeff Blackburne <jblackburne@gmail.com>
Date: Wed, 29 Apr 2015 12:31:06 -0700
Subject: [PATCH] Changed a condition in tokenize_delimited to account for data
 chunks that start with newline.

Changed a condition in tokenize_delim_customterm to account for data chunks that start with terminator.

Added a unit test that fails in master and passes in this branch.

Moved new unit test in order to test all parser engines. Added GH issue number.

Added release note.
---
 doc/source/whatsnew/v0.16.1.txt | 1 +
 pandas/io/tests/test_parsers.py | 6 ++++++
 pandas/src/parser/tokenizer.c   | 4 ++--
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index 2ddf77d99d51d..16aab41cc3e88 100755
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -216,6 +216,7 @@ Bug Fixes
 - Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`)
 - Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`)
 - Bug in csv parser causing lines with initial whitespace plus one non-space character to be skipped. (:issue:`9710`)
+- Bug in C csv parser causing spurious NaNs when data started with newline followed by whitespace. (:issue:`10022`)
 
 
 
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 59fb3f14de8d2..7d52c6ad4cb3b 100755
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -2287,6 +2287,12 @@ def test_single_char_leading_whitespace(self):
         result = self.read_csv(StringIO(data), skipinitialspace=True)
         tm.assert_frame_equal(result, expected)
 
+    def test_chunk_begins_with_newline_whitespace(self):
+        # GH 10022
+        data = '\n hello\nworld\n'
+        result = self.read_csv(StringIO(data), header=None)
+        self.assertEqual(len(result), 2)
+
 
 class TestPythonParser(ParserTests, tm.TestCase):
     def test_negative_skipfooter_raises(self):
diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index e7b5db9c5e361..3be17f17d6afa 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -854,7 +854,7 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
                     --i;
                 } while (i + 1 > self->datapos && *buf != '\n');
 
-                if (i + 1 > self->datapos) // reached a newline rather than the beginning
+                if (*buf == '\n') // reached a newline rather than the beginning
                 {
                     ++buf; // move pointer to first char after newline
                     ++i;
@@ -1172,7 +1172,7 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit)
                     --i;
                 } while (i + 1 > self->datapos && *buf != self->lineterminator);
 
-                if (i + 1 > self->datapos) // reached a newline rather than the beginning
+                if (*buf == self->lineterminator) // reached a newline rather than the beginning
                 {
                     ++buf; // move pointer to first char after newline
                     ++i;