From d2b85ffe794d44d9cb7f62c6024a4b9ec1ac490e Mon Sep 17 00:00:00 2001
From: Laurent Gautier <lgautier@gmail.com>
Date: Sun, 21 Jul 2013 22:10:16 +0200
Subject: [PATCH 1/8] Typo in error message

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 401a7746953cb..6376b94d1ae99 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5705,7 +5705,7 @@ def extract_index(data):
                 raw_lengths.append(len(v))
 
         if not indexes and not raw_lengths:
-            raise ValueError('If using all scalar values, you must must pass'
+            raise ValueError('If using all scalar values, you must pass'
                              ' an index')
 
         if have_series or have_dicts:

From e6c3cd6b796a8df0c545d7862c482d5464c1e8c9 Mon Sep 17 00:00:00 2001
From: Laurent Gautier <lgautier@gmail.com>
Date: Sun, 21 Jul 2013 23:12:03 +0200
Subject: [PATCH 2/8] Tentative fix for #4297.

The problem finally seem to be an odd restriction: acceptable sequences
were list, tuple, or numpy.ndarray.

A guess is that strings in Python wanted to be considered scalars,
but the above restriction was implemented rather than take anything
with a __len__ except str (or unicode for Python 2.x) in.

Considerations about the use of the buffer interface are not needed
for now.

note: I cannot run tests on Python 2.7.

```
nosetests pandas
```
ends with:
```
    from . import hashtable, tslib, lib
ImportError: cannot import name hashtable
```
---
 pandas/core/frame.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6376b94d1ae99..24b4e8cabac2d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5700,9 +5700,20 @@ def extract_index(data):
             elif isinstance(v, dict):
                 have_dicts = True
                 indexes.append(v.keys())
-            elif isinstance(v, (list, tuple, np.ndarray)):
-                have_raw_arrays = True
-                raw_lengths.append(len(v))
+            elif not (isinstance(v, str) or \
+                          isinstance(v, unicode)):
+                # Although strings have a __len__,
+                # they are likely to be considered scalars.
+                try:
+                    l = len(v)
+                except TypeError:
+                    continue
+                # Anything else with a __len__ is considered
+                # a sequence (to be safe, we check
+                # that there is __getitem__)
+                if hasattr(v, '__getitem__'):
+                    have_raw_arrays = True
+                    raw_lengths.append(l)
 
         if not indexes and not raw_lengths:
             raise ValueError('If using all scalar values, you must pass'

From 209c01c393e7be9ab8064386d37896422d1a45af Mon Sep 17 00:00:00 2001
From: Laurent Gautier <lgautier@gmail.com>
Date: Mon, 22 Jul 2013 13:10:04 +0200
Subject: [PATCH 3/8] - Comment the source - Upper case for the first letter in
 exception messages

---
 pandas/core/frame.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 24b4e8cabac2d..0f19c7f518163 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5680,6 +5680,9 @@ def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
     return create_block_manager_from_arrays(arrays, arr_names, axes)
 
 def extract_index(data):
+    # Slightly misleading name.
+    # Indexes are only extracted for elements in the iterable
+    # `data` inheriting from Series.
     from pandas.core.index import _union_indexes
 
     index = None
@@ -5693,6 +5696,8 @@ def extract_index(data):
         have_series = False
         have_dicts = False
 
+        # Loop over the element, such as vectors `v` corresponding
+        # to columns in the DataFrame  
         for v in data:
             if isinstance(v, Series):
                 have_series = True
@@ -5707,6 +5712,11 @@ def extract_index(data):
                 try:
                     l = len(v)
                 except TypeError:
+                    # Item v silently ignored (to conserve
+                    # the original behaviour - see also
+                    # test of __getitem__ below).
+                    # This behaviour is kept, but I think
+                    # that an exception (TypeError) should be raised instead.
                     continue
                 # Anything else with a __len__ is considered
                 # a sequence (to be safe, we check
@@ -5714,6 +5724,10 @@ def extract_index(data):
                 if hasattr(v, '__getitem__'):
                     have_raw_arrays = True
                     raw_lengths.append(l)
+                else:
+                    # The original code skipped silently invalid
+                    # objects (see note at TypeError above).
+                    pass
 
         if not indexes and not raw_lengths:
             raise ValueError('If using all scalar values, you must pass'
@@ -5725,7 +5739,7 @@ def extract_index(data):
         if have_raw_arrays:
             lengths = list(set(raw_lengths))
             if len(lengths) > 1:
-                raise ValueError('arrays must all be same length')
+                raise ValueError('Arrays must all be same length')
 
             if have_dicts:
                 raise ValueError('Mixing dicts with non-Series may lead to '
@@ -5733,7 +5747,7 @@ def extract_index(data):
 
             if have_series:
                 if lengths[0] != len(index):
-                    msg = ('array length %d does not match index length %d'
+                    msg = ('Array length %d does not match index length %d'
                            % (lengths[0], len(index)))
                     raise ValueError(msg)
             else:

From 04732e026d3b39d3688a435c4641e9956305b804 Mon Sep 17 00:00:00 2001
From: Laurent Gautier <lgautier@gmail.com>
Date: Mon, 22 Jul 2013 16:33:54 +0200
Subject: [PATCH 4/8] - Fixed unit test for commit d2b85ff - Added unit test
 for new capabilities introduced with commit e6c3cd6

(ran only the unit tests for test_frame.py - `nosetests pandas` aborts
with an import error)
---
 pandas/tests/test_frame.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index a9df56a498f63..7853af0964cfd 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -2236,7 +2236,7 @@ def testit():
 
         def testit():
             DataFrame({'a': False, 'b': True})
-        assertRaisesRegexp(ValueError, 'If using all scalar values, you must must pass an index', testit)
+        assertRaisesRegexp(ValueError, 'If using all scalar values, you must pass an index', testit)
 
     def test_insert_error_msmgs(self):
 
@@ -2774,6 +2774,17 @@ def test_constructor_from_items(self):
         # pass some columns
         recons = DataFrame.from_items(items, columns=['C', 'B', 'A'])
         assert_frame_equal(recons, self.frame.ix[:, ['C', 'B', 'A']])
+        # not any column either a dict, a list, a tuple, or a numpy.ndarray
+        import array
+        recons_ar = DataFrame.from_items([('A', array.array('i', range(10)))])
+        recons_rg = DataFrame.from_items([('A', range(10))])
+        recons_np = DataFrame.from_items([('A', np.array(range(10)))])
+        self.assertEquals(tuple(recons_ar['A']),
+                          tuple(recons_rg['A']))
+        self.assertEquals(tuple(recons_ar['A']),
+                          tuple(recons_np['A']))
+
+        
 
         # orient='index'
 

From 9172436fcb05bb56d7e91420b63e21e813c659ff Mon Sep 17 00:00:00 2001
From: Laurent Gautier <lgautier@gmail.com>
Date: Mon, 22 Jul 2013 20:51:54 +0200
Subject: [PATCH 5/8] refactored to use core.common._is_sequence

---
 pandas/core/frame.py | 34 ++++++++++++----------------------
 1 file changed, 12 insertions(+), 22 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0f19c7f518163..2c640960eceb4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5705,29 +5705,19 @@ def extract_index(data):
             elif isinstance(v, dict):
                 have_dicts = True
                 indexes.append(v.keys())
-            elif not (isinstance(v, str) or \
-                          isinstance(v, unicode)):
+            elif com._is_sequence(v):
+                # This is a sequence-but-not-a-string
                 # Although strings have a __len__,
-                # they are likely to be considered scalars.
-                try:
-                    l = len(v)
-                except TypeError:
-                    # Item v silently ignored (to conserve
-                    # the original behaviour - see also
-                    # test of __getitem__ below).
-                    # This behaviour is kept, but I think
-                    # that an exception (TypeError) should be raised instead.
-                    continue
-                # Anything else with a __len__ is considered
-                # a sequence (to be safe, we check
-                # that there is __getitem__)
-                if hasattr(v, '__getitem__'):
-                    have_raw_arrays = True
-                    raw_lengths.append(l)
-                else:
-                    # The original code skipped silently invalid
-                    # objects (see note at TypeError above).
-                    pass
+                # they will be considered scalar.
+                have_raw_arrays = True
+                raw_lengths.append(len(v))
+            else:
+                # Item v silently ignored (to conserve
+                # the original behaviour - see also
+                # test of __getitem__ below).
+                # This behaviour is kept, but I think
+                # that an exception (TypeError) should be raised instead.
+                pass
 
         if not indexes and not raw_lengths:
             raise ValueError('If using all scalar values, you must pass'

From 4f71e9888d43808fd3b2b9e0c103292ab1de257f Mon Sep 17 00:00:00 2001
From: Laurent Gautier <lgautier@gmail.com>
Date: Tue, 23 Jul 2013 13:10:14 +0200
Subject: [PATCH 6/8] Corrected a comment in the code that was suggesting
 incorrectly that an exception should be raised

---
 pandas/core/frame.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2c640960eceb4..d80402d776ac6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5712,11 +5712,9 @@ def extract_index(data):
                 have_raw_arrays = True
                 raw_lengths.append(len(v))
             else:
-                # Item v silently ignored (to conserve
-                # the original behaviour - see also
-                # test of __getitem__ below).
-                # This behaviour is kept, but I think
-                # that an exception (TypeError) should be raised instead.
+                # Item v is silently ignored,
+                # as it is not anything an index can be inferred
+                # from.
                 pass
 
         if not indexes and not raw_lengths:

From f131b719db0386f3c05408703729e4fc6b7084d7 Mon Sep 17 00:00:00 2001
From: Laurent Gautier <lgautier@gmail.com>
Date: Tue, 23 Jul 2013 15:05:02 +0200
Subject: [PATCH 7/8] removed unnecessary test

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d80402d776ac6..6ec28f9141c05 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5688,7 +5688,7 @@ def extract_index(data):
     index = None
     if len(data) == 0:
         index = Index([])
-    elif len(data) > 0 and index is None:
+    elif len(data) > 0:
         raw_lengths = []
         indexes = []
 

From 8787ef5b9bc175fbac3e07888299408b41e47ee3 Mon Sep 17 00:00:00 2001
From: Laurent Gautier <lgautier@gmail.com>
Date: Wed, 24 Jul 2013 17:14:35 +0200
Subject: [PATCH 8/8] Fixed behaviour for mapping vs sequence The use of
 _is_sequence() is convenient but not optimal (not basestring implicitly
 tested twice)

---
 pandas/core/frame.py | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6ec28f9141c05..26877752017e3 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5694,7 +5694,7 @@ def extract_index(data):
 
         have_raw_arrays = False
         have_series = False
-        have_dicts = False
+        have_mappings = False
 
         # Loop over the element, such as vectors `v` corresponding
         # to columns in the DataFrame  
@@ -5702,26 +5702,35 @@ def extract_index(data):
             if isinstance(v, Series):
                 have_series = True
                 indexes.append(v.index)
-            elif isinstance(v, dict):
-                have_dicts = True
-                indexes.append(v.keys())
-            elif com._is_sequence(v):
-                # This is a sequence-but-not-a-string
-                # Although strings have a __len__,
-                # they will be considered scalar.
-                have_raw_arrays = True
-                raw_lengths.append(len(v))
             else:
-                # Item v is silently ignored,
-                # as it is not anything an index can be inferred
-                # from.
-                pass
+                # When an OrderedDict, the mapping aspect
+                # is given priority, although there is a warning when
+                # mixture of sequences and mapping. The unit tests
+                # show that this is the desired behaviour.
+                # Also, shouldn't a `bytes` object be considered a scalar ?
+                is_mapping = isinstance(v, collections.Mapping)
+                is_sequence = (isinstance(v, collections.Sequence) or \
+                    _is_sequence(v)) and not isinstance(v, basestring)
+                if is_mapping:
+                    have_mappings = True
+                    indexes.append(v.keys())
+                elif is_sequence:
+                    # This is a sequence-but-not-a-string
+                    # Although strings have a __len__,
+                    # they will be considered scalar.
+                    have_raw_arrays = True
+                    raw_lengths.append(len(v))
+                else:
+                    # Item v is silently ignored,
+                    # as it is not anything an index can be inferred
+                    # from.
+                    pass
 
         if not indexes and not raw_lengths:
             raise ValueError('If using all scalar values, you must pass'
                              ' an index')
 
-        if have_series or have_dicts:
+        if have_series or have_mappings:
             index = _union_indexes(indexes)
 
         if have_raw_arrays:
@@ -5729,7 +5738,7 @@ def extract_index(data):
             if len(lengths) > 1:
                 raise ValueError('Arrays must all be same length')
 
-            if have_dicts:
+            if have_mappings:
                 raise ValueError('Mixing dicts with non-Series may lead to '
                                  'ambiguous ordering.')