@@ -482,8 +482,12 @@ def run_query(self, query, **kwargs):
482
482
self .process_http_error (ex )
483
483
484
484
schema_fields = [field .to_api_repr () for field in rows_iter .schema ]
485
- dtypes = _bqschema_to_dtypes (schema_fields )
486
- df = rows_iter .to_dataframe (dtypes = dtypes )
485
+ nullsafe_dtypes = _bqschema_to_nullsafe_dtypes (schema_fields )
486
+ df = rows_iter .to_dataframe (dtypes = nullsafe_dtypes )
487
+
488
+ if df .empty :
489
+ df = _cast_empty_df_dtypes (schema_fields , df )
490
+
487
491
logger .debug ("Got {} rows.\n " .format (rows_iter .total_rows ))
488
492
return df
489
493
@@ -633,11 +637,11 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
633
637
table .create (table_id , table_schema )
634
638
635
639
636
- def _bqschema_to_dtypes (schema_fields ):
640
+ def _bqschema_to_nullsafe_dtypes (schema_fields ):
637
641
# Only specify dtype when the dtype allows nulls. Otherwise, use pandas's
638
642
# default dtype choice.
639
643
#
640
- # see :
644
+ # See :
641
645
# http://pandas.pydata.org/pandas-docs/dev/missing_data.html
642
646
# #missing-data-casting-rules-and-indexing
643
647
dtype_map = {
@@ -661,6 +665,37 @@ def _bqschema_to_dtypes(schema_fields):
661
665
return dtypes
662
666
663
667
668
+ def _cast_empty_df_dtypes (schema_fields , df ):
669
+ """Cast any columns in an empty dataframe to correct type.
670
+
671
+ In an empty dataframe, pandas cannot choose a dtype unless one is
672
+ explicitly provided. The _bqschema_to_nullsafe_dtypes() function only
673
+ provides dtypes when the dtype safely handles null values. This means
674
+ that empty int64 and boolean columns are incorrectly classified as
675
+ ``object``.
676
+ """
677
+ if not df .empty :
678
+ raise ValueError (
679
+ "DataFrame must be empty in order to cast non-nullsafe dtypes"
680
+ )
681
+
682
+ dtype_map = {
683
+ "BOOLEAN" : bool ,
684
+ "INTEGER" : np .int64 ,
685
+ }
686
+
687
+ for field in schema_fields :
688
+ column = str (field ["name" ])
689
+ if field ["mode" ].upper () == "REPEATED" :
690
+ continue
691
+
692
+ dtype = dtype_map .get (field ["type" ].upper ())
693
+ if dtype :
694
+ df [column ] = df [column ].astype (dtype )
695
+
696
+ return df
697
+
698
+
664
699
def read_gbq (
665
700
query ,
666
701
project_id = None ,
0 commit comments