@@ -239,15 +239,19 @@ impl StandardTableProvider {
239
239
. await ?;
240
240
execution_plans. push ( arrow_exec) ;
241
241
242
- // Partititon parquet files on disk among the available CPUs
243
- let target_partition = num_cpus:: get ( ) ;
244
- let mut partitioned_files = Vec :: from_iter ( ( 0 ..target_partition) . map ( |_| Vec :: new ( ) ) ) ;
245
- for ( index, file_path) in staging. parquet_files ( ) . into_iter ( ) . enumerate ( ) {
242
+ // Get a list of parquet files still in staging, order by filename
243
+ let mut parquet_files = staging. parquet_files ( ) ;
244
+ parquet_files. sort_by ( |a, b| a. cmp ( b) . reverse ( ) ) ;
245
+
246
+ // NOTE: We don't partition among CPUs to ensure consistent results.
247
+ // i.e. We were seeing in-consistent ordering when querying over parquets in staging.
248
+ let mut partitioned_files = Vec :: with_capacity ( parquet_files. len ( ) ) ;
249
+ for file_path in parquet_files {
246
250
let Ok ( file_meta) = file_path. metadata ( ) else {
247
251
continue ;
248
252
} ;
249
253
let file = PartitionedFile :: new ( file_path. display ( ) . to_string ( ) , file_meta. len ( ) ) ;
250
- partitioned_files[ index % target_partition ] . push ( file)
254
+ partitioned_files. push ( file)
251
255
}
252
256
253
257
// NOTE: There is the possibility of a parquet file being pushed to object store
@@ -256,7 +260,7 @@ impl StandardTableProvider {
256
260
self . create_parquet_physical_plan (
257
261
execution_plans,
258
262
ObjectStoreUrl :: parse ( "file:///" ) . unwrap ( ) ,
259
- partitioned_files,
263
+ vec ! [ partitioned_files] ,
260
264
Statistics :: new_unknown ( & self . schema ) ,
261
265
projection,
262
266
filters,
0 commit comments