Skip to content

Commit b161625

Browse files
committed
fix: avoid convert special characters to escape characters in windows machine
1 parent b65d2a3 commit b161625

File tree

3 files changed

+37
-46
lines changed

3 files changed

+37
-46
lines changed

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,6 @@ datafusion/sqllogictests/test_files/tpch/data/*
6464
# Scratch temp dir for sqllogictests
6565
datafusion/sqllogictest/test_files/scratch*
6666

67-
# temp file for core
68-
datafusion/core/*.parquet
69-
7067
# rat
7168
filtered_rat.txt
7269
rat.txt

datafusion/core/src/datasource/listing/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,9 @@ impl PartitionedFile {
9090
pub fn new(path: impl Into<String>, size: u64) -> Self {
9191
Self {
9292
object_meta: ObjectMeta {
93-
location: Path::from(path.into()),
93+
// windows will convert special characters to escape characters (i.g. '~' => '%7E'),
94+
// so we need to use Path::parse
95+
location: Path::parse(path.into()).unwrap(),
9496
last_modified: chrono::Utc.timestamp_nanos(0),
9597
size: size as usize,
9698
e_tag: None,

datafusion/core/tests/parquet/external_access_plan.rs

Lines changed: 34 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ use datafusion_physical_plan::ExecutionPlan;
3333
use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
3434
use parquet::arrow::ArrowWriter;
3535
use parquet::file::properties::WriterProperties;
36-
use std::path::Path;
3736
use std::sync::{Arc, OnceLock};
3837
use tempfile::NamedTempFile;
3938

@@ -315,19 +314,12 @@ impl TestFull {
315314

316315
let TestData {
317316
_temp_file: _,
318-
ref schema,
319-
ref file_name,
320-
ref file_size,
317+
schema,
318+
file_name,
319+
file_size,
321320
} = get_test_data();
322321

323-
let new_file_name = if cfg!(target_os = "windows") {
324-
// Windows path separator is different from Unix
325-
file_name.replace("\\", "/")
326-
} else {
327-
file_name.clone()
328-
};
329-
330-
let mut partitioned_file = PartitionedFile::new(new_file_name, *file_size);
322+
let mut partitioned_file = PartitionedFile::new(file_name, *file_size);
331323

332324
// add the access plan, if any, as an extension
333325
if let Some(access_plan) = access_plan {
@@ -363,8 +355,6 @@ impl TestFull {
363355
pretty_format_batches(&results).unwrap()
364356
);
365357

366-
std::fs::remove_file(file_name).unwrap();
367-
368358
Ok(MetricsFinder::find_metrics(plan.as_ref()).unwrap())
369359
}
370360
}
@@ -379,43 +369,45 @@ struct TestData {
379369
file_size: u64,
380370
}
381371

382-
static _TEST_DATA: OnceLock<TestData> = OnceLock::new();
372+
static TEST_DATA: OnceLock<TestData> = OnceLock::new();
383373

384374
/// Return a parquet file with 2 row groups each with 5 rows
385-
fn get_test_data() -> TestData {
386-
let scenario = Scenario::UTF8;
387-
let row_per_group = 5;
375+
fn get_test_data() -> &'static TestData {
376+
TEST_DATA.get_or_init(|| {
377+
let scenario = Scenario::UTF8;
378+
let row_per_group = 5;
388379

389-
let mut temp_file = tempfile::Builder::new()
390-
.prefix("user_access_plan")
391-
.suffix(".parquet")
392-
.tempfile_in(Path::new(""))
393-
.expect("tempfile creation");
380+
let mut temp_file = tempfile::Builder::new()
381+
.prefix("user_access_plan")
382+
.suffix(".parquet")
383+
.tempfile()
384+
.expect("tempfile creation");
394385

395-
let props = WriterProperties::builder()
396-
.set_max_row_group_size(row_per_group)
397-
.build();
386+
let props = WriterProperties::builder()
387+
.set_max_row_group_size(row_per_group)
388+
.build();
398389

399-
let batches = create_data_batch(scenario);
400-
let schema = batches[0].schema();
390+
let batches = create_data_batch(scenario);
391+
let schema = batches[0].schema();
401392

402-
let mut writer =
403-
ArrowWriter::try_new(&mut temp_file, schema.clone(), Some(props)).unwrap();
393+
let mut writer =
394+
ArrowWriter::try_new(&mut temp_file, schema.clone(), Some(props)).unwrap();
404395

405-
for batch in batches {
406-
writer.write(&batch).expect("writing batch");
407-
}
408-
writer.close().unwrap();
396+
for batch in batches {
397+
writer.write(&batch).expect("writing batch");
398+
}
399+
writer.close().unwrap();
409400

410-
let file_name = temp_file.path().to_string_lossy().to_string();
411-
let file_size = temp_file.path().metadata().unwrap().len();
401+
let file_name = temp_file.path().to_string_lossy().to_string();
402+
let file_size = temp_file.path().metadata().unwrap().len();
412403

413-
TestData {
414-
_temp_file: temp_file,
415-
schema,
416-
file_name,
417-
file_size,
418-
}
404+
TestData {
405+
_temp_file: temp_file,
406+
schema,
407+
file_name,
408+
file_size,
409+
}
410+
})
419411
}
420412

421413
/// Return the total value of the specified metric name

0 commit comments

Comments
 (0)