Skip to content

Commit 38df664

Browse files
committed
Remove ObjectStore from FileScanConfig and ListingTableConfig
1 parent 8f514f9 commit 38df664

File tree

19 files changed

+168
-362
lines changed

19 files changed

+168
-362
lines changed

benchmarks/src/bin/tpch.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ use datafusion::{
4444
};
4545
use datafusion::{
4646
arrow::util::pretty,
47-
datafusion_data_access::object_store::local::LocalFileSystem,
4847
datasource::listing::{ListingOptions, ListingTable, ListingTableConfig},
4948
};
5049

@@ -427,7 +426,7 @@ fn get_table(
427426
};
428427

429428
let table_path = ListingTableUrl::parse(path)?;
430-
let config = ListingTableConfig::new(Arc::new(LocalFileSystem {}), table_path)
429+
let config = ListingTableConfig::new(table_path)
431430
.with_listing_options(options)
432431
.with_schema(schema);
433432

datafusion-examples/examples/flight_server.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ use std::pin::Pin;
1919
use std::sync::Arc;
2020

2121
use arrow_flight::SchemaAsIpc;
22-
use datafusion::datafusion_data_access::object_store::local::LocalFileSystem;
2322
use datafusion::datasource::file_format::parquet::ParquetFormat;
2423
use datafusion::datasource::listing::{ListingOptions, ListingTableUrl};
2524
use futures::Stream;
@@ -71,8 +70,9 @@ impl FlightService for FlightServiceImpl {
7170
let table_path =
7271
ListingTableUrl::parse(&request.path[0]).map_err(to_tonic_err)?;
7372

73+
let ctx = SessionContext::new();
7474
let schema = listing_options
75-
.infer_schema(Arc::new(LocalFileSystem {}), &table_path)
75+
.infer_schema(&ctx.state(), &table_path)
7676
.await
7777
.unwrap();
7878

datafusion/core/benches/sort_limit_query_sql.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#[macro_use]
1919
extern crate criterion;
2020
use criterion::Criterion;
21-
use datafusion::datafusion_data_access::object_store::local::LocalFileSystem;
2221
use datafusion::datasource::file_format::csv::CsvFormat;
2322
use datafusion::datasource::listing::{
2423
ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
@@ -71,7 +70,7 @@ fn create_context() -> Arc<Mutex<SessionContext>> {
7170
// create CSV data source
7271
let listing_options = ListingOptions::new(Arc::new(CsvFormat::default()));
7372

74-
let config = ListingTableConfig::new(Arc::new(LocalFileSystem {}), table_path)
73+
let config = ListingTableConfig::new(table_path)
7574
.with_listing_options(listing_options)
7675
.with_schema(schema);
7776

datafusion/core/src/catalog/schema.rs

Lines changed: 20 additions & 191 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,13 @@
1818
//! Describes the interface and built-in implementations of schemas,
1919
//! representing collections of named tables.
2020
21-
use parking_lot::{Mutex, RwLock};
21+
use parking_lot::RwLock;
2222
use std::any::Any;
2323
use std::collections::HashMap;
2424
use std::sync::Arc;
2525

26-
use crate::datasource::listing::{ListingTable, ListingTableConfig, ListingTableUrl};
27-
use crate::datasource::object_store::ObjectStoreRegistry;
2826
use crate::datasource::TableProvider;
2927
use crate::error::{DataFusionError, Result};
30-
use datafusion_data_access::object_store::ObjectStore;
3128

3229
/// Represents a schema, comprising a number of named tables.
3330
pub trait SchemaProvider: Sync + Send {
@@ -130,135 +127,19 @@ impl SchemaProvider for MemorySchemaProvider {
130127
}
131128
}
132129

133-
/// `ObjectStore` implementation of `SchemaProvider` to enable registering a `ListingTable`
134-
pub struct ObjectStoreSchemaProvider {
135-
tables: RwLock<HashMap<String, Arc<dyn TableProvider>>>,
136-
object_store_registry: Arc<Mutex<ObjectStoreRegistry>>,
137-
}
138-
139-
impl ObjectStoreSchemaProvider {
140-
/// Instantiates a new `ObjectStoreSchemaProvider` with an empty collection of tables.
141-
pub fn new() -> Self {
142-
Self {
143-
tables: RwLock::new(HashMap::new()),
144-
object_store_registry: Arc::new(Mutex::new(ObjectStoreRegistry::new())),
145-
}
146-
}
147-
148-
/// Assign an `ObjectStore` which enables calling `register_listing_table`.
149-
pub fn register_object_store(
150-
&self,
151-
scheme: impl Into<String>,
152-
object_store: Arc<dyn ObjectStore>,
153-
) -> Option<Arc<dyn ObjectStore>> {
154-
self.object_store_registry
155-
.lock()
156-
.register_store(scheme.into(), object_store)
157-
}
158-
159-
/// Retrieves a `ObjectStore` instance for a given Url
160-
pub fn object_store(
161-
&self,
162-
url: impl AsRef<url::Url>,
163-
) -> Result<Arc<dyn ObjectStore>> {
164-
self.object_store_registry
165-
.lock()
166-
.get_by_url(url)
167-
.map_err(DataFusionError::from)
168-
}
169-
170-
/// If supported by the implementation, adds a new table to this schema by creating a
171-
/// `ListingTable` from the provided `url` and a previously registered `ObjectStore`.
172-
/// If a table of the same name existed before, it returns "Table already exists" error.
173-
pub async fn register_listing_table(
174-
&self,
175-
name: &str,
176-
table_path: ListingTableUrl,
177-
config: Option<ListingTableConfig>,
178-
) -> Result<()> {
179-
let config = match config {
180-
Some(cfg) => cfg,
181-
None => {
182-
let object_store = self.object_store(&table_path)?;
183-
ListingTableConfig::new(object_store, table_path)
184-
.infer()
185-
.await?
186-
}
187-
};
188-
189-
let table = Arc::new(ListingTable::try_new(config)?);
190-
self.register_table(name.into(), table)?;
191-
Ok(())
192-
}
193-
}
194-
195-
impl Default for ObjectStoreSchemaProvider {
196-
fn default() -> Self {
197-
Self::new()
198-
}
199-
}
200-
201-
impl SchemaProvider for ObjectStoreSchemaProvider {
202-
fn as_any(&self) -> &dyn Any {
203-
self
204-
}
205-
206-
fn table_names(&self) -> Vec<String> {
207-
let tables = self.tables.read();
208-
tables.keys().cloned().collect()
209-
}
210-
211-
fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
212-
let tables = self.tables.read();
213-
tables.get(name).cloned()
214-
}
215-
216-
fn register_table(
217-
&self,
218-
name: String,
219-
table: Arc<dyn TableProvider>,
220-
) -> Result<Option<Arc<dyn TableProvider>>> {
221-
if self.table_exist(name.as_str()) {
222-
return Err(DataFusionError::Execution(format!(
223-
"The table {} already exists",
224-
name
225-
)));
226-
}
227-
let mut tables = self.tables.write();
228-
Ok(tables.insert(name, table))
229-
}
230-
231-
fn deregister_table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
232-
let mut tables = self.tables.write();
233-
Ok(tables.remove(name))
234-
}
235-
236-
fn table_exist(&self, name: &str) -> bool {
237-
let tables = self.tables.read();
238-
tables.contains_key(name)
239-
}
240-
}
241-
242130
#[cfg(test)]
243131
mod tests {
244-
use std::ffi::OsStr;
245-
use std::path::Path;
246132
use std::sync::Arc;
247133

248134
use arrow::datatypes::Schema;
135+
use datafusion_data_access::object_store::local::LocalFileSystem;
249136

250137
use crate::assert_batches_eq;
251-
use crate::catalog::catalog::CatalogProvider;
252-
use crate::catalog::catalog::MemoryCatalogProvider;
253-
use crate::catalog::schema::{
254-
MemorySchemaProvider, ObjectStoreSchemaProvider, SchemaProvider,
255-
};
256-
use crate::datafusion_data_access::object_store::local::LocalFileSystem;
138+
use crate::catalog::catalog::{CatalogProvider, MemoryCatalogProvider};
139+
use crate::catalog::schema::{MemorySchemaProvider, SchemaProvider};
257140
use crate::datasource::empty::EmptyTable;
258-
use crate::execution::context::SessionContext;
259-
260-
use crate::datasource::listing::ListingTableUrl;
261-
use futures::StreamExt;
141+
use crate::datasource::listing::{ListingTable, ListingTableConfig, ListingTableUrl};
142+
use crate::prelude::SessionContext;
262143

263144
#[tokio::test]
264145
async fn test_mem_provider() {
@@ -282,22 +163,27 @@ mod tests {
282163
#[tokio::test]
283164
async fn test_schema_register_listing_table() {
284165
let testdata = crate::test_util::parquet_test_data();
285-
let filename = format!("{}/{}", testdata, "alltypes_plain.parquet");
166+
let filename = format!("test:///{}/{}", testdata, "alltypes_plain.parquet");
286167
let table_path = ListingTableUrl::parse(filename).unwrap();
287168

288-
let schema = ObjectStoreSchemaProvider::new();
289-
let _store = schema.register_object_store("test", Arc::new(LocalFileSystem {}));
169+
let catalog = MemoryCatalogProvider::new();
170+
let schema = MemorySchemaProvider::new();
290171

291-
schema
292-
.register_listing_table("alltypes_plain", table_path, None)
172+
let ctx = SessionContext::new();
173+
let store = Arc::new(LocalFileSystem {});
174+
ctx.runtime_env().register_object_store("test", store);
175+
176+
let config = ListingTableConfig::new(table_path)
177+
.infer(&ctx.state())
293178
.await
294179
.unwrap();
180+
let table = ListingTable::try_new(config).unwrap();
295181

296-
let catalog = MemoryCatalogProvider::new();
297-
catalog.register_schema("active", Arc::new(schema)).unwrap();
298-
299-
let ctx = SessionContext::new();
182+
schema
183+
.register_table("alltypes_plain".to_string(), Arc::new(table))
184+
.unwrap();
300185

186+
catalog.register_schema("active", Arc::new(schema)).unwrap();
301187
ctx.register_catalog("cat", Arc::new(catalog));
302188

303189
let df = ctx
@@ -323,61 +209,4 @@ mod tests {
323209
];
324210
assert_batches_eq!(expected, &actual);
325211
}
326-
327-
#[tokio::test]
328-
async fn test_schema_register_listing_tables() {
329-
let testdata = crate::test_util::parquet_test_data();
330-
331-
let schema = ObjectStoreSchemaProvider::new();
332-
let store = schema
333-
.register_object_store("file", Arc::new(LocalFileSystem {}))
334-
.unwrap();
335-
336-
let mut files = store.list_file(&testdata).await.unwrap();
337-
while let Some(file) = files.next().await {
338-
let sized_file = file.unwrap().sized_file;
339-
let path = Path::new(&sized_file.path);
340-
let file = path.file_name().unwrap();
341-
if file == OsStr::new("alltypes_dictionary.parquet")
342-
|| file == OsStr::new("alltypes_plain.parquet")
343-
{
344-
let name = path.file_stem().unwrap().to_str().unwrap();
345-
let path = ListingTableUrl::parse(&sized_file.path).unwrap();
346-
schema
347-
.register_listing_table(name, path, None)
348-
.await
349-
.unwrap();
350-
}
351-
}
352-
353-
let tables = vec![
354-
String::from("alltypes_dictionary"),
355-
String::from("alltypes_plain"),
356-
];
357-
358-
let mut schema_tables = schema.table_names();
359-
schema_tables.sort();
360-
assert_eq!(schema_tables, tables);
361-
}
362-
363-
#[tokio::test]
364-
#[should_panic(expected = "already exists")]
365-
async fn test_schema_register_same_listing_table() {
366-
let testdata = crate::test_util::parquet_test_data();
367-
let filename = format!("{}/{}", testdata, "alltypes_plain.parquet");
368-
let table_path = ListingTableUrl::parse(filename).unwrap();
369-
370-
let schema = ObjectStoreSchemaProvider::new();
371-
let _store = schema.register_object_store("test", Arc::new(LocalFileSystem {}));
372-
373-
schema
374-
.register_listing_table("alltypes_plain", table_path.clone(), None)
375-
.await
376-
.unwrap();
377-
378-
schema
379-
.register_listing_table("alltypes_plain", table_path, None)
380-
.await
381-
.unwrap();
382-
}
383212
}

datafusion/core/src/datasource/file_format/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ pub(crate) mod test_util {
115115
let exec = format
116116
.create_physical_plan(
117117
FileScanConfig {
118-
object_store: store,
119118
object_store_url: ObjectStoreUrl::local_filesystem(),
120119
file_schema,
121120
file_groups,

0 commit comments

Comments
 (0)