@@ -244,6 +244,54 @@ def test_pyarrow_limit(catalog: Catalog) -> None:
244
244
full_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 999 ).to_arrow ()
245
245
assert len (full_result ) == 10
246
246
247
+ # test `to_arrow_batch_reader`
248
+ limited_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 1 ).to_arrow_batch_reader ().read_all ()
249
+ assert len (limited_result ) == 1
250
+
251
+ empty_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 0 ).to_arrow_batch_reader ().read_all ()
252
+ assert len (empty_result ) == 0
253
+
254
+ full_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 999 ).to_arrow_batch_reader ().read_all ()
255
+ assert len (full_result ) == 10
256
+
257
+
258
+ @pytest .mark .integration
259
+ @pytest .mark .parametrize ("catalog" , [pytest .lazy_fixture ("session_catalog_hive" ), pytest .lazy_fixture ("session_catalog" )])
260
+ def test_pyarrow_limit_with_multiple_files (catalog : Catalog ) -> None :
261
+ table_name = "default.test_pyarrow_limit_with_multiple_files"
262
+ try :
263
+ catalog .drop_table (table_name )
264
+ except NoSuchTableError :
265
+ pass
266
+ reference_table = catalog .load_table ("default.test_limit" )
267
+ data = reference_table .scan ().to_arrow ()
268
+ table_test_limit = catalog .create_table (table_name , schema = reference_table .schema ())
269
+
270
+ n_files = 2
271
+ for _ in range (n_files ):
272
+ table_test_limit .append (data )
273
+ assert len (table_test_limit .inspect .files ()) == n_files
274
+
275
+ # test with multiple files
276
+ limited_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 1 ).to_arrow ()
277
+ assert len (limited_result ) == 1
278
+
279
+ empty_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 0 ).to_arrow ()
280
+ assert len (empty_result ) == 0
281
+
282
+ full_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 999 ).to_arrow ()
283
+ assert len (full_result ) == 10 * n_files
284
+
285
+ # test `to_arrow_batch_reader`
286
+ limited_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 1 ).to_arrow_batch_reader ().read_all ()
287
+ assert len (limited_result ) == 1
288
+
289
+ empty_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 0 ).to_arrow_batch_reader ().read_all ()
290
+ assert len (empty_result ) == 0
291
+
292
+ full_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 999 ).to_arrow_batch_reader ().read_all ()
293
+ assert len (full_result ) == 10 * n_files
294
+
247
295
248
296
@pytest .mark .integration
249
297
@pytest .mark .filterwarnings ("ignore" )
0 commit comments