@@ -41,6 +41,7 @@ use parquet::{
41
41
} ;
42
42
use rand:: distributions:: DistString ;
43
43
use relative_path:: RelativePathBuf ;
44
+ use tokio:: task:: JoinSet ;
44
45
use tracing:: { error, info, trace, warn} ;
45
46
46
47
use crate :: {
@@ -651,6 +652,13 @@ impl Stream {
651
652
pub fn get_stream_type ( & self ) -> StreamType {
652
653
self . metadata . read ( ) . expect ( LOCK_EXPECT ) . stream_type
653
654
}
655
+
656
+ /// First flushes arrows onto disk and then converts the arrow into parquet
657
+ pub fn flush_and_convert ( & self , shutdown_signal : bool ) -> Result < ( ) , StagingError > {
658
+ self . flush ( ) ;
659
+
660
+ self . prepare_parquet ( shutdown_signal)
661
+ }
654
662
}
655
663
656
664
#[ derive( Deref , DerefMut , Default ) ]
@@ -717,21 +725,22 @@ impl Streams {
717
725
. collect ( )
718
726
}
719
727
720
- /// Convert arrow files into parquet, preparing it for upload
721
- pub fn prepare_parquet ( & self , shutdown_signal : bool ) -> Result < ( ) , StagingError > {
728
+ /// Asynchronously flushes arrows and compacts into parquet data on all streams in staging,
729
+ /// so that it is ready to be pushed onto objectstore.
730
+ pub fn flush_and_convert (
731
+ & self ,
732
+ joinset : & mut JoinSet < Result < ( ) , StagingError > > ,
733
+ shutdown_signal : bool ,
734
+ ) {
722
735
let streams: Vec < Arc < Stream > > = self
723
736
. read ( )
724
737
. expect ( LOCK_EXPECT )
725
738
. values ( )
726
739
. map ( Arc :: clone)
727
740
. collect ( ) ;
728
741
for stream in streams {
729
- stream
730
- . prepare_parquet ( shutdown_signal)
731
- . inspect_err ( |err| error ! ( "Failed to run conversion task {err:?}" ) ) ?;
742
+ joinset. spawn ( async move { stream. flush_and_convert ( shutdown_signal) } ) ;
732
743
}
733
-
734
- Ok ( ( ) )
735
744
}
736
745
}
737
746
0 commit comments