@@ -32,7 +32,6 @@ use serde_json::to_vec;
32
32
use std:: cmp:: min;
33
33
use std:: collections:: HashMap ;
34
34
use std:: str:: FromStr ;
35
-
36
35
/// A manifest contains metadata and a list of entries.
37
36
#[ derive( Debug , PartialEq , Eq , Clone ) ]
38
37
pub struct Manifest {
@@ -851,7 +850,11 @@ impl TryFrom<i32> for ManifestStatus {
851
850
}
852
851
853
852
/// Data file carries data file path, partition tuple, metrics, …
854
- #[ derive( Debug , PartialEq , Clone , Eq ) ]
853
+ #[ derive( Debug , PartialEq , Clone , Eq , Builder ) ]
854
+ /// For optional field, we use `#[builder(default)]` or `#[builder(setter(strip_option), default)]` so that the field
855
+ /// will be set to `Default` when it is not set.
856
+ /// For required field, the build will fail if it is not set.
857
+ #[ builder( name = "DataFileBuilder" , setter( prefix = "with" ) ) ]
855
858
pub struct DataFile {
856
859
/// field id: 134
857
860
///
@@ -886,25 +889,29 @@ pub struct DataFile {
886
889
/// Map from column id to the total size on disk of all regions that
887
890
/// store the column. Does not include bytes necessary to read other
888
891
/// columns, like footers. Leave null for row-oriented formats (Avro)
892
+ #[ builder( default ) ]
889
893
column_sizes : HashMap < i32 , u64 > ,
890
894
/// field id: 109
891
895
/// key field id: 119
892
896
/// value field id: 120
893
897
///
894
898
/// Map from column id to number of values in the column (including null
895
899
/// and NaN values)
900
+ #[ builder( default ) ]
896
901
value_counts : HashMap < i32 , u64 > ,
897
902
/// field id: 110
898
903
/// key field id: 121
899
904
/// value field id: 122
900
905
///
901
906
/// Map from column id to number of null values in the column
907
+ #[ builder( default ) ]
902
908
null_value_counts : HashMap < i32 , u64 > ,
903
909
/// field id: 137
904
910
/// key field id: 138
905
911
/// value field id: 139
906
912
///
907
913
/// Map from column id to number of NaN values in the column
914
+ #[ builder( default ) ]
908
915
nan_value_counts : HashMap < i32 , u64 > ,
909
916
/// field id: 125
910
917
/// key field id: 126
@@ -917,6 +924,7 @@ pub struct DataFile {
917
924
/// Reference:
918
925
///
919
926
/// - [Binary single-value serialization](https://iceberg.apache.org/spec/#binary-single-value-serialization)
927
+ #[ builder( default ) ]
920
928
lower_bounds : HashMap < i32 , Literal > ,
921
929
/// field id: 128
922
930
/// key field id: 129
@@ -929,16 +937,19 @@ pub struct DataFile {
929
937
/// Reference:
930
938
///
931
939
/// - [Binary single-value serialization](https://iceberg.apache.org/spec/#binary-single-value-serialization)
940
+ #[ builder( default ) ]
932
941
upper_bounds : HashMap < i32 , Literal > ,
933
942
/// field id: 131
934
943
///
935
944
/// Implementation-specific key metadata for encryption
945
+ #[ builder( default ) ]
936
946
key_metadata : Vec < u8 > ,
937
947
/// field id: 132
938
948
/// element field id: 133
939
949
///
940
950
/// Split offsets for the data file. For example, all row group offsets
941
951
/// in a Parquet file. Must be sorted ascending
952
+ #[ builder( default ) ]
942
953
split_offsets : Vec < i64 > ,
943
954
/// field id: 135
944
955
/// element field id: 136
@@ -947,6 +958,7 @@ pub struct DataFile {
947
958
/// Required when content is EqualityDeletes and should be null
948
959
/// otherwise. Fields with ids listed in this column must be present
949
960
/// in the delete file
961
+ #[ builder( default ) ]
950
962
equality_ids : Vec < i32 > ,
951
963
/// field id: 140
952
964
///
@@ -958,6 +970,7 @@ pub struct DataFile {
958
970
/// sorted by file and position, not a table order, and should set sort
959
971
/// order id to null. Readers must ignore sort order id for position
960
972
/// delete files.
973
+ #[ builder( setter( strip_option) , default ) ]
961
974
sort_order_id : Option < i32 > ,
962
975
}
963
976
0 commit comments