diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index c349219f6..bb676e36e 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -164,6 +164,8 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Double => "double".to_string(), PrimitiveType::Date => "date".to_string(), PrimitiveType::Timestamp => "timestamp".to_string(), + PrimitiveType::TimestampNs => "timestamp_ns".to_string(), + PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(), PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => { "string".to_string() } diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs index fa7819d62..4012098c2 100644 --- a/crates/catalog/hms/src/schema.rs +++ b/crates/catalog/hms/src/schema.rs @@ -121,6 +121,8 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Double => "double".to_string(), PrimitiveType::Date => "date".to_string(), PrimitiveType::Timestamp => "timestamp".to_string(), + PrimitiveType::TimestampNs => "timestamp_ns".to_string(), + PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(), PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => { "string".to_string() } diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index 43875bf67..a41243756 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -579,6 +579,13 @@ impl SchemaVisitor for ToArrowSchemaConverter { // Timestampz always stored as UTC DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())), )), + crate::spec::PrimitiveType::TimestampNs => Ok(ArrowSchemaOrFieldOrType::Type( + DataType::Timestamp(TimeUnit::Nanosecond, None), + )), + crate::spec::PrimitiveType::TimestamptzNs => Ok(ArrowSchemaOrFieldOrType::Type( + // Store timestamptz_ns as UTC + DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())), + )), crate::spec::PrimitiveType::String => { Ok(ArrowSchemaOrFieldOrType::Type(DataType::Utf8)) } diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index 7f8142745..cfcf38dea 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -229,6 +229,8 @@ impl SchemaVisitor for SchemaToAvroSchema { PrimitiveType::Time => AvroSchema::TimeMicros, PrimitiveType::Timestamp => AvroSchema::TimestampMicros, PrimitiveType::Timestamptz => AvroSchema::TimestampMicros, + PrimitiveType::TimestampNs => AvroSchema::TimestampNanos, + PrimitiveType::TimestamptzNs => AvroSchema::TimestampNanos, PrimitiveType::String => AvroSchema::String, PrimitiveType::Uuid => avro_fixed_schema(UUID_BYTES, Some(UUID_LOGICAL_TYPE))?, PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize, None)?, @@ -519,6 +521,7 @@ impl AvroSchemaVisitor for AvroSchemaToSchema { AvroSchema::Date => Type::Primitive(PrimitiveType::Date), AvroSchema::TimeMicros => Type::Primitive(PrimitiveType::Time), AvroSchema::TimestampMicros => Type::Primitive(PrimitiveType::Timestamp), + AvroSchema::TimestampNanos => Type::Primitive(PrimitiveType::TimestampNs), AvroSchema::Boolean => Type::Primitive(PrimitiveType::Boolean), AvroSchema::Int => Type::Primitive(PrimitiveType::Int), AvroSchema::Long => Type::Primitive(PrimitiveType::Long), diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 833f17fc5..d38245960 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -225,6 +225,10 @@ pub enum PrimitiveType { Timestamp, /// Timestamp in microsecond precision, with timezone Timestamptz, + /// Timestamp in nanosecond precision, without timezone + TimestampNs, + /// Timestamp in nanosecond precision with timezone + TimestamptzNs, /// Arbitrary-length character sequences encoded in utf-8 String, /// Universally Unique Identifiers, should use 16-byte fixed @@ -250,6 +254,8 @@ impl PrimitiveType { | (PrimitiveType::Time, PrimitiveLiteral::Long(_)) | (PrimitiveType::Timestamp, PrimitiveLiteral::Long(_)) | (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(_)) + | (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(_)) + | (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(_)) | (PrimitiveType::String, PrimitiveLiteral::String(_)) | (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(_)) | (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(_)) @@ -360,6 +366,8 @@ impl fmt::Display for PrimitiveType { PrimitiveType::Time => write!(f, "time"), PrimitiveType::Timestamp => write!(f, "timestamp"), PrimitiveType::Timestamptz => write!(f, "timestamptz"), + PrimitiveType::TimestampNs => write!(f, "timestamp_ns"), + PrimitiveType::TimestamptzNs => write!(f, "timestamptz_ns"), PrimitiveType::String => write!(f, "string"), PrimitiveType::Uuid => write!(f, "uuid"), PrimitiveType::Fixed(size) => write!(f, "fixed({})", size), @@ -1152,6 +1160,8 @@ mod tests { (PrimitiveType::Time, PrimitiveLiteral::Long(1)), (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(1)), (PrimitiveType::Timestamp, PrimitiveLiteral::Long(1)), + (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(1)), + (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(1)), ( PrimitiveType::Uuid, PrimitiveLiteral::UInt128(Uuid::new_v4().as_u128()), diff --git a/crates/iceberg/src/spec/manifest.rs b/crates/iceberg/src/spec/manifest.rs index 14b8a8000..b3766045f 100644 --- a/crates/iceberg/src/spec/manifest.rs +++ b/crates/iceberg/src/spec/manifest.rs @@ -1570,6 +1570,11 @@ mod tests { "v_ts_ntz", Type::Primitive(PrimitiveType::Timestamp), )), + Arc::new(NestedField::optional( + 12, + "v_ts_ns_ntz", + Type::Primitive(PrimitiveType::TimestampNs + ))), ]) .build() .unwrap(), @@ -1678,6 +1683,11 @@ mod tests { "v_ts_ntz", Type::Primitive(PrimitiveType::Timestamp), )), + Arc::new(NestedField::optional( + 12, + "v_ts_ns_ntz", + Type::Primitive(PrimitiveType::TimestampNs + ))) ]) .build() .unwrap(), diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 9148844cc..6b7d03f11 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -159,6 +159,8 @@ impl Transform { | PrimitiveType::Time | PrimitiveType::Timestamp | PrimitiveType::Timestamptz + | PrimitiveType::TimestampNs + | PrimitiveType::TimestamptzNs | PrimitiveType::String | PrimitiveType::Uuid | PrimitiveType::Fixed(_) @@ -200,6 +202,8 @@ impl Transform { match p { PrimitiveType::Timestamp | PrimitiveType::Timestamptz + | PrimitiveType::TimestampNs + | PrimitiveType::TimestamptzNs | PrimitiveType::Date => Ok(Type::Primitive(PrimitiveType::Date)), _ => Err(Error::new( ErrorKind::DataInvalid, @@ -216,9 +220,10 @@ impl Transform { Transform::Hour => { if let Type::Primitive(p) = input_type { match p { - PrimitiveType::Timestamp | PrimitiveType::Timestamptz => { - Ok(Type::Primitive(PrimitiveType::Int)) - } + PrimitiveType::Timestamp + | PrimitiveType::Timestamptz + | PrimitiveType::TimestampNs + | PrimitiveType::TimestamptzNs => Ok(Type::Primitive(PrimitiveType::Int)), _ => Err(Error::new( ErrorKind::DataInvalid, format!("{input_type} is not a valid input type of {self} transform",), diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index 03fd1ec38..3568d3dcd 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -38,6 +38,7 @@ use serde::ser::SerializeStruct; use serde::{Deserialize, Serialize}; use serde_bytes::ByteBuf; use serde_json::{Map as JsonMap, Number, Value as JsonValue}; +use timestamp::nanoseconds_to_datetime; use uuid::Uuid; use super::datatypes::{PrimitiveType, Type}; @@ -45,7 +46,7 @@ use crate::error::Result; use crate::spec::values::date::{date_from_naive_date, days_to_date, unix_epoch}; use crate::spec::values::time::microseconds_to_time; use crate::spec::values::timestamp::microseconds_to_datetime; -use crate::spec::values::timestamptz::microseconds_to_datetimetz; +use crate::spec::values::timestamptz::{microseconds_to_datetimetz, nanoseconds_to_datetimetz}; use crate::spec::MAX_DECIMAL_PRECISION; use crate::{ensure_data_valid, Error, ErrorKind}; @@ -326,6 +327,12 @@ impl Display for Datum { (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(val)) => { write!(f, "{}", microseconds_to_datetimetz(*val)) } + (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(val)) => { + write!(f, "{}", nanoseconds_to_datetime(*val)) + } + (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(val)) => { + write!(f, "{}", nanoseconds_to_datetimetz(*val)) + } (_, PrimitiveLiteral::String(val)) => write!(f, r#""{}""#, val), (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(val)) => { write!(f, "{}", Uuid::from_u128(*val)) @@ -401,6 +408,12 @@ impl Datum { PrimitiveType::Timestamptz => { PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?)) } + PrimitiveType::TimestampNs => { + PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?)) + } + PrimitiveType::TimestamptzNs => { + PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?)) + } PrimitiveType::String => { PrimitiveLiteral::String(std::str::from_utf8(bytes)?.to_string()) } @@ -734,6 +747,23 @@ impl Datum { } } + /// Creates a timestamp from unix epoch in nanoseconds. + /// + /// Example: + /// + /// ```rust + /// use iceberg::spec::Datum; + /// let t = Datum::timestamp_nanos(1000); + /// + /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001"); + /// ``` + pub fn timestamp_nanos(value: i64) -> Self { + Self { + r#type: PrimitiveType::TimestampNs, + literal: PrimitiveLiteral::Long(value), + } + } + /// Creates a timestamp from [`DateTime`]. /// /// Example: @@ -792,6 +822,23 @@ impl Datum { } } + /// Creates a timestamp with timezone from unix epoch in nanoseconds. + /// + /// Example: + /// + /// ```rust + /// use iceberg::spec::Datum; + /// let t = Datum::timestamptz_nanos(1000); + /// + /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001 UTC"); + /// ``` + pub fn timestamptz_nanos(value: i64) -> Self { + Self { + r#type: PrimitiveType::TimestamptzNs, + literal: PrimitiveLiteral::Long(value), + } + } + /// Creates a timestamp with timezone from [`DateTime`]. /// Example: /// @@ -1805,6 +1852,18 @@ impl Literal { .format("%Y-%m-%dT%H:%M:%S%.f+00:00") .to_string(), )), + (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(val)) => Ok(JsonValue::String( + timestamp::nanoseconds_to_datetime(val) + .format("%Y-%m-%dT%H:%M:%S%.f") + .to_string(), + )), + (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(val)) => { + Ok(JsonValue::String( + timestamptz::nanoseconds_to_datetimetz(val) + .format("%Y-%m-%dT%H:%M:%S%.f+00:00") + .to_string(), + )) + } (PrimitiveType::String, PrimitiveLiteral::String(val)) => { Ok(JsonValue::String(val.clone())) } @@ -1958,6 +2017,10 @@ mod timestamp { // This shouldn't fail until the year 262000 DateTime::from_timestamp_micros(micros).unwrap().naive_utc() } + + pub(crate) fn nanoseconds_to_datetime(nanos: i64) -> NaiveDateTime { + DateTime::from_timestamp_nanos(nanos).naive_utc() + } } mod timestamptz { @@ -1972,6 +2035,12 @@ mod timestamptz { DateTime::from_timestamp(secs, rem as u32 * 1_000).unwrap() } + + pub(crate) fn nanoseconds_to_datetimetz(nanos: i64) -> DateTime { + let (secs, rem) = (nanos / 1_000_000_000, nanos % 1_000_000_000); + + DateTime::from_timestamp(secs, rem as u32).unwrap() + } } mod _serde { diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index 83cbbd833..ce39826bb 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -256,7 +256,7 @@ mod test { use crate::expr::PredicateOperator; use crate::spec::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, - Timestamptz, Uuid, + TimestampNs, Timestamptz, TimestamptzNs, Uuid, }; use crate::spec::Type::{Primitive, Struct}; use crate::spec::{Datum, NestedField, PrimitiveType, StructType, Transform, Type}; @@ -297,6 +297,8 @@ mod test { (Primitive(Time), Some(Primitive(Int))), (Primitive(Timestamp), Some(Primitive(Int))), (Primitive(Timestamptz), Some(Primitive(Int))), + (Primitive(TimestampNs), Some(Primitive(Int))), + (Primitive(TimestamptzNs), Some(Primitive(Int))), ( Struct(StructType::new(vec![NestedField::optional( 1, diff --git a/crates/iceberg/src/transform/identity.rs b/crates/iceberg/src/transform/identity.rs index e23ccffa9..68e5a0b1a 100644 --- a/crates/iceberg/src/transform/identity.rs +++ b/crates/iceberg/src/transform/identity.rs @@ -38,7 +38,7 @@ impl TransformFunction for Identity { mod test { use crate::spec::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, - Timestamptz, Uuid, + TimestampNs, Timestamptz, TimestamptzNs, Uuid, }; use crate::spec::Type::{Primitive, Struct}; use crate::spec::{NestedField, StructType, Transform}; @@ -81,6 +81,8 @@ mod test { (Primitive(Time), Some(Primitive(Time))), (Primitive(Timestamp), Some(Primitive(Timestamp))), (Primitive(Timestamptz), Some(Primitive(Timestamptz))), + (Primitive(TimestampNs), Some(Primitive(TimestampNs))), + (Primitive(TimestamptzNs), Some(Primitive(TimestamptzNs))), ( Struct(StructType::new(vec![NestedField::optional( 1, diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 72506a71e..f326cfed6 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -20,7 +20,9 @@ use std::sync::Arc; use arrow_arith::arity::binary; use arrow_arith::temporal::{date_part, DatePart}; use arrow_array::types::Date32Type; -use arrow_array::{Array, ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray}; +use arrow_array::{ + Array, ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray, TimestampNanosecondArray, +}; use arrow_schema::{DataType, TimeUnit}; use chrono::{DateTime, Datelike, Duration}; @@ -34,6 +36,8 @@ const HOUR_PER_SECOND: f64 = 1.0_f64 / 3600.0_f64; const UNIX_EPOCH_YEAR: i32 = 1970; /// One second in micros. const MICROS_PER_SECOND: i64 = 1_000_000; +/// One second in nanos. +const NANOS_PER_SECOND: i64 = 1_000_000_000; /// Extract a date or timestamp year, as years from 1970 #[derive(Debug)] @@ -41,7 +45,7 @@ pub struct Year; impl Year { #[inline] - fn timestamp_to_year(timestamp: i64) -> Result { + fn timestamp_to_year_micros(timestamp: i64) -> Result { Ok(DateTime::from_timestamp_micros(timestamp) .ok_or_else(|| { Error::new( @@ -52,6 +56,11 @@ impl Year { .year() - UNIX_EPOCH_YEAR) } + + #[inline] + fn timestamp_to_year_nanos(timestamp: i64) -> Result { + Ok(DateTime::from_timestamp_nanos(timestamp).year() - UNIX_EPOCH_YEAR) + } } impl TransformFunction for Year { @@ -72,8 +81,18 @@ impl TransformFunction for Year { (PrimitiveType::Date, PrimitiveLiteral::Int(v)) => { Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR } - (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => Self::timestamp_to_year(*v)?, - (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => Self::timestamp_to_year(*v)?, + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => { + Self::timestamp_to_year_micros(*v)? + } + (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => { + Self::timestamp_to_year_micros(*v)? + } + (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(v)) => { + Self::timestamp_to_year_nanos(*v)? + } + (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(v)) => { + Self::timestamp_to_year_nanos(*v)? + } _ => { return Err(crate::Error::new( crate::ErrorKind::FeatureUnsupported, @@ -94,7 +113,7 @@ pub struct Month; impl Month { #[inline] - fn timestamp_to_month(timestamp: i64) -> Result { + fn timestamp_to_month_micros(timestamp: i64) -> Result { // date: aaaa-aa-aa // unix epoch date: 1970-01-01 // if date > unix epoch date, delta month = (aa - 1) + 12 * (aaaa-1970) @@ -114,6 +133,22 @@ impl Month { Ok(-delta) } } + + #[inline] + fn timestamp_to_month_nanos(timestamp: i64) -> Result { + // date: aaaa-aa-aa + // unix epoch date: 1970-01-01 + // if date > unix epoch date, delta month = (aa - 1) + 12 * (aaaa-1970) + // if date < unix epoch date, delta month = (12 - (aa - 1)) + 12 * (1970-aaaa-1) + let date = DateTime::from_timestamp_nanos(timestamp); + let unix_epoch_date = DateTime::from_timestamp_nanos(0); + if date > unix_epoch_date { + Ok((date.month0() as i32) + 12 * (date.year() - UNIX_EPOCH_YEAR)) + } else { + let delta = (12 - date.month0() as i32) + 12 * (UNIX_EPOCH_YEAR - date.year() - 1); + Ok(-delta) + } + } } impl TransformFunction for Month { @@ -144,9 +179,17 @@ impl TransformFunction for Month { (Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR) * 12 + Date32Type::to_naive_date(*v).month0() as i32 } - (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => Self::timestamp_to_month(*v)?, + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => { + Self::timestamp_to_month_micros(*v)? + } (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => { - Self::timestamp_to_month(*v)? + Self::timestamp_to_month_micros(*v)? + } + (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(v)) => { + Self::timestamp_to_month_nanos(*v)? + } + (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(v)) => { + Self::timestamp_to_month_nanos(*v)? } _ => { return Err(crate::Error::new( @@ -196,6 +239,35 @@ impl Day { Ok(days) } + + fn day_timestamp_nano(v: i64) -> Result { + let secs = v / NANOS_PER_SECOND; + + let (nanos, offset) = if v >= 0 { + let nanos = (v.rem_euclid(NANOS_PER_SECOND)) as u32; + let offset = 0i64; + (nanos, offset) + } else { + let v = v + 1; + let nanos = (v.rem_euclid(NANOS_PER_SECOND)) as u32; + let offset = 1i64; + (nanos, offset) + }; + + let delta = Duration::new(secs, nanos).ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create 'TimeDelta' from seconds {} and nanos {}", + secs, nanos + ), + ) + })?; + + let days = (delta.num_days() - offset) as i32; + + Ok(days) + } } impl TransformFunction for Day { @@ -206,6 +278,11 @@ impl TransformFunction for Day { .downcast_ref::() .unwrap() .try_unary(|v| -> Result { Self::day_timestamp_micro(v) })?, + DataType::Timestamp(TimeUnit::Nanosecond, _) => input + .as_any() + .downcast_ref::() + .unwrap() + .try_unary(|v| -> Result { Self::day_timestamp_nano(v) })?, DataType::Date32 => input .as_any() .downcast_ref::() @@ -231,6 +308,12 @@ impl TransformFunction for Day { (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => { Self::day_timestamp_micro(*v)? } + (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(v)) => { + Self::day_timestamp_nano(*v)? + } + (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(v)) => { + Self::day_timestamp_nano(*v)? + } _ => { return Err(crate::Error::new( crate::ErrorKind::FeatureUnsupported, @@ -254,6 +337,11 @@ impl Hour { fn hour_timestamp_micro(v: i64) -> i32 { (v as f64 / 1000.0 / 1000.0 * HOUR_PER_SECOND) as i32 } + + #[inline] + fn hour_timestamp_nano(v: i64) -> i32 { + (v as f64 / 1_000_000.0 / 1000.0 * HOUR_PER_SECOND) as i32 + } } impl TransformFunction for Hour { @@ -283,6 +371,12 @@ impl TransformFunction for Hour { (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => { Self::hour_timestamp_micro(*v) } + (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(v)) => { + Self::hour_timestamp_nano(*v) + } + (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(v)) => { + Self::hour_timestamp_nano(*v) + } _ => { return Err(crate::Error::new( crate::ErrorKind::FeatureUnsupported, @@ -307,7 +401,7 @@ mod test { use crate::expr::PredicateOperator; use crate::spec::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, - Timestamptz, Uuid, + TimestampNs, Timestamptz, TimestamptzNs, Uuid, }; use crate::spec::Type::{Primitive, Struct}; use crate::spec::{Datum, NestedField, PrimitiveType, StructType, Transform, Type}; @@ -350,6 +444,8 @@ mod test { (Primitive(Time), None), (Primitive(Timestamp), Some(Primitive(Date))), (Primitive(Timestamptz), Some(Primitive(Date))), + (Primitive(TimestampNs), Some(Primitive(Date))), + (Primitive(TimestamptzNs), Some(Primitive(Date))), ( Struct(StructType::new(vec![NestedField::optional( 1, @@ -400,6 +496,8 @@ mod test { (Primitive(Time), None), (Primitive(Timestamp), Some(Primitive(Date))), (Primitive(Timestamptz), Some(Primitive(Date))), + (Primitive(TimestampNs), Some(Primitive(Date))), + (Primitive(TimestamptzNs), Some(Primitive(Date))), ( Struct(StructType::new(vec![NestedField::optional( 1, @@ -450,6 +548,8 @@ mod test { (Primitive(Time), None), (Primitive(Timestamp), Some(Primitive(Date))), (Primitive(Timestamptz), Some(Primitive(Date))), + (Primitive(TimestampNs), Some(Primitive(Date))), + (Primitive(TimestamptzNs), Some(Primitive(Date))), ( Struct(StructType::new(vec![NestedField::optional( 1, @@ -500,6 +600,8 @@ mod test { (Primitive(Time), None), (Primitive(Timestamp), Some(Primitive(Int))), (Primitive(Timestamptz), Some(Primitive(Int))), + (Primitive(TimestampNs), Some(Primitive(Int))), + (Primitive(TimestamptzNs), Some(Primitive(Int))), ( Struct(StructType::new(vec![NestedField::optional( 1, @@ -2323,6 +2425,50 @@ mod test { assert_eq!(res, expect); } + fn test_timestamp_ns_and_tz_transform( + time: &str, + transform: &BoxedTransformFunction, + expect: Datum, + ) { + let timestamp_ns = Datum::timestamp_nanos( + NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f") + .unwrap() + .and_utc() + .timestamp_nanos_opt() + .unwrap(), + ); + let timestamptz_ns = Datum::timestamptz_nanos( + NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f") + .unwrap() + .and_utc() + .timestamp_nanos_opt() + .unwrap(), + ); + let res = transform.transform_literal(×tamp_ns).unwrap().unwrap(); + assert_eq!(res, expect); + let res = transform + .transform_literal(×tamptz_ns) + .unwrap() + .unwrap(); + assert_eq!(res, expect); + } + + fn test_timestamp_ns_and_tz_transform_using_i64( + time: i64, + transform: &BoxedTransformFunction, + expect: Datum, + ) { + let timestamp_ns = Datum::timestamp_nanos(time); + let timestamptz_ns = Datum::timestamptz_nanos(time); + let res = transform.transform_literal(×tamp_ns).unwrap().unwrap(); + assert_eq!(res, expect); + let res = transform + .transform_literal(×tamptz_ns) + .unwrap() + .unwrap(); + assert_eq!(res, expect); + } + #[test] fn test_transform_year_literal() { let year = Box::new(super::Year) as BoxedTransformFunction; @@ -2338,6 +2484,14 @@ mod test { Datum::int(1970 - super::UNIX_EPOCH_YEAR), ); test_timestamp_and_tz_transform("1969-01-01 00:00:00.00", &year, Datum::int(-1)); + + // Test TimestampNanosecond + test_timestamp_ns_and_tz_transform_using_i64( + 186280000000, + &year, + Datum::int(1970 - super::UNIX_EPOCH_YEAR), + ); + test_timestamp_ns_and_tz_transform("1969-01-01 00:00:00.00", &year, Datum::int(-1)); } #[test] @@ -2432,6 +2586,17 @@ mod test { test_timestamp_and_tz_transform("2017-12-01 00:00:00.00", &month, Datum::int(575)); test_timestamp_and_tz_transform("1970-01-01 00:00:00.00", &month, Datum::int(0)); test_timestamp_and_tz_transform("1969-12-31 00:00:00.00", &month, Datum::int(-1)); + + // Test TimestampNanosecond + test_timestamp_ns_and_tz_transform_using_i64( + 186280000000, + &month, + Datum::int((1970 - super::UNIX_EPOCH_YEAR) * 12), + ); + test_timestamp_ns_and_tz_transform("1969-12-01 23:00:00.00", &month, Datum::int(-1)); + test_timestamp_ns_and_tz_transform("2017-12-01 00:00:00.00", &month, Datum::int(575)); + test_timestamp_ns_and_tz_transform("1970-01-01 00:00:00.00", &month, Datum::int(0)); + test_timestamp_ns_and_tz_transform("1969-12-31 00:00:00.00", &month, Datum::int(-1)); } #[test] @@ -2523,6 +2688,11 @@ mod test { test_timestamp_and_tz_transform_using_i64(1512151975038194, &day, Datum::int(17501)); test_timestamp_and_tz_transform_using_i64(-115200000000, &day, Datum::int(-2)); test_timestamp_and_tz_transform("2017-12-01 10:30:42.123", &day, Datum::int(17501)); + + // Test TimestampNanosecond + test_timestamp_ns_and_tz_transform_using_i64(1512151975038194, &day, Datum::int(17)); + test_timestamp_ns_and_tz_transform_using_i64(-115200000000, &day, Datum::int(-1)); + test_timestamp_ns_and_tz_transform("2017-12-01 10:30:42.123", &day, Datum::int(17501)); } #[test] @@ -2591,5 +2761,9 @@ mod test { // Test TimestampMicrosecond test_timestamp_and_tz_transform("2017-12-01 18:00:00.00", &hour, Datum::int(420042)); test_timestamp_and_tz_transform("1969-12-31 23:00:00.00", &hour, Datum::int(-1)); + + // Test TimestampNanosecond + test_timestamp_ns_and_tz_transform("2017-12-01 18:00:00.00", &hour, Datum::int(420042)); + test_timestamp_ns_and_tz_transform("1969-12-31 23:00:00.00", &hour, Datum::int(-1)); } } diff --git a/crates/iceberg/src/transform/truncate.rs b/crates/iceberg/src/transform/truncate.rs index cba5409bb..83f769e27 100644 --- a/crates/iceberg/src/transform/truncate.rs +++ b/crates/iceberg/src/transform/truncate.rs @@ -174,7 +174,7 @@ mod test { use crate::expr::PredicateOperator; use crate::spec::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, - Timestamptz, Uuid, + TimestampNs, Timestamptz, TimestamptzNs, Uuid, }; use crate::spec::Type::{Primitive, Struct}; use crate::spec::{Datum, NestedField, PrimitiveType, StructType, Transform, Type}; @@ -219,6 +219,8 @@ mod test { (Primitive(Time), None), (Primitive(Timestamp), None), (Primitive(Timestamptz), None), + (Primitive(TimestampNs), None), + (Primitive(TimestamptzNs), None), ( Struct(StructType::new(vec![NestedField::optional( 1, diff --git a/crates/iceberg/src/transform/void.rs b/crates/iceberg/src/transform/void.rs index 92d76b141..5d429a593 100644 --- a/crates/iceberg/src/transform/void.rs +++ b/crates/iceberg/src/transform/void.rs @@ -37,7 +37,7 @@ impl TransformFunction for Void { mod test { use crate::spec::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, - Timestamptz, Uuid, + TimestampNs, Timestamptz, TimestamptzNs, Uuid, }; use crate::spec::Type::{Primitive, Struct}; use crate::spec::{NestedField, StructType, Transform}; @@ -81,6 +81,8 @@ mod test { (Primitive(Time), Some(Primitive(Time))), (Primitive(Timestamp), Some(Primitive(Timestamp))), (Primitive(Timestamptz), Some(Primitive(Timestamptz))), + (Primitive(TimestampNs), Some(Primitive(TimestampNs))), + (Primitive(TimestamptzNs), Some(Primitive(TimestamptzNs))), ( Struct(StructType::new(vec![NestedField::optional( 1, diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index d41714b9e..11ba04f6a 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -337,6 +337,14 @@ impl MinMaxColAggregator { let convert_func = |v: i64| Result::::Ok(Datum::timestamptz_micros(v)); self.update_state::(field_id, &stat, convert_func) } + (PrimitiveType::TimestampNs, Statistics::Int64(stat)) => { + let convert_func = |v: i64| Result::::Ok(Datum::timestamp_nanos(v)); + self.update_state::(field_id, &stat, convert_func) + } + (PrimitiveType::TimestamptzNs, Statistics::Int64(stat)) => { + let convert_func = |v: i64| Result::::Ok(Datum::timestamptz_nanos(v)); + self.update_state::(field_id, &stat, convert_func) + } ( PrimitiveType::Decimal { precision: _, @@ -595,7 +603,7 @@ mod tests { use anyhow::Result; use arrow_array::types::Int64Type; use arrow_array::{ - ArrayRef, BooleanArray, Int32Array, Int64Array, ListArray, RecordBatch, StructArray, + Array, ArrayRef, BooleanArray, Int32Array, Int64Array, ListArray, RecordBatch, StructArray, }; use arrow_schema::{DataType, SchemaRef as ArrowSchemaRef}; use arrow_select::concat::concat_batches; @@ -632,6 +640,18 @@ mod tests { .into(), NestedField::optional( 11, + "timestamp_ns", + Type::Primitive(PrimitiveType::TimestampNs), + ) + .into(), + NestedField::optional( + 12, + "timestamptz_ns", + Type::Primitive(PrimitiveType::TimestamptzNs), + ) + .into(), + NestedField::optional( + 13, "decimal", Type::Primitive(PrimitiveType::Decimal { precision: 10, @@ -639,8 +659,8 @@ mod tests { }), ) .into(), - NestedField::optional(12, "uuid", Type::Primitive(PrimitiveType::Uuid)).into(), - NestedField::optional(13, "fixed", Type::Primitive(PrimitiveType::Fixed(10))) + NestedField::optional(14, "uuid", Type::Primitive(PrimitiveType::Uuid)).into(), + NestedField::optional(15, "fixed", Type::Primitive(PrimitiveType::Fixed(10))) .into(), ]) .build() @@ -1091,12 +1111,22 @@ mod tests { arrow_array::TimestampMicrosecondArray::from(vec![Some(0), Some(1), None, Some(3)]) .with_timezone_utc(), ) as ArrayRef; - let col11 = Arc::new( + let col11 = Arc::new(arrow_array::TimestampNanosecondArray::from(vec![ + Some(0), + Some(1), + None, + Some(3), + ])) as ArrayRef; + let col12 = Arc::new( + arrow_array::TimestampNanosecondArray::from(vec![Some(0), Some(1), None, Some(3)]) + .with_timezone_utc(), + ) as ArrayRef; + let col13 = Arc::new( arrow_array::Decimal128Array::from(vec![Some(1), Some(2), None, Some(100)]) .with_precision_and_scale(10, 5) .unwrap(), ) as ArrayRef; - let col12 = Arc::new( + let col14 = Arc::new( arrow_array::FixedSizeBinaryArray::try_from_sparse_iter_with_size( vec![ Some(Uuid::from_u128(0).as_bytes().to_vec()), @@ -1109,7 +1139,7 @@ mod tests { ) .unwrap(), ) as ArrayRef; - let col13 = Arc::new( + let col15 = Arc::new( arrow_array::FixedSizeBinaryArray::try_from_sparse_iter_with_size( vec![ Some(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), @@ -1124,6 +1154,7 @@ mod tests { ) as ArrayRef; let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![ col0, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, + col14, col15, ]) .unwrap(); @@ -1171,8 +1202,10 @@ mod tests { (8, Datum::time_micros(0).unwrap()), (9, Datum::timestamp_micros(0)), (10, Datum::timestamptz_micros(0)), + (11, Datum::timestamp_nanos(0)), + (12, Datum::timestamptz_nanos(0)), ( - 11, + 13, Datum::new( PrimitiveType::Decimal { precision: 10, @@ -1181,10 +1214,8 @@ mod tests { PrimitiveLiteral::Int128(1) ) ), - (12, Datum::uuid(Uuid::from_u128(0))), - (13, Datum::fixed(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10])), - (12, Datum::uuid(Uuid::from_u128(0))), - (13, Datum::fixed(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10])), + (14, Datum::uuid(Uuid::from_u128(0))), + (15, Datum::fixed(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10])), ]) ); assert_eq!( @@ -1201,8 +1232,10 @@ mod tests { (8, Datum::time_micros(3).unwrap()), (9, Datum::timestamp_micros(3)), (10, Datum::timestamptz_micros(3)), + (11, Datum::timestamp_nanos(3)), + (12, Datum::timestamptz_nanos(3)), ( - 11, + 13, Datum::new( PrimitiveType::Decimal { precision: 10, @@ -1211,9 +1244,9 @@ mod tests { PrimitiveLiteral::Int128(100) ) ), - (12, Datum::uuid(Uuid::from_u128(3))), + (14, Datum::uuid(Uuid::from_u128(3))), ( - 13, + 15, Datum::fixed(vec![21, 22, 23, 24, 25, 26, 27, 28, 29, 30]) ), ])