86
86
87
87
from dateutil import parser
88
88
from typing_extensions import Never
89
+ from whenever import Instant , LocalDateTime , ZonedDateTime
89
90
90
91
91
92
class AirbyteDateTime (datetime ):
@@ -150,8 +151,6 @@ def __str__(self) -> str:
150
151
base = self .strftime ("%Y-%m-%dT%H:%M:%S" )
151
152
if self .microsecond :
152
153
base = f"{ base } .{ self .microsecond :06d} "
153
- if aware_self .tzinfo == timezone .utc :
154
- return f"{ base } +00:00"
155
154
# Format timezone as ±HH:MM
156
155
offset = aware_self .strftime ("%z" )
157
156
return f"{ base } { offset [:3 ]} :{ offset [3 :]} "
@@ -353,7 +352,7 @@ def ab_datetime_parse(dt_str: Union[str, int]) -> AirbyteDateTime:
353
352
- ISO8601/RFC3339 format strings (with 'T' delimiter)
354
353
- Unix timestamps (as integers or strings)
355
354
- Date-only strings (YYYY-MM-DD)
356
- - Timezone-aware formats (Z for UTC, or ±HH:MM offset)
355
+ - Timezone-aware formats (+00:00 for UTC, or ±HH:MM offset)
357
356
358
357
Always returns a timezone-aware datetime (defaults to UTC if no timezone specified).
359
358
@@ -368,108 +367,68 @@ def ab_datetime_parse(dt_str: Union[str, int]) -> AirbyteDateTime:
368
367
ValueError: If the input cannot be parsed as a valid datetime.
369
368
370
369
Example:
371
- >>> ab_datetime_parse("2023-03-14T15:09:26Z ")
372
- '2023-03-14T15:09:26Z '
370
+ >>> ab_datetime_parse("2023-03-14T15:09:26+00:00 ")
371
+ '2023-03-14T15:09:26+00:00 '
373
372
>>> ab_datetime_parse(1678806000) # Unix timestamp
374
- '2023-03-14T15:00:00Z '
373
+ '2023-03-14T15:00:00+00:00 '
375
374
>>> ab_datetime_parse("2023-03-14") # Date-only
376
- '2023-03-14T00:00:00Z '
375
+ '2023-03-14T00:00:00+00:00 '
377
376
"""
378
377
try :
379
378
# Handle numeric values as Unix timestamps (UTC)
380
379
if isinstance (dt_str , int ) or (
381
380
isinstance (dt_str , str )
382
381
and (dt_str .isdigit () or (dt_str .startswith ("-" ) and dt_str [1 :].isdigit ()))
383
382
):
384
- # Always treat numeric values as Unix timestamps (UTC)
385
383
timestamp = int (dt_str )
386
384
if timestamp < 0 :
387
385
raise ValueError ("Timestamp cannot be negative" )
388
386
if len (str (abs (timestamp ))) > 10 : # More than 10 digits means beyond year 2286
389
387
raise ValueError ("Timestamp value too large" )
390
- # Use utcfromtimestamp to ensure consistent UTC handling without local timezone influence
391
- dt_obj = datetime .fromtimestamp (timestamp , timezone .utc )
392
- return AirbyteDateTime .from_datetime (dt_obj )
388
+ instant = Instant .from_timestamp (timestamp )
389
+ return AirbyteDateTime .from_datetime (instant .py_datetime ())
393
390
394
391
if not isinstance (dt_str , str ):
395
392
raise ValueError (f"Expected string or integer, got { type (dt_str )} " )
396
393
397
- # For string inputs, first check if it's a valid datetime format
398
- if isinstance (dt_str , str ):
399
- if dt_str .isdigit ():
400
- # Handle Unix timestamp as string
401
- timestamp = int (dt_str )
402
- if timestamp < 0 :
403
- raise ValueError ("Timestamp cannot be negative" )
404
- if len (str (timestamp )) > 10 : # More than 10 digits means beyond year 2286
405
- raise ValueError ("Timestamp value too large" )
406
- dt_obj = datetime .utcfromtimestamp (timestamp )
407
- return AirbyteDateTime .from_datetime (dt_obj .replace (tzinfo = timezone .utc ))
408
- # For date-only strings (YYYY-MM-DD), add time component
409
- if "T" not in dt_str :
410
- # For date-only format, validate and convert
411
- if ":" not in dt_str :
412
- # Check for wrong separators
413
- if "/" in dt_str :
414
- raise ValueError (f"Invalid date format (expected YYYY-MM-DD): { dt_str } " )
415
- parts = dt_str .split ("-" )
416
- if len (parts ) != 3 :
417
- raise ValueError (f"Invalid date format (expected YYYY-MM-DD): { dt_str } " )
394
+ # For string inputs with time component but no T delimiter, it's invalid
395
+ if ":" in dt_str and "T" not in dt_str :
396
+ raise ValueError (f"Missing T delimiter in datetime string: { dt_str } " )
397
+
398
+ # Check for wrong separators in date format
399
+ if "/" in dt_str :
400
+ raise ValueError (f"Invalid date format (expected YYYY-MM-DD): { dt_str } " )
401
+
402
+ # Check for invalid timezone formats (GMT, UTC, etc.)
403
+ if any (x in dt_str for x in ["GMT" , "UTC" ]):
404
+ raise ValueError (f"Invalid timezone format: { dt_str } " )
405
+
406
+ # Try parsing with whenever's RFC3339 parser first
407
+ try :
408
+ instant = Instant .parse_rfc3339 (dt_str )
409
+ return AirbyteDateTime .from_datetime (instant .py_datetime ())
410
+ except ValueError :
411
+ # If RFC3339 fails, try dateutil parser
412
+ try :
413
+ parsed = parser .parse (dt_str )
414
+ if parsed .tzinfo is None :
415
+ parsed = parsed .replace (tzinfo = timezone .utc )
416
+ return AirbyteDateTime .from_datetime (parsed )
417
+ except (ValueError , TypeError ):
418
+ # Finally try ISO8601 date-only format
419
+ if ":" not in dt_str and dt_str .count ("-" ) == 2 :
418
420
try :
419
- # Validate date components before adding time
420
- year , month , day = map (int , parts )
421
- if not (1 <= month <= 12 and 1 <= day <= 31 ):
422
- raise ValueError (f"Invalid date components in: { dt_str } " )
423
- # Create datetime directly instead of string manipulation
424
- return AirbyteDateTime (year , month , day , tzinfo = timezone .utc )
425
- except ValueError as e :
426
- raise ValueError (f"Invalid date format: { dt_str } " ) from e
421
+ year , month , day = map (int , dt_str .split ("-" ))
422
+ instant = Instant .from_utc (year , month , day , 0 , 0 , 0 )
423
+ return AirbyteDateTime .from_datetime (instant .py_datetime ())
424
+ except (ValueError , TypeError ):
425
+ raise ValueError (f"Invalid date format: { dt_str } " )
427
426
else :
428
- # If it has time component but no T delimiter, it's invalid
429
- raise ValueError (f"Missing T delimiter in datetime string: { dt_str } " )
430
- # For string inputs, validate timezone format
431
- if isinstance (dt_str , str ):
432
- # First check for timezone format
433
- if dt_str .endswith ("Z" ):
434
- # Remove Z, parse as UTC, then ensure we output Z format
435
- try :
436
- dt_obj = parser .parse (dt_str [:- 1 ])
437
- if dt_obj .tzinfo is not None :
438
- raise ValueError (f"Invalid timezone format (Z with offset): { dt_str } " )
439
- dt_obj = dt_obj .replace (tzinfo = timezone .utc )
440
- return AirbyteDateTime .from_datetime (dt_obj )
441
- except ValueError as e :
442
- raise ValueError (f"Invalid datetime format: { dt_str } " ) from e
443
- elif dt_str .endswith ("GMT" ):
444
- raise ValueError (f"Invalid timezone format (use Z or ±HH:MM): { dt_str } " )
445
- elif len (dt_str ) >= 6 and dt_str [- 3 ] == ":" and dt_str [- 6 ] in ("+" , "-" ):
446
- # Check for valid timezone offset format (±HH:MM)
447
- try :
448
- hours = int (dt_str [- 5 :- 3 ])
449
- minutes = int (dt_str [- 2 :])
450
- if hours >= 24 or minutes >= 60 :
451
- raise ValueError (f"Invalid timezone offset values: { dt_str } " )
452
- # Let dateutil.parser handle the actual parsing
453
- dt_obj = parser .parse (dt_str )
454
- return AirbyteDateTime .from_datetime (dt_obj )
455
- except ValueError as e :
456
- raise ValueError (f"Invalid timezone offset format: { dt_str } " ) from e
457
-
458
- # For all other formats, try parsing and validate timezone
459
- try :
460
- dt_obj = parser .parse (dt_str )
461
- # For strings without timezone, assume UTC as documented
462
- if dt_obj .tzinfo is None :
463
- dt_obj = dt_obj .replace (tzinfo = timezone .utc )
464
- return AirbyteDateTime .from_datetime (dt_obj )
465
- except ValueError as e :
466
- raise ValueError (f"Could not parse datetime string: { dt_str } " ) from e
427
+ raise ValueError (f"Invalid datetime format: { dt_str } " )
467
428
except ValueError as e :
468
429
if "Timestamp cannot be negative" in str (e ) or "Timestamp value too large" in str (e ):
469
430
raise
470
431
raise ValueError (f"Could not parse datetime string: { dt_str } " ) from e
471
- except TypeError as e :
472
- raise ValueError (f"Could not parse datetime string: { dt_str } " ) from e
473
432
474
433
475
434
def ab_datetime_format (dt : Union [datetime , AirbyteDateTime ]) -> str :
@@ -497,7 +456,13 @@ def ab_datetime_format(dt: Union[datetime, AirbyteDateTime]) -> str:
497
456
498
457
if dt .tzinfo is None :
499
458
dt = dt .replace (tzinfo = timezone .utc )
500
- return dt .isoformat ()
459
+
460
+ # Format with consistent timezone representation
461
+ base = dt .strftime ("%Y-%m-%dT%H:%M:%S" )
462
+ if dt .microsecond :
463
+ base = f"{ base } .{ dt .microsecond :06d} "
464
+ offset = dt .strftime ("%z" )
465
+ return f"{ base } { offset [:3 ]} :{ offset [3 :]} "
501
466
502
467
503
468
def ab_datetime_try_parse (dt_str : str ) -> AirbyteDateTime | None :
0 commit comments