Skip to content

Commit 507a241

Browse files
feat: change UTC timezone format to +00:00 and integrate whenever library
Co-Authored-By: Aaron <AJ> Steers <[email protected]>
1 parent af48bb6 commit 507a241

File tree

4 files changed

+2000134
-85
lines changed

4 files changed

+2000134
-85
lines changed

airbyte_cdk/utils/datetime_helpers.py

Lines changed: 47 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686

8787
from dateutil import parser
8888
from typing_extensions import Never
89+
from whenever import Instant, LocalDateTime, ZonedDateTime
8990

9091

9192
class AirbyteDateTime(datetime):
@@ -150,8 +151,6 @@ def __str__(self) -> str:
150151
base = self.strftime("%Y-%m-%dT%H:%M:%S")
151152
if self.microsecond:
152153
base = f"{base}.{self.microsecond:06d}"
153-
if aware_self.tzinfo == timezone.utc:
154-
return f"{base}+00:00"
155154
# Format timezone as ±HH:MM
156155
offset = aware_self.strftime("%z")
157156
return f"{base}{offset[:3]}:{offset[3:]}"
@@ -353,7 +352,7 @@ def ab_datetime_parse(dt_str: Union[str, int]) -> AirbyteDateTime:
353352
- ISO8601/RFC3339 format strings (with 'T' delimiter)
354353
- Unix timestamps (as integers or strings)
355354
- Date-only strings (YYYY-MM-DD)
356-
- Timezone-aware formats (Z for UTC, or ±HH:MM offset)
355+
- Timezone-aware formats (+00:00 for UTC, or ±HH:MM offset)
357356
358357
Always returns a timezone-aware datetime (defaults to UTC if no timezone specified).
359358
@@ -368,108 +367,68 @@ def ab_datetime_parse(dt_str: Union[str, int]) -> AirbyteDateTime:
368367
ValueError: If the input cannot be parsed as a valid datetime.
369368
370369
Example:
371-
>>> ab_datetime_parse("2023-03-14T15:09:26Z")
372-
'2023-03-14T15:09:26Z'
370+
>>> ab_datetime_parse("2023-03-14T15:09:26+00:00")
371+
'2023-03-14T15:09:26+00:00'
373372
>>> ab_datetime_parse(1678806000) # Unix timestamp
374-
'2023-03-14T15:00:00Z'
373+
'2023-03-14T15:00:00+00:00'
375374
>>> ab_datetime_parse("2023-03-14") # Date-only
376-
'2023-03-14T00:00:00Z'
375+
'2023-03-14T00:00:00+00:00'
377376
"""
378377
try:
379378
# Handle numeric values as Unix timestamps (UTC)
380379
if isinstance(dt_str, int) or (
381380
isinstance(dt_str, str)
382381
and (dt_str.isdigit() or (dt_str.startswith("-") and dt_str[1:].isdigit()))
383382
):
384-
# Always treat numeric values as Unix timestamps (UTC)
385383
timestamp = int(dt_str)
386384
if timestamp < 0:
387385
raise ValueError("Timestamp cannot be negative")
388386
if len(str(abs(timestamp))) > 10: # More than 10 digits means beyond year 2286
389387
raise ValueError("Timestamp value too large")
390-
# Use utcfromtimestamp to ensure consistent UTC handling without local timezone influence
391-
dt_obj = datetime.fromtimestamp(timestamp, timezone.utc)
392-
return AirbyteDateTime.from_datetime(dt_obj)
388+
instant = Instant.from_timestamp(timestamp)
389+
return AirbyteDateTime.from_datetime(instant.py_datetime())
393390

394391
if not isinstance(dt_str, str):
395392
raise ValueError(f"Expected string or integer, got {type(dt_str)}")
396393

397-
# For string inputs, first check if it's a valid datetime format
398-
if isinstance(dt_str, str):
399-
if dt_str.isdigit():
400-
# Handle Unix timestamp as string
401-
timestamp = int(dt_str)
402-
if timestamp < 0:
403-
raise ValueError("Timestamp cannot be negative")
404-
if len(str(timestamp)) > 10: # More than 10 digits means beyond year 2286
405-
raise ValueError("Timestamp value too large")
406-
dt_obj = datetime.utcfromtimestamp(timestamp)
407-
return AirbyteDateTime.from_datetime(dt_obj.replace(tzinfo=timezone.utc))
408-
# For date-only strings (YYYY-MM-DD), add time component
409-
if "T" not in dt_str:
410-
# For date-only format, validate and convert
411-
if ":" not in dt_str:
412-
# Check for wrong separators
413-
if "/" in dt_str:
414-
raise ValueError(f"Invalid date format (expected YYYY-MM-DD): {dt_str}")
415-
parts = dt_str.split("-")
416-
if len(parts) != 3:
417-
raise ValueError(f"Invalid date format (expected YYYY-MM-DD): {dt_str}")
394+
# For string inputs with time component but no T delimiter, it's invalid
395+
if ":" in dt_str and "T" not in dt_str:
396+
raise ValueError(f"Missing T delimiter in datetime string: {dt_str}")
397+
398+
# Check for wrong separators in date format
399+
if "/" in dt_str:
400+
raise ValueError(f"Invalid date format (expected YYYY-MM-DD): {dt_str}")
401+
402+
# Check for invalid timezone formats (GMT, UTC, etc.)
403+
if any(x in dt_str for x in ["GMT", "UTC"]):
404+
raise ValueError(f"Invalid timezone format: {dt_str}")
405+
406+
# Try parsing with whenever's RFC3339 parser first
407+
try:
408+
instant = Instant.parse_rfc3339(dt_str)
409+
return AirbyteDateTime.from_datetime(instant.py_datetime())
410+
except ValueError:
411+
# If RFC3339 fails, try dateutil parser
412+
try:
413+
parsed = parser.parse(dt_str)
414+
if parsed.tzinfo is None:
415+
parsed = parsed.replace(tzinfo=timezone.utc)
416+
return AirbyteDateTime.from_datetime(parsed)
417+
except (ValueError, TypeError):
418+
# Finally try ISO8601 date-only format
419+
if ":" not in dt_str and dt_str.count("-") == 2:
418420
try:
419-
# Validate date components before adding time
420-
year, month, day = map(int, parts)
421-
if not (1 <= month <= 12 and 1 <= day <= 31):
422-
raise ValueError(f"Invalid date components in: {dt_str}")
423-
# Create datetime directly instead of string manipulation
424-
return AirbyteDateTime(year, month, day, tzinfo=timezone.utc)
425-
except ValueError as e:
426-
raise ValueError(f"Invalid date format: {dt_str}") from e
421+
year, month, day = map(int, dt_str.split("-"))
422+
instant = Instant.from_utc(year, month, day, 0, 0, 0)
423+
return AirbyteDateTime.from_datetime(instant.py_datetime())
424+
except (ValueError, TypeError):
425+
raise ValueError(f"Invalid date format: {dt_str}")
427426
else:
428-
# If it has time component but no T delimiter, it's invalid
429-
raise ValueError(f"Missing T delimiter in datetime string: {dt_str}")
430-
# For string inputs, validate timezone format
431-
if isinstance(dt_str, str):
432-
# First check for timezone format
433-
if dt_str.endswith("Z"):
434-
# Remove Z, parse as UTC, then ensure we output Z format
435-
try:
436-
dt_obj = parser.parse(dt_str[:-1])
437-
if dt_obj.tzinfo is not None:
438-
raise ValueError(f"Invalid timezone format (Z with offset): {dt_str}")
439-
dt_obj = dt_obj.replace(tzinfo=timezone.utc)
440-
return AirbyteDateTime.from_datetime(dt_obj)
441-
except ValueError as e:
442-
raise ValueError(f"Invalid datetime format: {dt_str}") from e
443-
elif dt_str.endswith("GMT"):
444-
raise ValueError(f"Invalid timezone format (use Z or ±HH:MM): {dt_str}")
445-
elif len(dt_str) >= 6 and dt_str[-3] == ":" and dt_str[-6] in ("+", "-"):
446-
# Check for valid timezone offset format (±HH:MM)
447-
try:
448-
hours = int(dt_str[-5:-3])
449-
minutes = int(dt_str[-2:])
450-
if hours >= 24 or minutes >= 60:
451-
raise ValueError(f"Invalid timezone offset values: {dt_str}")
452-
# Let dateutil.parser handle the actual parsing
453-
dt_obj = parser.parse(dt_str)
454-
return AirbyteDateTime.from_datetime(dt_obj)
455-
except ValueError as e:
456-
raise ValueError(f"Invalid timezone offset format: {dt_str}") from e
457-
458-
# For all other formats, try parsing and validate timezone
459-
try:
460-
dt_obj = parser.parse(dt_str)
461-
# For strings without timezone, assume UTC as documented
462-
if dt_obj.tzinfo is None:
463-
dt_obj = dt_obj.replace(tzinfo=timezone.utc)
464-
return AirbyteDateTime.from_datetime(dt_obj)
465-
except ValueError as e:
466-
raise ValueError(f"Could not parse datetime string: {dt_str}") from e
427+
raise ValueError(f"Invalid datetime format: {dt_str}")
467428
except ValueError as e:
468429
if "Timestamp cannot be negative" in str(e) or "Timestamp value too large" in str(e):
469430
raise
470431
raise ValueError(f"Could not parse datetime string: {dt_str}") from e
471-
except TypeError as e:
472-
raise ValueError(f"Could not parse datetime string: {dt_str}") from e
473432

474433

475434
def ab_datetime_format(dt: Union[datetime, AirbyteDateTime]) -> str:
@@ -497,7 +456,13 @@ def ab_datetime_format(dt: Union[datetime, AirbyteDateTime]) -> str:
497456

498457
if dt.tzinfo is None:
499458
dt = dt.replace(tzinfo=timezone.utc)
500-
return dt.isoformat()
459+
460+
# Format with consistent timezone representation
461+
base = dt.strftime("%Y-%m-%dT%H:%M:%S")
462+
if dt.microsecond:
463+
base = f"{base}.{dt.microsecond:06d}"
464+
offset = dt.strftime("%z")
465+
return f"{base}{offset[:3]}:{offset[3:]}"
501466

502467

503468
def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None:

0 commit comments

Comments
 (0)