Skip to content

Commit adef1e8

Browse files
authored
feat(low-code cdk): add KeyToSnakeCase transformation (#178)
1 parent 216cd43 commit adef1e8

File tree

7 files changed

+548
-384
lines changed

7 files changed

+548
-384
lines changed

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,7 @@ definitions:
12341234
- "$ref": "#/definitions/CustomTransformation"
12351235
- "$ref": "#/definitions/RemoveFields"
12361236
- "$ref": "#/definitions/KeysToLower"
1237+
- "$ref": "#/definitions/KeysToSnakeCase"
12371238
state_migrations:
12381239
title: State Migrations
12391240
description: Array of state migrations to be applied on the input state
@@ -1838,6 +1839,19 @@ definitions:
18381839
$parameters:
18391840
type: object
18401841
additionalProperties: true
1842+
KeysToSnakeCase:
1843+
title: Key to Snake Case
1844+
description: A transformation that renames all keys to snake case.
1845+
type: object
1846+
required:
1847+
- type
1848+
properties:
1849+
type:
1850+
type: string
1851+
enum: [KeysToSnakeCase]
1852+
$parameters:
1853+
type: object
1854+
additionalProperties: true
18411855
IterableDecoder:
18421856
title: Iterable Decoder
18431857
description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
@@ -2160,7 +2174,9 @@ definitions:
21602174
description: |-
21612175
The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.
21622176
examples:
2163-
- {"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"}
2177+
- {
2178+
"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}",
2179+
}
21642180
access_token_params:
21652181
title: Access Token Query Params (Json Encoded)
21662182
type: object

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,11 @@ class KeysToLower(BaseModel):
710710
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
711711

712712

713+
class KeysToSnakeCase(BaseModel):
714+
type: Literal["KeysToSnakeCase"]
715+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
716+
717+
713718
class IterableDecoder(BaseModel):
714719
type: Literal["IterableDecoder"]
715720

@@ -1654,7 +1659,15 @@ class Config:
16541659
title="Schema Loader",
16551660
)
16561661
transformations: Optional[
1657-
List[Union[AddFields, CustomTransformation, RemoveFields, KeysToLower]]
1662+
List[
1663+
Union[
1664+
AddFields,
1665+
CustomTransformation,
1666+
RemoveFields,
1667+
KeysToLower,
1668+
KeysToSnakeCase,
1669+
]
1670+
]
16581671
] = Field(
16591672
None,
16601673
description="A list of transformations to be applied to each output record.",
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#
2+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3+
#
4+
5+
import re
6+
from dataclasses import dataclass
7+
from typing import Any, Dict, List, Optional
8+
9+
import unidecode
10+
11+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
12+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
13+
14+
15+
@dataclass
16+
class KeysToSnakeCaseTransformation(RecordTransformation):
17+
token_pattern: re.Pattern[str] = re.compile(
18+
r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)"
19+
)
20+
21+
def transform(
22+
self,
23+
record: Dict[str, Any],
24+
config: Optional[Config] = None,
25+
stream_state: Optional[StreamState] = None,
26+
stream_slice: Optional[StreamSlice] = None,
27+
) -> None:
28+
transformed_record = self._transform_record(record)
29+
record.clear()
30+
record.update(transformed_record)
31+
32+
def _transform_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
33+
transformed_record = {}
34+
for key, value in record.items():
35+
transformed_key = self.process_key(key)
36+
transformed_value = value
37+
38+
if isinstance(value, dict):
39+
transformed_value = self._transform_record(value)
40+
41+
transformed_record[transformed_key] = transformed_value
42+
return transformed_record
43+
44+
def process_key(self, key: str) -> str:
45+
key = self.normalize_key(key)
46+
tokens = self.tokenize_key(key)
47+
tokens = self.filter_tokens(tokens)
48+
return self.tokens_to_snake_case(tokens)
49+
50+
def normalize_key(self, key: str) -> str:
51+
return unidecode.unidecode(key)
52+
53+
def tokenize_key(self, key: str) -> List[str]:
54+
tokens = []
55+
for match in self.token_pattern.finditer(key):
56+
token = match.group(0) if match.group("NoToken") is None else ""
57+
tokens.append(token)
58+
return tokens
59+
60+
def filter_tokens(self, tokens: List[str]) -> List[str]:
61+
if len(tokens) >= 3:
62+
tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
63+
if tokens and tokens[0].isdigit():
64+
tokens.insert(0, "")
65+
return tokens
66+
67+
def tokens_to_snake_case(self, tokens: List[str]) -> str:
68+
return "_".join(token.lower() for token in tokens)

0 commit comments

Comments
 (0)