Skip to content

Commit 77555f6

Browse files
committed
improve slice hasher
1 parent eb00f8b commit 77555f6

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

airbyte_cdk/utils/slice_hasher.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
import hashlib
12
import json
2-
from typing import Any, Mapping, Optional
3+
from typing import Any, Mapping, Optional, Final
34

45

56
class SliceEncoder(json.JSONEncoder):
@@ -12,11 +13,18 @@ def default(self, obj: Any) -> Any:
1213

1314

1415
class SliceHasher:
16+
_ENCODING: Final = "utf-8"
17+
1518
@classmethod
1619
def hash(cls, stream_name: str, stream_slice: Optional[Mapping[str, Any]] = None) -> int:
1720
if stream_slice:
18-
# Convert the slice to a string so that it can be hashed
19-
s = json.dumps(stream_slice, sort_keys=True, cls=SliceEncoder)
20-
return hash((stream_name, s))
21+
try:
22+
s = json.dumps(stream_slice, sort_keys=True, cls=SliceEncoder)
23+
hash_input = f"{stream_name}:{s}".encode(cls._ENCODING)
24+
except TypeError as e:
25+
raise ValueError(f"Failed to serialize stream slice: {e}")
2126
else:
22-
return hash(stream_name)
27+
hash_input = stream_name.encode(cls._ENCODING)
28+
29+
# Use last 8 bytes as 64-bit integer for better distribution
30+
return int.from_bytes(hashlib.sha256(hash_input).digest()[-8:], "big")

0 commit comments

Comments
 (0)