Skip to content

Commit 025e4e7

Browse files
authored
Add KvDiff - Represents a difference for a specific key, with properties (#104)
1 parent 7191438 commit 025e4e7

File tree

3 files changed

+476
-1
lines changed

3 files changed

+476
-1
lines changed

python/prollytree/prollytree.pyi

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,42 @@ class ConflictResolution:
318318
TakeSource: "ConflictResolution"
319319
TakeDestination: "ConflictResolution"
320320

321+
class DiffOperation:
322+
"""Represents a difference operation (Added, Removed, or Modified)"""
323+
324+
@property
325+
def operation_type(self) -> str:
326+
"""The type of operation: 'Added', 'Removed', or 'Modified'"""
327+
...
328+
329+
@property
330+
def value(self) -> Optional[bytes]:
331+
"""For Added/Removed operations, the value involved"""
332+
...
333+
334+
@property
335+
def old_value(self) -> Optional[bytes]:
336+
"""For Modified operations, the old value"""
337+
...
338+
339+
@property
340+
def new_value(self) -> Optional[bytes]:
341+
"""For Modified operations, the new value"""
342+
...
343+
344+
class KvDiff:
345+
"""Represents a key-value difference between two references"""
346+
347+
@property
348+
def key(self) -> bytes:
349+
"""The key that changed"""
350+
...
351+
352+
@property
353+
def operation(self) -> DiffOperation:
354+
"""The operation that occurred on this key"""
355+
...
356+
321357
class VersionedKvStore:
322358
"""A versioned key-value store backed by Git and ProllyTree"""
323359

@@ -483,6 +519,7 @@ class VersionedKvStore:
483519
"""
484520
...
485521

522+
486523
def get_commit_history(self) -> List[Dict[str, Union[str, int]]]:
487524
"""
488525
Get the commit history for the repository.
@@ -595,3 +632,39 @@ class VersionedKvStore:
595632
pairs = store.get_keys_at_ref("HEAD~1")
596633
"""
597634
...
635+
636+
def diff(self, from_ref: str, to_ref: str) -> List[KvDiff]:
637+
"""
638+
Compare two commits or branches and return all keys that are added, updated or deleted.
639+
640+
Args:
641+
from_ref: Reference (branch or commit) to compare from
642+
to_ref: Reference (branch or commit) to compare to
643+
644+
Returns:
645+
List of KvDiff objects representing the differences between the two references
646+
647+
Example:
648+
# Compare two commits
649+
diffs = store.diff("abc123", "def456")
650+
651+
# Compare two branches
652+
diffs = store.diff("main", "feature-branch")
653+
654+
# Check what changed from last commit
655+
diffs = store.diff("HEAD~1", "HEAD")
656+
"""
657+
...
658+
659+
def current_commit(self) -> str:
660+
"""
661+
Get the current commit's object ID.
662+
663+
Returns:
664+
The hexadecimal string representation of the current commit ID
665+
666+
Example:
667+
commit_id = store.current_commit()
668+
print(f"Current commit: {commit_id}")
669+
"""
670+
...

python/tests/test_diff.py

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
#!/usr/bin/env python3
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
"""Tests for the diff and current_commit functionality in VersionedKvStore."""
15+
16+
import tempfile
17+
import shutil
18+
import subprocess
19+
import os
20+
import pytest
21+
from pathlib import Path
22+
23+
import prollytree
24+
25+
26+
class TestDiffFunctionality:
27+
"""Test diff and current_commit functions."""
28+
29+
def setup_method(self):
30+
"""Set up test fixtures."""
31+
self.temp_dir = tempfile.mkdtemp()
32+
33+
# Initialize git repository in the temp directory
34+
subprocess.run(["git", "init"], cwd=self.temp_dir, check=True, capture_output=True)
35+
subprocess.run(["git", "config", "user.name", "Test User"], cwd=self.temp_dir, check=True)
36+
subprocess.run(["git", "config", "user.email", "[email protected]"], cwd=self.temp_dir, check=True)
37+
38+
# Create subdirectory for the store (not in git root)
39+
self.store_path = Path(self.temp_dir) / "data"
40+
self.store_path.mkdir(parents=True, exist_ok=True)
41+
42+
# Change working directory to the store path for git operations
43+
self.original_cwd = os.getcwd()
44+
os.chdir(str(self.store_path))
45+
46+
def teardown_method(self):
47+
"""Clean up test fixtures."""
48+
# Restore original working directory
49+
os.chdir(self.original_cwd)
50+
shutil.rmtree(self.temp_dir, ignore_errors=True)
51+
52+
def test_diff_between_commits(self):
53+
"""Test diff between two commits."""
54+
# Initialize store
55+
store = prollytree.VersionedKvStore(str(self.store_path))
56+
57+
# Create first commit
58+
store.insert(b"key1", b"value1")
59+
store.insert(b"key2", b"value2")
60+
commit1 = store.commit("Initial commit")
61+
62+
# Create second commit with changes
63+
store.insert(b"key3", b"value3") # Added
64+
store.update(b"key1", b"value1_modified") # Modified
65+
store.delete(b"key2") # Removed
66+
commit2 = store.commit("Second commit")
67+
68+
# Get diff between commits
69+
diffs = store.diff(commit1, commit2)
70+
71+
# Verify diff results
72+
diff_map = {diff.key: diff.operation for diff in diffs}
73+
74+
# Check that we have all expected changes
75+
assert len(diffs) == 3
76+
assert b"key1" in diff_map
77+
assert b"key2" in diff_map
78+
assert b"key3" in diff_map
79+
80+
# Verify operation types
81+
key1_op = diff_map[b"key1"]
82+
assert key1_op.operation_type == "Modified"
83+
assert key1_op.old_value == b"value1"
84+
assert key1_op.new_value == b"value1_modified"
85+
86+
key2_op = diff_map[b"key2"]
87+
assert key2_op.operation_type == "Removed"
88+
assert key2_op.value == b"value2"
89+
90+
key3_op = diff_map[b"key3"]
91+
assert key3_op.operation_type == "Added"
92+
assert key3_op.value == b"value3"
93+
94+
def test_diff_between_branches(self):
95+
"""Test diff between two branches."""
96+
# Initialize store
97+
store = prollytree.VersionedKvStore(str(self.store_path))
98+
99+
# Create initial data on main branch
100+
store.insert(b"shared", b"initial")
101+
store.insert(b"main_only", b"main_value")
102+
store.commit("Initial commit on main")
103+
104+
# Create feature branch
105+
store.create_branch("feature")
106+
107+
# Make changes on feature branch
108+
store.update(b"shared", b"feature_value")
109+
store.insert(b"feature_only", b"feature_data")
110+
store.delete(b"main_only")
111+
store.commit("Changes on feature branch")
112+
113+
# Get diff between branches
114+
diffs = store.diff("main", "feature")
115+
116+
# Verify diff results
117+
assert len(diffs) == 3
118+
119+
diff_map = {diff.key: diff.operation for diff in diffs}
120+
121+
# Check shared key was modified
122+
shared_op = diff_map[b"shared"]
123+
assert shared_op.operation_type == "Modified"
124+
assert shared_op.old_value == b"initial"
125+
assert shared_op.new_value == b"feature_value"
126+
127+
# Check main_only was removed
128+
main_only_op = diff_map[b"main_only"]
129+
assert main_only_op.operation_type == "Removed"
130+
131+
# Check feature_only was added
132+
feature_only_op = diff_map[b"feature_only"]
133+
assert feature_only_op.operation_type == "Added"
134+
135+
def test_current_commit(self):
136+
"""Test getting current commit ID."""
137+
# Initialize store
138+
store = prollytree.VersionedKvStore(str(self.store_path))
139+
140+
# Create first commit
141+
store.insert(b"key1", b"value1")
142+
commit1 = store.commit("First commit")
143+
144+
# Get current commit
145+
current = store.current_commit()
146+
assert current == commit1
147+
148+
# Create second commit
149+
store.insert(b"key2", b"value2")
150+
commit2 = store.commit("Second commit")
151+
152+
# Current commit should be updated
153+
current = store.current_commit()
154+
assert current == commit2
155+
156+
# Test with branch operations
157+
store.create_branch("test-branch")
158+
store.insert(b"key3", b"value3")
159+
commit3 = store.commit("Third commit on branch")
160+
161+
# Current commit should be updated
162+
current = store.current_commit()
163+
assert current == commit3
164+
165+
# Checkout back to main branch
166+
store.checkout("main")
167+
current = store.current_commit()
168+
assert current == commit2
169+
170+
def test_diff_with_no_changes(self):
171+
"""Test diff when there are no changes."""
172+
# Initialize store
173+
store = prollytree.VersionedKvStore(str(self.store_path))
174+
175+
# Create a commit
176+
store.insert(b"key1", b"value1")
177+
commit1 = store.commit("First commit")
178+
179+
# Get diff between same commit
180+
diffs = store.diff(commit1, commit1)
181+
182+
# Should be empty
183+
assert len(diffs) == 0
184+
185+
def test_diff_representation(self):
186+
"""Test string representation of diff objects."""
187+
# Initialize store
188+
store = prollytree.VersionedKvStore(str(self.store_path))
189+
190+
# Create commits with changes
191+
store.insert(b"key1", b"value1")
192+
commit1 = store.commit("First")
193+
194+
store.update(b"key1", b"value2")
195+
commit2 = store.commit("Second")
196+
197+
# Get diff
198+
diffs = store.diff(commit1, commit2)
199+
200+
# Check representation
201+
assert len(diffs) == 1
202+
diff = diffs[0]
203+
204+
# Test __repr__ methods
205+
repr_str = repr(diff)
206+
assert "key1" in repr_str
207+
assert "Modified" in repr_str
208+
209+
op_repr = repr(diff.operation)
210+
assert "Modified" in op_repr
211+
assert "old_size" in op_repr
212+
assert "new_size" in op_repr
213+
214+
def test_get_commits_for_key_functionality(self):
215+
"""Test the get_commits_for_key function works correctly."""
216+
# Initialize store
217+
store = prollytree.VersionedKvStore(str(self.store_path))
218+
219+
# Create commits with changes to a specific key
220+
store.insert(b"tracked_key", b"value1")
221+
store.insert(b"other_key", b"other_value")
222+
commit1 = store.commit("First commit")
223+
224+
store.update(b"tracked_key", b"value2")
225+
commit2 = store.commit("Second commit - tracked_key changed")
226+
227+
store.insert(b"another_key", b"another_value")
228+
commit3 = store.commit("Third commit - no tracked_key change")
229+
230+
# Test get_commits_for_key functionality
231+
commits_for_key = store.get_commits_for_key(b"tracked_key")
232+
233+
# Should return 2 commits that modified tracked_key
234+
assert len(commits_for_key) == 2
235+
236+
# Verify the commit IDs match what we expect
237+
commit_ids = [commit['id'] for commit in commits_for_key]
238+
assert commit2 in commit_ids # Most recent change
239+
assert commit1 in commit_ids # First commit with this key
240+
assert commit3 not in [c['id'] for c in commits_for_key] # Third commit didn't touch tracked_key
241+
242+
# Verify commits are in reverse chronological order (newest first)
243+
assert commits_for_key[0]['id'] == commit2 # Most recent first
244+
245+
246+
if __name__ == "__main__":
247+
pytest.main([__file__, "-v"])

0 commit comments

Comments
 (0)