Skip to content

proper tree entry sorting #849

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions gix-object/src/tree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ pub struct EntryRef<'a> {
pub filename: &'a BStr,
/// The id of the object representing the entry.
// TODO: figure out how these should be called. id or oid? It's inconsistent around the codebase.
// Answer: make it 'id', as in `git2`
// Answer: make it 'id', as in `git2`
#[cfg_attr(feature = "serde", serde(borrow))]
pub oid: &'a gix_hash::oid,
}
Expand All @@ -84,11 +84,14 @@ impl<'a> PartialOrd for EntryRef<'a> {
}

impl<'a> Ord for EntryRef<'a> {
/// Entries compare by the common portion of the filename. This is critical for proper functioning of algorithms working on trees.
/// Doing it like this is needed for compatibility with older, potentially broken(?) trees.
fn cmp(&self, other: &Self) -> Ordering {
let len = self.filename.len().min(other.filename.len());
self.filename[..len].cmp(&other.filename[..len])
fn cmp(&self, b: &Self) -> Ordering {
let a = self;
let common = a.filename.len().min(b.filename.len());
a.filename[..common].cmp(&b.filename[..common]).then_with(|| {
let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/'));
a.cmp(&b)
})
}
}

Expand All @@ -111,12 +114,14 @@ impl PartialOrd for Entry {
}

impl Ord for Entry {
/// Entries compare by the common portion of the filename. This is critical for proper functioning of algorithms working on trees.
fn cmp(&self, other: &Self) -> Ordering {
let common_len = self.filename.len().min(other.filename.len());
self.filename[..common_len]
.cmp(&other.filename[..common_len])
.then_with(|| self.filename.len().cmp(&other.filename.len()))
fn cmp(&self, b: &Self) -> Ordering {
let a = self;
let common = a.filename.len().min(b.filename.len());
a.filename[..common].cmp(&b.filename[..common]).then_with(|| {
let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/'));
let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/'));
a.cmp(&b)
})
}
}

Expand Down
16 changes: 8 additions & 8 deletions gix-object/src/tree/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,16 @@ impl crate::WriteTo for Tree {
Ok(())
}

fn kind(&self) -> Kind {
Kind::Tree
}

fn size(&self) -> usize {
self.entries
.iter()
.map(|Entry { mode, filename, oid }| mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len())
.sum()
}

fn kind(&self) -> Kind {
Kind::Tree
}
}

/// Serialization
Expand Down Expand Up @@ -96,6 +96,10 @@ impl<'a> crate::WriteTo for TreeRef<'a> {
Ok(())
}

fn kind(&self) -> Kind {
Kind::Tree
}

fn size(&self) -> usize {
self.entries
.iter()
Expand All @@ -104,8 +108,4 @@ impl<'a> crate::WriteTo for TreeRef<'a> {
})
.sum()
}

fn kind(&self) -> Kind {
Kind::Tree
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ use gix_actor::{Sign, SignatureRef, Time};
use gix_object::{bstr::ByteSlice, commit::message::body::TrailerRef, CommitRef};
use smallvec::SmallVec;

use crate::immutable::{
use crate::{
commit::{LONG_MESSAGE, MERGE_TAG, SIGNATURE},
fixture_bytes, linus_signature, signature,
fixture_name, linus_signature, signature,
};

#[test]
fn unsigned() -> crate::Result {
assert_eq!(
CommitRef::from_bytes(&fixture_bytes("commit", "unsigned.txt"))?,
CommitRef::from_bytes(&fixture_name("commit", "unsigned.txt"))?,
CommitRef {
tree: b"1b2dfb4ac5e42080b682fc676e9738c94ce6d54d".as_bstr(),
parents: SmallVec::default(),
Expand All @@ -27,7 +27,7 @@ fn unsigned() -> crate::Result {
#[test]
fn whitespace() -> crate::Result {
assert_eq!(
CommitRef::from_bytes(&fixture_bytes("commit", "whitespace.txt"))?,
CommitRef::from_bytes(&fixture_name("commit", "whitespace.txt"))?,
CommitRef {
tree: b"9bed6275068a0575243ba8409253e61af81ab2ff".as_bstr(),
parents: SmallVec::from(vec![b"26b4df046d1776c123ac69d918f5aec247b58cc6".as_bstr()]),
Expand All @@ -44,7 +44,7 @@ fn whitespace() -> crate::Result {
#[test]
fn signed_singleline() -> crate::Result {
assert_eq!(
CommitRef::from_bytes(&fixture_bytes("commit", "signed-singleline.txt"))?,
CommitRef::from_bytes(&fixture_name("commit", "signed-singleline.txt"))?,
CommitRef {
tree: b"00fc39317701176e326974ce44f5bd545a32ec0b".as_bstr(),
parents: SmallVec::from(vec![b"09d8d3a12e161a7f6afb522dbe8900a9c09bce06".as_bstr()]),
Expand All @@ -60,7 +60,7 @@ fn signed_singleline() -> crate::Result {

#[test]
fn mergetag() -> crate::Result {
let fixture = fixture_bytes("commit", "mergetag.txt");
let fixture = fixture_name("commit", "mergetag.txt");
let commit = CommitRef {
tree: b"1c61918031bf2c7fab9e17dde3c52a6a9884fcb5".as_bstr(),
parents: SmallVec::from(vec![
Expand All @@ -85,7 +85,7 @@ fn mergetag() -> crate::Result {
#[test]
fn signed() -> crate::Result {
assert_eq!(
CommitRef::from_bytes(&fixture_bytes("commit", "signed.txt"))?,
CommitRef::from_bytes(&fixture_name("commit", "signed.txt"))?,
CommitRef {
tree: b"00fc39317701176e326974ce44f5bd545a32ec0b".as_bstr(),
parents: SmallVec::from(vec![b"09d8d3a12e161a7f6afb522dbe8900a9c09bce06".as_bstr()]),
Expand All @@ -102,7 +102,7 @@ fn signed() -> crate::Result {
#[test]
fn signed_with_encoding() -> crate::Result {
assert_eq!(
CommitRef::from_bytes(&fixture_bytes("commit", "signed-with-encoding.txt"))?,
CommitRef::from_bytes(&fixture_name("commit", "signed-with-encoding.txt"))?,
CommitRef {
tree: b"1973afa74d87b2bb73fa884aaaa8752aec43ea88".as_bstr(),
parents: SmallVec::from(vec![b"79c51cc86923e2b8ca0ee5c4eb75e48027133f9a".as_bstr()]),
Expand All @@ -119,7 +119,7 @@ fn signed_with_encoding() -> crate::Result {
#[test]
fn with_encoding() -> crate::Result {
assert_eq!(
CommitRef::from_bytes(&fixture_bytes("commit", "with-encoding.txt"))?,
CommitRef::from_bytes(&fixture_name("commit", "with-encoding.txt"))?,
CommitRef {
tree: b"4a1c03029e7407c0afe9fc0320b3258e188b115e".as_bstr(),
parents: SmallVec::from(vec![b"7ca98aad461a5c302cb4c9e3acaaa6053cc67a62".as_bstr()]),
Expand All @@ -144,7 +144,7 @@ fn with_trailer() -> crate::Result {
sign: Sign::Plus,
},
};
let backing = fixture_bytes("commit", "message-with-footer.txt");
let backing = fixture_name("commit", "message-with-footer.txt");
let commit = CommitRef::from_bytes(&backing)?;
assert_eq!(
commit,
Expand Down Expand Up @@ -224,7 +224,7 @@ instead of depending directly on the lower-level crates.
#[test]
fn merge() -> crate::Result {
assert_eq!(
CommitRef::from_bytes(&fixture_bytes("commit", "merge.txt"))?,
CommitRef::from_bytes(&fixture_name("commit", "merge.txt"))?,
CommitRef {
tree: b"0cf16ce8e229b59a761198975f0c0263229faf82".as_bstr(),
parents: SmallVec::from(vec![
Expand Down Expand Up @@ -255,7 +255,7 @@ iyBBl69jASy41Ug/BlFJbw4+ItkShpXwkJKuBBV/JExChmvbxYWaS7QnyYC9UO0=

#[test]
fn newline_right_after_signature_multiline_header() -> crate::Result {
let fixture = fixture_bytes("commit", "signed-whitespace.txt");
let fixture = fixture_name("commit", "signed-whitespace.txt");
let commit = CommitRef::from_bytes(&fixture)?;
let pgp_sig = OTHER_SIGNATURE.as_bstr();
assert_eq!(commit.extra_headers[0].1.as_ref(), pgp_sig);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
use gix_object::{bstr::ByteSlice, commit::ref_iter::Token, CommitRefIter};

use crate::{
hex_to_id,
immutable::{
commit::{LONG_MESSAGE, MERGE_TAG, SIGNATURE},
fixture_bytes, linus_signature, signature,
},
commit::{LONG_MESSAGE, MERGE_TAG, SIGNATURE},
fixture_name, hex_to_id, linus_signature, signature,
};

#[test]
fn newline_right_after_signature_multiline_header() -> crate::Result {
let data = fixture_bytes("commit", "signed-whitespace.txt");
let data = fixture_name("commit", "signed-whitespace.txt");
let tokens = CommitRefIter::from_bytes(&data).collect::<Result<Vec<_>, _>>()?;
assert_eq!(tokens.len(), 7, "mainly a parsing exercise");
match tokens.last().expect("there are tokens") {
Expand All @@ -24,7 +21,7 @@ fn newline_right_after_signature_multiline_header() -> crate::Result {

#[test]
fn signed_with_encoding() -> crate::Result {
let input = fixture_bytes("commit", "signed-with-encoding.txt");
let input = fixture_name("commit", "signed-with-encoding.txt");
let iter = CommitRefIter::from_bytes(&input);
assert_eq!(
iter.collect::<Result<Vec<_>, _>>()?,
Expand Down Expand Up @@ -55,7 +52,7 @@ fn signed_with_encoding() -> crate::Result {
#[test]
fn whitespace() -> crate::Result {
assert_eq!(
CommitRefIter::from_bytes(&fixture_bytes("commit", "whitespace.txt")).collect::<Result<Vec<_>, _>>()?,
CommitRefIter::from_bytes(&fixture_name("commit", "whitespace.txt")).collect::<Result<Vec<_>, _>>()?,
vec![
Token::Tree {
id: hex_to_id("9bed6275068a0575243ba8409253e61af81ab2ff")
Expand All @@ -78,7 +75,7 @@ fn whitespace() -> crate::Result {
#[test]
fn unsigned() -> crate::Result {
assert_eq!(
CommitRefIter::from_bytes(&fixture_bytes("commit", "unsigned.txt")).collect::<Result<Vec<_>, _>>()?,
CommitRefIter::from_bytes(&fixture_name("commit", "unsigned.txt")).collect::<Result<Vec<_>, _>>()?,
vec![
Token::Tree {
id: hex_to_id("1b2dfb4ac5e42080b682fc676e9738c94ce6d54d")
Expand All @@ -98,7 +95,7 @@ fn unsigned() -> crate::Result {
#[test]
fn signed_singleline() -> crate::Result {
assert_eq!(
CommitRefIter::from_bytes(&fixture_bytes("commit", "signed-singleline.txt")).collect::<Result<Vec<_>, _>>()?,
CommitRefIter::from_bytes(&fixture_name("commit", "signed-singleline.txt")).collect::<Result<Vec<_>, _>>()?,
vec![
Token::Tree {
id: hex_to_id("00fc39317701176e326974ce44f5bd545a32ec0b")
Expand All @@ -117,7 +114,7 @@ fn signed_singleline() -> crate::Result {
]
);
assert_eq!(
CommitRefIter::from_bytes(&fixture_bytes("commit", "signed-singleline.txt"))
CommitRefIter::from_bytes(&fixture_name("commit", "signed-singleline.txt"))
.parent_ids()
.collect::<Vec<_>>(),
vec![hex_to_id("09d8d3a12e161a7f6afb522dbe8900a9c09bce06")]
Expand All @@ -127,7 +124,7 @@ fn signed_singleline() -> crate::Result {

#[test]
fn error_handling() -> crate::Result {
let data = fixture_bytes("commit", "unsigned.txt");
let data = fixture_name("commit", "unsigned.txt");
let iter = CommitRefIter::from_bytes(&data[..data.len() / 2]);
let tokens = iter.collect::<Vec<_>>();
assert!(
Expand All @@ -139,7 +136,7 @@ fn error_handling() -> crate::Result {

#[test]
fn mergetag() -> crate::Result {
let input = fixture_bytes("commit", "mergetag.txt");
let input = fixture_name("commit", "mergetag.txt");
let iter = CommitRefIter::from_bytes(&input);
assert_eq!(
iter.collect::<Result<Vec<_>, _>>()?,
Expand Down Expand Up @@ -177,14 +174,11 @@ fn mergetag() -> crate::Result {
mod method {
use gix_object::CommitRefIter;

use crate::{
hex_to_id,
immutable::{fixture_bytes, signature},
};
use crate::{fixture_name, hex_to_id, signature};

#[test]
fn tree_id() -> crate::Result {
let input = fixture_bytes("commit", "unsigned.txt");
let input = fixture_name("commit", "unsigned.txt");
let iter = CommitRefIter::from_bytes(&input);
assert_eq!(
iter.clone().tree_id().ok(),
Expand All @@ -200,7 +194,7 @@ mod method {

#[test]
fn signatures() -> crate::Result {
let input = fixture_bytes("commit", "unsigned.txt");
let input = fixture_name("commit", "unsigned.txt");
let iter = CommitRefIter::from_bytes(&input);
assert_eq!(
iter.signatures().collect::<Vec<_>>(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,11 @@ mod method {
use gix_object::CommitRef;
use pretty_assertions::assert_eq;

use crate::{hex_to_id, immutable::fixture_bytes};
use crate::{fixture_name, hex_to_id};

#[test]
fn tree() -> crate::Result {
let fixture = fixture_bytes("commit", "unsigned.txt");
let fixture = fixture_name("commit", "unsigned.txt");
let commit = CommitRef::from_bytes(&fixture)?;
assert_eq!(commit.tree(), hex_to_id("1b2dfb4ac5e42080b682fc676e9738c94ce6d54d"));
assert_eq!(commit.tree, "1b2dfb4ac5e42080b682fc676e9738c94ce6d54d");
Expand Down
23 changes: 23 additions & 0 deletions gix-object/tests/encode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,26 @@ mod blob {
// It doesn't matter which data we use - it's not interpreted.
round_trip!(gix_object::Blob, gix_object::BlobRef, "tree/everything.tree");
}

mod loose_header {
use bstr::ByteSlice;
use gix_object::{decode, encode, Kind};

#[test]
fn round_trip() -> Result<(), Box<dyn std::error::Error>> {
for (kind, size, expected) in &[
(Kind::Tree, 1234, "tree 1234\0".as_bytes()),
(Kind::Blob, 0, b"blob 0\0"),
(Kind::Commit, 24241, b"commit 24241\0"),
(Kind::Tag, 9999999999, b"tag 9999999999\0"),
] {
let buf = encode::loose_header(*kind, *size);
assert_eq!(buf.as_bstr(), expected.as_bstr());
let (actual_kind, actual_size, actual_read) = decode::loose_header(&buf)?;
assert_eq!(actual_kind, *kind);
assert_eq!(actual_size, *size);
assert_eq!(actual_read, buf.len());
}
Ok(())
}
}
Git LFS file not shown
19 changes: 19 additions & 0 deletions gix-object/tests/fixtures/make_trees.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash
set -eu -o pipefail

function baseline() {
local revspec=${1:?First argument is the revspec of the object to create a baseline for}
local basename=${2:?Second argument is the name of the baseline file}
local baseline_file="$basename.baseline"
git rev-parse "$revspec" | git cat-file --batch | tail -n +2 > "$baseline_file"
truncate -s "$(($(stat -c '%s' "$baseline_file")-1))" "$baseline_file"
}

git init -q
mkdir file
touch bin bin.d file.to file.toml file.toml.bin file0 file/a

git add .
git commit -m "c1"

baseline @^{tree} tree
Loading