From a20b3d053b43d4613127e36994f181868cafb730 Mon Sep 17 00:00:00 2001 From: Pierre Chevalier Date: Fri, 4 Apr 2025 15:59:12 +0100 Subject: [PATCH 1/4] fix: Make email with spaces roundtrip. We see this situation in commits in the wild. --- gix-actor/src/signature/decode.rs | 22 +++++++++++++--------- gix-actor/tests/identity/mod.rs | 7 +++---- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/gix-actor/src/signature/decode.rs b/gix-actor/src/signature/decode.rs index dc07accbfdb..fecdfaf773b 100644 --- a/gix-actor/src/signature/decode.rs +++ b/gix-actor/src/signature/decode.rs @@ -79,11 +79,7 @@ pub(crate) mod function { StrContext::Label("Closing '>' not found"), )))?; let i_name_and_email = &i[..right_delim_idx]; - let skip_from_right = i_name_and_email - .iter() - .rev() - .take_while(|b| b.is_ascii_whitespace() || **b == b'>') - .count(); + let skip_from_right = i_name_and_email.iter().rev().take_while(|b| **b == b'>').count(); let left_delim_idx = i_name_and_email .find_byte(b'<') .ok_or(ErrMode::Cut(E::from_input(i).add_context( @@ -91,10 +87,7 @@ pub(crate) mod function { &start, StrContext::Label("Opening '<' not found"), )))?; - let skip_from_left = i[left_delim_idx..] - .iter() - .take_while(|b| b.is_ascii_whitespace() || **b == b'<') - .count(); + let skip_from_left = i[left_delim_idx..].iter().take_while(|b| **b == b'<').count(); let mut name = i[..left_delim_idx].as_bstr(); name = name.strip_suffix(b" ").unwrap_or(name).as_bstr(); @@ -164,6 +157,17 @@ mod tests { ); } + #[test] + fn email_with_space() { + assert_eq!( + decode + .parse_peek(b"Sebastian Thiel <\tbyronimo@gmail.com > 1528473343 +0230") + .expect("parse to work") + .1, + signature("Sebastian Thiel", "\tbyronimo@gmail.com ", 1528473343, Sign::Plus, 9000) + ); + } + #[test] fn negative_offset_0000() { assert_eq!( diff --git a/gix-actor/tests/identity/mod.rs b/gix-actor/tests/identity/mod.rs index 72d9421bd84..2dd82f722e6 100644 --- a/gix-actor/tests/identity/mod.rs +++ b/gix-actor/tests/identity/mod.rs @@ -5,6 +5,9 @@ use gix_actor::Identity; fn round_trip() -> gix_testtools::Result { static DEFAULTS: &[&[u8]] = &[ b"Sebastian Thiel ", + b"Sebastian Thiel < byronimo@gmail.com>", + b"Sebastian Thiel ", + b"Sebastian Thiel <\tbyronimo@gmail.com \t >", ".. ☺️Sebastian 王知明 Thiel🙌 .. ".as_bytes(), b".. whitespace \t is explicitly allowed - unicode aware trimming must be done elsewhere " ]; @@ -25,10 +28,6 @@ fn lenient_parsing() -> gix_testtools::Result { ] { let identity = gix_actor::IdentityRef::from_bytes::<()>(input.as_bytes()).unwrap(); assert_eq!(identity.name, "First Last"); - assert_eq!( - identity.email, "fl Date: Fri, 4 Apr 2025 16:02:41 +0100 Subject: [PATCH 2/4] adapt to changes in gix-actor. Stop trimming the email at parsing time. We see poorly formatted emails with a space at the end in real Git repos and we wish for the gitoxide representation for such commits to roundtrip. Propagate the fix from `gix-actor` and add a test to show we now roundtrip. --- gix-object/tests/fixtures/commit/email-with-space.txt | 5 +++++ gix-object/tests/object/encode/mod.rs | 1 + 2 files changed, 6 insertions(+) create mode 100644 gix-object/tests/fixtures/commit/email-with-space.txt diff --git a/gix-object/tests/fixtures/commit/email-with-space.txt b/gix-object/tests/fixtures/commit/email-with-space.txt new file mode 100644 index 00000000000..d61b3e6a4ac --- /dev/null +++ b/gix-object/tests/fixtures/commit/email-with-space.txt @@ -0,0 +1,5 @@ +tree 1b2dfb4ac5e42080b682fc676e9738c94ce6d54d +author Sebastian Thiel 1592437401 +0800 +committer Sebastian Thiel 1592437401 +0800 + +In the author line, the email is followed by a space diff --git a/gix-object/tests/object/encode/mod.rs b/gix-object/tests/object/encode/mod.rs index d7a028b0c7a..81cb9c73160 100644 --- a/gix-object/tests/object/encode/mod.rs +++ b/gix-object/tests/object/encode/mod.rs @@ -87,6 +87,7 @@ mod commit { round_trip!( gix_object::Commit, gix_object::CommitRef, + "commit/email-with-space.txt", "commit/signed-whitespace.txt", "commit/two-multiline-headers.txt", "commit/mergetag.txt", From b2bccbc4d2ebb085a7958a0d077d65946369210d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 5 Apr 2025 10:36:45 +0800 Subject: [PATCH 3/4] doc: inform about untrimmed name and email --- gix-actor/src/lib.rs | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/gix-actor/src/lib.rs b/gix-actor/src/lib.rs index e582541cb1a..ec182e96d1d 100644 --- a/gix-actor/src/lib.rs +++ b/gix-actor/src/lib.rs @@ -28,9 +28,13 @@ pub mod signature; #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Identity { - /// The actors name. + /// The actors name, potentially with whitespace as parsed. + /// + /// Use [IdentityRef::trim()] or trim manually to be able to clean it up. pub name: BString, - /// The actor's email. + /// The actor's email, potentially with whitespace and garbage as parsed. + /// + /// Use [IdentityRef::trim()] or trim manually to be able to clean it up. pub email: BString, } @@ -38,10 +42,14 @@ pub struct Identity { #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct IdentityRef<'a> { - /// The actors name. + /// The actors name, potentially with whitespace as parsed. + /// + /// Use [IdentityRef::trim()] or trim manually to be able to clean it up. #[cfg_attr(feature = "serde", serde(borrow))] pub name: &'a BStr, - /// The actor's email. + /// The actor's email, potentially with whitespace and garbage as parsed. + /// + /// Use [IdentityRef::trim()] or trim manually to be able to clean it up. pub email: &'a BStr, } @@ -51,9 +59,13 @@ pub struct IdentityRef<'a> { #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Signature { - /// The actors name. + /// The actors name, potentially with whitespace as parsed. + /// + /// Use [SignatureRef::trim()] or trim manually to be able to clean it up. pub name: BString, - /// The actor's email. + /// The actor's email, potentially with whitespace and garbage as parsed. + /// + /// Use [SignatureRef::trim()] or trim manually to be able to clean it up. pub email: BString, /// The time stamp at which the signature is performed. pub time: Time, @@ -65,10 +77,14 @@ pub struct Signature { #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct SignatureRef<'a> { - /// The actor's name. + /// The actors name, potentially with whitespace as parsed. + /// + /// Use [SignatureRef::trim()] or trim manually to be able to clean it up. #[cfg_attr(feature = "serde", serde(borrow))] pub name: &'a BStr, - /// The actor's email. + /// The actor's email, potentially with whitespace and garbage as parsed. + /// + /// Use [SignatureRef::trim()] or trim manually to be able to clean it up. pub email: &'a BStr, /// The time stamp at which the signature was performed. pub time: gix_date::Time, From 39e35a30453f8860bb115a254c25a83b85cfd820 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 5 Apr 2025 10:42:24 +0800 Subject: [PATCH 4/4] Bring back test-case to show how trailing slashes are handled (#1438) --- gix-actor/tests/identity/mod.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/gix-actor/tests/identity/mod.rs b/gix-actor/tests/identity/mod.rs index 2dd82f722e6..d3787d9a732 100644 --- a/gix-actor/tests/identity/mod.rs +++ b/gix-actor/tests/identity/mod.rs @@ -22,12 +22,22 @@ fn round_trip() -> gix_testtools::Result { #[test] fn lenient_parsing() -> gix_testtools::Result { - for input in [ - "First Last<> >", - "First Last>\n", + for (input, expected_email) in [ + ( + "First Last<> >", + "fl > ", + ), + ( + "First Last>\n", + "fl (input.as_bytes()).unwrap(); assert_eq!(identity.name, "First Last"); + assert_eq!( + identity.email, expected_email, + "emails are parsed but left as is for round-tripping" + ); let signature: Identity = identity.into(); let mut output = Vec::new(); let err = signature.write_to(&mut output).unwrap_err();