Radish alpha
h
rad:z3gqcJUoA1n9HaHKufZs5FCSGazv5
Radicle Heartwood Protocol & Stack
Radicle
Git
git-metadata: Add parsing of `CommitData`
Fintan Halpenny committed 1 month ago
commit 01c60388db1d109198fbe3922a7b4020f94b25ed
parent 5862414
7 files changed +722 -1
modified crates/radicle-git-metadata/src/commit.rs
@@ -1,8 +1,11 @@
pub mod headers;
pub mod trailers;

+
mod parse;
+
pub use parse::ParseError;
+

use core::fmt;
-
use std::str;
+
use std::str::{self, FromStr};

use headers::{Headers, Signature};
use trailers::{OwnedTrailer, Trailer};
@@ -157,6 +160,44 @@ impl<Tree, Parent> CommitData<Tree, Parent> {
    }
}

+
impl<Tree, Parent> CommitData<Tree, Parent>
+
where
+
    Tree: str::FromStr,
+
    Parent: str::FromStr,
+
    Tree::Err: std::error::Error + Send + Sync + 'static,
+
    Parent::Err: std::error::Error + Send + Sync + 'static,
+
{
+
    /// Parse a [`CommitData`] from its raw git object bytes.
+
    ///
+
    /// This is the inverse of the [`fmt::Display`] implementation. The bytes
+
    /// are expected to be valid UTF-8 and in the standard git commit object
+
    /// format produced by `git cat-file -p <commit>`.
+
    ///
+
    /// Trailers are detected by scanning the last paragraph of the message
+
    /// body (the section after the final blank line). If every non-empty line
+
    /// in that paragraph is a valid `Token: value` pair, those lines are
+
    /// parsed as trailers and stored separately; otherwise the whole body is
+
    /// kept as the message with no trailers.
+
    pub fn from_bytes(bytes: &[u8]) -> Result<Self, ParseError> {
+
        let s = str::from_utf8(bytes).map_err(ParseError::Utf8)?;
+
        parse::parse(s)
+
    }
+
}
+

+
impl<Tree, Parent> FromStr for CommitData<Tree, Parent>
+
where
+
    Tree: str::FromStr,
+
    Parent: str::FromStr,
+
    Tree::Err: std::error::Error + Send + Sync + 'static,
+
    Parent::Err: std::error::Error + Send + Sync + 'static,
+
{
+
    type Err = ParseError;
+

+
    fn from_str(s: &str) -> Result<Self, Self::Err> {
+
        parse::parse(s)
+
    }
+
}
+

impl<Tree: fmt::Display, Parent: fmt::Display> fmt::Display for CommitData<Tree, Parent> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        writeln!(f, "tree {}", self.tree)?;
added crates/radicle-git-metadata/src/commit/headers/parse.rs
@@ -0,0 +1,98 @@
+
use super::Headers;
+

+
#[derive(Debug, thiserror::Error)]
+
pub enum ParseError {
+
    #[error("invalid utf-8")]
+
    Utf8(#[source] std::str::Utf8Error),
+
    #[error("missing tree")]
+
    MissingTree,
+
    #[error("invalid tree")]
+
    InvalidTree,
+
    #[error("invalid format")]
+
    InvalidFormat,
+
    #[error("invalid parent")]
+
    InvalidParent,
+
    #[error("invalid header")]
+
    InvalidHeader,
+
    #[error("invalid author")]
+
    InvalidAuthor,
+
    #[error("missing author")]
+
    MissingAuthor,
+
    #[error("invalid committer")]
+
    InvalidCommitter,
+
    #[error("missing committer")]
+
    MissingCommitter,
+
}
+

+
pub fn parse_commit_header<
+
    Tree: std::str::FromStr,
+
    Parent: std::str::FromStr,
+
    Signature: std::str::FromStr,
+
>(
+
    header: &str,
+
) -> Result<(Tree, Vec<Parent>, Signature, Signature, Headers), ParseError> {
+
    let mut lines = header.lines();
+

+
    let tree = match lines.next() {
+
        Some(tree) => tree
+
            .strip_prefix("tree ")
+
            .map(Tree::from_str)
+
            .transpose()
+
            .map_err(|_| ParseError::InvalidTree)?
+
            .ok_or(ParseError::MissingTree)?,
+
        None => return Err(ParseError::MissingTree),
+
    };
+

+
    let mut parents = Vec::new();
+
    let mut author: Option<Signature> = None;
+
    let mut committer: Option<Signature> = None;
+
    let mut headers = Headers::new();
+

+
    for line in lines {
+
        // Check if a signature is still being parsed
+
        if let Some(rest) = line.strip_prefix(' ') {
+
            let value: &mut String = headers
+
                .0
+
                .last_mut()
+
                .map(|(_, v)| v)
+
                .ok_or(ParseError::InvalidFormat)?;
+
            value.push('\n');
+
            value.push_str(rest);
+
            continue;
+
        }
+

+
        if let Some((name, value)) = line.split_once(' ') {
+
            match name {
+
                "parent" => parents.push(
+
                    value
+
                        .parse::<Parent>()
+
                        .map_err(|_| ParseError::InvalidParent)?,
+
                ),
+
                "author" => {
+
                    author = Some(
+
                        value
+
                            .parse::<Signature>()
+
                            .map_err(|_| ParseError::InvalidAuthor)?,
+
                    )
+
                }
+
                "committer" => {
+
                    committer = Some(
+
                        value
+
                            .parse::<Signature>()
+
                            .map_err(|_| ParseError::InvalidCommitter)?,
+
                    )
+
                }
+
                _ => headers.push(name, value),
+
            }
+
            continue;
+
        }
+
    }
+

+
    Ok((
+
        tree,
+
        parents,
+
        author.ok_or(ParseError::MissingAuthor)?,
+
        committer.ok_or(ParseError::MissingCommitter)?,
+
        headers,
+
    ))
+
}
added crates/radicle-git-metadata/src/commit/parse.rs
@@ -0,0 +1,189 @@
+
#[cfg(test)]
+
mod test;
+

+
use std::borrow::Cow;
+

+
use crate::author::Author;
+

+
use super::{
+
    headers::Headers,
+
    trailers::{OwnedTrailer, Token, Trailer},
+
    CommitData,
+
};
+

+
#[derive(Debug, thiserror::Error)]
+
pub enum ParseError {
+
    #[error("the provided commit data contained invalid UTF-8")]
+
    Utf8(#[source] std::str::Utf8Error),
+
    #[error("the commit header is missing the 'tree' entry")]
+
    MissingTree,
+
    #[error("failed to parse 'tree' value: {0}")]
+
    InvalidTree(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
+
    #[error("invalid format: {reason}")]
+
    InvalidFormat { reason: &'static str },
+
    #[error("failed to parse 'parent' value: {0}")]
+
    InvalidParent(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
+
    #[error("invalid header")]
+
    InvalidHeader,
+
    #[error("failed to parse 'author' value: {0}")]
+
    InvalidAuthor(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
+
    #[error("the commit header is missing the 'author' entry")]
+
    MissingAuthor,
+
    #[error("failed to parse 'committer' value: {0}")]
+
    InvalidCommitter(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
+
    #[error("the commit header is missing the 'committer' entry")]
+
    MissingCommitter,
+
}
+

+
pub(super) fn parse<Tree: std::str::FromStr, Parent: std::str::FromStr>(
+
    commit: &str,
+
) -> Result<CommitData<Tree, Parent>, ParseError>
+
where
+
    Tree::Err: std::error::Error + Send + Sync + 'static,
+
    Parent::Err: std::error::Error + Send + Sync + 'static,
+
{
+
    // The header and body are separated by the first blank line.
+
    let (header, body) = commit.split_once("\n\n").ok_or(ParseError::InvalidFormat {
+
        reason: "commit headers and body must be separated by a blank line",
+
    })?;
+

+
    let (tree, parents, author, committer, headers) =
+
        parse_headers::<Tree, Parent, Author>(header)?;
+

+
    let (message, trailers) = parse_body(body);
+

+
    Ok(CommitData {
+
        tree,
+
        parents,
+
        author,
+
        committer,
+
        headers,
+
        message,
+
        trailers,
+
    })
+
}
+

+
fn parse_headers<Tree: std::str::FromStr, Parent: std::str::FromStr, Signature: std::str::FromStr>(
+
    header: &str,
+
) -> Result<(Tree, Vec<Parent>, Signature, Signature, Headers), ParseError>
+
where
+
    Tree::Err: std::error::Error + Send + Sync + 'static,
+
    Parent::Err: std::error::Error + Send + Sync + 'static,
+
    Signature::Err: std::error::Error + Send + Sync + 'static,
+
{
+
    let mut lines = header.lines();
+

+
    let tree = lines
+
        .next()
+
        .ok_or(ParseError::MissingTree)?
+
        .strip_prefix("tree ")
+
        .map(Tree::from_str)
+
        .transpose()
+
        .map_err(|err| ParseError::InvalidTree(Box::new(err)))?
+
        .ok_or(ParseError::MissingTree)?;
+

+
    let mut parents = Vec::new();
+
    let mut author: Option<Signature> = None;
+
    let mut committer: Option<Signature> = None;
+
    let mut headers = Headers::new();
+

+
    for line in lines {
+
        // Check if a signature is still being parsed
+
        if let Some(rest) = line.strip_prefix(' ') {
+
            let value: &mut String =
+
                headers
+
                    .0
+
                    .last_mut()
+
                    .map(|(_, v)| v)
+
                    .ok_or(ParseError::InvalidFormat {
+
                        reason: "failed to parse extra header",
+
                    })?;
+
            value.push('\n');
+
            value.push_str(rest);
+
            continue;
+
        }
+

+
        if let Some((name, value)) = line.split_once(' ') {
+
            match name {
+
                "parent" => parents.push(
+
                    value
+
                        .parse::<Parent>()
+
                        .map_err(|err| ParseError::InvalidParent(Box::new(err)))?,
+
                ),
+
                "author" => {
+
                    author = Some(
+
                        value
+
                            .parse::<Signature>()
+
                            .map_err(|err| ParseError::InvalidAuthor(Box::new(err)))?,
+
                    )
+
                }
+
                "committer" => {
+
                    committer = Some(
+
                        value
+
                            .parse::<Signature>()
+
                            .map_err(|err| ParseError::InvalidCommitter(Box::new(err)))?,
+
                    )
+
                }
+
                _ => headers.push(name, value),
+
            }
+
            continue;
+
        }
+
    }
+

+
    Ok((
+
        tree,
+
        parents,
+
        author.ok_or(ParseError::MissingAuthor)?,
+
        committer.ok_or(ParseError::MissingCommitter)?,
+
        headers,
+
    ))
+
}
+

+
/// Split the commit body (the portion after the first `\n\n` in the object)
+
/// into a message string and a list of trailers.
+
///
+
/// Trailers are only separated out when the last paragraph of the body
+
/// consists entirely of valid `Token: value` lines. If parsing the last
+
/// paragraph as trailers fails for any line, the whole body is returned as
+
/// the message with an empty trailer list.
+
fn parse_body(body: &str) -> (String, Vec<OwnedTrailer>) {
+
    // Strip the single trailing newline that Display always writes after the
+
    // message, so that rfind("\n\n") reliably finds the trailer separator
+
    // rather than a spurious match at the very end.
+
    let body = body.trim_end_matches('\n');
+

+
    if let Some(split) = body.rfind("\n\n") {
+
        let candidate = &body[split + 2..];
+
        // Only treat non-empty paragraphs as trailers.
+
        if !candidate.trim().is_empty() {
+
            if let Some(trailers) = try_parse_trailers(candidate) {
+
                return (body[..split].to_string(), trailers);
+
            }
+
        }
+
    }
+

+
    (body.to_string(), Vec::new())
+
}
+

+
/// Attempt to parse every non-empty line in `s` as a `Token: value` trailer.
+
///
+
/// Returns `None` if any line is not a valid trailer, so that the caller can
+
/// fall back to treating the whole paragraph as part of the message.
+
fn try_parse_trailers(s: &str) -> Option<Vec<OwnedTrailer>> {
+
    s.lines()
+
        .filter(|l| !l.is_empty())
+
        .map(|line| {
+
            let (token_str, value) = line.split_once(": ")?;
+
            let token = Token::try_from(token_str).ok()?;
+
            // Round-trip through Trailer so that OwnedToken construction
+
            // stays inside the trailers module where the private field lives.
+
            Some(
+
                Trailer {
+
                    token,
+
                    value: Cow::Borrowed(value),
+
                }
+
                .to_owned(),
+
            )
+
        })
+
        .collect()
+
}
added crates/radicle-git-metadata/src/commit/parse/test.rs
@@ -0,0 +1,3 @@
+
mod error;
+
mod success;
+
mod unit;
added crates/radicle-git-metadata/src/commit/parse/test/error.rs
@@ -0,0 +1,154 @@
+
use crate::commit::parse::{parse, ParseError};
+

+
/// Helper type whose FromStr always fails.
+
///
+
/// Used to exercise ParseError::InvalidTree and ParseError::InvalidParent
+
/// without relying on a specific OID type; String::from_str is infallible so
+
/// it cannot trigger those variants on its own.
+
#[derive(Debug)]
+
struct AlwaysFails;
+

+
#[derive(Debug)]
+
struct AlwaysFailsErr;
+

+
impl std::fmt::Display for AlwaysFailsErr {
+
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+
        write!(f, "always fails to parse")
+
    }
+
}
+

+
impl std::error::Error for AlwaysFailsErr {}
+

+
impl std::str::FromStr for AlwaysFails {
+
    type Err = AlwaysFailsErr;
+

+
    fn from_str(_s: &str) -> Result<Self, Self::Err> {
+
        Err(AlwaysFailsErr)
+
    }
+
}
+

+
#[test]
+
fn missing_header_body_separator() {
+
    // No blank line separating the headers from the body at all.
+
    let raw = "tree abc123\nauthor Alice Liddell <alice@example.com> 1700000000 +0000\ncommitter Alice Liddell <alice@example.com> 1700000000 +0000\nno blank line follows";
+

+
    let err = parse::<String, String>(raw).unwrap_err();
+
    assert!(
+
        matches!(err, ParseError::InvalidFormat { .. }),
+
        "unexpected error: {err}"
+
    );
+
}
+

+
#[test]
+
fn missing_tree_empty_header() {
+
    // Header section is empty (just "\n\n").
+
    let raw = "\n\nMessage";
+

+
    let err = parse::<String, String>(raw).unwrap_err();
+
    assert!(
+
        matches!(err, ParseError::MissingTree),
+
        "unexpected error: {err}"
+
    );
+
}
+

+
#[test]
+
fn missing_tree_wrong_first_line() {
+
    // Header section exists but does not open with "tree <oid>".
+
    let raw = "author Alice Liddell <alice@example.com> 1700000000 +0000\ncommitter Alice Liddell <alice@example.com> 1700000000 +0000\n\nMessage";
+

+
    let err = parse::<String, String>(raw).unwrap_err();
+
    assert!(
+
        matches!(err, ParseError::MissingTree),
+
        "unexpected error: {err}"
+
    );
+
}
+

+
#[test]
+
fn invalid_tree() {
+
    // Tree value present but cannot be parsed into the target Tree type.
+
    let raw = "tree abc123\nauthor Alice Liddell <alice@example.com> 1700000000 +0000\ncommitter Alice Liddell <alice@example.com> 1700000000 +0000\n\nMessage";
+

+
    let err = parse::<AlwaysFails, String>(raw).unwrap_err();
+
    assert!(
+
        matches!(err, ParseError::InvalidTree(_)),
+
        "unexpected error: {err}"
+
    );
+
}
+

+
#[test]
+
fn invalid_parent() {
+
    // Parent value present but cannot be parsed into the target Parent type.
+
    let raw = "tree abc123\nparent bad-oid\nauthor Alice Liddell <alice@example.com> 1700000000 +0000\ncommitter Alice Liddell <alice@example.com> 1700000000 +0000\n\nMessage";
+

+
    let err = parse::<String, AlwaysFails>(raw).unwrap_err();
+
    assert!(
+
        matches!(err, ParseError::InvalidParent(_)),
+
        "unexpected error: {err}"
+
    );
+
}
+

+
#[test]
+
fn invalid_format_continuation_without_preceding_header() {
+
    // A continuation line (leading space) appearing before any extra header
+
    // has been pushed to the headers vec triggers InvalidFormat, because
+
    // there is no preceding header value to fold it into.
+
    //
+
    // Note: author and committer do not push to the headers vec, so a
+
    // continuation line immediately after them (with an otherwise empty
+
    // headers vec) exercises this branch.
+
    let raw = "tree abc123\nauthor Alice Liddell <alice@example.com> 1700000000 +0000\ncommitter Alice Liddell <alice@example.com> 1700000000 +0000\n spurious continuation\n\nMessage";
+

+
    let err = parse::<String, String>(raw).unwrap_err();
+
    assert!(
+
        matches!(err, ParseError::InvalidFormat { .. }),
+
        "unexpected error: {err}"
+
    );
+
}
+

+
#[test]
+
fn missing_author() {
+
    let raw =
+
        "tree abc123\ncommitter Alice Liddell <alice@example.com> 1700000000 +0000\n\nMessage";
+

+
    let err = parse::<String, String>(raw).unwrap_err();
+
    assert!(
+
        matches!(err, ParseError::MissingAuthor),
+
        "unexpected error: {err}"
+
    );
+
}
+

+
#[test]
+
fn invalid_author() {
+
    // Author line is present but its value is not a valid Author string
+
    // (no email bracket, no timestamp, no timezone offset).
+
    let raw = "tree abc123\nauthor not-a-valid-author\ncommitter Alice Liddell <alice@example.com> 1700000000 +0000\n\nMessage";
+

+
    let err = parse::<String, String>(raw).unwrap_err();
+
    assert!(
+
        matches!(err, ParseError::InvalidAuthor(_)),
+
        "unexpected error: {err}"
+
    );
+
}
+

+
#[test]
+
fn missing_committer() {
+
    let raw = "tree abc123\nauthor Alice Liddell <alice@example.com> 1700000000 +0000\n\nMessage";
+

+
    let err = parse::<String, String>(raw).unwrap_err();
+
    assert!(
+
        matches!(err, ParseError::MissingCommitter),
+
        "unexpected error: {err}"
+
    );
+
}
+

+
#[test]
+
fn invalid_committer() {
+
    // Committer line is present but its value is not a valid Author string.
+
    let raw = "tree abc123\nauthor Alice Liddell <alice@example.com> 1700000000 +0000\ncommitter not-a-valid-committer\n\nMessage";
+

+
    let err = parse::<String, String>(raw).unwrap_err();
+
    assert!(
+
        matches!(err, ParseError::InvalidCommitter(_)),
+
        "unexpected error: {err}"
+
    );
+
}
added crates/radicle-git-metadata/src/commit/parse/test/success.rs
@@ -0,0 +1,198 @@
+
use crate::commit::parse::parse;
+

+
#[test]
+
fn root_commit() {
+
    let raw = r#"tree abc123
+
author Alice Liddell <alice@example.com> 1700000000 +0000
+
committer Alice Liddell <alice@example.com> 1700000000 +0000
+

+
Initial commit"#;
+

+
    let commit = parse::<String, String>(raw).unwrap();
+

+
    assert_eq!(commit.tree(), "abc123");
+
    assert_eq!(commit.parents().count(), 0);
+
    assert_eq!(commit.author().name, "Alice Liddell");
+
    assert_eq!(commit.author().email, "alice@example.com");
+
    assert_eq!(commit.author().time.seconds(), 1700000000);
+
    assert_eq!(commit.author().time.offset(), 0);
+
    assert_eq!(commit.committer().name, "Alice Liddell");
+
    assert_eq!(commit.message(), "Initial commit");
+
    assert_eq!(commit.trailers().count(), 0);
+
    assert_eq!(commit.headers().count(), 0);
+
}
+

+
#[test]
+
fn commit_with_single_parent() {
+
    let raw = r#"tree def456
+
parent abc123
+
author Alice Liddell <alice@example.com> 1700000000 +0000
+
committer Alice Liddell <alice@example.com> 1700000000 +0000
+

+
Second commit"#;
+

+
    let commit = parse::<String, String>(raw).unwrap();
+

+
    assert_eq!(commit.tree(), "def456");
+
    assert_eq!(commit.parents().collect::<Vec<_>>(), ["abc123"]);
+
    assert_eq!(commit.message(), "Second commit");
+
}
+

+
#[test]
+
fn merge_commit() {
+
    let raw = r#"tree ghi789
+
parent abc123
+
parent def456
+
author Alice Liddell <alice@example.com> 1700000000 +0000
+
committer Alice Liddell <alice@example.com> 1700000000 +0000
+

+
Merge branch 'feature'"#;
+

+
    let commit = parse::<String, String>(raw).unwrap();
+

+
    assert_eq!(commit.parents().collect::<Vec<_>>(), ["abc123", "def456"]);
+
}
+

+
#[test]
+
fn commit_with_multiline_gpgsig() {
+
    // gpgsig continuation lines are indented by one space in the raw object.
+
    // The parser folds them back into the header value with embedded newlines.
+
    let raw = r#"tree abc123
+
author Alice Liddell <alice@example.com> 1700000000 +0000
+
committer Alice Liddell <alice@example.com> 1700000000 +0000
+
gpgsig -----BEGIN SSH SIGNATURE-----
+
 AAAAB3NzaC1yc2EAAAADAQAB
+
 AAAA==
+
 -----END SSH SIGNATURE-----
+

+
Signed commit"#;
+

+
    let commit = parse::<String, String>(raw).unwrap();
+

+
    assert_eq!(commit.signatures().count(), 1);
+
    // gpgsig is stored in headers; it is the only extra header here.
+
    assert_eq!(commit.headers().count(), 1);
+
    assert_eq!(commit.message(), "Signed commit");
+
}
+

+
#[test]
+
fn commit_gpgsig_is_preserved_and_strip_removes_it() {
+
    // Parsing preserves gpgsig so callers can extract it before stripping.
+
    let raw = r#"tree abc123
+
author Alice Liddell <alice@example.com> 1700000000 +0000
+
committer Alice Liddell <alice@example.com> 1700000000 +0000
+
gpgsig -----BEGIN SSH SIGNATURE-----
+
 AAAA==
+
 -----END SSH SIGNATURE-----
+

+
Signed commit"#;
+

+
    let commit = parse::<String, String>(raw).unwrap();
+
    assert_eq!(commit.signatures().count(), 1);
+

+
    let stripped = commit.strip_signatures();
+
    assert_eq!(stripped.signatures().count(), 0);
+
    assert_eq!(stripped.headers().count(), 0);
+
    assert_eq!(stripped.message(), "Signed commit");
+
}
+

+
#[test]
+
fn commit_with_trailers() {
+
    // The last paragraph contains only valid Token: value lines, so they
+
    // are split out into the trailers vec and excluded from the message.
+
    let raw = r#"tree abc123
+
author Alice Liddell <alice@example.com> 1700000000 +0000
+
committer Bob Bobson <bob@example.com> 1700000001 +0100
+

+
Add a new feature
+

+
This commit adds a new feature to the library.
+

+
Signed-off-by: Alice Liddell <alice@example.com>
+
Co-authored-by: Bob Bobson <bob@example.com>"#;
+

+
    let commit = parse::<String, String>(raw).unwrap();
+

+
    assert_eq!(
+
        commit.message(),
+
        "Add a new feature\n\nThis commit adds a new feature to the library."
+
    );
+
    let trailers: Vec<_> = commit.trailers().collect();
+
    assert_eq!(trailers.len(), 2);
+
    assert_eq!(&*trailers[0].token, "Signed-off-by");
+
    assert_eq!(trailers[0].value, "Alice Liddell <alice@example.com>");
+
    assert_eq!(&*trailers[1].token, "Co-authored-by");
+
    assert_eq!(trailers[1].value, "Bob Bobson <bob@example.com>");
+
}
+

+
#[test]
+
fn commit_last_paragraph_kept_in_message_when_not_all_trailers() {
+
    // If any line in the last paragraph is not a valid Token: value pair,
+
    // the entire paragraph stays in the message and no trailers are extracted.
+
    let raw = r#"tree abc123
+
author Alice Liddell <alice@example.com> 1700000000 +0000
+
committer Alice Liddell <alice@example.com> 1700000000 +0000
+

+
Add feature
+

+
Signed-off-by: Alice Liddell <alice@example.com>
+
This line is not a valid trailer."#;
+

+
    let commit = parse::<String, String>(raw).unwrap();
+

+
    assert_eq!(commit.trailers().count(), 0);
+
    assert!(commit.message().contains("Signed-off-by"));
+
    assert!(commit
+
        .message()
+
        .contains("This line is not a valid trailer."));
+
}
+

+
#[test]
+
fn commit_with_extra_headers() {
+
    let raw = r#"tree abc123
+
author Alice Liddell <alice@example.com> 1700000000 +0000
+
committer Alice Liddell <alice@example.com> 1700000000 +0000
+
encoding UTF-8
+
mergetag some-value
+

+
Commit with extra headers"#;
+

+
    let commit = parse::<String, String>(raw).unwrap();
+

+
    let headers: Vec<_> = commit.headers().collect();
+
    assert_eq!(headers.len(), 2);
+
    assert_eq!(headers[0], ("encoding", "UTF-8"));
+
    assert_eq!(headers[1], ("mergetag", "some-value"));
+
}
+

+
#[test]
+
fn roundtrip() {
+
    // Parsing and then re-displaying a commit must produce output that parses
+
    // back to a CommitData equal in every field, exercising the Display /
+
    // parse_body symmetry in particular.
+
    let raw = r#"tree abc123
+
parent def456
+
author Alice Liddell <alice@example.com> 1700000000 +0000
+
committer Bob Bobson <bob@example.com> 1700000001 +0100
+

+
Add something useful
+

+
Signed-off-by: Alice Liddell <alice@example.com>"#;
+

+
    let commit = parse::<String, String>(raw).unwrap();
+
    let displayed = commit.to_string();
+
    let reparsed = parse::<String, String>(&displayed).unwrap();
+

+
    assert_eq!(commit.tree(), reparsed.tree());
+
    assert_eq!(
+
        commit.parents().collect::<Vec<_>>(),
+
        reparsed.parents().collect::<Vec<_>>()
+
    );
+
    assert_eq!(commit.author(), reparsed.author());
+
    assert_eq!(commit.committer(), reparsed.committer());
+
    assert_eq!(commit.message(), reparsed.message());
+
    assert_eq!(
+
        commit.trailers().collect::<Vec<_>>(),
+
        reparsed.trailers().collect::<Vec<_>>()
+
    );
+
}
added crates/radicle-git-metadata/src/commit/parse/test/unit.rs
@@ -0,0 +1,38 @@
+
use crate::commit::parse::{parse_body, try_parse_trailers};
+

+
#[test]
+
fn body_no_paragraph_separator_means_no_trailers() {
+
    // A body with no blank line cannot have a trailing trailer paragraph.
+
    let (message, trailers) = parse_body("Just a message with no blank line");
+
    assert_eq!(message, "Just a message with no blank line");
+
    assert!(trailers.is_empty());
+
}
+

+
#[test]
+
fn body_last_paragraph_not_trailers_stays_in_message() {
+
    let body = "Short description\n\nThis paragraph has no Token: value lines.";
+
    let (message, trailers) = parse_body(body);
+
    assert_eq!(message, body);
+
    assert!(trailers.is_empty());
+
}
+

+
#[test]
+
fn trailers_rejects_line_without_separator() {
+
    // A line that contains no ": " cannot be a trailer.
+
    assert!(try_parse_trailers("NotATrailerLine").is_none());
+
}
+

+
#[test]
+
fn trailers_rejects_invalid_token_chars() {
+
    // Token characters must be alphanumeric or '-'; spaces are not allowed.
+
    assert!(try_parse_trailers("Invalid Token: value").is_none());
+
}
+

+
#[test]
+
fn trailers_accepts_empty_input() {
+
    // An empty paragraph produces an empty trailer list rather than None.
+
    // (parse_body guards against this with the is_empty() check, but the
+
    // helper itself is defined to return Some([]) for an empty iterator.)
+
    let result = try_parse_trailers("");
+
    assert_eq!(result, Some(vec![]));
+
}