Radish alpha
h
Radicle Heartwood Protocol & Stack
Radicle
Git (anonymous pull)
Log in to clone via SSH
git-metadata: New crate extraction from radicle-git-ext
Lorenz Leutgeb committed 7 months ago
commit d01e2afacf6eec621a3cc040f35929e268af3525
parent cc2ea18ddcb1a7c045cabd1901c5ea1e29e560ce
7 files changed +621 -0
modified Cargo.lock
@@ -2915,6 +2915,13 @@ dependencies = [
]

[[package]]
+
name = "radicle-git-metadata"
+
version = "0.1.0"
+
dependencies = [
+
 "thiserror 1.0.69",
+
]
+

+
[[package]]
name = "radicle-git-ref-format"
version = "0.1.0"
dependencies = [
added crates/radicle-git-metadata/Cargo.toml
@@ -0,0 +1,13 @@
+
[package]
+
name = "radicle-git-metadata"
+
description = "Radicle structs that carry Git commit metadata"
+
homepage.workspace = true
+
repository.workspace = true
+
version = "0.1.0"
+
edition.workspace = true
+
license.workspace = true
+
keywords = ["radicle", "git", "metadata"]
+
rust-version.workspace = true
+

+
[dependencies]
+
thiserror = { workspace = true }

\ No newline at end of file
added crates/radicle-git-metadata/src/author.rs
@@ -0,0 +1,125 @@
+
use std::{
+
    fmt,
+
    num::ParseIntError,
+
    str::{self, FromStr},
+
};
+

+
use thiserror::Error;
+

+
/// The data for indicating authorship of an action within
+
/// [`crate::commit::CommitData`].
+
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+
pub struct Author {
+
    /// Name corresponding to `user.name` in the git config.
+
    ///
+
    /// Note: this must not contain `<` or `>`.
+
    pub name: String,
+
    /// Email corresponding to `user.email` in the git config.
+
    ///
+
    /// Note: this must not contain `<` or `>`.
+
    pub email: String,
+
    /// The time of this author's action.
+
    pub time: Time,
+
}
+

+
/// The time of a [`Author`]'s action.
+
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+
pub struct Time {
+
    seconds: i64,
+
    offset: i32,
+
}
+

+
impl Time {
+
    pub fn new(seconds: i64, offset: i32) -> Self {
+
        Self { seconds, offset }
+
    }
+

+
    /// Return the time, in seconds, since the epoch.
+
    pub fn seconds(&self) -> i64 {
+
        self.seconds
+
    }
+

+
    /// Return the timezone offset, in minutes.
+
    pub fn offset(&self) -> i32 {
+
        self.offset
+
    }
+

+
    fn from_components<'a>(cs: &mut impl Iterator<Item = &'a str>) -> Result<Self, ParseError> {
+
        let offset = match cs.next() {
+
            None => Err(ParseError::Missing("offset")),
+
            Some(offset) => Self::parse_offset(offset).map_err(ParseError::Offset),
+
        }?;
+
        let time = match cs.next() {
+
            None => return Err(ParseError::Missing("time")),
+
            Some(time) => time.parse::<i64>().map_err(ParseError::Time)?,
+
        };
+
        Ok(Self::new(time, offset))
+
    }
+

+
    fn parse_offset(offset: &str) -> Result<i32, ParseIntError> {
+
        // The offset is in the form of timezone offset,
+
        // e.g. +0200, -0100.  This needs to be converted into
+
        // minutes. The first two digits in the offset are the
+
        // number of hours in the offset, while the latter two
+
        // digits are the number of minutes in the offset.
+
        let tz_offset = offset.parse::<i32>()?;
+
        let hours = tz_offset / 100;
+
        let minutes = tz_offset % 100;
+
        Ok(hours * 60 + minutes)
+
    }
+
}
+

+
impl FromStr for Time {
+
    type Err = ParseError;
+

+
    fn from_str(s: &str) -> Result<Self, Self::Err> {
+
        Self::from_components(&mut s.split(' ').rev())
+
    }
+
}
+

+
impl fmt::Display for Time {
+
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
        let sign = if self.offset.is_negative() { '-' } else { '+' };
+
        let hours = self.offset.abs() / 60;
+
        let minutes = self.offset.abs() % 60;
+
        write!(f, "{} {}{:0>2}{:0>2}", self.seconds, sign, hours, minutes)
+
    }
+
}
+

+
impl fmt::Display for Author {
+
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
        write!(f, "{} <{}> {}", self.name, self.email, self.time,)
+
    }
+
}
+

+
#[derive(Debug, Error)]
+
pub enum ParseError {
+
    #[error("missing '{0}' while parsing person signature")]
+
    Missing(&'static str),
+
    #[error("offset was incorrect format while parsing person signature")]
+
    Offset(#[source] ParseIntError),
+
    #[error("time was incorrect format while parsing person signature")]
+
    Time(#[source] ParseIntError),
+
}
+

+
impl FromStr for Author {
+
    type Err = ParseError;
+

+
    fn from_str(s: &str) -> Result<Self, Self::Err> {
+
        // Splitting the string in 4 subcomponents is expected to give back the
+
        // following iterator entries: timezone offset, time, email, and name
+
        let mut components = s.rsplitn(4, ' ');
+
        let time = Time::from_components(&mut components)?;
+
        let email = components
+
            .next()
+
            .ok_or(ParseError::Missing("email"))?
+
            .trim_matches(|c| c == '<' || c == '>')
+
            .to_owned();
+
        let name = components.next().ok_or(ParseError::Missing("name"))?;
+
        Ok(Self {
+
            name: name.to_owned(),
+
            email: email.to_owned(),
+
            time,
+
        })
+
    }
+
}
added crates/radicle-git-metadata/src/commit.rs
@@ -0,0 +1,179 @@
+
pub mod headers;
+
pub mod trailers;
+

+
use core::fmt;
+
use std::str;
+

+
use headers::{Headers, Signature};
+
use trailers::{OwnedTrailer, Trailer};
+

+
use crate::author::Author;
+

+
/// A git commit in its object description form, i.e. the output of
+
/// `git cat-file` for a commit object.
+
#[derive(Debug)]
+
pub struct CommitData<Tree, Parent> {
+
    tree: Tree,
+
    parents: Vec<Parent>,
+
    author: Author,
+
    committer: Author,
+
    headers: Headers,
+
    message: String,
+
    trailers: Vec<OwnedTrailer>,
+
}
+

+
impl<Tree, Parent> CommitData<Tree, Parent> {
+
    pub fn new<P, I, T>(
+
        tree: Tree,
+
        parents: P,
+
        author: Author,
+
        committer: Author,
+
        headers: Headers,
+
        message: String,
+
        trailers: I,
+
    ) -> Self
+
    where
+
        P: IntoIterator<Item = Parent>,
+
        I: IntoIterator<Item = T>,
+
        OwnedTrailer: From<T>,
+
    {
+
        let trailers = trailers.into_iter().map(OwnedTrailer::from).collect();
+
        let parents = parents.into_iter().collect();
+
        Self {
+
            tree,
+
            parents,
+
            author,
+
            committer,
+
            headers,
+
            message,
+
            trailers,
+
        }
+
    }
+

+
    /// The tree this commit points to.
+
    pub fn tree(&self) -> &Tree {
+
        &self.tree
+
    }
+

+
    /// The parents of this commit.
+
    pub fn parents(&self) -> impl Iterator<Item = Parent> + '_
+
    where
+
        Parent: Clone,
+
    {
+
        self.parents.iter().cloned()
+
    }
+

+
    /// The author of this commit, i.e. the header corresponding to `author`.
+
    pub fn author(&self) -> &Author {
+
        &self.author
+
    }
+

+
    /// The committer of this commit, i.e. the header corresponding to
+
    /// `committer`.
+
    pub fn committer(&self) -> &Author {
+
        &self.committer
+
    }
+

+
    /// The message body of this commit.
+
    pub fn message(&self) -> &str {
+
        &self.message
+
    }
+

+
    /// The [`Signature`]s found in this commit, i.e. the headers corresponding
+
    /// to `gpgsig`.
+
    pub fn signatures(&self) -> impl Iterator<Item = Signature> + '_ {
+
        self.headers.signatures()
+
    }
+

+
    /// The [`Headers`] found in this commit.
+
    ///
+
    /// Note: these do not include `tree`, `parent`, `author`, and `committer`.
+
    pub fn headers(&self) -> impl Iterator<Item = (&str, &str)> {
+
        self.headers.iter()
+
    }
+

+
    /// Iterate over the [`Headers`] values that match the provided `name`.
+
    pub fn values<'a>(&'a self, name: &'a str) -> impl Iterator<Item = &'a str> + 'a {
+
        self.headers.values(name)
+
    }
+

+
    /// Push a header to the end of the headers section.
+
    pub fn push_header(&mut self, name: &str, value: &str) {
+
        self.headers.push(name, value.trim());
+
    }
+

+
    pub fn trailers(&self) -> impl Iterator<Item = &OwnedTrailer> {
+
        self.trailers.iter()
+
    }
+

+
    /// Convert the `CommitData::tree` into a value of type `U`. The
+
    /// conversion function `f` can be fallible.
+
    ///
+
    /// For example, `map_tree` can be used to turn raw tree data into
+
    /// an `Oid` by writing it to a repository.
+
    pub fn map_tree<U, E, F>(self, f: F) -> Result<CommitData<U, Parent>, E>
+
    where
+
        F: FnOnce(Tree) -> Result<U, E>,
+
    {
+
        Ok(CommitData {
+
            tree: f(self.tree)?,
+
            parents: self.parents,
+
            author: self.author,
+
            committer: self.committer,
+
            headers: self.headers,
+
            message: self.message,
+
            trailers: self.trailers,
+
        })
+
    }
+

+
    /// Convert the [`CommitData::parents`] into a vector containing
+
    /// values of type `U`. The conversion function `f` can be
+
    /// fallible.
+
    ///
+
    /// For example, this can be used to resolve the object identifiers
+
    /// to their respective full commits.
+
    pub fn map_parents<U, E, F>(self, f: F) -> Result<CommitData<Tree, U>, E>
+
    where
+
        F: FnMut(Parent) -> Result<U, E>,
+
    {
+
        Ok(CommitData {
+
            tree: self.tree,
+
            parents: self
+
                .parents
+
                .into_iter()
+
                .map(f)
+
                .collect::<Result<Vec<_>, _>>()?,
+
            author: self.author,
+
            committer: self.committer,
+
            headers: self.headers,
+
            message: self.message,
+
            trailers: self.trailers,
+
        })
+
    }
+
}
+

+
impl<Tree: fmt::Display, Parent: fmt::Display> fmt::Display for CommitData<Tree, Parent> {
+
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+
        writeln!(f, "tree {}", self.tree)?;
+
        for parent in self.parents.iter() {
+
            writeln!(f, "parent {parent}")?;
+
        }
+
        writeln!(f, "author {}", self.author)?;
+
        writeln!(f, "committer {}", self.committer)?;
+

+
        for (name, value) in self.headers.iter() {
+
            writeln!(f, "{name} {}", value.replace('\n', "\n "))?;
+
        }
+
        writeln!(f)?;
+
        write!(f, "{}", self.message.trim())?;
+
        writeln!(f)?;
+

+
        if !self.trailers.is_empty() {
+
            writeln!(f)?;
+
        }
+
        for trailer in self.trailers.iter() {
+
            writeln!(f, "{}", Trailer::from(trailer).display(": "))?;
+
        }
+
        Ok(())
+
    }
+
}
added crates/radicle-git-metadata/src/commit/headers.rs
@@ -0,0 +1,168 @@
+
use core::fmt;
+
use std::borrow::Cow;
+

+
const BEGIN_SSH: &str = "-----BEGIN SSH SIGNATURE-----\n";
+
const BEGIN_PGP: &str = "-----BEGIN PGP SIGNATURE-----\n";
+

+
/// A collection of headers stored in a [`crate::commit::Commit`].
+
///
+
/// Note: these do not include `tree`, `parent`, `author`, and `committer`.
+
#[derive(Clone, Debug, Default)]
+
pub struct Headers(pub(super) Vec<(String, String)>);
+

+
/// A `gpgsig` signature stored in a [`crate::commit::Commit`].
+
#[derive(Debug)]
+
pub enum Signature<'a> {
+
    /// A PGP signature, i.e. starts with `-----BEGIN PGP SIGNATURE-----`.
+
    Pgp(Cow<'a, str>),
+
    /// A SSH signature, i.e. starts with `-----BEGIN SSH SIGNATURE-----`.
+
    Ssh(Cow<'a, str>),
+
}
+

+
impl<'a> Signature<'a> {
+
    fn from_str(s: &'a str) -> Result<Self, UnknownScheme> {
+
        if s.starts_with(BEGIN_SSH) {
+
            Ok(Signature::Ssh(Cow::Borrowed(s)))
+
        } else if s.starts_with(BEGIN_PGP) {
+
            Ok(Signature::Pgp(Cow::Borrowed(s)))
+
        } else {
+
            Err(UnknownScheme)
+
        }
+
    }
+
}
+

+
impl fmt::Display for Signature<'_> {
+
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+
        match self {
+
            Signature::Pgp(pgp) => f.write_str(pgp.as_ref()),
+
            Signature::Ssh(ssh) => f.write_str(ssh.as_ref()),
+
        }
+
    }
+
}
+

+
pub struct UnknownScheme;
+

+
impl Headers {
+
    pub fn new() -> Self {
+
        Headers(Vec::new())
+
    }
+

+
    pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {
+
        self.0.iter().map(|(k, v)| (k.as_str(), v.as_str()))
+
    }
+

+
    pub fn values<'a>(&'a self, name: &'a str) -> impl Iterator<Item = &'a str> + 'a {
+
        self.iter()
+
            .filter_map(move |(k, v)| (k == name).then_some(v))
+
    }
+

+
    pub fn signatures(&self) -> impl Iterator<Item = Signature> + '_ {
+
        self.0.iter().filter_map(|(k, v)| {
+
            if k == "gpgsig" {
+
                Signature::from_str(v).ok()
+
            } else {
+
                None
+
            }
+
        })
+
    }
+

+
    /// Push a header to the end of the headers section.
+
    pub fn push(&mut self, name: &str, value: &str) {
+
        self.0.push((name.to_owned(), value.trim().to_owned()));
+
    }
+
}
+

+
#[derive(Debug, thiserror::Error)]
+
pub enum ParseError {
+
    #[error("missing tree")]
+
    MissingTree,
+
    #[error("invalid tree")]
+
    InvalidTree,
+
    #[error("invalid format")]
+
    InvalidFormat,
+
    #[error("invalid parent")]
+
    InvalidParent,
+
    #[error("invalid header")]
+
    InvalidHeader,
+
    #[error("invalid author")]
+
    InvalidAuthor,
+
    #[error("missing author")]
+
    MissingAuthor,
+
    #[error("invalid committer")]
+
    InvalidCommitter,
+
    #[error("missing committer")]
+
    MissingCommitter,
+
}
+

+
pub fn parse_commit_header<
+
    Tree: std::str::FromStr,
+
    Parent: std::str::FromStr,
+
    Signature: std::str::FromStr,
+
>(
+
    header: &str,
+
) -> Result<(Tree, Vec<Parent>, Signature, Signature, Headers), ParseError> {
+
    let mut lines = header.lines();
+

+
    let tree = match lines.next() {
+
        Some(tree) => tree
+
            .strip_prefix("tree ")
+
            .map(Tree::from_str)
+
            .transpose()
+
            .map_err(|_| ParseError::InvalidTree)?
+
            .ok_or(ParseError::MissingTree)?,
+
        None => return Err(ParseError::MissingTree),
+
    };
+

+
    let mut parents = Vec::new();
+
    let mut author: Option<Signature> = None;
+
    let mut committer: Option<Signature> = None;
+
    let mut headers = Headers::new();
+

+
    for line in lines {
+
        // Check if a signature is still being parsed
+
        if let Some(rest) = line.strip_prefix(' ') {
+
            let value: &mut String = headers
+
                .0
+
                .last_mut()
+
                .map(|(_, v)| v)
+
                .ok_or(ParseError::InvalidFormat)?;
+
            value.push('\n');
+
            value.push_str(rest);
+
            continue;
+
        }
+

+
        if let Some((name, value)) = line.split_once(' ') {
+
            match name {
+
                "parent" => parents.push(
+
                    value
+
                        .parse::<Parent>()
+
                        .map_err(|_| ParseError::InvalidParent)?,
+
                ),
+
                "author" => {
+
                    author = Some(
+
                        value
+
                            .parse::<Signature>()
+
                            .map_err(|_| ParseError::InvalidAuthor)?,
+
                    )
+
                }
+
                "committer" => {
+
                    committer = Some(
+
                        value
+
                            .parse::<Signature>()
+
                            .map_err(|_| ParseError::InvalidCommitter)?,
+
                    )
+
                }
+
                _ => headers.push(name, value),
+
            }
+
            continue;
+
        }
+
    }
+

+
    Ok((
+
        tree,
+
        parents,
+
        author.ok_or(ParseError::MissingAuthor)?,
+
        committer.ok_or(ParseError::MissingCommitter)?,
+
        headers,
+
    ))
+
}
added crates/radicle-git-metadata/src/commit/trailers.rs
@@ -0,0 +1,127 @@
+
use std::{borrow::Cow, fmt, ops::Deref};
+

+
pub trait Separator<'a> {
+
    fn sep_for(&self, token: &Token) -> &'a str;
+
}
+

+
impl<'a> Separator<'a> for &'a str {
+
    fn sep_for(&self, _: &Token) -> &'a str {
+
        self
+
    }
+
}
+

+
impl<'a, F> Separator<'a> for F
+
where
+
    F: Fn(&Token) -> &'a str,
+
{
+
    fn sep_for(&self, token: &Token) -> &'a str {
+
        self(token)
+
    }
+
}
+

+
#[derive(Debug, Clone, Eq, PartialEq)]
+
pub struct Token<'a>(&'a str);
+

+
impl Deref for Token<'_> {
+
    type Target = str;
+

+
    fn deref(&self) -> &Self::Target {
+
        self.0
+
    }
+
}
+

+
impl<'a> TryFrom<&'a str> for Token<'a> {
+
    type Error = &'static str;
+

+
    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
+
        let is_token = s.chars().all(|c| c.is_alphanumeric() || c == '-');
+
        if is_token {
+
            Ok(Token(s))
+
        } else {
+
            Err("token contains invalid characters")
+
        }
+
    }
+
}
+

+
pub struct Display<'a> {
+
    trailer: &'a Trailer<'a>,
+
    separator: &'a str,
+
}
+

+
impl fmt::Display for Display<'_> {
+
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+
        write!(
+
            f,
+
            "{}{}{}",
+
            self.trailer.token.deref(),
+
            self.separator,
+
            self.trailer.value,
+
        )
+
    }
+
}
+

+
/// A trailer is a key/value pair found in the last paragraph of a Git
+
/// commit message, not including any patches or conflicts that may be
+
/// present.
+
#[derive(Debug, Clone, Eq, PartialEq)]
+
pub struct Trailer<'a> {
+
    pub token: Token<'a>,
+
    pub value: Cow<'a, str>,
+
}
+

+
impl<'a> Trailer<'a> {
+
    pub fn display(&'a self, separator: &'a str) -> Display<'a> {
+
        Display {
+
            trailer: self,
+
            separator,
+
        }
+
    }
+

+
    pub fn to_owned(&self) -> OwnedTrailer {
+
        OwnedTrailer::from(self)
+
    }
+
}
+

+
/// A version of the [`Trailer`] which owns its token and
+
/// value. Useful for when you need to carry trailers around in a long
+
/// lived data structure.
+
#[derive(Debug)]
+
pub struct OwnedTrailer {
+
    pub token: OwnedToken,
+
    pub value: String,
+
}
+

+
#[derive(Debug)]
+
pub struct OwnedToken(String);
+

+
impl Deref for OwnedToken {
+
    type Target = str;
+

+
    fn deref(&self) -> &Self::Target {
+
        &self.0
+
    }
+
}
+

+
impl<'a> From<&Trailer<'a>> for OwnedTrailer {
+
    fn from(t: &Trailer<'a>) -> Self {
+
        OwnedTrailer {
+
            token: OwnedToken(t.token.0.to_string()),
+
            value: t.value.to_string(),
+
        }
+
    }
+
}
+

+
impl<'a> From<Trailer<'a>> for OwnedTrailer {
+
    fn from(t: Trailer<'a>) -> Self {
+
        (&t).into()
+
    }
+
}
+

+
impl<'a> From<&'a OwnedTrailer> for Trailer<'a> {
+
    fn from(t: &'a OwnedTrailer) -> Self {
+
        Trailer {
+
            token: Token(t.token.0.as_str()),
+
            value: Cow::from(&t.value),
+
        }
+
    }
+
}
added crates/radicle-git-metadata/src/lib.rs
@@ -0,0 +1,2 @@
+
pub mod author;
+
pub mod commit;