Radish alpha
r
rad:z6cFWeWpnZNHh9rUW8phgA3b5yGt
Git libraries for Radicle
Radicle
Git
radicle-git radicle-surf src fs.rs
//! Definition for a file system consisting of `Directory` and `File`.
//!
//! A `Directory` is expected to be a non-empty tree of directories and files.
//! See [`Directory`] for more information.

use std::{
    cmp::Ordering,
    collections::BTreeMap,
    convert::{Infallible, Into as _},
    path::{Path, PathBuf},
};

use git2::Blob;
use radicle_git_ext::{is_not_found_err, Oid};
use radicle_std_ext::result::ResultExt as _;
use url::Url;

use crate::{Repository, Revision};

pub mod error {
    use std::path::PathBuf;

    use thiserror::Error;

    #[derive(Debug, Error, PartialEq)]
    pub enum Directory {
        #[error(transparent)]
        Git(#[from] git2::Error),
        #[error(transparent)]
        File(#[from] File),
        #[error("the path {0} is not valid")]
        InvalidPath(PathBuf),
        #[error("the entry at '{0}' must be of type {1}")]
        InvalidType(PathBuf, &'static str),
        #[error("the entry name was not valid UTF-8")]
        Utf8Error,
        #[error("the path {0} not found")]
        PathNotFound(PathBuf),
        #[error(transparent)]
        Submodule(#[from] Submodule),
    }

    #[derive(Debug, Error, PartialEq)]
    pub enum File {
        #[error(transparent)]
        Git(#[from] git2::Error),
    }

    #[derive(Debug, Error, PartialEq)]
    pub enum Submodule {
        #[error("URL is invalid utf-8 for submodule '{name}': {err}")]
        Utf8 {
            name: String,
            #[source]
            err: std::str::Utf8Error,
        },
        #[error("failed to parse URL '{url}' for submodule '{name}': {err}")]
        ParseUrl {
            name: String,
            url: String,
            #[source]
            err: url::ParseError,
        },
    }
}

/// A `File` in a git repository.
///
/// The representation is lightweight and contains the [`Oid`] that
/// points to the git blob which is this file.
///
/// The name of a file can be retrieved via [`File::name`].
///
/// The [`FileContent`] of a file can be retrieved via
/// [`File::content`].
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct File {
    /// The name of the file.
    name: String,
    /// The relative path of the file, not including the `name`,
    /// in respect to the root of the git repository.
    prefix: PathBuf,
    /// The object identifier of the git blob of this file.
    id: Oid,
}

impl File {
    /// Construct a new `File`.
    ///
    /// The `path` must be the prefix location of the directory, and
    /// so should not end in `name`.
    ///
    /// The `id` must point to a git blob.
    pub(crate) fn new(name: String, prefix: PathBuf, id: Oid) -> Self {
        debug_assert!(
            !prefix.ends_with(&name),
            "prefix = {prefix:?}, name = {name}",
        );
        Self { name, prefix, id }
    }

    /// The name of this `File`.
    pub fn name(&self) -> &str {
        self.name.as_str()
    }

    /// The object identifier of this `File`.
    pub fn id(&self) -> Oid {
        self.id
    }

    /// Return the exact path for this `File`, including the `name` of
    /// the directory itself.
    ///
    /// The path is relative to the git repository root.
    pub fn path(&self) -> PathBuf {
        self.prefix.join(&self.name)
    }

    /// Return the [`Path`] where this `File` is located, relative to the
    /// git repository root.
    pub fn location(&self) -> &Path {
        &self.prefix
    }

    /// Get the [`FileContent`] for this `File`.
    ///
    /// # Errors
    ///
    /// This function will fail if it could not find the `git` blob
    /// for the `Oid` of this `File`.
    pub fn content<'a>(&self, repo: &'a Repository) -> Result<FileContent<'a>, error::File> {
        let blob = repo.find_blob(self.id)?;
        Ok(FileContent { blob })
    }
}

/// The contents of a [`File`].
///
/// To construct a `FileContent` use [`File::content`].
pub struct FileContent<'a> {
    blob: Blob<'a>,
}

impl<'a> FileContent<'a> {
    /// Return the file contents as a byte slice.
    pub fn as_bytes(&self) -> &[u8] {
        self.blob.content()
    }

    /// Return the size of the file contents.
    pub fn size(&self) -> usize {
        self.blob.size()
    }

    /// Creates a `FileContent` using a blob.
    pub(crate) fn new(blob: Blob<'a>) -> Self {
        Self { blob }
    }
}

/// A representations of a [`Directory`]'s entries.
pub struct Entries {
    listing: BTreeMap<String, Entry>,
}

impl Entries {
    /// Return the name of each [`Entry`].
    pub fn names(&self) -> impl Iterator<Item = &String> {
        self.listing.keys()
    }

    /// Return each [`Entry`].
    pub fn entries(&self) -> impl Iterator<Item = &Entry> {
        self.listing.values()
    }

    /// Return each [`Entry`] and its name.
    pub fn iter(&self) -> impl Iterator<Item = (&String, &Entry)> {
        self.listing.iter()
    }
}

impl Iterator for Entries {
    type Item = Entry;

    fn next(&mut self) -> Option<Self::Item> {
        // Can be improved when `pop_first()` is stable for BTreeMap.
        let next_key = match self.listing.keys().next() {
            Some(k) => k.clone(),
            None => return None,
        };
        self.listing.remove(&next_key)
    }
}

/// An `Entry` is either a [`File`] entry or a [`Directory`] entry.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Entry {
    /// A file entry within a [`Directory`].
    File(File),
    /// A sub-directory of a [`Directory`].
    Directory(Directory),
    /// An entry points to a submodule.
    Submodule(Submodule),
}

impl PartialOrd for Entry {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for Entry {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        match (self, other) {
            (Entry::File(x), Entry::File(y)) => x.name().cmp(y.name()),
            (Entry::File(_), Entry::Directory(_)) => Ordering::Less,
            (Entry::File(_), Entry::Submodule(_)) => Ordering::Less,
            (Entry::Directory(_), Entry::File(_)) => Ordering::Greater,
            (Entry::Submodule(_), Entry::File(_)) => Ordering::Less,
            (Entry::Directory(x), Entry::Directory(y)) => x.name().cmp(y.name()),
            (Entry::Directory(x), Entry::Submodule(y)) => x.name().cmp(y.name()),
            (Entry::Submodule(x), Entry::Directory(y)) => x.name().cmp(y.name()),
            (Entry::Submodule(x), Entry::Submodule(y)) => x.name().cmp(y.name()),
        }
    }
}

impl Entry {
    /// Get a label for the `Entriess`, either the name of the [`File`],
    /// the name of the [`Directory`], or the name of the [`Submodule`].
    pub fn name(&self) -> &String {
        match self {
            Entry::File(file) => &file.name,
            Entry::Directory(directory) => directory.name(),
            Entry::Submodule(submodule) => submodule.name(),
        }
    }

    pub fn path(&self) -> PathBuf {
        match self {
            Entry::File(file) => file.path(),
            Entry::Directory(directory) => directory.path(),
            Entry::Submodule(submodule) => submodule.path(),
        }
    }

    pub fn location(&self) -> &Path {
        match self {
            Entry::File(file) => file.location(),
            Entry::Directory(directory) => directory.location(),
            Entry::Submodule(submodule) => submodule.location(),
        }
    }

    /// Returns `true` if the `Entry` is a file.
    pub fn is_file(&self) -> bool {
        matches!(self, Entry::File(_))
    }

    /// Returns `true` if the `Entry` is a directory.
    pub fn is_directory(&self) -> bool {
        matches!(self, Entry::Directory(_))
    }

    pub(crate) fn from_entry(
        entry: &git2::TreeEntry,
        path: PathBuf,
        repo: &Repository,
    ) -> Result<Self, error::Directory> {
        let name = entry.name().ok_or(error::Directory::Utf8Error)?.to_string();
        let id = entry.id().into();

        match entry.kind() {
            Some(git2::ObjectType::Tree) => Ok(Self::Directory(Directory::new(name, path, id))),
            Some(git2::ObjectType::Blob) => Ok(Self::File(File::new(name, path, id))),
            Some(git2::ObjectType::Commit) => {
                let submodule = (!repo.is_bare())
                    .then(|| repo.find_submodule(&name))
                    .transpose()?;
                Ok(Self::Submodule(Submodule::new(name, path, submodule, id)?))
            }
            _ => Err(error::Directory::InvalidType(path, "tree or blob")),
        }
    }
}

/// A `Directory` is the representation of a file system directory, for a given
/// [`git` tree][git-tree].
///
/// The name of a directory can be retrieved via [`File::name`].
///
/// The [`Entries`] of a directory can be retrieved via
/// [`Directory::entries`].
///
/// [git-tree]: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Directory {
    /// The name of the directoy.
    name: String,
    /// The relative path of the directory, not including the `name`,
    /// in respect to the root of the git repository.
    prefix: PathBuf,
    /// The object identifier of the git tree of this directory.
    id: Oid,
}

const ROOT_DIR: &str = "";

impl Directory {
    /// Creates a directory given its `tree_id`.
    ///
    /// The `name` and `prefix` are both set to be empty.
    pub(crate) fn root(id: Oid) -> Self {
        Self::new(ROOT_DIR.to_string(), PathBuf::new(), id)
    }

    /// Creates a directory given its `name` and `id`.
    ///
    /// The `path` must be the prefix location of the directory, and
    /// so should not end in `name`.
    ///
    /// The `id` must point to a `git` tree.
    pub(crate) fn new(name: String, prefix: PathBuf, id: Oid) -> Self {
        debug_assert!(
            name.is_empty() || !prefix.ends_with(&name),
            "prefix = {prefix:?}, name = {name}",
        );
        Self { name, prefix, id }
    }

    /// Get the name of the current `Directory`.
    pub fn name(&self) -> &String {
        &self.name
    }

    /// The object identifier of this `[Directory]`.
    pub fn id(&self) -> Oid {
        self.id
    }

    /// Return the exact path for this `Directory`, including the `name` of the
    /// directory itself.
    ///
    /// The path is relative to the git repository root.
    pub fn path(&self) -> PathBuf {
        self.prefix.join(&self.name)
    }

    /// Return the [`Path`] where this `Directory` is located, relative to the
    /// git repository root.
    pub fn location(&self) -> &Path {
        &self.prefix
    }

    /// Return the [`Entries`] for this `Directory`'s `Oid`.
    ///
    /// The resulting `Entries` will only resolve to this
    /// `Directory`'s entries. Any sub-directories will need to be
    /// resolved independently.
    ///
    /// # Errors
    ///
    /// This function will fail if it could not find the `git` tree
    /// for the `Oid`.
    pub fn entries(&self, repo: &Repository) -> Result<Entries, error::Directory> {
        let tree = repo.find_tree(self.id)?;

        let mut entries = BTreeMap::new();
        let mut error = None;
        let path = self.path();

        // Walks only the first level of entries. And `_entry_path` is always
        // empty for the first level.
        tree.walk(git2::TreeWalkMode::PreOrder, |_entry_path, entry| {
            match Entry::from_entry(entry, path.clone(), repo) {
                Ok(entry) => match entry {
                    Entry::File(_) => {
                        entries.insert(entry.name().clone(), entry);
                        git2::TreeWalkResult::Ok
                    }
                    Entry::Directory(_) => {
                        entries.insert(entry.name().clone(), entry);
                        // Skip nested directories
                        git2::TreeWalkResult::Skip
                    }
                    Entry::Submodule(_) => {
                        entries.insert(entry.name().clone(), entry);
                        git2::TreeWalkResult::Ok
                    }
                },
                Err(err) => {
                    error = Some(err);
                    git2::TreeWalkResult::Abort
                }
            }
        })?;

        match error {
            Some(err) => Err(err),
            None => Ok(Entries { listing: entries }),
        }
    }

    /// Find the [`Entry`] found at a non-empty `path`, if it exists.
    pub fn find_entry<P>(&self, path: &P, repo: &Repository) -> Result<Entry, error::Directory>
    where
        P: AsRef<Path>,
    {
        // Search the path in git2 tree.
        let path = path.as_ref();
        let git2_tree = repo.find_tree(self.id)?;
        let entry = git2_tree
            .get_path(path)
            .or_matches::<error::Directory, _, _>(is_not_found_err, || {
                Err(error::Directory::PathNotFound(path.to_path_buf()))
            })?;
        let parent = path
            .parent()
            .ok_or_else(|| error::Directory::InvalidPath(path.to_path_buf()))?;
        let root_path = self.path().join(parent);

        Entry::from_entry(&entry, root_path, repo)
    }

    /// Find the `Oid`, for a [`File`], found at `path`, if it exists.
    pub fn find_file<P>(&self, path: &P, repo: &Repository) -> Result<File, error::Directory>
    where
        P: AsRef<Path>,
    {
        match self.find_entry(path, repo)? {
            Entry::File(file) => Ok(file),
            _ => Err(error::Directory::InvalidType(
                path.as_ref().to_path_buf(),
                "file",
            )),
        }
    }

    /// Find the `Directory` found at `path`, if it exists.
    ///
    /// If `path` is `ROOT_DIR` (i.e. an empty path), returns self.
    pub fn find_directory<P>(&self, path: &P, repo: &Repository) -> Result<Self, error::Directory>
    where
        P: AsRef<Path>,
    {
        if path.as_ref() == Path::new(ROOT_DIR) {
            return Ok(self.clone());
        }

        match self.find_entry(path, repo)? {
            Entry::Directory(d) => Ok(d),
            _ => Err(error::Directory::InvalidType(
                path.as_ref().to_path_buf(),
                "directory",
            )),
        }
    }

    // TODO(fintan): This is going to be a bit trickier so going to leave it out for
    // now
    #[allow(dead_code)]
    fn fuzzy_find(_label: &Path) -> Vec<Self> {
        unimplemented!()
    }

    /// Get the total size, in bytes, of a `Directory`. The size is
    /// the sum of all files that can be reached from this `Directory`.
    pub fn size(&self, repo: &Repository) -> Result<usize, error::Directory> {
        self.traverse(repo, 0, &mut |size, entry| match entry {
            Entry::File(file) => Ok(size + file.content(repo)?.size()),
            Entry::Directory(dir) => Ok(size + dir.size(repo)?),
            Entry::Submodule(_) => Ok(size),
        })
    }

    /// Traverse the entire `Directory` using the `initial`
    /// accumulator and the function `f`.
    ///
    /// For each [`Entry::Directory`] this will recursively call
    /// [`Directory::traverse`] and obtain its [`Entries`].
    ///
    /// `Error` is the error type of the fallible function.
    /// `B` is the type of the accumulator.
    /// `F` is the fallible function that takes the accumulator and
    /// the next [`Entry`], possibly providing the next accumulator
    /// value.
    pub fn traverse<Error, B, F>(
        &self,
        repo: &Repository,
        initial: B,
        f: &mut F,
    ) -> Result<B, Error>
    where
        Error: From<error::Directory>,
        F: FnMut(B, &Entry) -> Result<B, Error>,
    {
        self.entries(repo)?
            .entries()
            .try_fold(initial, |acc, entry| match entry {
                Entry::File(_) => f(acc, entry),
                Entry::Directory(directory) => {
                    let acc = directory.traverse(repo, acc, f)?;
                    f(acc, entry)
                }
                Entry::Submodule(_) => f(acc, entry),
            })
    }
}

impl Revision for Directory {
    type Error = Infallible;

    fn object_id(&self, _repo: &Repository) -> Result<Oid, Self::Error> {
        Ok(self.id)
    }
}

/// A representation of a Git [submodule] when encountered in a Git
/// repository.
///
/// [submodule]: https://git-scm.com/book/en/v2/Git-Tools-Submodules
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Submodule {
    name: String,
    prefix: PathBuf,
    id: Oid,
    url: Option<Url>,
}

impl Submodule {
    /// Construct a new `Submodule`.
    ///
    /// The `path` must be the prefix location of the directory, and
    /// so should not end in `name`.
    ///
    /// The `id` is the commit pointer that Git provides when listing
    /// a submodule.
    pub fn new(
        name: String,
        prefix: PathBuf,
        submodule: Option<git2::Submodule>,
        id: Oid,
    ) -> Result<Self, error::Submodule> {
        let url = submodule
            .and_then(|module| {
                module
                    .opt_url_bytes()
                    .map(|bs| std::str::from_utf8(bs).map(|url| url.to_string()))
            })
            .transpose()
            .map_err(|err| error::Submodule::Utf8 {
                name: name.clone(),
                err,
            })?;
        let url = url
            .map(|url| {
                Url::parse(&url).map_err(|err| error::Submodule::ParseUrl {
                    name: name.clone(),
                    url,
                    err,
                })
            })
            .transpose()?;
        Ok(Self {
            name,
            prefix,
            id,
            url,
        })
    }

    /// The name of this `Submodule`.
    pub fn name(&self) -> &String {
        &self.name
    }

    /// Return the [`Path`] where this `Submodule` is located, relative to the
    /// git repository root.
    pub fn location(&self) -> &Path {
        &self.prefix
    }

    /// Return the exact path for this `Submodule`, including the
    /// `name` of the submodule itself.
    ///
    /// The path is relative to the git repository root.
    pub fn path(&self) -> PathBuf {
        self.prefix.join(&self.name)
    }

    /// The object identifier of this `Submodule`.
    ///
    /// Note that this does not exist in the parent `Repository`. A
    /// new `Repository` should be opened for the submodule.
    pub fn id(&self) -> Oid {
        self.id
    }

    /// The URL for the submodule, if it is defined.
    pub fn url(&self) -> &Option<Url> {
        &self.url
    }
}