Radish alpha
h
rad:z3gqcJUoA1n9HaHKufZs5FCSGazv5
Radicle Heartwood Protocol & Stack
Radicle
Git
Perform `git gc` on an interval basis
Open fintohaps opened 2 months ago

Reworked from 9f920f5d4639168302ffd9f2eef2c9e14bc5ea8d.

Instead of performing a git gc on every fetch, do it on an interval basis.

4 files changed +51 -5 1cab036c 19328a86
modified crates/radicle-cli/examples/rad-config.md
@@ -47,6 +47,7 @@ $ rad config
        "inbound": 128,
        "outbound": 16
      },
+
      "gcInterval": 600,
      "fetchPackReceive": "500.0 MiB"
    },
    "workers": 8,
modified crates/radicle-node/src/runtime.rs
@@ -242,6 +242,7 @@ impl Runtime {
            limit: FetchLimit::default(),
            local: nid,
            expiry: worker::garbage::Expiry::default(),
+
            gc_interval: config.limits.gc_interval.into(),
        };
        let pool = worker::Pool::with(
            worker_recv,
modified crates/radicle-node/src/worker.rs
@@ -5,6 +5,7 @@ mod upload_pack;
pub mod fetch;
pub mod garbage;

+
use std::collections::HashMap;
use std::path::PathBuf;

use crossbeam_channel as chan;
@@ -25,6 +26,7 @@ pub use radicle_protocol::worker::{

use crate::runtime::{thread, Handle};
use crate::wire::StreamId;
+
use crate::{LocalDuration, LocalTime};

pub use channels::{ChannelEvent, Channels, ChannelsConfig};

@@ -67,6 +69,8 @@ pub struct FetchConfig {
    /// Configuration for `git gc` garbage collection. Defaults to `1
    /// hour ago`.
    pub expiry: garbage::Expiry,
+
    /// Minimum interval between `git gc` runs per repository.
+
    pub gc_interval: LocalDuration,
}

/// A worker that replicates git objects.
@@ -80,6 +84,7 @@ struct Worker {
    notifications: notifications::StoreWriter,
    cache: cob::cache::StoreWriter,
    db: radicle::node::Database,
+
    last_gc: HashMap<RepoId, LocalTime>,
}

impl Worker {
@@ -210,6 +215,7 @@ impl Worker {
            limit,
            local,
            expiry,
+
            gc_interval,
        } = &self.fetch_config;
        // N.b. if the `rid` is blocked this will return an error, so
        // we won't continue with any further set up of the fetch.
@@ -236,11 +242,22 @@ impl Worker {
            refs_at,
        )?;

-
        if let Err(e) = garbage::collect(&self.storage, rid, *expiry) {
-
            // N.b. ensure that `git gc` works in debug mode.
-
            debug_assert!(false, "`git gc` failed: {e}");
-

-
            log::debug!(target: "worker", "Failed to run `git gc`: {e}");
+
        let now = LocalTime::now();
+
        let should_gc = self
+
            .last_gc
+
            .get(&rid)
+
            .map(|last| now - *last >= *gc_interval)
+
            .unwrap_or(true);
+
        if should_gc {
+
            if let Err(e) = garbage::collect(&self.storage, rid, *expiry) {
+
                // N.b. ensure that `git gc` works in debug mode.
+
                debug_assert!(false, "`git gc` failed: {e}");
+

+
                log::debug!(target: "worker", "Failed to run `git gc`: {e}");
+
            }
+
            self.last_gc.insert(rid, now);
+
        } else {
+
            log::debug!(target: "worker", "Skipping `git gc` for {rid}; recently ran");
        }
        Ok(result)
    }
@@ -276,6 +293,7 @@ impl Pool {
                notifications: notifications.clone(),
                cache: cache.clone(),
                db: db.clone(),
+
                last_gc: HashMap::new(),
            };
            let thread = thread::spawn(&nid, format!("worker#{i}"), || worker.run());

modified crates/radicle/src/node/config.rs
@@ -137,6 +137,9 @@ pub struct Limits {
    /// Connection limits.
    pub connection: ConnectionLimits,

+
    /// Minimum interval between `git gc` runs per repository.
+
    pub gc_interval: LimitGcInterval,
+

    /// Channel limits.
    pub fetch_pack_receive: FetchPackSizeLimit,
}
@@ -585,6 +588,29 @@ impl From<LimitGossipMaxAge> for LocalDuration {
    }
}

+
#[derive(Clone, Copy, Debug, Deserialize, Serialize, Eq, PartialEq)]
+
#[serde(transparent)]
+
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+
pub struct LimitGcInterval(localtime::LocalDuration);
+

+
impl Default for LimitGcInterval {
+
    fn default() -> Self {
+
        Self(localtime::LocalDuration::from_mins(10))
+
    }
+
}
+

+
impl From<LimitGcInterval> for LocalDuration {
+
    fn from(value: LimitGcInterval) -> Self {
+
        value.0
+
    }
+
}
+

+
impl From<LocalDuration> for LimitGcInterval {
+
    fn from(value: LocalDuration) -> Self {
+
        Self(value)
+
    }
+
}
+

/// Create a new type (`$name`) around a given type (`$type`), with a provided
/// default (`$default`).
///