This patch adds functionality to the CI broker to set a time limit to — radicle-ci-broker

the adapter. The limit can be set in the configuration file, for example: max_run_time: 12765h. https://docs.rs/duration-str/0.11.2/duration_str/ shows the supported formats. The default is one hour.

In addition to limiting the run time, this patch changes how the adapter’s stdout and stderr output is captured. This should make the CI broker not get stuck if the adapter produces a lot of output to either stream.

Additionally, there is a limit to how much output the CI broker accepts from the adapter. If there is more output, the adapter process is terminated with extreme prejudice, and the CI run fails. The limit is not currently configurable, and is hard coded to 10 MiB. I can make this, too, configurable, if someone explains why they need it to be configurable.

All the interesting code is in the new module src/timeoutcmd.rs. That is some quite intricate code. It’s intricate, because it’s doing several things concurrently: spawning a sub-process, feeding it input via stdin, capture anything it writes to stdout or stderr, terminating the sub-process if it runs for too long, or produces too much output, or the main thread requests. Further, the output capturing is done in a way that allows the main thread to process the output in real time: this is needed for the CI broker to react to the adapter’s messages as they are received, without waiting until the adapter process ends. The module also does this without getting stuck or crashing, in any of the scenarios I have been able to come up with. The end of the timeoutcmd.rs module has a bunch of tests to verify the code works in those scenarios to make sure the module keeps working.

 checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236"
 [[package]]
 name = "arrayvec"
 version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 [[package]]
 name = "as-slice"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 ]
 [[package]]
 name = "chrono"
 version = "0.4.38"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401"
 dependencies = [
  "num-traits",
 ]
 [[package]]
 name = "cipher"
 version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
 [[package]]
 name = "duration-str"
 version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "709d653e7c92498eb29fb86a2a6f0f3502b97530f33aedb32ef848d4d28b31a3"
 dependencies = [
  "chrono",
  "rust_decimal",
  "serde",
  "thiserror",
  "time",
  "winnow",
 ]
 [[package]]
 name = "ec25519"
 version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"

  "clap",
  "ctor",
  "culpa",
  "duration-str",
  "html-page",
  "radicle",
  "radicle-git-ext",

 ]
 [[package]]
 name = "rust_decimal"
 version = "1.36.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b082d80e3e3cc52b2ed634388d436fe1f4de6af5786cc2de9ba9737527bdf555"
 dependencies = [
  "arrayvec",
  "num-traits",
 ]
 [[package]]
 name = "rustix"
 version = "0.38.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 [[package]]
 name = "winnow"
 version = "0.6.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b"
 dependencies = [
  "memchr",
 ]
 [[package]]
 name = "xattr"
 version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 [dependencies]
 anyhow = "1.0.86"
 clap = { version = "4.5.11", features = ["derive", "wrap_help"] }
 duration-str = "0.11.2"
 html-page = "0.4.0"
 radicle-git-ext = "0.8.0"
 radicle-surf = { version = "0.22.0", default-features = false, features = ["serde"] }

 use std::{
     collections::HashMap,
     ffi::OsStr,
     io::{BufRead, BufReader, Read},
     path::{Path, PathBuf},
     process::{Command, Stdio},
     process::Command,
     time::Duration,
 };
 use crate::{

     msg::{MessageError, Request, Response},
     notif::NotificationSender,
     run::{Run, RunState},
     timeoutcmd::{TimeoutCommand, TimeoutError},
 };
 const NOT_EXITED: i32 = 999;

         run: &mut Run,
         db: &Db,
         run_notification: &NotificationSender,
         max_run_time: Duration,
     ) -> Result<(), AdapterError> {
         run.set_state(RunState::Triggered);
         db.update_run(run).map_err(AdapterError::UpdateRun)?;
         let x = self.run_helper(trigger, run, db, run_notification);
         let x = self.run_helper(trigger, run, db, run_notification, max_run_time);
         run.set_state(RunState::Finished);
         db.update_run(run).map_err(AdapterError::UpdateRun)?;

         run: &mut Run,
         db: &Db,
         run_notification: &NotificationSender,
         max_run_time: Duration,
     ) -> Result<(), AdapterError> {
         assert!(matches!(trigger, Request::Trigger { .. }));
         // Spawn the adapter sub-process.
         let mut child = Command::new(&self.bin)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
             .stderr(Stdio::piped())
             .envs(self.envs())
             .spawn()
             .map_err(|e| AdapterError::SpawnAdapter(self.bin.clone(), e))?;
         let mut cmd = Command::new(&self.bin);
         cmd.envs(self.envs());
         let mut child = TimeoutCommand::new(max_run_time);
         child.feed_stdin(trigger.to_string().as_bytes());
         let child = child.spawn(cmd).map_err(|err| match err {
             TimeoutError::Spawn(_, err) => AdapterError::SpawnAdapter(self.bin.clone(), err),
             _ => AdapterError::TimeoutCommand(err),
         })?;
         run_notification.notify()?;
         // Write the request to trigger a run to the child's stdin.
         // Then close the pipe to prevent the child from trying to
         // read another message that will never be sent.
         {
             let stdin = child.stdin.take().ok_or(AdapterError::StdinHandle)?;
             trigger
                 .to_writer(stdin)
                 .map_err(AdapterError::RequestWrite)?;
         }
         // Get the child's stdout into a BufReader so that we can loop
         // over lines.
         let stdout = child.stdout.take().ok_or(AdapterError::StdoutHandle)?;
         let stdout = BufReader::new(stdout);
         let mut lines = stdout.lines();
         let stdout = child.stdout();
         if let Some(line) = lines.next() {
             let line = line.map_err(AdapterError::ReadLine)?;
         if let Some(line) = stdout.line() {
             let resp = Response::from_str(&line).map_err(AdapterError::ParseResponse)?;
             run_notification.notify()?;
             match resp {

                     run.set_result(result);
                     db.update_run(run).map_err(AdapterError::UpdateRun)?;
                 }
                 _ => return Err(AdapterError::NotFinished(resp)),
                 _ => {
                     child.kill().ok();
                     return Err(AdapterError::NotFinished(resp));
                 }
             }
         } else {
             logger::adapter_no_second_response();
             child.kill().ok();
             return Err(AdapterError::NoSecondMessage);
         }
         if let Some(line) = lines.next() {
             let line = line.map_err(AdapterError::ReadLine)?;
         if let Some(line) = stdout.line() {
             let resp = Response::from_str(&line).map_err(AdapterError::ParseResponse)?;
             logger::adapter_too_many_responses();
             child.kill().ok();
             return Err(AdapterError::TooMany(resp));
         }
         let wait = child.wait().map_err(AdapterError::Wait)?;
         let stderr = child.stderr();
         while let Some(line) = stderr.line() {
             logger::adapter_stderr_line(&line);
         }
         let result = child.wait().expect("FIXME");
         let mut stderr = child.stderr.take().ok_or(AdapterError::StderrHandle)?;
         let mut buf = vec![];
         stderr
             .read_to_end(&mut buf)
             .map_err(AdapterError::ReadStderr)?;
         let stderr = String::from_utf8_lossy(&buf);
         logger::adapter_result(wait.code(), &stderr);
         logger::debug2(format!(
             "wait result? {result:?} status.code: {:?}",
             result.status().code()
         ));
         if let Some(exit) = wait.code() {
         if result.timed_out() {
             logger::adapter_did_not_exit_voluntarily();
             return Err(AdapterError::Failed(NOT_EXITED));
         } else if let Some(exit) = result.status().code() {
             logger::adapter_result(exit);
             if exit != 0 {
                 return Err(AdapterError::Failed(exit));
             }
         } else {
             logger::adapter_did_not_exit_voluntarily();
             return Err(AdapterError::Failed(NOT_EXITED));
             logger::adapter_did_not_exit();
             return Err(AdapterError::Signal);
         }
         Ok(())

 #[derive(Debug, thiserror::Error)]
 pub enum AdapterError {
     /// Error from [`TimeoutCommand`] or [`RunningProcess`].
     #[error(transparent)]
     TimeoutCommand(#[from] crate::timeoutcmd::TimeoutError),
     /// Error from spawning a sub-process.
     #[error("failed to spawn a CI adapter sub-process: {0}")]
     SpawnAdapter(PathBuf, #[source] std::io::Error),
     /// Error creating Response from a string.
     #[error("failed to create a Response message from adapter output")]
     ParseResponse(#[source] MessageError),

     #[error("failed to write request to adapter stdin")]
     RequestWrite(#[source] MessageError),
     /// Error from spawning a sub-process.
     #[error("failed to spawn a CI adapter sub-process: {0}")]
     SpawnAdapter(PathBuf, #[source] std::io::Error),
     /// Error getting the file handle for the adapter's stdin.
     #[error("failed to get handle for adapter's stdin")]
     StdinHandle,

     #[error("child process failed with wait status {0}")]
     Failed(i32),
     /// Child process was killed.
     #[error("child process terminated by signal")]
     Signal,
     /// First message is not `Response::Triggered`
     #[error("adapter's first message is not 'triggered', but {0:?}")]
     NotTriggered(Response),
     /// There was no first response from adapter.
     #[error("adapter did not sent its first message")]
     NoFirstMessage,
     /// There was no second response from adapter.
     #[error("adapter did not sent its second message")]
     NoSecondMessage,
     /// Second message is not `Response::Finished`
     #[error("adapter's second message is not 'finished', but {0:?}")]
     NotFinished(Response),

 #[cfg(test)]
 mod test {
     use std::{fs::write, io::ErrorKind};
     use std::{fs::write, io::ErrorKind, time::Duration};
     use tempfile::{tempdir, NamedTempFile};

         test::{mock_adapter, trigger_request, TestResult},
     };
     const MAX: Duration = Duration::from_secs(10);
     fn db() -> anyhow::Result<Db> {
         let tmp = NamedTempFile::new()?;
         let db = Db::new(tmp.path())?;

         let mut run = run()?;
         let mut channel = NotificationChannel::new_run();
         let sender = channel.tx()?;
         Adapter::new(&bin).run(&trigger_request()?, &mut run, &db, &sender)?;
         Adapter::new(&bin).run(&trigger_request()?, &mut run, &db, &sender, MAX)?;
         assert_eq!(run.result(), Some(&RunResult::Success));
         Ok(())

         let mut run = run()?;
         let mut channel = NotificationChannel::new_run();
         let sender = channel.tx()?;
         let x = Adapter::new(&bin).run(&trigger_request()?, &mut run, &db, &sender);
         let x = Adapter::new(&bin).run(&trigger_request()?, &mut run, &db, &sender, MAX);
         match x {
             Ok(_) => (),

         let mut run = run()?;
         let mut channel = NotificationChannel::new_run();
         let sender = channel.tx()?;
         let x = Adapter::new(&bin).run(&trigger_request()?, &mut run, &db, &sender);
         let x = Adapter::new(&bin).run(&trigger_request()?, &mut run, &db, &sender, MAX);
         eprintln!("{x:#?}");
         assert!(matches!(x, Err(AdapterError::NoSecondMessage)));
         Ok(())
     }
     #[test]
     fn adapter_ends_ok_before_second_message() -> TestResult<()> {
         const ADAPTER: &str = r#"#!/bin/bash
 read
 echo '{"response":"triggered","run_id":{"id":"xyzzy"}}'
 "#;
         let tmp = tempdir()?;
         let bin = tmp.path().join("adapter.sh");
         mock_adapter(&bin, ADAPTER)?;
         let db = db()?;
         let mut run = run()?;
         let mut channel = NotificationChannel::new_run();
         let sender = channel.tx()?;
         let x = Adapter::new(&bin).run(&trigger_request()?, &mut run, &db, &sender, MAX);
         eprintln!("{x:#?}");
         assert!(matches!(x, Err(AdapterError::Failed(_))));
         assert!(matches!(x, Err(AdapterError::NoSecondMessage)));
         Ok(())
     }

     fn run(&self, args: &Args, config: &Config) -> Result<(), CibError> {
         let profile = Profile::load().map_err(CibError::profile)?;
         let mut broker = Broker::new(config.db()).map_err(CibError::new_broker)?;
         let mut broker =
             Broker::new(config.db(), config.max_run_time()).map_err(CibError::new_broker)?;
         let spec =
             config
                 .adapter(&config.default_adapter)

     Run(RunCmd),
     Report(cibtoolcmd::ReportCmd),
     Trigger(cibtoolcmd::TriggerCmd),
     #[clap(hide = true)]
     Timeout(cibtoolcmd::TimeoutCmd),
 }
 impl Subcommand for Cmd {

             Self::Run(x) => x.run(args),
             Self::Report(x) => x.run(args),
             Self::Trigger(x) => x.run(args),
             Self::Timeout(x) => x.run(args),
         }
     }
 }

     #[error("programming error: failed to set up inter-thread notification channel")]
     Notification(#[source] NotificationError),
     #[error(transparent)]
     Timeout(#[from] radicle_ci_broker::timeoutcmd::TimeoutError),
 }

 mod trigger;
 pub use trigger::*;
 mod timeout;
 pub use timeout::*;

 use std::{
     process::Command,
     thread::sleep,
     time::{Duration, Instant},
 };
 use radicle_ci_broker::timeoutcmd::TimeoutCommand;
 use super::*;
 /// Trigger a CI run.
 ///
 /// This is meant for developer experimentation.
 #[derive(Parser)]
 pub struct TimeoutCmd {
     /// A Bash script to run. Should start with "exec", or time out won't work.
     #[clap(long)]
     script: String,
     /// Text to be fed to script via stdin.
     #[clap(long, default_value = "")]
     stdin: String,
     /// Generate at least this much data to feed to script via stdin.
     #[clap(long)]
     generate: Option<usize>,
     /// Terminate script after this many seconds.
     #[clap(long)]
     timeout: u64,
     /// Verbose output: show stdout and stderr output lines.
     #[clap(short, long)]
     verbose: bool,
     /// Don't empty stdout and stderr buffers, let them fill up.
     #[clap(long)]
     fill_buffers: bool,
     /// Kill script after this many seconds, unconditionally.
     #[clap(long)]
     kill_after: Option<u64>,
 }
 impl Leaf for TimeoutCmd {
     fn run(&self, _args: &Args) -> Result<(), CibToolError> {
         let mut cmd = Command::new("bash");
         cmd.arg("-c").arg(&self.script);
         let mut to = TimeoutCommand::new(Duration::from_secs(self.timeout));
         if let Some(bytes) = self.generate {
             let mut stdin: Vec<u8> = vec![];
             while stdin.len() < bytes {
                 for byte in b"hello, world\n" {
                     stdin.push(*byte);
                 }
             }
             to.feed_stdin(stdin.as_slice());
             println!("generated stdin has {} bytes", stdin.len());
         } else {
             to.feed_stdin(self.stdin.as_bytes());
         }
         let started = Instant::now();
         println!("spawn child");
         let running = to.spawn(cmd)?;
         if let Some(secs) = self.kill_after {
             sleep(Duration::from_secs(secs));
             running.kill().unwrap();
         }
         let mut stdout_bytes = 0;
         if !self.fill_buffers {
             let stdout = running.stdout();
             while let Some(line) = stdout.line() {
                 stdout_bytes += line.as_bytes().len();
                 if self.verbose {
                     println!("stdout: {line:?}");
                 }
             }
             println!("finished reading stdout");
             let stderr = running.stderr();
             while let Some(line) = stderr.line() {
                 if self.verbose {
                     println!("stderr: {line:?}");
                 }
             }
             println!("finished reading stderr");
         }
         let tor = running.wait()?;
         let elapsed = started.elapsed();
         let speed = (stdout_bytes as f64) / elapsed.as_secs_f64();
         println!("stdout bytes: {stdout_bytes}");
         println!("duration: {} ms", elapsed.as_millis());
         println!("speed: {:.0} B/s", speed);
         println!("exit: {}", tor.status());
         println!("timed out? {}", tor.timed_out());
         Ok(())
     }
 }

 pub struct Broker {
     default_adapter: Option<Adapter>,
     adapters: HashMap<RepoId, Adapter>,
     max_run_time: Duration,
     db: Db,
 }
 impl Broker {
     #[allow(clippy::result_large_err)]
     pub fn new(db_filename: &Path) -> Result<Self, BrokerError> {
     pub fn new(db_filename: &Path, max_run_time: Duration) -> Result<Self, BrokerError> {
         logger::broker_db(db_filename);
         Ok(Self {
             default_adapter: None,
             adapters: HashMap::new(),
             max_run_time,
             db: Db::new(db_filename)?,
         })
     }

                     // We run the adapter, but if that fails, we just
                     // log the error. The `Run` value records the
                     // result of the run.
                     if let Err(e) = adapter.run(trigger, &mut run, &self.db, run_notification) {
                     if let Err(e) = adapter.run(
                         trigger,
                         &mut run,
                         &self.db,
                         run_notification,
                         self.max_run_time,
                     ) {
                         logger::error("failed to run adapter or it failed to run CI", &e);
                     }

 #[cfg(test)]
 mod test {
     use std::path::Path;
     use std::{path::Path, time::Duration};
     use tempfile::tempdir;
     use super::{Adapter, Broker, RepoId};

     };
     fn broker(filename: &Path) -> anyhow::Result<Broker> {
         Ok(Broker::new(filename)?)
         Ok(Broker::new(filename, Duration::from_secs(1))?)
     }
     fn rid() -> anyhow::Result<RepoId> {

     collections::HashMap,
     fmt,
     path::{Path, PathBuf},
     time::Duration,
 };
 use duration_str::deserialize_duration;
 use serde::{Deserialize, Serialize};
 use crate::filter::EventFilter;
 const DEFAULT_MAX_RUN_TIME: Duration = Duration::from_secs(3600);
 const DEFAULT_STATUS_PAGE_UPDATE_INTERVAL: u64 = 10;
 #[derive(Debug, Serialize, Deserialize)]
 pub struct Config {
     pub default_adapter: String,
     pub adapters: HashMap<String, Adapter>,
     #[serde(deserialize_with = "deserialize_duration")]
     #[serde(default = "default_max_run_time")]
     pub max_run_time: Duration,
     pub filters: Vec<EventFilter>,
     pub report_dir: Option<PathBuf>,
     pub status_update_interval_seconds: Option<u64>,
     pub db: PathBuf,
 }
 fn default_max_run_time() -> Duration {
     DEFAULT_MAX_RUN_TIME
 }
 impl Config {
     pub fn load(filename: &Path) -> Result<Self, ConfigError> {
         let config =

             .unwrap_or(DEFAULT_STATUS_PAGE_UPDATE_INTERVAL)
     }
     pub fn max_run_time(&self) -> Duration {
         self.max_run_time
     }
     pub fn db(&self) -> &Path {
         &self.db
     }

     #[error("failed to convert configuration into JSON")]
     ToJson(#[source] serde_json::Error),
 }
 #[cfg(test)]
 mod test {
     use super::*;
     #[test]
     #[allow(clippy::unwrap_used)]
     fn parse_config_yaml() {
         const YAML: &str = r#"---
 default_adapter: foo
 adapters: {}
 filters: []
 db: "foo.db"
 max_run_time: 1min
 ...
 "#;
         let cfg: Config = serde_yml::from_str(YAML).unwrap();
         assert_eq!(cfg.max_run_time(), Duration::from_secs(60));
     }
     #[test]
     #[allow(clippy::unwrap_used)]
     fn parse_config_yaml_without_max_run_time() {
         const YAML: &str = r#"---
 default_adapter: foo
 adapters: {}
 filters: []
 db: "foo.db"
 ...
 "#;
         let cfg: Config = serde_yml::from_str(YAML).unwrap();
         assert_eq!(cfg.max_run_time(), DEFAULT_MAX_RUN_TIME);
     }
 }

 pub mod run;
 #[cfg(test)]
 pub mod test;
 pub mod timeoutcmd;
 pub mod util;

     error!(slog_scope::logger(), "too many response messages");
 }
 pub fn adapter_result(exit: Option<i32>, stderr: &str) {
     if let Some(exit) = exit {
         debug!(slog_scope::logger(), "adapter exit code"; "exit_code" => exit);
     } else {
         debug!(slog_scope::logger(), "adapter was terminated by signal");
     }
     for line in stderr.lines() {
         debug!(slog_scope::logger(), "adapter stderr"; "stderr" => line);
     }
 pub fn adapter_stderr_line(line: &str) {
     debug!(slog_scope::logger(), "adapter stderr"; "stderr" => line);
 }
 pub fn adapter_result(exit: i32) {
     debug!(slog_scope::logger(), "adapter exit code"; "exit_code" => exit);
 }
 pub fn adapter_did_not_exit_voluntarily() {
     warn!(slog_scope::logger(), "adapter did not exit voluntarily");
     warn!(
         slog_scope::logger(),
         "adapter did not exit voluntarily: terminated for taking too long"
     );
 }
 pub fn adapter_did_not_exit() {
     warn!(
         slog_scope::logger(),
         "adapter did not exit: probably killed by signal"
     );
 }
 pub fn debug(msg: &str) {