Radish alpha
H
rad:z3QDZAW2FAfuLvihrhiyDC9fAD8G9
HardenedBSD Package Manager
Radicle
Git
HardenedBSD-pkg libpkg pkg_jobs_schedule.c
/*-
 * Copyright (c) 2024 The FreeBSD Foundation
 *
 * This software was developed by Isaac Freund <ifreund@freebsdfoundation.org>
 * under sponsorship from the FreeBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer
 *    in this position and unchanged.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#include <assert.h>

#include "pkg.h"
#include "pkg/vec.h"
#include "private/event.h"
#include "private/pkg.h"
#include "private/pkg_jobs.h"

#define dbg(x, ...) pkg_dbg(PKG_DBG_SCHEDULER, x, __VA_ARGS__)

extern struct pkg_ctx ctx;

static const char *
pkg_jobs_schedule_job_type_string(struct pkg_solved *job)
{
	switch (job->type) {
	case PKG_SOLVED_INSTALL:
		return "install";
	case PKG_SOLVED_DELETE:
		return "delete";
	case PKG_SOLVED_UPGRADE:
		return "upgrade";
	case PKG_SOLVED_UPGRADE_INSTALL:
		return "split upgrade install";
	case PKG_SOLVED_UPGRADE_REMOVE:
		return "split upgrade delete";
	default:
		assert(false);
	}
}

/*
 * Returns true if pkg a directly depends on pkg b.
 *
 * Checking only direct dependencies is sufficient to define the edges in a
 * graph that models indirect dependencies as well as long as all of the
 * intermediate dependencies are also nodes in the graph.
 */
static bool pkg_jobs_schedule_direct_depends(struct pkg *a, struct pkg *b)
{
	struct pkg_dep *dep = NULL;
	while (pkg_deps(a, &dep) == EPKG_OK) {
		if (STREQ(b->uid, dep->uid)) {
			return (true);
		}
	}
	return (false);
}

enum pkg_jobs_schedule_graph_edge_type {
	PKG_SCHEDULE_EDGE_NONE,
	PKG_SCHEDULE_EDGE_NEW_DEP_NEW,
	PKG_SCHEDULE_EDGE_OLD_DEP_OLD,
	PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW,
	PKG_SCHEDULE_EDGE_SPLIT_UPGRADE,
};

/*
 * Jobs are nodes in a directed graph. Edges represent job scheduling order
 * requirements. The existence of an edge from node A to node B indicates
 * that job A must be executed before job B.
 *
 * There is a directed edge from node A to node B if and only if
 * one of the following conditions holds:
 *
 * 1. B's new package depends on A's new package
 * 2. A's old package depends on B's old package
 * 3. A's old package conflicts with B's new package
 * 4. A and B are the two halves of a split upgrade job
 *    and A is the delete half.
 *
 * There is one exception made to rule 2 in order to avoid splitting in
 * the common case of upgrading packages X and Y where Xold depends on Yold
 * and Xnew depends on Ynew. In this case, rule 2 is ignored and there is no
 * edge from X to Y.
 */
static enum pkg_jobs_schedule_graph_edge_type
pkg_jobs_schedule_graph_edge(struct pkg_solved *a, struct pkg_solved *b)
{
	if (a == b) {
		return (PKG_SCHEDULE_EDGE_NONE);
	}

	if (a->xlink == b || b->xlink == a) {
		assert(a->xlink == b && b->xlink == a);
		assert(a->type == PKG_SOLVED_UPGRADE_INSTALL ||
		       a->type == PKG_SOLVED_UPGRADE_REMOVE);
		assert(b->type == PKG_SOLVED_UPGRADE_INSTALL ||
		       b->type == PKG_SOLVED_UPGRADE_REMOVE);
		assert(a->type != b->type);
		if (a->type == PKG_SOLVED_UPGRADE_REMOVE) {
			return (PKG_SCHEDULE_EDGE_SPLIT_UPGRADE);
		}
		return (PKG_SCHEDULE_EDGE_NONE);
	}

	/* TODO: These switches would be unnecessary if delete jobs used
	 * items[1] rather than items[0]. I suspect other cleanups could
	 * be made as well. */
	struct pkg *a_new = NULL;
	struct pkg *a_old = NULL;
	switch (a->type) {
	case PKG_SOLVED_INSTALL:
	case PKG_SOLVED_UPGRADE_INSTALL:
		a_new = a->items[0]->pkg;
		break;
	case PKG_SOLVED_DELETE:
	case PKG_SOLVED_UPGRADE_REMOVE:
		a_old = a->items[0]->pkg;
		break;
	case PKG_SOLVED_UPGRADE:
		a_new = a->items[0]->pkg;
		a_old = a->items[1]->pkg;
		break;
	default:
		assert(false);
	}

	struct pkg *b_new = NULL;
	struct pkg *b_old = NULL;
	switch (b->type) {
	case PKG_SOLVED_INSTALL:
	case PKG_SOLVED_UPGRADE_INSTALL:
		b_new = b->items[0]->pkg;
		break;
	case PKG_SOLVED_DELETE:
	case PKG_SOLVED_UPGRADE_REMOVE:
		b_old = b->items[0]->pkg;
		break;
	case PKG_SOLVED_UPGRADE:
		b_new = b->items[0]->pkg;
		b_old = b->items[1]->pkg;
		break;
	default:
		assert(false);
	}

	if (a_new != NULL && b_new != NULL &&
	    pkg_jobs_schedule_direct_depends(b_new, a_new)) {
		return (PKG_SCHEDULE_EDGE_NEW_DEP_NEW);
	}
	if (a_old != NULL && b_new != NULL) {
		struct pkg_conflict *conflict = NULL;
		while (pkg_conflicts(a_old, &conflict) == EPKG_OK) {
			if (STREQ(b_new->uid, conflict->uid)) {
				return (PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW);
			}
		}
	}
	if (a_old != NULL && b_old != NULL &&
	    pkg_jobs_schedule_direct_depends(a_old, b_old)) {
		if (!(a_new != NULL && b_new != NULL &&
		    pkg_jobs_schedule_direct_depends(a_new, b_new))) {
			return (PKG_SCHEDULE_EDGE_OLD_DEP_OLD);
		}
	}

	return (PKG_SCHEDULE_EDGE_NONE);
}

static void
pkg_jobs_schedule_dbg_job(pkg_solved_list *jobs, struct pkg_solved *job)
{
	if (ctx.debug_level < 4) {
		return;
	}

	dbg(4, "job: %s %s", pkg_jobs_schedule_job_type_string(job),
	    job->items[0]->pkg->uid);

	vec_foreach(*jobs, i) {
		struct pkg_solved *other = jobs->d[i];
		if (jobs->d[i] == NULL)
			continue;
		switch (pkg_jobs_schedule_graph_edge(job, other)) {
		case PKG_SCHEDULE_EDGE_NONE:
			break;
		case PKG_SCHEDULE_EDGE_NEW_DEP_NEW:
			dbg(4, "  edge to %s %s, new depends on new",
			    pkg_jobs_schedule_job_type_string(other),
			    other->items[0]->pkg->uid);
			break;
		case PKG_SCHEDULE_EDGE_OLD_DEP_OLD:
			dbg(4, "  edge to %s %s, old depends on old",
			    pkg_jobs_schedule_job_type_string(other),
			    other->items[0]->pkg->uid);
			break;
		case PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW:
			dbg(4, "  edge to %s %s, old conflicts with new",
			    pkg_jobs_schedule_job_type_string(other),
			    other->items[0]->pkg->uid);
			break;
		case PKG_SCHEDULE_EDGE_SPLIT_UPGRADE:
			dbg(4, "  edge to %s %s, split upgrade",
			    pkg_jobs_schedule_job_type_string(other),
			    other->items[0]->pkg->uid);
			break;
		default:
			assert(false);
		}
	}
}

static void
pkg_job_schedule_fprint_node_id(FILE *file, struct pkg_solved *node)
{
	fprintf(file, "\"%s %s\"", pkg_jobs_schedule_job_type_string(node),
		node->items[0]->pkg->uid);
}

static void
pkg_jobs_schedule_debug_dot_file(pkg_solved_list *nodes)
{
	const char *path = pkg_object_string(pkg_config_get("DEBUG_SCHEDULER_DOT_FILE"));
	if (path == NULL) {
		return;
	}
	FILE *file = fopen(path, "we");
	if (file == NULL) {
		pkg_emit_errno("fopen", path);
		return;
	}
	fprintf(file, "digraph {\n");
	for (size_t i = 0; i < vec_len(nodes); i++) {
		for (size_t j = 0; j < vec_len(nodes); j++) {
			enum pkg_jobs_schedule_graph_edge_type edge =
				pkg_jobs_schedule_graph_edge(nodes->d[i], nodes->d[j]);
			if (edge == PKG_SCHEDULE_EDGE_NONE) {
				continue;
			}
			pkg_job_schedule_fprint_node_id(file, nodes->d[i]);
			fprintf(file, " -> ");
			pkg_job_schedule_fprint_node_id(file, nodes->d[j]);
			fprintf(file, "\n");
			switch (edge) {
			case PKG_SCHEDULE_EDGE_NEW_DEP_NEW:
				fprintf(file, " [label=\"new dep new\"]\n");
				break;
			case PKG_SCHEDULE_EDGE_OLD_DEP_OLD:
				fprintf(file, " [label=\"old dep old\"]\n");
				break;
			case PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW:
				fprintf(file, " [label=\"old conflict new\"]\n");
				break;
			case PKG_SCHEDULE_EDGE_SPLIT_UPGRADE:
				fprintf(file, " [label=\"split upgrade\"]\n");
				break;
			case PKG_SCHEDULE_EDGE_NONE:
			default:
				assert(false);
			}
		}
	}
	fprintf(file, "}\n");
	fclose(file);
}


static bool
pkg_jobs_schedule_has_incoming_edge(pkg_solved_list *nodes,
    struct pkg_solved *node)
{
	vec_foreach(*nodes, i) {
		if (pkg_jobs_schedule_graph_edge(nodes->d[i], node) != PKG_SCHEDULE_EDGE_NONE) {
			return (true);
		}
	}
	return (false);
}

/*
 * Prioritizing the install jobs and deprioritizing the delete jobs of split
 * upgrades reduces the distance between the two halves of the split job in the
 * final execution order.
 */
static int
pkg_jobs_schedule_priority(struct pkg_solved *node)
{
	switch (node->type) {
	case PKG_SOLVED_UPGRADE_INSTALL:
		return 1;
	case PKG_SOLVED_UPGRADE_REMOVE:
		return -1;
	default:
		return 0;
	}
}

/* This comparison function is used as a tiebreaker in the topological sort. */
static int
pkg_jobs_schedule_cmp_available(struct pkg_solved *a, struct pkg_solved *b)
{
	int ret = pkg_jobs_schedule_priority(a) - pkg_jobs_schedule_priority(b);
	if (ret == 0) {
		/* Falling back to lexicographical ordering ensures that job execution
		 * order is always consistent and makes testing easier. */
		return strcmp(b->items[0]->pkg->uid, a->items[0]->pkg->uid);
	} else {
		return ret;
	}
}

/*
 * Move job nodes with no incoming edges from unavailable to available.
 * If changed is non-NULL, only check jobs with an incoming edge from changed. */
static void
pkg_jobs_schedule_update_available(pkg_solved_list *unavailable,
    pkg_solved_list *available, struct pkg_solved *changed)
{
	for (size_t i = 0; i < unavailable->len;) {
		struct pkg_solved *job = unavailable->d[i];
		if ((changed == NULL ||
		    pkg_jobs_schedule_graph_edge(changed, job) != PKG_SCHEDULE_EDGE_NONE) &&
		    !pkg_jobs_schedule_has_incoming_edge(unavailable, job) &&
		    !pkg_jobs_schedule_has_incoming_edge(available, job)) {
			vec_push(available, job);
			vec_swap_remove(unavailable, i);
		} else {
			i++;
		}
	}
}

/* Topological sort based on Kahn's algorithm with a tiebreaker */
static void
pkg_jobs_schedule_topological_sort(pkg_solved_list *jobs)
{
	pkg_solved_list unavailable = *jobs;
	pkg_solved_list available = vec_init();
	*jobs = (pkg_solved_list)vec_init();

	pkg_jobs_schedule_update_available(&unavailable, &available, NULL);

	while (available.len > 0) {
		/* Add the highest priority job from the set of available jobs
		 * to the sorted list */
		size_t max = 0;
		for (size_t i = 1; i < available.len; i++) {
			if (pkg_jobs_schedule_cmp_available(available.d[i], available.d[max]) > 0) {
				max = i;
			}
		}
		struct pkg_solved *node = available.d[max];
		vec_push(jobs, node);
		vec_swap_remove(&available, max);

		/* Again, place all job nodes with no incoming edges in the set
		 * of available jobs, ignoring any incoming edges from job nodes
		 * already added to the sorted list */
		pkg_jobs_schedule_update_available(&unavailable, &available, node);
	}

	/* The unavailable set will only be non-empty at this point if there is a
	 * cycle in the graph and all cycles must be eliminated by splitting
	 * upgrade jobs before calling this function. */
	assert(unavailable.len == 0);

	vec_free(&unavailable);
	vec_free(&available);
}

/*
 * This is a depth-first search that keeps track of the path taken to the
 * current node in the graph. If a node on this path is encountered a
 * second time a cycle has been found.
 */
static struct pkg_solved *
pkg_jobs_schedule_find_cycle(pkg_solved_list *jobs,
    struct pkg_solved **path, struct pkg_solved *node)
{
	/* Push node to path */
	assert(node->mark == PKG_SOLVED_CYCLE_MARK_NONE);
	node->mark = PKG_SOLVED_CYCLE_MARK_PATH;
	assert(node->path_prev == NULL);
	node->path_prev = *path;
	*path = node;

	vec_foreach(*jobs, i) {
		if (pkg_jobs_schedule_graph_edge(node, jobs->d[i]) != PKG_SCHEDULE_EDGE_NONE) {
			switch (jobs->d[i]->mark){
			case PKG_SOLVED_CYCLE_MARK_NONE:;
				struct pkg_solved *cycle =
				    pkg_jobs_schedule_find_cycle(jobs, path, jobs->d[i]);
				if (cycle != NULL) {
					return (cycle);
				}
				break;
			case PKG_SOLVED_CYCLE_MARK_DONE:
				break;
			case PKG_SOLVED_CYCLE_MARK_PATH:
				return (jobs->d[i]); /* Found a cycle */
			default:
				assert(false);
			}
		}
	}

	/* Pop node from path */
	assert(node->mark == PKG_SOLVED_CYCLE_MARK_PATH);
	node->mark = PKG_SOLVED_CYCLE_MARK_DONE;
	*path = node->path_prev;
	node->path_prev = NULL;

	return (NULL);
}

int pkg_jobs_schedule(struct pkg_jobs *j)
{
	pkg_jobs_schedule_debug_dot_file(&j->jobs);

	while (true) {
		dbg(3, "checking job scheduling graph for cycles...");

		vec_foreach(j->jobs, i) {
			j->jobs.d[i]->mark = PKG_SOLVED_CYCLE_MARK_NONE;
			j->jobs.d[i]->path_prev = NULL;

			pkg_jobs_schedule_dbg_job(&j->jobs, j->jobs.d[i]);
		}

		/* The graph may not be connected, in which case it is necessary to
		 * run multiple searches for cycles from different start nodes. */
		struct pkg_solved *path = NULL;
		struct pkg_solved *cycle = NULL;
		vec_foreach(j->jobs, i) {
			switch (j->jobs.d[i]->mark) {
			case PKG_SOLVED_CYCLE_MARK_NONE:
				cycle = pkg_jobs_schedule_find_cycle(&j->jobs, &path, j->jobs.d[i]);
				break;
			case PKG_SOLVED_CYCLE_MARK_DONE:
				break;
			case PKG_SOLVED_CYCLE_MARK_PATH:
			default:
				assert(false);
			}
			if (cycle != NULL) {
				break;
			}
		}

		if (cycle == NULL) {
			dbg(3, "no job scheduling graph cycles found");
			assert(path == NULL);
			break;
		}

		dbg(3, "job scheduling graph cycle found");
		assert(path != NULL);
		assert(path->path_prev != NULL);
		assert(path != cycle);

		/* Close the path into a cycle to eliminate some edge cases in the loop. */
		cycle->path_prev = path;

		/* Not all upgrade jobs would break the cycle if split.
		 * It is helpful to think of each upgrade job as two separate
		 * nodes, the remove half and the install half. If a cycle only
		 * involved one half of the upgrade job, splitting the upgrade
		 * job would not break the cycle or even change its length.
		 *
		 * Furthermore, since there is an edge from the remove half of
		 * an upgrade job to the install half, the install half must
		 * come *before* the remove half in the cycle. Otherwise
		 * splitting the upgrade job will only make the cycle one node
		 * longer.
		 */
		enum pkg_jobs_schedule_graph_edge_type out =
		    pkg_jobs_schedule_graph_edge(path, cycle);
		assert(out != PKG_SCHEDULE_EDGE_NONE);
		enum pkg_jobs_schedule_graph_edge_type in =
		    pkg_jobs_schedule_graph_edge(path->path_prev, path);
		assert(in != PKG_SCHEDULE_EDGE_NONE);
		while (true) {
			if (path->type == PKG_SOLVED_UPGRADE) {
				assert(out != PKG_SCHEDULE_EDGE_SPLIT_UPGRADE);
				assert(in != PKG_SCHEDULE_EDGE_SPLIT_UPGRADE);
				if ((out == PKG_SCHEDULE_EDGE_OLD_DEP_OLD ||
				     out == PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW) &&
				    (in == PKG_SCHEDULE_EDGE_NEW_DEP_NEW ||
				     in == PKG_SCHEDULE_EDGE_OLD_CONFLICT_NEW)) {
					/* Found an upgrade job that would break
					 * the cycle if split. */
					break;
				}
			}
			if (path == cycle) {
				pkg_emit_error("failed to break job scheduling cycle");
			 	return (EPKG_FATAL);
			}
			path = path->path_prev;
			assert(path != NULL);
			out = in;
			in = pkg_jobs_schedule_graph_edge(path->path_prev, path);
			assert(in != PKG_SCHEDULE_EDGE_NONE);
		}

		/* path is now the upgrade job chosen to be split */
		dbg(2, "splitting upgrade %s job", path->items[0]->pkg->uid);

		struct pkg_solved *new = xcalloc(1, sizeof(struct pkg_solved));
		new->type = PKG_SOLVED_UPGRADE_REMOVE;
		new->items[0] = path->items[1];
		new->xlink = path;
		path->type = PKG_SOLVED_UPGRADE_INSTALL;
		path->items[1] = NULL;
		path->xlink = new;
		vec_push(&j->jobs, new);
	}

	pkg_jobs_schedule_topological_sort(&j->jobs);

	dbg(3, "finished job scheduling");

	return (EPKG_OK);
}