Radish alpha
H
rad:z3QDZAW2FAfuLvihrhiyDC9fAD8G9
HardenedBSD Package Manager
Radicle
Git
Speed up vulnerability audit process
Eygene Ryabinkin committed 13 years ago
commit eed892dd5ac171d05b72daad47c54859e00c8edc
parent 8247aec
1 file changed +195 -15
modified pkg/audit.c
@@ -65,6 +65,52 @@ struct audit_entry {

SLIST_HEAD(audit_head, audit_entry);

+
/*
+
 * The _sorted stuff.
+
 *
+
 * We are using the optimized search based on the following observations:
+
 *
+
 * - number of VuXML entries is more likely to be far greater than
+
 *   the number of installed ports; thus we should try to optimize
+
 *   the walk through all entries for a given port;
+
 *
+
 * - fnmatch() is good and fast, but if we will compare the audit entry
+
 *   name prefix without globbing characters to the prefix of port name
+
 *   of the same length and they are different, there is no point to
+
 *   check the rest;
+
 *
+
 * - (most important bit): if parsed VuXML entries are lexicographically
+
 *   sorted per the largest prefix with no globbing characters and we
+
 *   know how many succeeding entries have the same prefix we can
+
 *
+
 *   a. skip the rest of the entries once the non-globbing prefix is
+
 *      lexicographically larger than the port name prefix of the
+
 *      same length: all successive prefixes will be larger as well;
+
 *
+
 *   b. if we have non-globbing prefix that is lexicographically smaller
+
 *      than port name prefix, we can skip all succeeding entries with
+
 *      the same prefix; and as some port names tend to repeat due to
+
 *      multiple vulnerabilities, it could be a large win.
+
 */
+
struct audit_entry_sorted {
+
	struct audit_entry *e;	/* Entry itself */
+
	size_t noglob_len;	/* Prefix without glob characters */
+
	size_t next_pfx_incr;	/* Index increment for the entry with
+
				   different prefix */
+
};
+

+
/*
+
 * Another small optimization to skip the beginning of the
+
 * VuXML entry array, if possible.
+
 *
+
 * audit_entry_first_byte_idx[ch] represents the index
+
 * of the first VuXML entry in the sorted array that has
+
 * its non-globbing prefix that is started with the character
+
 * 'ch'.  It allows to skip entries from the beginning of the
+
 * VuXML array that aren't relevant for the checked port name.
+
 */
+
static size_t audit_entry_first_byte_idx[256];
+

void
usage_audit(void)
{
@@ -251,6 +297,119 @@ parse_db(const char *path, struct audit_head *h)
	return EPKG_OK;
}

+
/*
+
 * Returns the length of the largest prefix without globbing
+
 * characters, as per fnmatch().
+
 */
+
static size_t
+
str_noglob_len(const char *s)
+
{
+
	size_t n;
+

+
	for (n = 0; s[n] && s[n] != '*' && s[n] != '?' &&
+
	    s[n] != '[' && s[n] != '{' && s[n] != '\\'; n++);
+

+
	return n;
+
}
+

+
/*
+
 * Helper for quicksort that lexicographically orders prefixes.
+
 */
+
static int
+
audit_entry_compare(const void *a, const void *b)
+
{
+
	struct audit_entry_sorted *e1, *e2;
+
	size_t min_len;
+
	int result;
+

+
	e1 = (struct audit_entry_sorted *)a;
+
	e2 = (struct audit_entry_sorted *)b;
+

+
	min_len = (e1->noglob_len < e2->noglob_len ?
+
	    e1->noglob_len : e2->noglob_len);
+
	result = strncmp(e1->e->pkgname, e2->e->pkgname, min_len);
+
	/*
+
	 * Additional check to see if some word is a prefix of an
+
	 * another one and, thus, should go before the former.
+
	 */
+
	if (result == 0) {
+
		if (e1->noglob_len < e2->noglob_len)
+
			result = -1;
+
		else if (e1->noglob_len > e2->noglob_len)
+
			result = 1;
+
	}
+

+
	return (result);
+
}
+

+
/*
+
 * Sorts VuXML entries and calculates increments to jump to the
+
 * next distinct prefix.
+
 */
+
static struct audit_entry_sorted *
+
preprocess_db(struct audit_head *h)
+
{
+
	struct audit_entry *e;
+
	struct audit_entry_sorted *ret;
+
	size_t i, n, tofill;
+

+
	n = 0;
+
	SLIST_FOREACH(e, h, next)
+
		n++;
+

+
	ret = (struct audit_entry_sorted *)calloc(n + 1, sizeof(ret[0]));
+
	if (ret == NULL)
+
		err(1, "calloc(audit_entry_sorted*)");
+
	bzero((void *)ret, (n + 1) * sizeof(ret[0]));
+

+
	n = 0;
+
	SLIST_FOREACH(e, h, next) {
+
		ret[n].e = e;
+
		ret[n].noglob_len = str_noglob_len(e->pkgname);
+
		ret[n].next_pfx_incr = 1;
+
		n++;
+
	}
+

+
	qsort(ret, n, sizeof(*ret), audit_entry_compare);
+

+
	/*
+
	 * Determining jump indexes to the next different prefix.
+
	 * Only non-1 increments are calculated there.
+
	 *
+
	 * Due to the current usage that picks only increment for the
+
	 * first of the non-unique prefixes in a row, we could
+
	 * calculate only that one and skip calculations for the
+
	 * succeeding, but for the uniformity and clarity we're
+
	 * calculating 'em all.
+
	 */
+
	for (n = 1, tofill = 0; ret[n].e; n++) {
+
		if (ret[n - 1].noglob_len != ret[n].noglob_len) {
+
			struct audit_entry_sorted *base;
+

+
			base = ret + n - tofill;
+
			for (i = 0; tofill > 1; i++, tofill--)
+
				base[i].next_pfx_incr = tofill;
+
			tofill = 1;
+
		} else if (strcmp(ret[n - 1].e->pkgname,
+
		    ret[n].e->pkgname) == 0) {
+
			tofill++;
+
		} else {
+
			tofill = 1;
+
		}
+
	}
+

+
	/* Calculate jump indexes for the first byte of the package name */
+
	bzero(audit_entry_first_byte_idx, sizeof(audit_entry_first_byte_idx));
+
	for (n = 1, i = 0; n < 256; n++) {
+
		while (ret[i].e != NULL &&
+
		    (size_t)(ret[i].e->pkgname[0]) < n)
+
			i++;
+
		audit_entry_first_byte_idx[n] = i;
+
	}
+

+
	return (ret);
+
}
+

static bool
match_version(const char *pkgversion, struct version_entry *v)
{
@@ -281,7 +440,7 @@ match_version(const char *pkgversion, struct version_entry *v)
}

static bool
-
is_vulnerable(struct audit_head *h, struct pkg *pkg)
+
is_vulnerable(struct audit_entry_sorted *a, struct pkg *pkg)
{
	struct audit_entry *e;
	const char *pkgname;
@@ -293,20 +452,38 @@ is_vulnerable(struct audit_head *h, struct pkg *pkg)
		PKG_VERSION, &pkgversion
	);

-
	SLIST_FOREACH(e, h, next) {
-
		if (fnmatch(e->pkgname, pkgname, 0) != 0)
+
	a += audit_entry_first_byte_idx[(size_t)pkgname[0]];
+
	for (; (e = a->e) != NULL; a += a->next_pfx_incr) {
+
		int cmp;
+
		size_t i;
+

+
		/*
+
		 * Audit entries are sorted, so if we had found one
+
		 * that is lexicographically greater than our name,
+
		 * it and the rest won't match our name.
+
		 */
+
		cmp = strncmp(pkgname, e->pkgname, a->noglob_len);
+
		if (cmp > 0)
			continue;
+
		else if (cmp < 0)
+
			break;

-
		res1 = match_version(pkgversion, &e->v1);
-
		res2 = match_version(pkgversion, &e->v2);
-
		if (res1 && res2) {
-
			res = true;
-
			if (quiet) {
-
				printf("%s-%s\n", pkgname, pkgversion);
-
			} else {
-
				printf("%s-%s is vulnerable:\n", pkgname, pkgversion);
-
				printf("%s\n", e->desc);
-
				printf("WWW: %s\n\n", e->url);
+
		for (i = 0; i < a->next_pfx_incr; i++) {
+
			e = a[i].e;
+
			if (fnmatch(e->pkgname, pkgname, 0) != 0)
+
				continue;
+

+
			res1 = match_version(pkgversion, &e->v1);
+
			res2 = match_version(pkgversion, &e->v2);
+
			if (res1 && res2) {
+
				res = true;
+
				if (quiet) {
+
					printf("%s-%s\n", pkgname, pkgversion);
+
				} else {
+
					printf("%s-%s is vulnerable:\n", pkgname, pkgversion);
+
					printf("%s\n", e->desc);
+
					printf("WWW: %s\n\n", e->url);
+
				}
			}
		}
	}
@@ -333,6 +510,7 @@ int
exec_audit(int argc, char **argv)
{
	struct audit_head h = SLIST_HEAD_INITIALIZER();
+
	struct audit_entry_sorted *cooked_audit_entries = NULL;
	struct pkgdb *db = NULL;
	struct pkgdb_it *it = NULL;
	struct pkg *pkg = NULL;
@@ -403,7 +581,8 @@ exec_audit(int argc, char **argv)
			ret = EX_DATAERR;
			goto cleanup;
		}
-
		is_vulnerable(&h, pkg);
+
		cooked_audit_entries = preprocess_db(&h);
+
		is_vulnerable(cooked_audit_entries, pkg);
		goto cleanup;
	}

@@ -441,9 +620,10 @@ exec_audit(int argc, char **argv)
		ret = EX_DATAERR;
		goto cleanup;
	}
+
	cooked_audit_entries = preprocess_db(&h);

	while ((ret = pkgdb_it_next(it, &pkg, PKG_LOAD_BASIC)) == EPKG_OK)
-
		if (is_vulnerable(&h, pkg))
+
		if (is_vulnerable(cooked_audit_entries, pkg))
			vuln++;

	if (ret == EPKG_END && vuln == 0)