eed892d Speed up vulnerability audit process — HardenedBSD-pkg

HardenedBSD Package Manager

Speed up vulnerability audit process

Eygene Ryabinkin committed 13 years ago

commit eed892dd5ac171d05b72daad47c54859e00c8edc
parent 8247aec8cbd99e7332446d3d2f2cc89e36bb65c3

1 file changed +195 -15

modified pkg/audit.c

@@ -65,6 +65,52 @@ struct audit_entry {

 SLIST_HEAD(audit_head, audit_entry);
 /*
  * The _sorted stuff.
  *
  * We are using the optimized search based on the following observations:
  *
  * - number of VuXML entries is more likely to be far greater than
  *   the number of installed ports; thus we should try to optimize
  *   the walk through all entries for a given port;
  *
  * - fnmatch() is good and fast, but if we will compare the audit entry
  *   name prefix without globbing characters to the prefix of port name
  *   of the same length and they are different, there is no point to
  *   check the rest;
  *
  * - (most important bit): if parsed VuXML entries are lexicographically
  *   sorted per the largest prefix with no globbing characters and we
  *   know how many succeeding entries have the same prefix we can
  *
  *   a. skip the rest of the entries once the non-globbing prefix is
  *      lexicographically larger than the port name prefix of the
  *      same length: all successive prefixes will be larger as well;
  *
  *   b. if we have non-globbing prefix that is lexicographically smaller
  *      than port name prefix, we can skip all succeeding entries with
  *      the same prefix; and as some port names tend to repeat due to
  *      multiple vulnerabilities, it could be a large win.
  */
 struct audit_entry_sorted {
 	struct audit_entry *e;	/* Entry itself */
 	size_t noglob_len;	/* Prefix without glob characters */
 	size_t next_pfx_incr;	/* Index increment for the entry with
 				   different prefix */
 };
 /*
  * Another small optimization to skip the beginning of the
  * VuXML entry array, if possible.
  *
  * audit_entry_first_byte_idx[ch] represents the index
  * of the first VuXML entry in the sorted array that has
  * its non-globbing prefix that is started with the character
  * 'ch'.  It allows to skip entries from the beginning of the
  * VuXML array that aren't relevant for the checked port name.
  */
 static size_t audit_entry_first_byte_idx[256];
 void
 usage_audit(void)
 {

@@ -251,6 +297,119 @@ parse_db(const char *path, struct audit_head *h)

 	return EPKG_OK;
 }
 /*
  * Returns the length of the largest prefix without globbing
  * characters, as per fnmatch().
  */
 static size_t
 str_noglob_len(const char *s)
 {
 	size_t n;
 	for (n = 0; s[n] && s[n] != '*' && s[n] != '?' &&
 	    s[n] != '[' && s[n] != '{' && s[n] != '\\'; n++);
 	return n;
 }
 /*
  * Helper for quicksort that lexicographically orders prefixes.
  */
 static int
 audit_entry_compare(const void *a, const void *b)
 {
 	struct audit_entry_sorted *e1, *e2;
 	size_t min_len;
 	int result;
 	e1 = (struct audit_entry_sorted *)a;
 	e2 = (struct audit_entry_sorted *)b;
 	min_len = (e1->noglob_len < e2->noglob_len ?
 	    e1->noglob_len : e2->noglob_len);
 	result = strncmp(e1->e->pkgname, e2->e->pkgname, min_len);
 	/*
 	 * Additional check to see if some word is a prefix of an
 	 * another one and, thus, should go before the former.
 	 */
 	if (result == 0) {
 		if (e1->noglob_len < e2->noglob_len)
 			result = -1;
 		else if (e1->noglob_len > e2->noglob_len)
 			result = 1;
 	}
 	return (result);
 }
 /*
  * Sorts VuXML entries and calculates increments to jump to the
  * next distinct prefix.
  */
 static struct audit_entry_sorted *
 preprocess_db(struct audit_head *h)
 {
 	struct audit_entry *e;
 	struct audit_entry_sorted *ret;
 	size_t i, n, tofill;
 	n = 0;
 	SLIST_FOREACH(e, h, next)
 		n++;
 	ret = (struct audit_entry_sorted *)calloc(n + 1, sizeof(ret[0]));
 	if (ret == NULL)
 		err(1, "calloc(audit_entry_sorted*)");
 	bzero((void *)ret, (n + 1) * sizeof(ret[0]));
 	n = 0;
 	SLIST_FOREACH(e, h, next) {
 		ret[n].e = e;
 		ret[n].noglob_len = str_noglob_len(e->pkgname);
 		ret[n].next_pfx_incr = 1;
 		n++;
 	}
 	qsort(ret, n, sizeof(*ret), audit_entry_compare);
 	/*
 	 * Determining jump indexes to the next different prefix.
 	 * Only non-1 increments are calculated there.
 	 *
 	 * Due to the current usage that picks only increment for the
 	 * first of the non-unique prefixes in a row, we could
 	 * calculate only that one and skip calculations for the
 	 * succeeding, but for the uniformity and clarity we're
 	 * calculating 'em all.
 	 */
 	for (n = 1, tofill = 0; ret[n].e; n++) {
 		if (ret[n - 1].noglob_len != ret[n].noglob_len) {
 			struct audit_entry_sorted *base;
 			base = ret + n - tofill;
 			for (i = 0; tofill > 1; i++, tofill--)
 				base[i].next_pfx_incr = tofill;
 			tofill = 1;
 		} else if (strcmp(ret[n - 1].e->pkgname,
 		    ret[n].e->pkgname) == 0) {
 			tofill++;
 		} else {
 			tofill = 1;
 		}
 	}
 	/* Calculate jump indexes for the first byte of the package name */
 	bzero(audit_entry_first_byte_idx, sizeof(audit_entry_first_byte_idx));
 	for (n = 1, i = 0; n < 256; n++) {
 		while (ret[i].e != NULL &&
 		    (size_t)(ret[i].e->pkgname[0]) < n)
 			i++;
 		audit_entry_first_byte_idx[n] = i;
 	}
 	return (ret);
 }
 static bool
 match_version(const char *pkgversion, struct version_entry *v)
 {

@@ -281,7 +440,7 @@ match_version(const char *pkgversion, struct version_entry *v)

 }
 static bool
 is_vulnerable(struct audit_head *h, struct pkg *pkg)
 is_vulnerable(struct audit_entry_sorted *a, struct pkg *pkg)
 {
 	struct audit_entry *e;
 	const char *pkgname;

@@ -293,20 +452,38 @@ is_vulnerable(struct audit_head *h, struct pkg *pkg)

 		PKG_VERSION, &pkgversion
 	);
 	SLIST_FOREACH(e, h, next) {
 		if (fnmatch(e->pkgname, pkgname, 0) != 0)
 	a += audit_entry_first_byte_idx[(size_t)pkgname[0]];
 	for (; (e = a->e) != NULL; a += a->next_pfx_incr) {
 		int cmp;
 		size_t i;
 		/*
 		 * Audit entries are sorted, so if we had found one
 		 * that is lexicographically greater than our name,
 		 * it and the rest won't match our name.
 		 */
 		cmp = strncmp(pkgname, e->pkgname, a->noglob_len);
 		if (cmp > 0)
 			continue;
 		else if (cmp < 0)
 			break;
 		res1 = match_version(pkgversion, &e->v1);
 		res2 = match_version(pkgversion, &e->v2);
 		if (res1 && res2) {
 			res = true;
 			if (quiet) {
 				printf("%s-%s\n", pkgname, pkgversion);
 			} else {
 				printf("%s-%s is vulnerable:\n", pkgname, pkgversion);
 				printf("%s\n", e->desc);
 				printf("WWW: %s\n\n", e->url);
 		for (i = 0; i < a->next_pfx_incr; i++) {
 			e = a[i].e;
 			if (fnmatch(e->pkgname, pkgname, 0) != 0)
 				continue;
 			res1 = match_version(pkgversion, &e->v1);
 			res2 = match_version(pkgversion, &e->v2);
 			if (res1 && res2) {
 				res = true;
 				if (quiet) {
 					printf("%s-%s\n", pkgname, pkgversion);
 				} else {
 					printf("%s-%s is vulnerable:\n", pkgname, pkgversion);
 					printf("%s\n", e->desc);
 					printf("WWW: %s\n\n", e->url);
 				}
 			}
 		}
 	}

@@ -333,6 +510,7 @@ int

 exec_audit(int argc, char **argv)
 {
 	struct audit_head h = SLIST_HEAD_INITIALIZER();
 	struct audit_entry_sorted *cooked_audit_entries = NULL;
 	struct pkgdb *db = NULL;
 	struct pkgdb_it *it = NULL;
 	struct pkg *pkg = NULL;

@@ -403,7 +581,8 @@ exec_audit(int argc, char **argv)

 			ret = EX_DATAERR;
 			goto cleanup;
 		}
 		is_vulnerable(&h, pkg);
 		cooked_audit_entries = preprocess_db(&h);
 		is_vulnerable(cooked_audit_entries, pkg);
 		goto cleanup;
 	}

@@ -441,9 +620,10 @@ exec_audit(int argc, char **argv)

 		ret = EX_DATAERR;
 		goto cleanup;
 	}
 	cooked_audit_entries = preprocess_db(&h);
 	while ((ret = pkgdb_it_next(it, &pkg, PKG_LOAD_BASIC)) == EPKG_OK)
 		if (is_vulnerable(&h, pkg))
 		if (is_vulnerable(cooked_audit_entries, pkg))
 			vuln++;
 	if (ret == EPKG_END && vuln == 0)