Radish alpha
H
rad:z3QDZAW2FAfuLvihrhiyDC9fAD8G9
HardenedBSD Package Manager
Radicle
Git
Move the tokenizer code into libpkg
Baptiste Daroussin committed 11 years ago
commit 437faef71bbb45272fd4de7e24c5f3db684dd597
parent 2d876fa
4 files changed +105 -100
modified libpkg/libpkg.ver
@@ -188,6 +188,8 @@ global:
	pkg_user_name;
	pkg_user_uidstr;
	pkg_users;
+
	pkg_utils_count_spaces;
+
	pkg_utils_tokenize;
	pkg_vasprintf;
	pkg_vdprintf;
	pkg_version_change;
modified libpkg/pkg.h.in
@@ -1812,5 +1812,7 @@ bool pkg_audit_is_vulnerable(struct pkg_audit *audit, struct pkg *pkg,
		bool quiet, struct sbuf **result);
		
void pkg_audit_free (struct pkg_audit *audit);
+
char *pkg_utils_tokenize(char **);
+
int pkg_utils_count_spaces(const char *);

#endif
modified libpkg/utils.c
@@ -994,3 +994,102 @@ pkg_symlink_cksumat(int fd, const char *path, const char *root, char *cksum)

	return (pkg_symlink_cksum_readlink(linkbuf, linklen, root, cksum));
}
+

+
/* A bit like strsep(), except it accounts for "double" and 'single'
+
   quotes.  Unlike strsep(), returns the next arg string, trimmed of
+
   whitespace or enclosing quotes, and updates **args to point at the
+
   character after that.  Sets *args to NULL when it has been
+
   completely consumed.  Quoted strings run from the first encountered
+
   quotemark to the next one of the same type or the terminating NULL.
+
   Quoted strings can contain the /other/ type of quote mark, which
+
   loses any special significance.  There isn't an escape
+
   character. */
+

+
enum parse_states {
+
	START,
+
	ORDINARY_TEXT,
+
	OPEN_SINGLE_QUOTES,
+
	IN_SINGLE_QUOTES,
+
	OPEN_DOUBLE_QUOTES,
+
	IN_DOUBLE_QUOTES,
+
};
+

+
char *
+
pkg_utils_tokenize(char **args)
+
{
+
	char			*p, *p_start;
+
	enum parse_states	 parse_state = START;
+

+
	assert(*args != NULL);
+

+
	for (p = p_start = *args; *p != '\0'; p++) {
+
		switch (parse_state) {
+
		case START:
+
			if (!isspace(*p)) {
+
				if (*p == '"')
+
					parse_state = OPEN_DOUBLE_QUOTES;
+
				else if (*p == '\'')
+
					parse_state = OPEN_SINGLE_QUOTES;
+
				else {
+
					parse_state = ORDINARY_TEXT;
+
					p_start = p;
+
				}				
+
			} else 
+
				p_start = p;
+
			break;
+
		case ORDINARY_TEXT:
+
			if (isspace(*p))
+
				goto finish;
+
			break;
+
		case OPEN_SINGLE_QUOTES:
+
			p_start = p;
+
			if (*p == '\'')
+
				goto finish;
+

+
			parse_state = IN_SINGLE_QUOTES;
+
			break;
+
		case IN_SINGLE_QUOTES:
+
			if (*p == '\'')
+
				goto finish;
+
			break;
+
		case OPEN_DOUBLE_QUOTES:
+
			p_start = p;
+
			if (*p == '"')
+
				goto finish;
+
			parse_state = IN_DOUBLE_QUOTES;
+
			break;
+
		case IN_DOUBLE_QUOTES:
+
			if (*p == '"')
+
				goto finish;
+
			break;
+
		}
+
	}
+

+
finish:
+
	if (*p == '\0')
+
		*args = NULL;	/* All done */
+
	else {
+
		*p = '\0';
+
		p++;
+
		if (*p == '\0' || parse_state == START)
+
			*args = NULL; /* whitespace or nothing left */
+
		else
+
			*args = p;
+
	}
+
	return (p_start);
+
}
+

+
int
+
pkg_utils_count_spaces(const char *args)
+
{
+
	int		spaces;
+
	const char	*p;
+

+
	for (spaces = 0, p = args; *p != '\0'; p++) 
+
		if (isspace(*p))
+
			spaces++;
+

+
	return (spaces);
+
}
+

+

modified src/main.c
@@ -479,104 +479,6 @@ start_process_worker(char *const *save_argv)
	/* NOTREACHED */
}

-
/* A bit like strsep(), except it accounts for "double" and 'single'
-
   quotes.  Unlike strsep(), returns the next arg string, trimmed of
-
   whitespace or enclosing quotes, and updates **args to point at the
-
   character after that.  Sets *args to NULL when it has been
-
   completely consumed.  Quoted strings run from the first encountered
-
   quotemark to the next one of the same type or the terminating NULL.
-
   Quoted strings can contain the /other/ type of quote mark, which
-
   loses any special significance.  There isn't an escape
-
   character. */
-

-
enum parse_states {
-
	START,
-
	ORDINARY_TEXT,
-
	OPEN_SINGLE_QUOTES,
-
	IN_SINGLE_QUOTES,
-
	OPEN_DOUBLE_QUOTES,
-
	IN_DOUBLE_QUOTES,
-
};
-

-
static char *
-
tokenize(char **args)
-
{
-
	char			*p, *p_start;
-
	enum parse_states	 parse_state = START;
-

-
	assert(*args != NULL);
-

-
	for (p = p_start = *args; *p != '\0'; p++) {
-
		switch (parse_state) {
-
		case START:
-
			if (!isspace(*p)) {
-
				if (*p == '"')
-
					parse_state = OPEN_DOUBLE_QUOTES;
-
				else if (*p == '\'')
-
					parse_state = OPEN_SINGLE_QUOTES;
-
				else {
-
					parse_state = ORDINARY_TEXT;
-
					p_start = p;
-
				}				
-
			} else 
-
				p_start = p;
-
			break;
-
		case ORDINARY_TEXT:
-
			if (isspace(*p))
-
				goto finish;
-
			break;
-
		case OPEN_SINGLE_QUOTES:
-
			p_start = p;
-
			if (*p == '\'')
-
				goto finish;
-

-
			parse_state = IN_SINGLE_QUOTES;
-
			break;
-
		case IN_SINGLE_QUOTES:
-
			if (*p == '\'')
-
				goto finish;
-
			break;
-
		case OPEN_DOUBLE_QUOTES:
-
			p_start = p;
-
			if (*p == '"')
-
				goto finish;
-
			parse_state = IN_DOUBLE_QUOTES;
-
			break;
-
		case IN_DOUBLE_QUOTES:
-
			if (*p == '"')
-
				goto finish;
-
			break;
-
		}
-
	}
-

-
finish:
-
	if (*p == '\0')
-
		*args = NULL;	/* All done */
-
	else {
-
		*p = '\0';
-
		p++;
-
		if (*p == '\0' || parse_state == START)
-
			*args = NULL; /* whitespace or nothing left */
-
		else
-
			*args = p;
-
	}
-
	return (p_start);
-
}
-

-
static int
-
count_spaces(const char *args)
-
{
-
	int		spaces;
-
	const char	*p;
-

-
	for (spaces = 0, p = args; *p != '\0'; p++) 
-
		if (isspace(*p))
-
			spaces++;
-

-
	return (spaces);
-
}
-

-

static int
expand_aliases(int argc, char ***argv)
{
@@ -613,7 +515,7 @@ expand_aliases(int argc, char ***argv)
	 * consuming one of the orginal argv, so that balances
	 * out. */ 

-
	spaces = count_spaces(alias_value);
+
	spaces = pkg_utils_count_spaces(alias_value);
	arglen = strlen(alias_value) + 1;
	veclen = sizeof(char *) * (spaces + argc + 1);
	buf = malloc(veclen + arglen);
@@ -626,7 +528,7 @@ expand_aliases(int argc, char ***argv)

	newargc = 0;
	while(args != NULL) {
-
		newargv[newargc++] = tokenize(&args);
+
		newargv[newargc++] = pkg_utils_tokenize(&args);
	}
	for (i = 1; i < argc; i++) {
		newargv[newargc++] = oldargv[i];