xref: /illumos-gate/usr/src/cmd/logadm/glob.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * logadm/glob.c -- globbing routines
26  *
27  * these routines support two kinds of globs.  first, the
28  * usual kind of filename globbing, like:
29  *
30  * 	*.c
31  * 	/var/log/syslog.?
32  * 	log[0-9]*file
33  * 	/var/apache/logs/x*{access,error}_log
34  *
35  * this is basically the same syntax that csh supports for globs and
36  * is provided by the routine glob_glob() which takes a filename and
37  * returns a list of filenames that match the glob.
38  *
39  * the second type is something called a "reglob" which is a pathname
40  * where the components are regular expressions as described in regex(3c).
41  * some examples:
42  *
43  * 	.*\.c
44  * 	/var/log/syslog\..
45  * 	log[0-9].*file
46  * 	/var/log/syslog\.([0-9]+)$0
47  *
48  * the last example uses the ()$n form to assign a numeric extension
49  * on a filename to the "n" value kept by the fn routines with each
50  * filename (see fn_setn() in fn.c).  logadm uses this mechanism to
51  * correctly sort lognames when templates containing $n are used.
52  *
53  * the routine glob_reglob() is used to expand reglobs.  glob_glob()
54  * is implemented by expanding the curly braces, converting the globs
55  * to reglobs, and then passing the work to glob_reglob().
56  *
57  * finally, since expanding globs and reglobs requires doing a stat(2)
58  * on the files, we store the resulting stat information in the filename
59  * struct (see fn_setstat() in fn.c).
60  *
61  * the glob(3c) routines are not used here since they don't support
62  * braces, and don't support the more powerful reglobs required by logadm.
63  */
64 
65 #pragma ident	"%Z%%M%	%I%	%E% SMI"
66 
67 #include <stdio.h>
68 #include <libintl.h>
69 #include <stdlib.h>
70 #include <libgen.h>
71 #include <strings.h>
72 #include <sys/types.h>
73 #include <sys/param.h>
74 #include <sys/stat.h>
75 #include <dirent.h>
76 #include "err.h"
77 #include "fn.h"
78 #include "glob.h"
79 
80 /* forward declarations for functions used internally by this module */
81 static struct fn_list *glob_debrace(struct fn *fnp);
82 static struct fn_list *glob_reglob_list(struct fn_list *fnlp);
83 static boolean_t glob_magic(struct fn *fnp);
84 
85 /* expand curly braces (like file{one,two,three}name) */
86 static struct fn_list *
87 glob_debrace(struct fn *fnp)
88 {
89 	struct fn_list *ret = fn_list_new(NULL);
90 	struct fn_list *newret;
91 	char *sp = fn_s(fnp);
92 	char *left;
93 	char *right;
94 	char *comma;
95 
96 	/* start with an empty string in the list */
97 	fn_list_adds(ret, "");
98 
99 	/* while braces remain... */
100 	while (sp != NULL && (left = strchr(sp, '{')) != NULL)
101 		if ((right = strchr(left, '}')) == NULL) {
102 			err(EF_FILE|EF_JMP, "Missing }");
103 			fn_list_free(ret);
104 			return (NULL);
105 		} else {
106 			/* stuff before "left" is finished */
107 			fn_list_appendrange(ret, sp, left);
108 
109 			/* stuff after "right" still need processing */
110 			sp = right + 1;
111 
112 			if (left + 1 == right)
113 				continue;	/* just an empty {} */
114 
115 			/* stuff between "left" and "right" is comma-sep list */
116 			left++;
117 			newret = fn_list_new(NULL);
118 			while ((comma = strchr(left, ',')) != NULL) {
119 				struct fn_list *dup = fn_list_dup(ret);
120 
121 				/* stuff from left to comma is one variant */
122 				fn_list_appendrange(dup, left, comma);
123 				fn_list_addfn_list(newret, dup);
124 				left = comma + 1;
125 			}
126 			/* what's left is the last item in the list */
127 			fn_list_appendrange(ret, left, right);
128 			fn_list_addfn_list(newret, ret);
129 			ret = newret;
130 		}
131 
132 	/* anything remaining in "s" is finished */
133 	fn_list_appendrange(ret, sp, &sp[strlen(sp)]);
134 	return (ret);
135 }
136 
137 /* return true if filename contains any "magic" characters (*,?,[) */
138 static boolean_t
139 glob_magic(struct fn *fnp)
140 {
141 	char *s = fn_s(fnp);
142 
143 	for (; s != NULL && *s; s++)
144 		if (*s == '*' ||
145 		    *s == '?' ||
146 		    *s == '[')
147 			return (B_TRUE);
148 
149 	return (B_FALSE);
150 }
151 
152 /*
153  * glob_glob -- given a filename glob, return the list of matching filenames
154  *
155  * fn_setn() and fn_setstat() are called to set the "n" and stat information
156  * for the resulting filenames.
157  */
158 struct fn_list *
159 glob_glob(struct fn *fnp)
160 {
161 	struct fn_list *tmplist = glob_debrace(fnp);
162 	struct fn_list *ret;
163 	struct fn *nextfnp;
164 	struct fn *newfnp;
165 	int magic = 0;
166 
167 	/* debracing produced NULL list? */
168 	if (tmplist == NULL)
169 		return (NULL);
170 
171 	/* see if anything in list contains magic characters */
172 	fn_list_rewind(tmplist);
173 	while ((nextfnp = fn_list_next(tmplist)) != NULL)
174 		if (glob_magic(nextfnp)) {
175 			magic = 1;
176 			break;
177 		}
178 
179 	if (!magic)
180 		return (tmplist);	/* no globs to expand */
181 
182 	/* foreach name in the list, call glob_glob() to expand it */
183 	fn_list_rewind(tmplist);
184 	ret = fn_list_new(NULL);
185 	while ((nextfnp = fn_list_next(tmplist)) != NULL) {
186 		newfnp = glob_to_reglob(nextfnp);
187 		fn_list_addfn(ret, newfnp);
188 	}
189 	fn_list_free(tmplist);
190 	tmplist = ret;
191 	ret = glob_reglob_list(tmplist);
192 	fn_list_free(tmplist);
193 
194 	return (ret);
195 }
196 
197 /*
198  * glob_glob_list -- given a list of filename globs, return all matches
199  */
200 struct fn_list *
201 glob_glob_list(struct fn_list *fnlp)
202 {
203 	struct fn_list *ret = fn_list_new(NULL);
204 	struct fn *fnp;
205 
206 	fn_list_rewind(fnlp);
207 	while ((fnp = fn_list_next(fnlp)) != NULL)
208 		fn_list_addfn_list(ret, glob_glob(fnp));
209 	return (ret);
210 }
211 
212 /*
213  * glob_reglob -- given a filename reglob, return a list of matching filenames
214  *
215  * this routine does all the hard work in this module.
216  */
217 struct fn_list *
218 glob_reglob(struct fn *fnp)
219 {
220 	struct fn_list *ret = fn_list_new(NULL);
221 	struct fn_list *newret;
222 	struct fn *nextfnp;
223 	char *mys = STRDUP(fn_s(fnp));
224 	char *sp = mys;
225 	char *slash;
226 	int skipdotfiles;
227 	char *re;
228 	char ret0[MAXPATHLEN];
229 
230 
231 	/* start with the initial directory in the list */
232 	if (*sp == '/') {
233 		fn_list_adds(ret, "/");
234 		while (*sp == '/')
235 			sp++;
236 	} else
237 		fn_list_adds(ret, "./");
238 
239 	/* while components remain... */
240 	do {
241 		if ((slash = strchr(sp, '/')) != NULL) {
242 			*slash++ = '\0';
243 			/* skip superfluous slashes */
244 			while (*slash == '/')
245 				slash++;
246 		}
247 
248 		/* dot files are skipped unless a dot was specifically given */
249 		if (sp[0] == '\\' && sp[1] == '.')
250 			skipdotfiles = 0;
251 		else
252 			skipdotfiles = 1;
253 
254 		/* compile the regex */
255 		if ((re = regcmp("^", sp, "$", (char *)0)) == NULL)
256 			err(EF_FILE|EF_JMP, "regcmp failed on <%s>", sp);
257 
258 		/* apply regex to every filename we've matched so far */
259 		newret = fn_list_new(NULL);
260 		fn_list_rewind(ret);
261 		while ((nextfnp = fn_list_next(ret)) != NULL) {
262 			DIR *dirp;
263 			struct dirent *dp;
264 
265 			/* go through directory looking for matches */
266 			if ((dirp = opendir(fn_s(nextfnp))) == NULL)
267 				continue;
268 
269 			while ((dp = readdir(dirp)) != NULL) {
270 				if (skipdotfiles && dp->d_name[0] == '.')
271 					continue;
272 				*ret0 = '\0';
273 				if (regex(re, dp->d_name, ret0)) {
274 					struct fn *matchfnp = fn_dup(nextfnp);
275 					struct stat stbuf;
276 					int n;
277 
278 					fn_puts(matchfnp, dp->d_name);
279 
280 					if (stat(fn_s(matchfnp), &stbuf) < 0) {
281 						fn_free(matchfnp);
282 						continue;
283 					}
284 
285 					/* skip non-dirs if more components */
286 					if (slash &&
287 					    (stbuf.st_mode & S_IFMT) !=
288 					    S_IFDIR) {
289 						fn_free(matchfnp);
290 						continue;
291 					}
292 
293 					/*
294 					 * component matched, fill in "n"
295 					 * value, stat information, and
296 					 * append component to directory
297 					 * name just searched.
298 					 */
299 
300 					if (*ret0)
301 						n = atoi(ret0);
302 					else
303 						n = -1;
304 					fn_setn(matchfnp, n);
305 					fn_setstat(matchfnp, &stbuf);
306 
307 					if (slash)
308 						fn_putc(matchfnp, '/');
309 
310 					fn_list_addfn(newret, matchfnp);
311 				}
312 			}
313 			(void) closedir(dirp);
314 		}
315 		fn_list_free(ret);
316 		ret = newret;
317 		sp = slash;
318 	} while (slash);
319 
320 	FREE(mys);
321 
322 	return (ret);
323 }
324 
325 /* reglob a list of filenames */
326 static struct fn_list *
327 glob_reglob_list(struct fn_list *fnlp)
328 {
329 	struct fn_list *ret = fn_list_new(NULL);
330 	struct fn *fnp;
331 
332 	fn_list_rewind(fnlp);
333 	while ((fnp = fn_list_next(fnlp)) != NULL)
334 		fn_list_addfn_list(ret, glob_reglob(fnp));
335 	return (ret);
336 }
337 
338 /*
339  * glob_to_reglob -- convert a glob (*, ?, etc) to a reglob (.*, ., etc.)
340  */
341 struct fn *
342 glob_to_reglob(struct fn *fnp)
343 {
344 	int c;
345 	struct fn *ret = fn_new(NULL);
346 
347 	fn_rewind(fnp);
348 	while ((c = fn_getc(fnp)) != '\0')
349 		switch (c) {
350 		case '.':
351 		case '(':
352 		case ')':
353 		case '^':
354 		case '+':
355 		case '{':
356 		case '}':
357 		case '$':
358 			/* magic characters need backslash */
359 			fn_putc(ret, '\\');
360 			fn_putc(ret, c);
361 			break;
362 		case '?':
363 			/* change '?' to a single dot */
364 			fn_putc(ret, '.');
365 			break;
366 		case '*':
367 			/* change '*' to ".*" */
368 			fn_putc(ret, '.');
369 			fn_putc(ret, '*');
370 			break;
371 		default:
372 			fn_putc(ret, c);
373 		}
374 
375 	return (ret);
376 }
377 
378 #ifdef	TESTMODULE
379 
380 /*
381  * test main for glob module, usage: a.out [-r] [pattern...]
382  *	-r means the patterns are reglobs instead of globs
383  */
384 int
385 main(int argc, char *argv[])
386 {
387 	int i;
388 	int reglobs = 0;
389 	struct fn *argfnp = fn_new(NULL);
390 	struct fn *fnp;
391 	struct fn_list *fnlp;
392 
393 	err_init(argv[0]);
394 	setbuf(stdout, NULL);
395 
396 	for (i = 1; i < argc; i++) {
397 		if (strcmp(argv[i], "-r") == 0) {
398 			reglobs = 1;
399 			continue;
400 		}
401 
402 		if (SETJMP) {
403 			printf("    skipped due to errors\n");
404 			continue;
405 		} else {
406 			printf("<%s>:\n", argv[i]);
407 			fn_renew(argfnp, argv[i]);
408 			if (reglobs)
409 				fnlp = glob_reglob(argfnp);
410 			else
411 				fnlp = glob_glob(argfnp);
412 		}
413 
414 		fn_list_rewind(fnlp);
415 		while ((fnp = fn_list_next(fnlp)) != NULL)
416 			printf("    <%s>\n", fn_s(fnp));
417 
418 		printf("total size: %lld\n", fn_list_totalsize(fnlp));
419 
420 		while ((fnp = fn_list_popoldest(fnlp)) != NULL) {
421 			printf("    oldest <%s>\n", fn_s(fnp));
422 			fn_free(fnp);
423 		}
424 
425 		fn_list_free(fnlp);
426 	}
427 	fn_free(argfnp);
428 
429 	err_done(0);
430 	/* NOTREACHED */
431 	return (0);
432 }
433 
434 #endif	/* TESTMODULE */
435