xref: /illumos-gate/usr/src/cmd/csplit/csplit.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 /*
34  * csplit - Context or line file splitter
35  * Compile: cc -O -s -o csplit csplit.c
36  */
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <string.h>
42 #include <ctype.h>
43 #include <errno.h>
44 #include <limits.h>
45 #include <regexpr.h>
46 #include <signal.h>
47 #include <locale.h>
48 #include <libintl.h>
49 
50 #define	LAST	0LL
51 #define	ERR	-1
52 #define	FALSE	0
53 #define	TRUE	1
54 #define	EXPMODE	2
55 #define	LINMODE	3
56 #define	LINSIZ	LINE_MAX	/* POSIX.2 - read lines LINE_MAX long */
57 
58 	/* Globals */
59 
60 char linbuf[LINSIZ];		/* Input line buffer */
61 char *expbuf;
62 char tmpbuf[BUFSIZ];		/* Temporary buffer for stdin */
63 char file[8192] = "xx";		/* File name buffer */
64 char *targ;			/* Arg ptr for error messages */
65 char *sptr;
66 FILE *infile, *outfile;		/* I/O file streams */
67 int silent, keep, create;	/* Flags: -s(ilent), -k(eep), (create) */
68 int errflg;
69 int fiwidth = 2;		/* file index width (output file names) */
70 extern int optind;
71 extern char *optarg;
72 offset_t offset;		/* Regular expression offset value */
73 offset_t curline;		/* Current line in input file */
74 
75 /*
76  * These defines are needed for regexp handling(see regexp(7))
77  */
78 #define	PERROR(x)	fatal("%s: Illegal Regular Expression\n", targ);
79 
80 static int asc_to_ll(char *, long long *);
81 static void closefile(void);
82 static void fatal(char *, char *);
83 static offset_t findline(char *, offset_t);
84 static void flush(void);
85 static FILE *getfile(void);
86 static char *getline(int);
87 static void line_arg(char *);
88 static void num_arg(char *, int);
89 static void re_arg(char *);
90 static void sig(int);
91 static void to_line(offset_t);
92 static void usage(void);
93 
94 int
95 main(int argc, char **argv)
96 {
97 	int ch, mode;
98 	char *ptr;
99 
100 	(void) setlocale(LC_ALL, "");
101 #if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
102 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
103 #endif
104 	(void) textdomain(TEXT_DOMAIN);
105 
106 	while ((ch = getopt(argc, argv, "skf:n:")) != EOF) {
107 		switch (ch) {
108 			case 'f':
109 				(void) strcpy(file, optarg);
110 				if ((ptr = strrchr(optarg, '/')) == NULL)
111 					ptr = optarg;
112 				else
113 					ptr++;
114 
115 				break;
116 			case 'n':		/* POSIX.2 */
117 				for (ptr = optarg; *ptr != NULL; ptr++)
118 					if (!isdigit((int)*ptr))
119 						fatal("-n num\n", NULL);
120 				fiwidth = atoi(optarg);
121 				break;
122 			case 'k':
123 				keep++;
124 				break;
125 			case 's':
126 				silent++;
127 				break;
128 			case '?':
129 				errflg++;
130 		}
131 	}
132 
133 	argv = &argv[optind];
134 	argc -= optind;
135 	if (argc <= 1 || errflg)
136 		usage();
137 
138 	if (strcmp(*argv, "-") == 0) {
139 		infile = tmpfile();
140 
141 		while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) {
142 			if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0)
143 				if (errno == ENOSPC) {
144 					(void) fprintf(stderr, "csplit: ");
145 					(void) fprintf(stderr, gettext(
146 						"No space left on device\n"));
147 					exit(1);
148 				} else {
149 					(void) fprintf(stderr, "csplit: ");
150 					(void) fprintf(stderr, gettext(
151 						"Bad write to temporary "
152 							"file\n"));
153 					exit(1);
154 				}
155 
156 	/* clear the buffer to get correct size when writing buffer */
157 
158 			(void) memset(tmpbuf, '\0', sizeof (tmpbuf));
159 		}
160 		rewind(infile);
161 	} else if ((infile = fopen(*argv, "r")) == NULL)
162 		fatal("Cannot open %s\n", *argv);
163 	++argv;
164 	curline = (offset_t)1;
165 	(void) signal(SIGINT, sig);
166 
167 	/*
168 	 * The following for loop handles the different argument types.
169 	 * A switch is performed on the first character of the argument
170 	 * and each case calls the appropriate argument handling routine.
171 	 */
172 
173 	for (; *argv; ++argv) {
174 		targ = *argv;
175 		switch (**argv) {
176 		case '/':
177 			mode = EXPMODE;
178 			create = TRUE;
179 			re_arg(*argv);
180 			break;
181 		case '%':
182 			mode = EXPMODE;
183 			create = FALSE;
184 			re_arg(*argv);
185 			break;
186 		case '{':
187 			num_arg(*argv, mode);
188 			mode = FALSE;
189 			break;
190 		default:
191 			mode = LINMODE;
192 			create = TRUE;
193 			line_arg(*argv);
194 			break;
195 		}
196 	}
197 	create = TRUE;
198 	to_line(LAST);
199 	return (0);
200 }
201 
202 /*
203  * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc)
204  * It returns ERR if an illegal character.  The reason that asc_to_ll
205  * does not return an answer(long long) is that any value for the long
206  * long is legal, and this version of asc_to_ll detects error strings.
207  */
208 
209 static int
210 asc_to_ll(char *str, long long *plc)
211 {
212 	int f;
213 	*plc = 0;
214 	f = 0;
215 	for (; ; str++) {
216 		switch (*str) {
217 		case ' ':
218 		case '\t':
219 			continue;
220 		case '-':
221 			f++;
222 			/* FALLTHROUGH */
223 		case '+':
224 			str++;
225 		}
226 		break;
227 	}
228 	for (; *str != NULL; str++)
229 		if (*str >= '0' && *str <= '9')
230 			*plc = *plc * 10 + *str - '0';
231 		else
232 			return (ERR);
233 	if (f)
234 		*plc = -(*plc);
235 	return (TRUE);	/* not error */
236 }
237 
238 /*
239  * Closefile prints the byte count of the file created,(via fseeko
240  * and ftello), if the create flag is on and the silent flag is not on.
241  * If the create flag is on closefile then closes the file(fclose).
242  */
243 
244 static void
245 closefile()
246 {
247 	if (!silent && create) {
248 		(void) fseeko(outfile, (offset_t)0, SEEK_END);
249 		(void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile));
250 	}
251 	if (create)
252 		(void) fclose(outfile);
253 }
254 
255 /*
256  * Fatal handles error messages and cleanup.
257  * Because "arg" can be the global file, and the cleanup processing
258  * uses the global file, the error message is printed first.  If the
259  * "keep" flag is not set, fatal unlinks all created files.  If the
260  * "keep" flag is set, fatal closes the current file(if there is one).
261  * Fatal exits with a value of 1.
262  */
263 
264 static void
265 fatal(char *string, char *arg)
266 {
267 	char *fls;
268 	int num;
269 
270 	(void) fprintf(stderr, "csplit: ");
271 
272 	/* gettext dynamically replaces string */
273 
274 	(void) fprintf(stderr, gettext(string), arg);
275 	if (!keep) {
276 		if (outfile) {
277 			(void) fclose(outfile);
278 			for (fls = file; *fls != '\0'; fls++)
279 				continue;
280 			fls -= fiwidth;
281 			for (num = atoi(fls); num >= 0; num--) {
282 				(void) sprintf(fls, "%.*d", fiwidth, num);
283 				(void) unlink(file);
284 			}
285 		}
286 	} else
287 		if (outfile)
288 			closefile();
289 	exit(1);
290 }
291 
292 /*
293  * Findline returns the line number referenced by the current argument.
294  * Its arguments are a pointer to the compiled regular expression(expr),
295  * and an offset(oset).  The variable lncnt is used to count the number
296  * of lines searched.  First the current stream location is saved via
297  * ftello(), and getline is called so that R.E. searching starts at the
298  * line after the previously referenced line.  The while loop checks
299  * that there are more lines(error if none), bumps the line count, and
300  * checks for the R.E. on each line.  If the R.E. matches on one of the
301  * lines the old stream location is restored, and the line number
302  * referenced by the R.E. and the offset is returned.
303  */
304 
305 static offset_t
306 findline(char *expr, offset_t oset)
307 {
308 	static int benhere = 0;
309 	offset_t lncnt = 0, saveloc;
310 
311 	saveloc = ftello(infile);
312 	if (curline != (offset_t)1 || benhere)	/* If first line, first time, */
313 		(void) getline(FALSE);		/* then don't skip */
314 	else
315 		lncnt--;
316 	benhere = 1;
317 	while (getline(FALSE) != NULL) {
318 		lncnt++;
319 		if ((sptr = strrchr(linbuf, '\n')) != NULL)
320 			*sptr = '\0';
321 		if (step(linbuf, expr)) {
322 			(void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
323 			return (curline+lncnt+oset);
324 		}
325 	}
326 	(void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
327 	return (curline+lncnt+oset+2);
328 }
329 
330 /*
331  * Flush uses fputs to put lines on the output file stream(outfile)
332  * Since fputs does its own buffering, flush doesn't need to.
333  * Flush does nothing if the create flag is not set.
334  */
335 
336 static void
337 flush()
338 {
339 	if (create)
340 		(void) fputs(linbuf, outfile);
341 }
342 
343 /*
344  * Getfile does nothing if the create flag is not set.  If the create
345  * flag is set, getfile positions the file pointer(fptr) at the end of
346  * the file name prefix on the first call(fptr=0).  The file counter is
347  * stored in the file name and incremented.  If the subsequent fopen
348  * fails, the file name is copied to tfile for the error message, the
349  * previous file name is restored for cleanup, and fatal is called.  If
350  * the fopen succeeds, the stream(opfil) is returned.
351  */
352 
353 FILE *
354 getfile()
355 {
356 	static char *fptr;
357 	static int ctr;
358 	FILE *opfil;
359 	char tfile[15];
360 	char *delim;
361 	char savedelim;
362 
363 	if (create) {
364 		if (fptr == 0)
365 			for (fptr = file; *fptr != NULL; fptr++);
366 		(void) sprintf(fptr, "%.*d", fiwidth, ctr++);
367 
368 		/* check for suffix length overflow */
369 		if (strlen(fptr) > fiwidth) {
370 			fatal("Suffix longer than %ld chars; increase -n\n",
371 			    (char *)fiwidth);
372 		}
373 
374 		/* check for filename length overflow */
375 
376 		delim = strrchr(file, '/');
377 		if (delim == (char *)NULL) {
378 			if (strlen(file) > pathconf(".", _PC_NAME_MAX)) {
379 				fatal("Name too long: %s\n", file);
380 			}
381 		} else {
382 			/* truncate file at pathname delim to do pathconf */
383 			savedelim = *delim;
384 			*delim = '\0';
385 			/*
386 			 * file: pppppppp\0fffff\0
387 			 * ..... ^ file
388 			 * ............. ^ delim
389 			 */
390 			if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) {
391 				fatal("Name too long: %s\n", delim + 1);
392 			}
393 			*delim = savedelim;
394 		}
395 
396 		if ((opfil = fopen(file, "w")) == NULL) {
397 			(void) strcpy(tfile, file);
398 			(void) sprintf(fptr, "%.*d", fiwidth, (ctr-2));
399 			fatal("Cannot create %s\n", tfile);
400 		}
401 		return (opfil);
402 	}
403 	return (NULL);
404 }
405 
406 /*
407  * Getline gets a line via fgets from the input stream "infile".
408  * The line is put into linbuf and may not be larger than LINSIZ.
409  * If getline is called with a non-zero value, the current line
410  * is bumped, otherwise it is not(for R.E. searching).
411  */
412 
413 static char *
414 getline(int bumpcur)
415 {
416 	char *ret;
417 	if (bumpcur)
418 		curline++;
419 	ret = fgets(linbuf, LINSIZ, infile);
420 	return (ret);
421 }
422 
423 /*
424  * Line_arg handles line number arguments.
425  * line_arg takes as its argument a pointer to a character string
426  * (assumed to be a line number).  If that character string can be
427  * converted to a number(long long), to_line is called with that number,
428  * otherwise error.
429  */
430 
431 static void
432 line_arg(char *line)
433 {
434 	long long to;
435 
436 	if (asc_to_ll(line, &to) == ERR)
437 		fatal("%s: bad line number\n", line);
438 	to_line(to);
439 }
440 
441 /*
442  * Num_arg handles repeat arguments.
443  * Num_arg copies the numeric argument to "rep" (error if number is
444  * larger than 20 characters or } is left off).  Num_arg then converts
445  * the number and checks for validity.  Next num_arg checks the mode
446  * of the previous argument, and applys the argument the correct number
447  * of times. If the mode is not set properly its an error.
448  */
449 
450 static void
451 num_arg(char *arg, int md)
452 {
453 	offset_t repeat, toline;
454 	char rep[21];
455 	char *ptr;
456 	int		len;
457 
458 	ptr = rep;
459 	for (++arg; *arg != '}'; arg += len) {
460 		if (*arg == NULL)
461 			fatal("%s: missing '}'\n", targ);
462 		if ((len = mblen(arg, MB_LEN_MAX)) <= 0)
463 			len = 1;
464 		if ((ptr + len) >= &rep[20])
465 			fatal("%s: Repeat count too large\n", targ);
466 		(void) memcpy(ptr, arg, len);
467 		ptr += len;
468 	}
469 	*ptr = NULL;
470 	if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L)
471 		fatal("Illegal repeat count: %s\n", targ);
472 	if (md == LINMODE) {
473 		toline = offset = curline;
474 		for (; repeat > 0LL; repeat--) {
475 			toline += offset;
476 			to_line(toline);
477 		}
478 	} else	if (md == EXPMODE)
479 			for (; repeat > 0LL; repeat--)
480 				to_line(findline(expbuf, offset));
481 		else
482 			fatal("No operation for %s\n", targ);
483 }
484 
485 /*
486  * Re_arg handles regular expression arguments.
487  * Re_arg takes a csplit regular expression argument.  It checks for
488  * delimiter balance, computes any offset, and compiles the regular
489  * expression.  Findline is called with the compiled expression and
490  * offset, and returns the corresponding line number, which is used
491  * as input to the to_line function.
492  */
493 
494 static void
495 re_arg(char *string)
496 {
497 	char *ptr;
498 	char ch;
499 	int		len;
500 
501 	ch = *string;
502 	ptr = string;
503 	ptr++;
504 	while (*ptr != ch) {
505 		if (*ptr == '\\')
506 			++ptr;
507 
508 		if (*ptr == NULL)
509 			fatal("%s: missing delimiter\n", targ);
510 
511 		if ((len = mblen(ptr, MB_LEN_MAX)) <= 0)
512 			len = 1;
513 		ptr += len;
514 	}
515 
516 	/*
517 	 * The line below was added because compile no longer supports
518 	 * the fourth argument being passed.  The fourth argument used
519 	 * to be '/' or '%'.
520 	 */
521 
522 	*ptr = NULL;
523 	if (asc_to_ll(++ptr, &offset) == ERR)
524 		fatal("%s: illegal offset\n", string);
525 
526 	/*
527 	 * The line below was added because INIT which did this for us
528 	 * was removed from compile in regexp.h
529 	 */
530 
531 	string++;
532 	expbuf = compile(string, (char *)0, (char *)0);
533 	if (regerrno)
534 		PERROR(regerrno);
535 	to_line(findline(expbuf, offset));
536 }
537 
538 /*
539  * Sig handles breaks.  When a break occurs the signal is reset,
540  * and fatal is called to clean up and print the argument which
541  * was being processed at the time the interrupt occured.
542  */
543 
544 /* ARGSUSED */
545 static void
546 sig(int s)
547 {
548 	(void) signal(SIGINT, sig);
549 	fatal("Interrupt - program aborted at arg '%s'\n", targ);
550 }
551 
552 /*
553  * To_line creates split files.
554  * To_line gets as its argument the line which the current argument
555  * referenced.  To_line calls getfile for a new output stream, which
556  * does nothing if create is False.  If to_line's argument is not LAST
557  * it checks that the current line is not greater than its argument.
558  * While the current line is less than the desired line to_line gets
559  * lines and flushes(error if EOF is reached).
560  * If to_line's argument is LAST, it checks for more lines, and gets
561  * and flushes lines till the end of file.
562  * Finally, to_line calls closefile to close the output stream.
563  */
564 
565 static void
566 to_line(offset_t ln)
567 {
568 	outfile = getfile();
569 	if (ln != LAST) {
570 		if (curline > ln)
571 			fatal("%s - out of range\n", targ);
572 		while (curline < ln) {
573 			if (getline(TRUE) == NULL)
574 				fatal("%s - out of range\n", targ);
575 			flush();
576 		}
577 	} else		/* last file */
578 		if (getline(TRUE) != NULL) {
579 			flush();
580 			for (;;) {
581 				if (getline(TRUE) == NULL)
582 					break;
583 				flush();
584 			}
585 		} else
586 			fatal("%s - out of range\n", targ);
587 	closefile();
588 }
589 
590 static void
591 usage()
592 {
593 	(void) fprintf(stderr, gettext(
594 		"usage: csplit [-ks] [-f prefix] [-n number] "
595 			"file arg1 ...argn\n"));
596 	exit(1);
597 }
598