xref: /illumos-gate/usr/src/cmd/printf/printf.c (revision b6805bf78d2bbbeeaea8909a05623587b42d58b3)
1 /*
2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
3  * Copyright (c) 1989, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 4. Neither the name of the University nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/types.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <inttypes.h>
36 #include <limits.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <locale.h>
42 #include <note.h>
43 
44 #define	warnx1(a, b, c)		warnx(a)
45 #define	warnx2(a, b, c)		warnx(a, b)
46 #define	warnx3(a, b, c)		warnx(a, b, c)
47 
48 #define	PTRDIFF(x, y)	((uintptr_t)(x) - (uintptr_t)(y))
49 
50 #define	_(x)	gettext(x)
51 
52 #define	PF(f, func) do {						\
53 	char *b = NULL;							\
54 	int dollar = 0;							\
55 	if (*f == '$') 	{						\
56 		dollar++;						\
57 		*f = '%';						\
58 	} 								\
59 	if (havewidth)							\
60 		if (haveprec)						\
61 			(void) asprintf(&b, f, fieldwidth, precision, func); \
62 		else							\
63 			(void) asprintf(&b, f, fieldwidth, func);	\
64 	else if (haveprec)						\
65 		(void) asprintf(&b, f, precision, func);		\
66 	else								\
67 		(void) asprintf(&b, f, func);				\
68 	if (b) {							\
69 		(void) fputs(b, stdout);				\
70 		free(b);						\
71 	}								\
72 	if (dollar)							\
73 		*f = '$';						\
74 _NOTE(CONSTCOND) } while (0)
75 
76 static int	 asciicode(void);
77 static char	*doformat(char *, int *);
78 static int	 escape(char *, int, size_t *);
79 static int	 getchr(void);
80 static int	 getfloating(long double *, int);
81 static int	 getint(int *);
82 static int	 getnum(intmax_t *, uintmax_t *, int);
83 static const char
84 		*getstr(void);
85 static char	*mknum(char *, char);
86 static void	 usage(void);
87 
88 static int  myargc;
89 static char **myargv;
90 static char **gargv;
91 
92 int
93 main(int argc, char *argv[])
94 {
95 	size_t len;
96 	int chopped, end, rval;
97 	char *format, *fmt, *start;
98 
99 	(void) setlocale(LC_ALL, "");
100 
101 	argv++;
102 	argc--;
103 
104 	/*
105 	 * POSIX says: Standard utilities that do not accept options,
106 	 * but that do accept operands, shall recognize "--" as a
107 	 * first argument to be discarded.
108 	 */
109 	if (argc && strcmp(argv[0], "--") == 0) {
110 		argc--;
111 		argv++;
112 	}
113 
114 	if (argc < 1) {
115 		usage();
116 		return (1);
117 	}
118 
119 	/*
120 	 * Basic algorithm is to scan the format string for conversion
121 	 * specifications -- once one is found, find out if the field
122 	 * width or precision is a '*'; if it is, gather up value.  Note,
123 	 * format strings are reused as necessary to use up the provided
124 	 * arguments, arguments of zero/null string are provided to use
125 	 * up the format string.
126 	 */
127 	fmt = format = *argv;
128 	chopped = escape(fmt, 1, &len);		/* backslash interpretation */
129 	rval = end = 0;
130 	gargv = ++argv;
131 
132 	for (;;) {
133 		char **maxargv = gargv;
134 
135 		myargv = gargv;
136 		for (myargc = 0; gargv[myargc]; myargc++)
137 			/* nop */;
138 		start = fmt;
139 		while (fmt < format + len) {
140 			if (fmt[0] == '%') {
141 				(void) fwrite(start, 1, PTRDIFF(fmt, start),
142 				    stdout);
143 				if (fmt[1] == '%') {
144 					/* %% prints a % */
145 					(void) putchar('%');
146 					fmt += 2;
147 				} else {
148 					fmt = doformat(fmt, &rval);
149 					if (fmt == NULL)
150 						return (1);
151 					end = 0;
152 				}
153 				start = fmt;
154 			} else
155 				fmt++;
156 			if (gargv > maxargv)
157 				maxargv = gargv;
158 		}
159 		gargv = maxargv;
160 
161 		if (end == 1) {
162 			warnx1(_("missing format character"), NULL, NULL);
163 			return (1);
164 		}
165 		(void) fwrite(start, 1, PTRDIFF(fmt, start), stdout);
166 		if (chopped || !*gargv)
167 			return (rval);
168 		/* Restart at the beginning of the format string. */
169 		fmt = format;
170 		end = 1;
171 	}
172 	/* NOTREACHED */
173 }
174 
175 
176 static char *
177 doformat(char *start, int *rval)
178 {
179 	static const char skip1[] = "#'-+ 0";
180 	static const char skip2[] = "0123456789";
181 	char *fmt;
182 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
183 	char convch, nextch;
184 
185 	fmt = start + 1;
186 
187 	/* look for "n$" field index specifier */
188 	fmt += strspn(fmt, skip2);
189 	if ((*fmt == '$') && (fmt != (start + 1))) {
190 		int idx = atoi(start + 1);
191 		if (idx <= myargc) {
192 			gargv = &myargv[idx - 1];
193 		} else {
194 			gargv = &myargv[myargc];
195 		}
196 		start = fmt;
197 		fmt++;
198 	} else {
199 		fmt = start + 1;
200 	}
201 
202 	/* skip to field width */
203 	fmt += strspn(fmt, skip1);
204 	if (*fmt == '*') {
205 		if (getint(&fieldwidth))
206 			return (NULL);
207 		havewidth = 1;
208 		++fmt;
209 	} else {
210 		havewidth = 0;
211 
212 		/* skip to possible '.', get following precision */
213 		fmt += strspn(fmt, skip2);
214 	}
215 	if (*fmt == '.') {
216 		/* precision present? */
217 		++fmt;
218 		if (*fmt == '*') {
219 			if (getint(&precision))
220 				return (NULL);
221 			haveprec = 1;
222 			++fmt;
223 		} else {
224 			haveprec = 0;
225 
226 			/* skip to conversion char */
227 			fmt += strspn(fmt, skip2);
228 		}
229 	} else
230 		haveprec = 0;
231 	if (!*fmt) {
232 		warnx1(_("missing format character"), NULL, NULL);
233 		return (NULL);
234 	}
235 
236 	/*
237 	 * Look for a length modifier.  POSIX doesn't have these, so
238 	 * we only support them for floating-point conversions, which
239 	 * are extensions.  This is useful because the L modifier can
240 	 * be used to gain extra range and precision, while omitting
241 	 * it is more likely to produce consistent results on different
242 	 * architectures.  This is not so important for integers
243 	 * because overflow is the only bad thing that can happen to
244 	 * them, but consider the command  printf %a 1.1
245 	 */
246 	if (*fmt == 'L') {
247 		mod_ldbl = 1;
248 		fmt++;
249 		if (!strchr("aAeEfFgG", *fmt)) {
250 			warnx2(_("bad modifier L for %%%c"), *fmt, NULL);
251 			return (NULL);
252 		}
253 	} else {
254 		mod_ldbl = 0;
255 	}
256 
257 	convch = *fmt;
258 	nextch = *++fmt;
259 	*fmt = '\0';
260 	switch (convch) {
261 	case 'b': {
262 		size_t len;
263 		char *p;
264 		int getout;
265 
266 		p = strdup(getstr());
267 		if (p == NULL) {
268 			warnx2("%s", strerror(ENOMEM), NULL);
269 			return (NULL);
270 		}
271 		getout = escape(p, 0, &len);
272 		*(fmt - 1) = 's';
273 		PF(start, p);
274 		*(fmt - 1) = 'b';
275 		free(p);
276 
277 		if (getout)
278 			return (fmt);
279 		break;
280 	}
281 	case 'c': {
282 		char p;
283 
284 		p = getchr();
285 		PF(start, p);
286 		break;
287 	}
288 	case 's': {
289 		const char *p;
290 
291 		p = getstr();
292 		PF(start, p);
293 		break;
294 	}
295 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
296 		char *f;
297 		intmax_t val;
298 		uintmax_t uval;
299 		int signedconv;
300 
301 		signedconv = (convch == 'd' || convch == 'i');
302 		if ((f = mknum(start, convch)) == NULL)
303 			return (NULL);
304 		if (getnum(&val, &uval, signedconv))
305 			*rval = 1;
306 		if (signedconv)
307 			PF(f, val);
308 		else
309 			PF(f, uval);
310 		break;
311 	}
312 	case 'e': case 'E':
313 	case 'f': case 'F':
314 	case 'g': case 'G':
315 	case 'a': case 'A': {
316 		long double p;
317 
318 		if (getfloating(&p, mod_ldbl))
319 			*rval = 1;
320 		if (mod_ldbl)
321 			PF(start, p);
322 		else
323 			PF(start, (double)p);
324 		break;
325 	}
326 	default:
327 		warnx2(_("illegal format character %c"), convch, NULL);
328 		return (NULL);
329 	}
330 	*fmt = nextch;
331 	return (fmt);
332 }
333 
334 static char *
335 mknum(char *str, char ch)
336 {
337 	static char *copy;
338 	static size_t copy_size;
339 	char *newcopy;
340 	size_t len, newlen;
341 
342 	len = strlen(str) + 2;
343 	if (len > copy_size) {
344 		newlen = ((len + 1023) >> 10) << 10;
345 		if ((newcopy = realloc(copy, newlen)) == NULL) {
346 			warnx2("%s", strerror(ENOMEM), NULL);
347 			return (NULL);
348 		}
349 		copy = newcopy;
350 		copy_size = newlen;
351 	}
352 
353 	(void) memmove(copy, str, len - 3);
354 	copy[len - 3] = 'j';
355 	copy[len - 2] = ch;
356 	copy[len - 1] = '\0';
357 	return (copy);
358 }
359 
360 static int
361 escape(char *fmt, int percent, size_t *len)
362 {
363 	char *save, *store, c;
364 	int value;
365 
366 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
367 		if (c != '\\') {
368 			*store = c;
369 			continue;
370 		}
371 		switch (*++fmt) {
372 		case '\0':		/* EOS, user error */
373 			*store = '\\';
374 			*++store = '\0';
375 			*len = PTRDIFF(store, save);
376 			return (0);
377 		case '\\':		/* backslash */
378 		case '\'':		/* single quote */
379 			*store = *fmt;
380 			break;
381 		case 'a':		/* bell/alert */
382 			*store = '\a';
383 			break;
384 		case 'b':		/* backspace */
385 			*store = '\b';
386 			break;
387 		case 'c':
388 			*store = '\0';
389 			*len = PTRDIFF(store, save);
390 			return (1);
391 		case 'f':		/* form-feed */
392 			*store = '\f';
393 			break;
394 		case 'n':		/* newline */
395 			*store = '\n';
396 			break;
397 		case 'r':		/* carriage-return */
398 			*store = '\r';
399 			break;
400 		case 't':		/* horizontal tab */
401 			*store = '\t';
402 			break;
403 		case 'v':		/* vertical tab */
404 			*store = '\v';
405 			break;
406 					/* octal constant */
407 		case '0': case '1': case '2': case '3':
408 		case '4': case '5': case '6': case '7':
409 			c = (!percent && *fmt == '0') ? 4 : 3;
410 			for (value = 0;
411 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
412 				value <<= 3;
413 				value += *fmt - '0';
414 			}
415 			--fmt;
416 			if (percent && value == '%') {
417 				*store++ = '%';
418 				*store = '%';
419 			} else
420 				*store = (char)value;
421 			break;
422 		default:
423 			*store = *fmt;
424 			break;
425 		}
426 	}
427 	*store = '\0';
428 	*len = PTRDIFF(store, save);
429 	return (0);
430 }
431 
432 static int
433 getchr(void)
434 {
435 	if (!*gargv)
436 		return ('\0');
437 	return ((int)**gargv++);
438 }
439 
440 static const char *
441 getstr(void)
442 {
443 	if (!*gargv)
444 		return ("");
445 	return (*gargv++);
446 }
447 
448 static int
449 getint(int *ip)
450 {
451 	intmax_t val;
452 	uintmax_t uval;
453 	int rval;
454 
455 	if (getnum(&val, &uval, 1))
456 		return (1);
457 	rval = 0;
458 	if (val < INT_MIN || val > INT_MAX) {
459 		warnx3("%s: %s", *gargv, strerror(ERANGE));
460 		rval = 1;
461 	}
462 	*ip = (int)val;
463 	return (rval);
464 }
465 
466 static int
467 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
468 {
469 	char *ep;
470 	int rval;
471 
472 	if (!*gargv) {
473 		*ip = 0;
474 		return (0);
475 	}
476 	if (**gargv == '"' || **gargv == '\'') {
477 		if (signedconv)
478 			*ip = asciicode();
479 		else
480 			*uip = asciicode();
481 		return (0);
482 	}
483 	rval = 0;
484 	errno = 0;
485 	if (signedconv)
486 		*ip = strtoimax(*gargv, &ep, 0);
487 	else
488 		*uip = strtoumax(*gargv, &ep, 0);
489 	if (ep == *gargv) {
490 		warnx2(_("%s: expected numeric value"), *gargv, NULL);
491 		rval = 1;
492 	} else if (*ep != '\0') {
493 		warnx2(_("%s: not completely converted"), *gargv, NULL);
494 		rval = 1;
495 	}
496 	if (errno == ERANGE) {
497 		warnx3("%s: %s", *gargv, strerror(ERANGE));
498 		rval = 1;
499 	}
500 	++gargv;
501 	return (rval);
502 }
503 
504 static int
505 getfloating(long double *dp, int mod_ldbl)
506 {
507 	char *ep;
508 	int rval;
509 
510 	if (!*gargv) {
511 		*dp = 0.0;
512 		return (0);
513 	}
514 	if (**gargv == '"' || **gargv == '\'') {
515 		*dp = asciicode();
516 		return (0);
517 	}
518 	rval = 0;
519 	errno = 0;
520 	if (mod_ldbl)
521 		*dp = strtold(*gargv, &ep);
522 	else
523 		*dp = strtod(*gargv, &ep);
524 	if (ep == *gargv) {
525 		warnx2(_("%s: expected numeric value"), *gargv, NULL);
526 		rval = 1;
527 	} else if (*ep != '\0') {
528 		warnx2(_("%s: not completely converted"), *gargv, NULL);
529 		rval = 1;
530 	}
531 	if (errno == ERANGE) {
532 		warnx3("%s: %s", *gargv, strerror(ERANGE));
533 		rval = 1;
534 	}
535 	++gargv;
536 	return (rval);
537 }
538 
539 static int
540 asciicode(void)
541 {
542 	int ch;
543 
544 	ch = **gargv;
545 	if (ch == '\'' || ch == '"')
546 		ch = (*gargv)[1];
547 	++gargv;
548 	return (ch);
549 }
550 
551 static void
552 usage(void)
553 {
554 	(void) fprintf(stderr, _("usage: printf format [arguments ...]\n"));
555 }
556