xref: /illumos-gate/usr/src/cmd/awk_xpg4/awk4.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * awk -- functions
24  *
25  * Copyright (c) 1995, 1996 by Sun Microsystems, Inc.
26  * All rights reserved.
27  *
28  * Copyright 1986, 1994 by Mortice Kern Systems Inc.  All rights reserved.
29  *
30  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
31  */
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 #include "awk.h"
36 #include "y.tab.h"
37 #include <time.h>
38 #include <sys/wait.h>
39 
40 static uint	nargs(NODE *np);
41 static NODE	*dosub(NODE *np, int glob);
42 static NODE	*docasetr(NODE *np, int upper);
43 static int	asortcmp(const void *npp1, const void *npp2);
44 
45 static char	nargerr[] = "wrong number of arguments to function \"%s\"";
46 static NODE	*asortfunc;		/* Function call for asort() */
47 static NODE	*asnp1, *asnp2;		/* index1, index2 nodes */
48 static int	asarraylen;		/* strlen(array)+1 for asort */
49 
50 /*
51  * Return the value of exp(x).
52  * Usage:	y = exp(x)
53  *		y = exp()
54  */
55 NODE *
56 f_exp(NODE *np)
57 {
58 	register uint na;
59 
60 	if ((na = nargs(np)) > 1)
61 		awkerr(nargerr, s_exp);
62 	return (realnode(exp(exprreal(na==0 ? field0 : getlist(&np)))));
63 }
64 
65 /*
66  * Return the integer part of the argument.
67  * Usage:	i = int(r)
68  *		i = int()
69  */
70 NODE *
71 f_int(NODE *np)
72 {
73 	register uint na;
74 
75 	if ((na = nargs(np)) > 1)
76 		awkerr(nargerr, s_int);
77 	return (intnode(exprint(na==0 ? field0 : getlist(&np))));
78 }
79 
80 /*
81  * Logarithm function.
82  * Usage:	y = log(x)
83  *		y = log()
84  */
85 NODE *
86 f_log(NODE *np)
87 {
88 	register uint na;
89 
90 	if ((na = nargs(np)) > 1)
91 		awkerr(nargerr, s_log);
92 	return (realnode(log(exprreal(na==0 ? field0 : getlist(&np)))));
93 }
94 
95 /*
96  * Square root function.
97  * Usage:	y = sqrt(x)
98  *		y = sqrt()
99  */
100 NODE *
101 f_sqrt(NODE *np)
102 {
103 	register uint na;
104 
105 	if ((na = nargs(np)) > 1)
106 		awkerr(nargerr, s_sqrt);
107 	return (realnode(sqrt(exprreal(na==0 ? field0 : getlist(&np)))));
108 }
109 
110 /*
111  * Trigonometric sine function.
112  * Usage:	y = sin(x)
113  */
114 NODE *
115 f_sin(NODE *np)
116 {
117 	if (nargs(np) != 1)
118 		awkerr(nargerr, s_sin);
119 	return (realnode(sin(exprreal(getlist(&np)))));
120 }
121 
122 /*
123  * Trigonometric cosine function.
124  * Usage:	y = cos(x)
125  */
126 NODE *
127 f_cos(NODE *np)
128 {
129 	if (nargs(np) != 1)
130 		awkerr(nargerr, s_cos);
131 	return (realnode(cos(exprreal(getlist(&np)))));
132 }
133 
134 /*
135  * Arctangent of y/x.
136  * Usage:	z = atan2(y, x)
137  */
138 NODE *
139 f_atan2(NODE *np)
140 {
141 	double y, x;
142 
143 	if (nargs(np) != 2)
144 		awkerr(nargerr, s_atan2);
145 	y = (double)exprreal(getlist(&np));
146 	x = (double)exprreal(getlist(&np));
147 	return (realnode(atan2(y, x)));
148 }
149 
150 /*
151  * Set the seed for the random number generator function -- rand.
152  * Usage:	srand(x)
153  *		srand()
154  */
155 NODE *
156 f_srand(NODE *np)
157 {
158 	register uint na;
159 	register uint seed;
160 	static uint oldseed = 0;
161 
162 	if ((na = nargs(np)) > 1)
163 		awkerr(nargerr, s_srand);
164 	if (na == 0)
165 		seed = (uint)time((time_t *)0); else
166 		seed = (uint)exprint(getlist(&np));
167 	srand(seed);
168 	na = oldseed;
169 	oldseed = seed;
170 	return (intnode((INT)na));
171 }
172 
173 /*
174  * Generate a random number.
175  * Usage:	x = rand()
176  */
177 NODE *
178 f_rand(NODE *np)
179 {
180 	double result;
181 	int expon;
182 	ushort rint;
183 
184 	if (nargs(np) != 0)
185 		awkerr(nargerr, s_rand);
186 	rint = rand() & SHRT_MAX;
187 	result = frexp((double)rint, &expon);
188 	return (realnode((REAL)ldexp(result, expon-15)));
189 }
190 
191 /*
192  * Substitute function.
193  * Usage:	n = sub(regex, replace, target)
194  *		n = sub(regex, replace)
195  */
196 NODE *
197 f_sub(NODE *np)
198 {
199 	return (dosub(np, 1));
200 }
201 
202 /*
203  * Global substitution function.
204  * Usage:	n = gsub(regex, replace, target)
205  *		n = gsub(regex, replace)
206  */
207 NODE *
208 f_gsub(NODE *np)
209 {
210 	return (dosub(np, 0));
211 }
212 
213 /*
214  * Do actual substitutions.
215  * `glob' is the number to substitute, 0 for all.
216  */
217 static NODE *
218 dosub(NODE *np, int glob)
219 {
220 	wchar_t *text;
221 	register wchar_t *sub;
222 	register uint n;
223 	register uint na;
224 	register REGEXP rp;
225 	NODE *left;
226 	static wchar_t *buf;
227 
228 	if ((na = nargs(np)) != 2 && na != 3)
229 		awkerr(nargerr, glob==0 ? s_gsub : s_sub);
230 	rp = getregexp(getlist(&np));
231 	sub = exprstring(getlist(&np));
232 	if (na == 3) {
233 		left = getlist(&np);
234 		text = exprstring(left);
235 	} else {
236 		left = field0;
237 		text = linebuf;
238 	}
239 	switch (REGWDOSUBA(rp, sub, text, &buf, 256, &glob)) {
240 	case REG_OK:
241 	case REG_NOMATCH:
242 		n = glob;
243 		break;
244 	case REG_ESPACE:
245 		if (buf != NULL)
246 			free(buf);
247 		awkerr(nomem);
248 	default:
249 		awkerr(gettext("regular expression error"));
250 	}
251 	(void)assign(left, stringnode(buf, FNOALLOC, wcslen(buf)));
252 	return (intnode((INT)n));
253 }
254 
255 /*
256  * Match function.  Return position (origin 1) or 0 for regular
257  * expression match in string.  Set new variables RSTART and RLENGTH
258  * as well.
259  * Usage:	pos = match(string, re)
260  */
261 NODE *
262 f_match(NODE *np)
263 {
264 	register wchar_t *text;
265 	register REGEXP rp;
266 	register int pos, length;
267 	REGWMATCH_T match[10];
268 
269 	if (nargs(np) != 2)
270 		awkerr(nargerr, s_match);
271 	text = exprstring(getlist(&np));
272 	rp = getregexp(getlist(&np));
273 	if (REGWEXEC(rp, text, 10, match, 0) == REG_OK) {
274 		pos = match[0].rm_sp-text+1;
275 		length = match[0].rm_ep - match[0].rm_sp;
276 	} else {
277 		pos = 0;
278 		length = -1;
279 	}
280 	constant->n_int = length;
281 	(void)assign(vlook(M_MB_L("RLENGTH")), constant);
282 	return (assign(vlook(M_MB_L("RSTART")), intnode((INT)pos)));
283 }
284 
285 /*
286  * Call shell or command interpreter.
287  * Usage:	status = system(command)
288  */
289 NODE *
290 f_system(NODE *np)
291 {
292 	int retcode;
293 
294 	if (nargs(np) != 1)
295 		awkerr(nargerr, s_system);
296 	(void) fflush(stdout);
297 	retcode = system(mbunconvert(exprstring(getlist(&np))));
298 	return (intnode((INT)WEXITSTATUS(retcode)));
299 }
300 
301 /*
302  * Search for string within string.
303  * Usage:	pos = index(string1, string2)
304  */
305 NODE *
306 f_index(NODE *np)
307 {
308 	register wchar_t *s1, *s2;
309 	register int l1, l2;
310 	register int result;
311 
312 	if (nargs(np) != 2)
313 		awkerr(nargerr, s_index);
314 	s1 = (wchar_t *)exprstring(getlist(&np));
315 	s2 = (wchar_t *)exprstring(getlist(&np));
316 	l1 = wcslen(s1);
317 	l2 = wcslen(s2);
318 	result = 1;
319 	while (l2 <= l1) {
320 		if (memcmp(s1, s2, l2 * sizeof(wchar_t)) == 0)
321 			break;
322 		result++;
323 		s1++;
324 		l1--;
325 	}
326 	if (l2 > l1)
327 		result = 0;
328 	return (intnode((INT)result));
329 }
330 
331 /*
332  * Return length of argument or $0
333  * Usage:	n = length(string)
334  *		n = length()
335  *		n = length
336  */
337 NODE *
338 f_length(NODE *np)
339 {
340 	register uint na;
341 
342 	if ((na = nargs(np)) > 1)
343 		awkerr(nargerr, s_length);
344 	if (na == 0)
345 		na = lbuflen; else
346 		na = wcslen((wchar_t *)exprstring(getlist(&np)));
347 	return (intnode((INT)na));
348 }
349 
350 /*
351  * Split string into fields.
352  * Usage: nfields = split(string, array [, separator]);
353  */
354 NODE *
355 f_split(NODE *np)
356 {
357 	register wchar_t *cp;
358 	wchar_t *ep, *saved = 0;
359 	register NODE *tnp, *snp, *otnp;
360 	register NODE *sep;
361 	REGEXP old_resep = 0;
362 	size_t seplen;
363 	uint n;
364 	wint_t c;
365 	wchar_t savesep[20];
366 	wchar_t  *(*old_awkfield)(wchar_t **) = 0;
367 
368 	if ((n = nargs(np))<2 || n>3)
369 		awkerr(nargerr, s_split);
370 	ep = exprstring(snp = getlist(&np));
371 	tnp = getlist(&np);
372 	if (snp->n_type == INDEX && snp->n_left == tnp)
373 		ep = saved = wsdup(ep);
374 	if (n == 3) {
375 		sep = getlist(&np);
376 	} else
377 		sep = NNULL;
378 	switch (tnp->n_type) {
379 	case ARRAY:
380 		delarray(tnp);
381 		break;
382 
383 	case PARM:
384 		break;
385 
386 	case VAR:
387 		if (isstring(tnp->n_flags) && tnp->n_string==_null)
388 			break;
389 	default:
390 		awkerr(gettext(
391 			"second parameter to \"split\" must be an array"));
392 		/*NOTREACHED*/
393 	}
394 	/*
395 	 * If an argument has been passed in to be used as the
396 	 * field separator check to see if it is a constant regular
397 	 * expression. If so, use it directly otherwise reduce the
398 	 * expression, convert the result into a string and assign it
399 	 * to "FS" (after saving the old value for FS.)
400 	 */
401 	if (sep != NNULL) {
402 		if (sep->n_type == PARM)
403 			sep = sep->n_next;
404 		if (sep->n_type == RE) {
405 			old_resep = resep;
406 			resep = sep->n_regexp;
407 			old_awkfield = awkfield;
408 			awkfield = refield;
409 		} else {
410 			sep = exprreduce(sep);
411 			seplen = wcslen(cp = (wchar_t *)exprstring(varFS));
412 			(void) memcpy(savesep, cp,
413 				(seplen+1) * sizeof(wchar_t));
414 			(void) assign(varFS, sep);
415 		}
416 	}
417 	/*
418 	 * Iterate over the record, extracting each field and assigning it to
419 	 * the corresponding element in the array.
420 	 */
421 	otnp = tnp;	/* save tnp for possible promotion */
422 	tnp = node(INDEX, tnp, constant);
423 	fcount = 0;
424 	for (;;) {
425 		if ((cp = (*awkfield)(&ep)) == NULL) {
426 			if (fcount == 0) {
427 				if (otnp->n_type == PARM)
428 					otnp = otnp->n_next;
429 				promote(otnp);
430 			}
431 			break;
432 		}
433 		c = *ep;
434 		*ep = '\0';
435 		constant->n_int = ++fcount;
436 		(void)assign(tnp, stringnode(cp,FALLOC|FSENSE,(size_t)(ep-cp)));
437 		*ep = c;
438 	}
439 	/*
440 	 * Restore the old record separator/and or regular expression.
441 	 */
442 	if (sep != NNULL) {
443 		if (old_awkfield != 0) {
444 			resep = old_resep;
445 			awkfield = old_awkfield;
446 		} else {
447 			(void)assign(varFS,
448 				stringnode(savesep, FSTATIC, seplen));
449 		}
450 	}
451 	if (saved)
452 		free(saved);
453 	return (intnode((INT)fcount));
454 }
455 
456 /*
457  * Sprintf function.
458  * Usage:	string = sprintf(format, arg, ...)
459  */
460 NODE *
461 f_sprintf(NODE *np)
462 {
463         wchar_t *cp;
464         size_t length;
465 
466         if (nargs(np) == 0)
467                 awkerr(nargerr, s_sprintf);
468         length = xprintf(np, (FILE *)NULL, &cp);
469         np = stringnode(cp, FNOALLOC, length);
470         return (np);
471 }
472 
473 /*
474  * Substring.
475  * newstring = substr(string, start, [length])
476  */
477 NODE *
478 f_substr(NODE *np)
479 {
480 	register STRING str;
481 	register size_t n;
482 	register int start;
483 	register size_t len;
484 
485 	if ((n = nargs(np))<2 || n>3)
486 		awkerr(nargerr, s_substr);
487 	str = exprstring(getlist(&np));
488 	if ((start = (int)exprint(getlist(&np))-1) < 0)
489 		start = 0;
490 	if (n == 3) {
491 		int x;
492 		x = (int)exprint(getlist(&np));
493 		if (x < 0)
494 			len = 0;
495 		else
496 			len = (size_t)x;
497 	} else
498 		len = LARGE;
499 	n = wcslen((wchar_t *)str);
500 	if (start > n)
501 		start = n;
502 	n -= start;
503 	if (len > n)
504 		len = n;
505 	str += start;
506 	n = str[len];
507 	str[len] = '\0';
508 	np = stringnode(str, FALLOC, len);
509 	str[len] = n;
510 	return (np);
511 }
512 
513 /*
514  * Close an output or input file stream.
515  */
516 NODE *
517 f_close(NODE *np)
518 {
519 	register OFILE *op;
520 	register char *name;
521 
522 	if (nargs(np) != 1)
523 		awkerr(nargerr, s_close);
524 	name = mbunconvert(exprstring(getlist(&np)));
525 	for (op = &ofiles[0]; op < &ofiles[NIOSTREAM]; op++)
526 		if (op->f_fp!=FNULL && strcmp(name, op->f_name)==0) {
527 			awkclose(op);
528 			break;
529 		}
530 	if (op >= &ofiles[NIOSTREAM])
531 		return (const1);
532 	return (const0);
533 }
534 
535 /*
536  * Return the integer value of the first character of a string.
537  * Usage:	char = ord(string)
538  */
539 NODE *
540 f_ord(NODE *np)
541 {
542 	if (nargs(np) != 1)
543 		awkerr(nargerr, s_ord);
544 	return (intnode((INT)*exprstring(getlist(&np))));
545 }
546 
547 /*
548  * Return the argument string in lower case:
549  * Usage:
550  *	lower = tolower(upper)
551  */
552 NODE *
553 f_tolower(NODE *np)
554 {
555 	return (docasetr(np, 0));
556 }
557 
558 /*
559  * Return the argument string in upper case:
560  * Usage:
561  *	upper = toupper(lower)
562  */
563 NODE *
564 f_toupper(NODE *np)
565 {
566 	return (docasetr(np, 1));
567 }
568 
569 /*
570  * Sort the array into traversal order by the next "for (i in array)" loop.
571  * Usage:
572  *	asort(array, "cmpfunc")
573  * 	cmpfunc(array, index1, index2)
574  *		returns:
575  *		<0		if 	array[index1] <  array[index2]
576  *		 0		if	array[index1] == array[index2]
577  *		>0		if	array[index1] >  array[index2]
578  */
579 NODE *
580 f_asort(NODE *np)
581 {
582 	NODE *array;
583 	STRING funcname;
584 	register size_t nel;
585 	register NODE *tnp;
586 	register NODE *funcnp;
587 	register NODE **alist, **npp;
588 
589 	if (nargs(np) != 2)
590 		awkerr(nargerr, s_asort);
591 	array = getlist(&np);
592 	if (array->n_type == PARM)
593 		array = array->n_next;
594 	if (array->n_type != ARRAY)
595 		awkerr(gettext("%s function requires an array"),
596 			s_asort);
597 	funcname = exprstring(getlist(&np));
598 	if ((funcnp = vlookup(funcname, 1)) == NNULL
599 	 || funcnp->n_type != UFUNC)
600 		awkerr(gettext("%s: %s is not a function\n"),
601 		    s_asort, funcname);
602 	/*
603 	 * Count size of array, allowing one extra for NULL at end
604 	 */
605 	nel = 1;
606 	for (tnp = array->n_alink; tnp != NNULL; tnp = tnp->n_alink)
607 		++nel;
608 	/*
609 	 * Create UFUNC node that points at the funcnp on left and the
610 	 * list of three variables on right (array, index1, index2)
611 	 *				UFUNC
612 	 *				/    \
613 	 *			   funcnp    COMMA
614 	 *				      /   \
615 	 *				array	  COMMA
616 	 *					  /    \
617 	 *					index1 index2
618 	 */
619 	if (asortfunc == NNULL) {
620 		running = 0;
621 		asortfunc = node(CALLUFUNC, NNULL,
622 				    node(COMMA, NNULL,
623 				    node(COMMA,
624 					asnp1=stringnode(_null, FSTATIC, 0),
625 					asnp2=stringnode(_null, FSTATIC, 0))));
626 		running = 1;
627 	}
628 	asortfunc->n_left = funcnp;
629 	asortfunc->n_right->n_left = array;
630 	asarraylen = wcslen(array->n_name)+1;
631 	alist = (NODE **) emalloc(nel*sizeof(NODE *));
632 	/*
633 	 * Copy array into alist.
634 	 */
635 	npp = alist;
636 	for (tnp = array->n_alink; tnp != NNULL; tnp = tnp->n_alink)
637 		*npp++ = tnp;
638 	*npp = NNULL;
639 	/*
640 	 * Re-order array to this list
641 	 */
642 	qsort((wchar_t *)alist, nel-1, sizeof (NODE *), asortcmp);
643 	tnp = array;
644 	npp = alist;
645 	do {
646 		tnp = tnp->n_alink = *npp;
647 	} while (*npp++ != NNULL);
648 	free((wchar_t *)alist);
649 	return (constundef);
650 }
651 
652 /*
653  * Return the number of arguments of a function.
654  */
655 static uint
656 nargs(NODE *np)
657 {
658 	register int n;
659 
660 	if (np == NNULL)
661 		return (0);
662 	n = 1;
663 	while (np!=NNULL && np->n_type==COMMA) {
664 		np = np->n_right;
665 		n++;
666 	}
667 	return (n);
668 }
669 
670 /*
671  * Do case translation.
672  */
673 static NODE *
674 docasetr(NODE *np, int upper)
675 {
676 	register int c;
677 	register wchar_t *cp;
678 	register wchar_t *str;
679 	register uint na;
680 
681 	if ((na = nargs(np)) > 1)
682 		awkerr(nargerr, upper ? s_toupper : s_tolower);
683 	str = strsave(na==0 ? linebuf : exprstring(getlist(&np)));
684 	cp = str;
685 	if (upper) {
686 		while ((c = *cp++) != '\0')
687 			cp[-1] = towupper(c);
688 	} else {
689 		while ((c = *cp++) != '\0')
690 			cp[-1] = towlower(c);
691 	}
692 	return (stringnode((STRING)str, FNOALLOC, (size_t)(cp-str-1)));
693 }
694 
695 /*
696  * The comparison routine used by qsort inside f_asort()
697  */
698 static int
699 asortcmp(const void *npp1, const void *npp2)
700 {
701 	asnp1->n_strlen =
702 	    wcslen(asnp1->n_string = (*(NODE **)npp1)->n_name+asarraylen);
703 	asnp2->n_strlen =
704 	    wcslen(asnp2->n_string = (*(NODE **)npp2)->n_name+asarraylen);
705 	return ((int)exprint(asortfunc));
706 }
707 
708 #if M_MATHERR
709 #if !defined(__BORLANDC__)&&defined(__TURBOC__)&&__COMPACT__&&__EMULATE__
710 /* So it won't optimize registers our FP is using */
711 #define	flushesbx()	(_BX = 0, _ES = _BX)
712 #else
713 #define	flushesbx()	(0)
714 #endif
715 
716 /*
717  * Math error for awk.
718  */
719 int
720 matherr(struct exception *ep)
721 {
722 	register uint type;
723 	static char msgs[7][256];
724 	static int first_time = 1;
725 
726 	if (first_time) {
727 		msgs[0] = gettext("Unknown FP error"),
728 		msgs[1] = gettext("Domain"),
729 		msgs[2] = gettext("Singularity"),
730 		msgs[3] = gettext("Overflow"),
731 		msgs[4] = gettext("Underflow"),
732 		msgs[5] = gettext("Total loss of precision"),
733 		msgs[6] = gettext("Partial loss of precision")
734 		first_time = 0;
735 	}
736 
737 	if ((type = ep->type) > (uint)PLOSS)
738 		type = 0;
739 	(void)fprintf(stderr, "awk: %s", strmsg(msgs[type]));
740 	(void)fprintf(stderr, gettext(
741 		" error in function %s(%g) at NR=%lld\n"),
742 		((void) flushesbx(), ep->name), ep->arg1, (INT)exprint(varNR));
743 	return (1);
744 }
745 #endif	/*M_MATHERR*/
746