xref: /illumos-gate/usr/src/cmd/awk/awk.g.y (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 %}
24 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
25 /*	  All Rights Reserved  	*/
26 
27 
28 %{
29 #ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 2.10	*/
30 %}
31 
32 %{
33 #include "awk.h"
34 yywrap() { return(1); }
35 #ifndef	DEBUG
36 #	define	PUTS(x)
37 #endif
38 Node	*beginloc = 0, *endloc = 0;
39 int	infunc	= 0;	/* = 1 if in arglist or body of func */
40 uchar	*curfname = 0;
41 Node	*arglist = 0;	/* list of args for current function */
42 uchar	*strnode();
43 Node	*notnull();
44 %}
45 
46 %union {
47 	Node	*p;
48 	Cell	*cp;
49 	int	i;
50 	uchar	*s;
51 }
52 
53 %token	<i>	FIRSTTOKEN	/* must be first */
54 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
55 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
56 %token	<i>	ARRAY
57 %token	<i>	MATCH NOTMATCH MATCHOP
58 %token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
59 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
60 %token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
61 %token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT
62 %token	<i>	ADD MINUS MULT DIVIDE MOD
63 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
64 %token	<i>	PRINT PRINTF SPRINTF
65 %token	<p>	ELSE INTEST CONDEXPR
66 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
67 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING FIELD
68 %token	<s>	REGEXPR
69 
70 %type	<p>	pas pattern ppattern plist pplist patlist prarg term
71 %type	<p>	pa_pat pa_stat pa_stats
72 %type	<s>	reg_expr
73 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
74 %type	<p>	var varname funcname varlist
75 %type	<p>	for if while
76 %type	<i>	pst opt_pst lbrace rparen comma nl opt_nl and bor
77 %type	<i>	subop print
78 
79 %right	ASGNOP
80 %right	'?'
81 %right	':'
82 %left	BOR
83 %left	AND
84 %left	GETLINE
85 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
86 %left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FIELD FUNC
87 %left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
88 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
89 %left	REGEXPR VAR VARNF IVAR WHILE '('
90 %left	CAT
91 %left	'+' '-'
92 %left	'*' '/' '%'
93 %left	NOT UMINUS
94 %right	POWER
95 %right	DECR INCR
96 %left	INDIRECT
97 %token	LASTTOKEN	/* must be last */
98 
99 %%
100 
101 program:
102 	  pas	{ if (errorflag==0)
103 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
104 	| error	{ yyclearin; bracecheck(); ERROR "bailing out" SYNTAX; }
105 	;
106 
107 and:
108 	  AND | and NL
109 	;
110 
111 bor:
112 	  BOR | bor NL
113 	;
114 
115 comma:
116 	  ',' | comma NL
117 	;
118 
119 do:
120 	  DO | do NL
121 	;
122 
123 else:
124 	  ELSE | else NL
125 	;
126 
127 for:
128 	  FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt
129 		{ $$ = stat4(FOR, $3, notnull($5), $7, $9); }
130 	| FOR '(' opt_simple_stmt ';'  ';' opt_simple_stmt rparen stmt
131 		{ $$ = stat4(FOR, $3, NIL, $6, $8); }
132 	| FOR '(' varname IN varname rparen stmt
133 		{ $$ = stat3(IN, $3, makearr($5), $7); }
134 	;
135 
136 funcname:
137 	  VAR	{ setfname($1); }
138 	| CALL	{ setfname($1); }
139 	;
140 
141 if:
142 	  IF '(' pattern rparen		{ $$ = notnull($3); }
143 	;
144 
145 lbrace:
146 	  '{' | lbrace NL
147 	;
148 
149 nl:
150 	  NL | nl NL
151 	;
152 
153 opt_nl:
154 	  /* empty */	{ $$ = 0; }
155 	| nl
156 	;
157 
158 opt_pst:
159 	  /* empty */	{ $$ = 0; }
160 	| pst
161 	;
162 
163 
164 opt_simple_stmt:
165 	  /* empty */			{ $$ = 0; }
166 	| simple_stmt
167 	;
168 
169 pas:
170 	  opt_pst			{ $$ = 0; }
171 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
172 	;
173 
174 pa_pat:
175 	  pattern	{ $$ = notnull($1); }
176 	;
177 
178 pa_stat:
179 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
180 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
181 	| pa_pat ',' pa_pat		{ $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
182 	| pa_pat ',' pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $3, $5); }
183 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
184 	| XBEGIN lbrace stmtlist '}'
185 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
186 	| XEND lbrace stmtlist '}'
187 		{ endloc = linkum(endloc, $3); $$ = 0; }
188 	| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
189 		{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
190 	;
191 
192 pa_stats:
193 	  pa_stat
194 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
195 	;
196 
197 patlist:
198 	  pattern
199 	| patlist comma pattern	{ $$ = linkum($1, $3); }
200 	;
201 
202 ppattern:
203 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
204 	| ppattern '?' ppattern ':' ppattern %prec '?'
205 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
206 	| ppattern bor ppattern %prec BOR
207 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
208 	| ppattern and ppattern %prec AND
209 		{ $$ = op2(AND, notnull($1), notnull($3)); }
210 	| NOT ppattern
211 		{ $$ = op1(NOT, notnull($2)); }
212 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
213 	| ppattern MATCHOP ppattern
214 		{ if (constnode($3))
215 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
216 		  else
217 			$$ = op3($2, (Node *)1, $1, $3); }
218 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
219 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
220 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
221 	| reg_expr
222 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
223 	| term
224 	;
225 
226 pattern:
227 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
228 	| pattern '?' pattern ':' pattern %prec '?'
229 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
230 	| pattern bor pattern %prec BOR
231 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
232 	| pattern and pattern %prec AND
233 		{ $$ = op2(AND, notnull($1), notnull($3)); }
234 	| NOT pattern
235 		{ $$ = op1(NOT, op2(NE,$2,valtonode(lookup("$zero&null",symtab),CCON))); }
236 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
237 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
238 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
239 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
240 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
241 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
242 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
243 	| pattern MATCHOP pattern
244 		{ if (constnode($3))
245 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
246 		  else
247 			$$ = op3($2, (Node *)1, $1, $3); }
248 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
249 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
250 	| pattern '|' GETLINE var	{ $$ = op3(GETLINE, $4, (Node*)$2, $1); }
251 	| pattern '|' GETLINE		{ $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); }
252 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
253 	| reg_expr
254 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
255 	| term
256 	;
257 
258 plist:
259 	  pattern comma pattern		{ $$ = linkum($1, $3); }
260 	| plist comma pattern		{ $$ = linkum($1, $3); }
261 	;
262 
263 pplist:
264 	  ppattern
265 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
266 
267 prarg:
268 	  /* empty */			{ $$ = rectonode(); }
269 	| pplist
270 	| '(' plist ')'			{ $$ = $2; }
271 	;
272 
273 print:
274 	  PRINT | PRINTF
275 	;
276 
277 pst:
278 	  NL | ';' | pst NL | pst ';'
279 	;
280 
281 rbrace:
282 	  '}' | rbrace NL
283 	;
284 
285 reg_expr:
286 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
287 	;
288 
289 rparen:
290 	  ')' | rparen NL
291 	;
292 
293 simple_stmt:
294 	  print prarg '|' term		{ $$ = stat3($1, $2, (Node *) $3, $4); }
295 	| print prarg APPEND term	{ $$ = stat3($1, $2, (Node *) $3, $4); }
296 	| print prarg GT term		{ $$ = stat3($1, $2, (Node *) $3, $4); }
297 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
298 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
299 	| DELETE varname		{ yyclearin; ERROR "you can only delete array[element]" SYNTAX; $$ = stat1(DELETE, $2); }
300 	| pattern			{ $$ = exptostat($1); }
301 	| error				{ yyclearin; ERROR "illegal statement" SYNTAX; }
302 	;
303 
304 st:
305 	  nl | ';' opt_nl
306 	;
307 
308 stmt:
309 	  BREAK st		{ $$ = stat1(BREAK, NIL); }
310 	| CLOSE pattern st	{ $$ = stat1(CLOSE, $2); }
311 	| CONTINUE st		{ $$ = stat1(CONTINUE, NIL); }
312 	| do stmt WHILE '(' pattern ')' st
313 		{ $$ = stat2(DO, $2, notnull($5)); }
314 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
315 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
316 	| for
317 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
318 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
319 	| lbrace stmtlist rbrace { $$ = $2; }
320 	| NEXT st	{ if (infunc)
321 				ERROR "next is illegal inside a function" SYNTAX;
322 			  $$ = stat1(NEXT, NIL); }
323 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
324 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
325 	| simple_stmt st
326 	| while stmt		{ $$ = stat2(WHILE, $1, $2); }
327 	| ';' opt_nl		{ $$ = 0; }
328 	;
329 
330 stmtlist:
331 	  stmt
332 	| stmtlist stmt		{ $$ = linkum($1, $2); }
333 	;
334 
335 subop:
336 	  SUB | GSUB
337 	;
338 
339 term:
340 	  term '+' term			{ $$ = op2(ADD, $1, $3); }
341 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
342 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
343 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
344 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
345 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
346 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
347 	| '+' term %prec UMINUS		{ $$ = $2; }
348 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, (Node *) $1, rectonode()); }
349 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, (Node *) $1, $3); }
350 	| BLTIN				{ $$ = op2(BLTIN, (Node *) $1, rectonode()); }
351 	| CALL '(' ')'			{ $$ = op2(CALL, valtonode($1,CVAR), NIL); }
352 	| CALL '(' patlist ')'		{ $$ = op2(CALL, valtonode($1,CVAR), $3); }
353 	| DECR var			{ $$ = op1(PREDECR, $2); }
354 	| INCR var			{ $$ = op1(PREINCR, $2); }
355 	| var DECR			{ $$ = op1(POSTDECR, $1); }
356 	| var INCR			{ $$ = op1(POSTINCR, $1); }
357 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, (Node *)$3, $4); }
358 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, (Node *)$2, $3); }
359 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
360 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
361 	| INDEX '(' pattern comma pattern ')'
362 		{ $$ = op2(INDEX, $3, $5); }
363 	| INDEX '(' pattern comma reg_expr ')'
364 		{ ERROR "index() doesn't permit regular expressions" SYNTAX;
365 		  $$ = op2(INDEX, $3, (Node*)$5); }
366 	| '(' pattern ')'		{ $$ = $2; }
367 	| MATCHFCN '(' pattern comma reg_expr ')'
368 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
369 	| MATCHFCN '(' pattern comma pattern ')'
370 		{ if (constnode($5))
371 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
372 		  else
373 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
374 	| NUMBER			{ $$ = valtonode($1, CCON); }
375 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
376 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
377 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
378 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
379 	| SPLIT '(' pattern comma varname ')'
380 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
381 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
382 	| STRING	 		{ $$ = valtonode($1, CCON); }
383 	| subop '(' reg_expr comma pattern ')'
384 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
385 	| subop '(' pattern comma pattern ')'
386 		{ if (constnode($3))
387 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
388 		  else
389 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
390 	| subop '(' reg_expr comma pattern comma var ')'
391 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
392 	| subop '(' pattern comma pattern comma var ')'
393 		{ if (constnode($3))
394 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
395 		  else
396 			$$ = op4($1, (Node *)1, $3, $5, $7); }
397 	| SUBSTR '(' pattern comma pattern comma pattern ')'
398 		{ $$ = op3(SUBSTR, $3, $5, $7); }
399 	| SUBSTR '(' pattern comma pattern ')'
400 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
401 	| var
402 	;
403 
404 var:
405 	  varname
406 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
407 	| FIELD				{ $$ = valtonode($1, CFLD); }
408 	| IVAR				{ $$ = op1(INDIRECT, valtonode($1, CVAR)); }
409 	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
410 	;
411 
412 varlist:
413 	  /* nothing */		{ arglist = $$ = 0; }
414 	| VAR			{ arglist = $$ = valtonode($1,CVAR); }
415 	| varlist comma VAR	{ arglist = $$ = linkum($1,valtonode($3,CVAR)); }
416 	;
417 
418 varname:
419 	  VAR			{ $$ = valtonode($1, CVAR); }
420 	| ARG 			{ $$ = op1(ARG, (Node *) $1); }
421 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
422 	;
423 
424 
425 while:
426 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
427 	;
428 
429 %%
430 
431 setfname(p)
432 	Cell *p;
433 {
434 	if (isarr(p))
435 		ERROR "%s is an array, not a function", p->nval SYNTAX;
436 	else if (isfunc(p))
437 		ERROR "you can't define function %s more than once", p->nval SYNTAX;
438 	curfname = p->nval;
439 }
440 
441 constnode(p)
442 	Node *p;
443 {
444 	return p->ntype == NVALUE && ((Cell *) (p->narg[0]))->csub == CCON;
445 }
446 
447 uchar *strnode(p)
448 	Node *p;
449 {
450 	return ((Cell *)(p->narg[0]))->sval;
451 }
452 
453 Node *notnull(n)
454 	Node *n;
455 {
456 	switch (n->nobj) {
457 	case LE: case LT: case EQ: case NE: case GT: case GE:
458 	case BOR: case AND: case NOT:
459 		return n;
460 	default:
461 		return op2(NE, n, nullnode);
462 	}
463 }
464