xref: /illumos-gate/usr/src/cmd/mandoc/html.c (revision e153cda9f9660e385e8f468253f80e59f5d454d7)
1 /*	$Id: html.c,v 1.238 2018/06/25 16:54:59 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stddef.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "mandoc.h"
35 #include "roff.h"
36 #include "out.h"
37 #include "html.h"
38 #include "manconf.h"
39 #include "main.h"
40 
41 struct	htmldata {
42 	const char	 *name;
43 	int		  flags;
44 #define	HTML_NOSTACK	 (1 << 0)
45 #define	HTML_AUTOCLOSE	 (1 << 1)
46 #define	HTML_NLBEFORE	 (1 << 2)
47 #define	HTML_NLBEGIN	 (1 << 3)
48 #define	HTML_NLEND	 (1 << 4)
49 #define	HTML_NLAFTER	 (1 << 5)
50 #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
51 #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
52 #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
53 #define	HTML_INDENT	 (1 << 6)
54 #define	HTML_NOINDENT	 (1 << 7)
55 };
56 
57 static	const struct htmldata htmltags[TAG_MAX] = {
58 	{"html",	HTML_NLALL},
59 	{"head",	HTML_NLALL | HTML_INDENT},
60 	{"body",	HTML_NLALL},
61 	{"meta",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
62 	{"title",	HTML_NLAROUND},
63 	{"div",		HTML_NLAROUND},
64 	{"div",		0},
65 	{"h1",		HTML_NLAROUND},
66 	{"h2",		HTML_NLAROUND},
67 	{"span",	0},
68 	{"link",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
69 	{"br",		HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
70 	{"a",		0},
71 	{"table",	HTML_NLALL | HTML_INDENT},
72 	{"tr",		HTML_NLALL | HTML_INDENT},
73 	{"td",		HTML_NLAROUND},
74 	{"li",		HTML_NLAROUND | HTML_INDENT},
75 	{"ul",		HTML_NLALL | HTML_INDENT},
76 	{"ol",		HTML_NLALL | HTML_INDENT},
77 	{"dl",		HTML_NLALL | HTML_INDENT},
78 	{"dt",		HTML_NLAROUND},
79 	{"dd",		HTML_NLAROUND | HTML_INDENT},
80 	{"pre",		HTML_NLALL | HTML_NOINDENT},
81 	{"var",		0},
82 	{"cite",	0},
83 	{"b",		0},
84 	{"i",		0},
85 	{"code",	0},
86 	{"small",	0},
87 	{"style",	HTML_NLALL | HTML_INDENT},
88 	{"math",	HTML_NLALL | HTML_INDENT},
89 	{"mrow",	0},
90 	{"mi",		0},
91 	{"mn",		0},
92 	{"mo",		0},
93 	{"msup",	0},
94 	{"msub",	0},
95 	{"msubsup",	0},
96 	{"mfrac",	0},
97 	{"msqrt",	0},
98 	{"mfenced",	0},
99 	{"mtable",	0},
100 	{"mtr",		0},
101 	{"mtd",		0},
102 	{"munderover",	0},
103 	{"munder",	0},
104 	{"mover",	0},
105 };
106 
107 /* Avoid duplicate HTML id= attributes. */
108 static	struct ohash	 id_unique;
109 
110 static	void	 print_byte(struct html *, char);
111 static	void	 print_endword(struct html *);
112 static	void	 print_indent(struct html *);
113 static	void	 print_word(struct html *, const char *);
114 
115 static	void	 print_ctag(struct html *, struct tag *);
116 static	int	 print_escape(struct html *, char);
117 static	int	 print_encode(struct html *, const char *, const char *, int);
118 static	void	 print_href(struct html *, const char *, const char *, int);
119 static	void	 print_metaf(struct html *, enum mandoc_esc);
120 
121 
122 void *
123 html_alloc(const struct manoutput *outopts)
124 {
125 	struct html	*h;
126 
127 	h = mandoc_calloc(1, sizeof(struct html));
128 
129 	h->tag = NULL;
130 	h->style = outopts->style;
131 	h->base_man = outopts->man;
132 	h->base_includes = outopts->includes;
133 	if (outopts->fragment)
134 		h->oflags |= HTML_FRAGMENT;
135 
136 	mandoc_ohash_init(&id_unique, 4, 0);
137 
138 	return h;
139 }
140 
141 void
142 html_free(void *p)
143 {
144 	struct tag	*tag;
145 	struct html	*h;
146 	char		*cp;
147 	unsigned int	 slot;
148 
149 	h = (struct html *)p;
150 	while ((tag = h->tag) != NULL) {
151 		h->tag = tag->next;
152 		free(tag);
153 	}
154 	free(h);
155 
156 	cp = ohash_first(&id_unique, &slot);
157 	while (cp != NULL) {
158 		free(cp);
159 		cp = ohash_next(&id_unique, &slot);
160 	}
161 	ohash_delete(&id_unique);
162 }
163 
164 void
165 print_gen_head(struct html *h)
166 {
167 	struct tag	*t;
168 
169 	print_otag(h, TAG_META, "?", "charset", "utf-8");
170 	if (h->style != NULL) {
171 		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
172 		    h->style, "type", "text/css", "media", "all");
173 		return;
174 	}
175 
176 	/*
177 	 * Print a minimal embedded style sheet.
178 	 */
179 
180 	t = print_otag(h, TAG_STYLE, "");
181 	print_text(h, "table.head, table.foot { width: 100%; }");
182 	print_endline(h);
183 	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
184 	print_endline(h);
185 	print_text(h, "td.head-vol { text-align: center; }");
186 	print_endline(h);
187 	print_text(h, "div.Pp { margin: 1ex 0ex; }");
188 	print_endline(h);
189 	print_text(h, "div.Nd, div.Bf, div.Op { display: inline; }");
190 	print_endline(h);
191 	print_text(h, "span.Pa, span.Ad { font-style: italic; }");
192 	print_endline(h);
193 	print_text(h, "span.Ms { font-weight: bold; }");
194 	print_endline(h);
195 	print_text(h, "dl.Bl-diag ");
196 	print_byte(h, '>');
197 	print_text(h, " dt { font-weight: bold; }");
198 	print_endline(h);
199 	print_text(h, "code.Nm, code.Fl, code.Cm, code.Ic, "
200 	    "code.In, code.Fd, code.Fn,");
201 	print_endline(h);
202 	print_text(h, "code.Cd { font-weight: bold; "
203 	    "font-family: inherit; }");
204 	print_tagq(h, t);
205 }
206 
207 static void
208 print_metaf(struct html *h, enum mandoc_esc deco)
209 {
210 	enum htmlfont	 font;
211 
212 	switch (deco) {
213 	case ESCAPE_FONTPREV:
214 		font = h->metal;
215 		break;
216 	case ESCAPE_FONTITALIC:
217 		font = HTMLFONT_ITALIC;
218 		break;
219 	case ESCAPE_FONTBOLD:
220 		font = HTMLFONT_BOLD;
221 		break;
222 	case ESCAPE_FONTBI:
223 		font = HTMLFONT_BI;
224 		break;
225 	case ESCAPE_FONT:
226 	case ESCAPE_FONTROMAN:
227 		font = HTMLFONT_NONE;
228 		break;
229 	default:
230 		abort();
231 	}
232 
233 	if (h->metaf) {
234 		print_tagq(h, h->metaf);
235 		h->metaf = NULL;
236 	}
237 
238 	h->metal = h->metac;
239 	h->metac = font;
240 
241 	switch (font) {
242 	case HTMLFONT_ITALIC:
243 		h->metaf = print_otag(h, TAG_I, "");
244 		break;
245 	case HTMLFONT_BOLD:
246 		h->metaf = print_otag(h, TAG_B, "");
247 		break;
248 	case HTMLFONT_BI:
249 		h->metaf = print_otag(h, TAG_B, "");
250 		print_otag(h, TAG_I, "");
251 		break;
252 	default:
253 		break;
254 	}
255 }
256 
257 char *
258 html_make_id(const struct roff_node *n, int unique)
259 {
260 	const struct roff_node	*nch;
261 	char			*buf, *bufs, *cp;
262 	unsigned int		 slot;
263 	int			 suffix;
264 
265 	for (nch = n->child; nch != NULL; nch = nch->next)
266 		if (nch->type != ROFFT_TEXT)
267 			return NULL;
268 
269 	buf = NULL;
270 	deroff(&buf, n);
271 	if (buf == NULL)
272 		return NULL;
273 
274 	/*
275 	 * In ID attributes, only use ASCII characters that are
276 	 * permitted in URL-fragment strings according to the
277 	 * explicit list at:
278 	 * https://url.spec.whatwg.org/#url-fragment-string
279 	 */
280 
281 	for (cp = buf; *cp != '\0'; cp++)
282 		if (isalnum((unsigned char)*cp) == 0 &&
283 		    strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
284 			*cp = '_';
285 
286 	if (unique == 0)
287 		return buf;
288 
289 	/* Avoid duplicate HTML id= attributes. */
290 
291 	bufs = NULL;
292 	suffix = 1;
293 	slot = ohash_qlookup(&id_unique, buf);
294 	cp = ohash_find(&id_unique, slot);
295 	if (cp != NULL) {
296 		while (cp != NULL) {
297 			free(bufs);
298 			if (++suffix > 127) {
299 				free(buf);
300 				return NULL;
301 			}
302 			mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
303 			slot = ohash_qlookup(&id_unique, bufs);
304 			cp = ohash_find(&id_unique, slot);
305 		}
306 		free(buf);
307 		buf = bufs;
308 	}
309 	ohash_insert(&id_unique, slot, buf);
310 	return buf;
311 }
312 
313 static int
314 print_escape(struct html *h, char c)
315 {
316 
317 	switch (c) {
318 	case '<':
319 		print_word(h, "&lt;");
320 		break;
321 	case '>':
322 		print_word(h, "&gt;");
323 		break;
324 	case '&':
325 		print_word(h, "&amp;");
326 		break;
327 	case '"':
328 		print_word(h, "&quot;");
329 		break;
330 	case ASCII_NBRSP:
331 		print_word(h, "&nbsp;");
332 		break;
333 	case ASCII_HYPH:
334 		print_byte(h, '-');
335 		break;
336 	case ASCII_BREAK:
337 		break;
338 	default:
339 		return 0;
340 	}
341 	return 1;
342 }
343 
344 static int
345 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
346 {
347 	char		 numbuf[16];
348 	struct tag	*t;
349 	const char	*seq;
350 	size_t		 sz;
351 	int		 c, len, breakline, nospace;
352 	enum mandoc_esc	 esc;
353 	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
354 		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
355 
356 	if (pend == NULL)
357 		pend = strchr(p, '\0');
358 
359 	breakline = 0;
360 	nospace = 0;
361 
362 	while (p < pend) {
363 		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
364 			h->flags &= ~HTML_SKIPCHAR;
365 			p++;
366 			continue;
367 		}
368 
369 		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
370 			print_byte(h, *p);
371 
372 		if (breakline &&
373 		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
374 			t = print_otag(h, TAG_DIV, "");
375 			print_text(h, "\\~");
376 			print_tagq(h, t);
377 			breakline = 0;
378 			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
379 				p++;
380 			continue;
381 		}
382 
383 		if (p >= pend)
384 			break;
385 
386 		if (*p == ' ') {
387 			print_endword(h);
388 			p++;
389 			continue;
390 		}
391 
392 		if (print_escape(h, *p++))
393 			continue;
394 
395 		esc = mandoc_escape(&p, &seq, &len);
396 		if (ESCAPE_ERROR == esc)
397 			break;
398 
399 		switch (esc) {
400 		case ESCAPE_FONT:
401 		case ESCAPE_FONTPREV:
402 		case ESCAPE_FONTBOLD:
403 		case ESCAPE_FONTITALIC:
404 		case ESCAPE_FONTBI:
405 		case ESCAPE_FONTROMAN:
406 			if (0 == norecurse)
407 				print_metaf(h, esc);
408 			continue;
409 		case ESCAPE_SKIPCHAR:
410 			h->flags |= HTML_SKIPCHAR;
411 			continue;
412 		default:
413 			break;
414 		}
415 
416 		if (h->flags & HTML_SKIPCHAR) {
417 			h->flags &= ~HTML_SKIPCHAR;
418 			continue;
419 		}
420 
421 		switch (esc) {
422 		case ESCAPE_UNICODE:
423 			/* Skip past "u" header. */
424 			c = mchars_num2uc(seq + 1, len - 1);
425 			break;
426 		case ESCAPE_NUMBERED:
427 			c = mchars_num2char(seq, len);
428 			if (c < 0)
429 				continue;
430 			break;
431 		case ESCAPE_SPECIAL:
432 			c = mchars_spec2cp(seq, len);
433 			if (c <= 0)
434 				continue;
435 			break;
436 		case ESCAPE_BREAK:
437 			breakline = 1;
438 			continue;
439 		case ESCAPE_NOSPACE:
440 			if ('\0' == *p)
441 				nospace = 1;
442 			continue;
443 		case ESCAPE_OVERSTRIKE:
444 			if (len == 0)
445 				continue;
446 			c = seq[len - 1];
447 			break;
448 		default:
449 			continue;
450 		}
451 		if ((c < 0x20 && c != 0x09) ||
452 		    (c > 0x7E && c < 0xA0))
453 			c = 0xFFFD;
454 		if (c > 0x7E) {
455 			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
456 			print_word(h, numbuf);
457 		} else if (print_escape(h, c) == 0)
458 			print_byte(h, c);
459 	}
460 
461 	return nospace;
462 }
463 
464 static void
465 print_href(struct html *h, const char *name, const char *sec, int man)
466 {
467 	const char	*p, *pp;
468 
469 	pp = man ? h->base_man : h->base_includes;
470 	while ((p = strchr(pp, '%')) != NULL) {
471 		print_encode(h, pp, p, 1);
472 		if (man && p[1] == 'S') {
473 			if (sec == NULL)
474 				print_byte(h, '1');
475 			else
476 				print_encode(h, sec, NULL, 1);
477 		} else if ((man && p[1] == 'N') ||
478 		    (man == 0 && p[1] == 'I'))
479 			print_encode(h, name, NULL, 1);
480 		else
481 			print_encode(h, p, p + 2, 1);
482 		pp = p + 2;
483 	}
484 	if (*pp != '\0')
485 		print_encode(h, pp, NULL, 1);
486 }
487 
488 struct tag *
489 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
490 {
491 	va_list		 ap;
492 	struct tag	*t;
493 	const char	*attr;
494 	char		*arg1, *arg2;
495 	int		 tflags;
496 
497 	tflags = htmltags[tag].flags;
498 
499 	/* Push this tag onto the stack of open scopes. */
500 
501 	if ((tflags & HTML_NOSTACK) == 0) {
502 		t = mandoc_malloc(sizeof(struct tag));
503 		t->tag = tag;
504 		t->next = h->tag;
505 		h->tag = t;
506 	} else
507 		t = NULL;
508 
509 	if (tflags & HTML_NLBEFORE)
510 		print_endline(h);
511 	if (h->col == 0)
512 		print_indent(h);
513 	else if ((h->flags & HTML_NOSPACE) == 0) {
514 		if (h->flags & HTML_KEEP)
515 			print_word(h, "&#x00A0;");
516 		else {
517 			if (h->flags & HTML_PREKEEP)
518 				h->flags |= HTML_KEEP;
519 			print_endword(h);
520 		}
521 	}
522 
523 	if ( ! (h->flags & HTML_NONOSPACE))
524 		h->flags &= ~HTML_NOSPACE;
525 	else
526 		h->flags |= HTML_NOSPACE;
527 
528 	/* Print out the tag name and attributes. */
529 
530 	print_byte(h, '<');
531 	print_word(h, htmltags[tag].name);
532 
533 	va_start(ap, fmt);
534 
535 	while (*fmt != '\0') {
536 
537 		/* Parse attributes and arguments. */
538 
539 		arg1 = va_arg(ap, char *);
540 		arg2 = NULL;
541 		switch (*fmt++) {
542 		case 'c':
543 			attr = "class";
544 			break;
545 		case 'h':
546 			attr = "href";
547 			break;
548 		case 'i':
549 			attr = "id";
550 			break;
551 		case 's':
552 			attr = "style";
553 			arg2 = va_arg(ap, char *);
554 			break;
555 		case '?':
556 			attr = arg1;
557 			arg1 = va_arg(ap, char *);
558 			break;
559 		default:
560 			abort();
561 		}
562 		if (*fmt == 'M')
563 			arg2 = va_arg(ap, char *);
564 		if (arg1 == NULL)
565 			continue;
566 
567 		/* Print the attributes. */
568 
569 		print_byte(h, ' ');
570 		print_word(h, attr);
571 		print_byte(h, '=');
572 		print_byte(h, '"');
573 		switch (*fmt) {
574 		case 'I':
575 			print_href(h, arg1, NULL, 0);
576 			fmt++;
577 			break;
578 		case 'M':
579 			print_href(h, arg1, arg2, 1);
580 			fmt++;
581 			break;
582 		case 'R':
583 			print_byte(h, '#');
584 			print_encode(h, arg1, NULL, 1);
585 			fmt++;
586 			break;
587 		case 'T':
588 			print_encode(h, arg1, NULL, 1);
589 			print_word(h, "\" title=\"");
590 			print_encode(h, arg1, NULL, 1);
591 			fmt++;
592 			break;
593 		default:
594 			if (arg2 == NULL)
595 				print_encode(h, arg1, NULL, 1);
596 			else {
597 				print_word(h, arg1);
598 				print_byte(h, ':');
599 				print_byte(h, ' ');
600 				print_word(h, arg2);
601 				print_byte(h, ';');
602 			}
603 			break;
604 		}
605 		print_byte(h, '"');
606 	}
607 	va_end(ap);
608 
609 	/* Accommodate for "well-formed" singleton escaping. */
610 
611 	if (HTML_AUTOCLOSE & htmltags[tag].flags)
612 		print_byte(h, '/');
613 
614 	print_byte(h, '>');
615 
616 	if (tflags & HTML_NLBEGIN)
617 		print_endline(h);
618 	else
619 		h->flags |= HTML_NOSPACE;
620 
621 	if (tflags & HTML_INDENT)
622 		h->indent++;
623 	if (tflags & HTML_NOINDENT)
624 		h->noindent++;
625 
626 	return t;
627 }
628 
629 static void
630 print_ctag(struct html *h, struct tag *tag)
631 {
632 	int	 tflags;
633 
634 	/*
635 	 * Remember to close out and nullify the current
636 	 * meta-font and table, if applicable.
637 	 */
638 	if (tag == h->metaf)
639 		h->metaf = NULL;
640 	if (tag == h->tblt)
641 		h->tblt = NULL;
642 
643 	tflags = htmltags[tag->tag].flags;
644 
645 	if (tflags & HTML_INDENT)
646 		h->indent--;
647 	if (tflags & HTML_NOINDENT)
648 		h->noindent--;
649 	if (tflags & HTML_NLEND)
650 		print_endline(h);
651 	print_indent(h);
652 	print_byte(h, '<');
653 	print_byte(h, '/');
654 	print_word(h, htmltags[tag->tag].name);
655 	print_byte(h, '>');
656 	if (tflags & HTML_NLAFTER)
657 		print_endline(h);
658 
659 	h->tag = tag->next;
660 	free(tag);
661 }
662 
663 void
664 print_gen_decls(struct html *h)
665 {
666 	print_word(h, "<!DOCTYPE html>");
667 	print_endline(h);
668 }
669 
670 void
671 print_gen_comment(struct html *h, struct roff_node *n)
672 {
673 	int	 wantblank;
674 
675 	print_word(h, "<!-- This is an automatically generated file."
676 	    "  Do not edit.");
677 	h->indent = 1;
678 	wantblank = 0;
679 	while (n != NULL && n->type == ROFFT_COMMENT) {
680 		if (strstr(n->string, "-->") == NULL &&
681 		    (wantblank || *n->string != '\0')) {
682 			print_endline(h);
683 			print_indent(h);
684 			print_word(h, n->string);
685 			wantblank = *n->string != '\0';
686 		}
687 		n = n->next;
688 	}
689 	if (wantblank)
690 		print_endline(h);
691 	print_word(h, " -->");
692 	print_endline(h);
693 	h->indent = 0;
694 }
695 
696 void
697 print_text(struct html *h, const char *word)
698 {
699 	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
700 		if ( ! (HTML_KEEP & h->flags)) {
701 			if (HTML_PREKEEP & h->flags)
702 				h->flags |= HTML_KEEP;
703 			print_endword(h);
704 		} else
705 			print_word(h, "&#x00A0;");
706 	}
707 
708 	assert(NULL == h->metaf);
709 	switch (h->metac) {
710 	case HTMLFONT_ITALIC:
711 		h->metaf = print_otag(h, TAG_I, "");
712 		break;
713 	case HTMLFONT_BOLD:
714 		h->metaf = print_otag(h, TAG_B, "");
715 		break;
716 	case HTMLFONT_BI:
717 		h->metaf = print_otag(h, TAG_B, "");
718 		print_otag(h, TAG_I, "");
719 		break;
720 	default:
721 		print_indent(h);
722 		break;
723 	}
724 
725 	assert(word);
726 	if ( ! print_encode(h, word, NULL, 0)) {
727 		if ( ! (h->flags & HTML_NONOSPACE))
728 			h->flags &= ~HTML_NOSPACE;
729 		h->flags &= ~HTML_NONEWLINE;
730 	} else
731 		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
732 
733 	if (h->metaf) {
734 		print_tagq(h, h->metaf);
735 		h->metaf = NULL;
736 	}
737 
738 	h->flags &= ~HTML_IGNDELIM;
739 }
740 
741 void
742 print_tagq(struct html *h, const struct tag *until)
743 {
744 	struct tag	*tag;
745 
746 	while ((tag = h->tag) != NULL) {
747 		print_ctag(h, tag);
748 		if (until && tag == until)
749 			return;
750 	}
751 }
752 
753 void
754 print_stagq(struct html *h, const struct tag *suntil)
755 {
756 	struct tag	*tag;
757 
758 	while ((tag = h->tag) != NULL) {
759 		if (suntil && tag == suntil)
760 			return;
761 		print_ctag(h, tag);
762 	}
763 }
764 
765 void
766 print_paragraph(struct html *h)
767 {
768 	struct tag	*t;
769 
770 	t = print_otag(h, TAG_DIV, "c", "Pp");
771 	print_tagq(h, t);
772 }
773 
774 
775 /***********************************************************************
776  * Low level output functions.
777  * They implement line breaking using a short static buffer.
778  ***********************************************************************/
779 
780 /*
781  * Buffer one HTML output byte.
782  * If the buffer is full, flush and deactivate it and start a new line.
783  * If the buffer is inactive, print directly.
784  */
785 static void
786 print_byte(struct html *h, char c)
787 {
788 	if ((h->flags & HTML_BUFFER) == 0) {
789 		putchar(c);
790 		h->col++;
791 		return;
792 	}
793 
794 	if (h->col + h->bufcol < sizeof(h->buf)) {
795 		h->buf[h->bufcol++] = c;
796 		return;
797 	}
798 
799 	putchar('\n');
800 	h->col = 0;
801 	print_indent(h);
802 	putchar(' ');
803 	putchar(' ');
804 	fwrite(h->buf, h->bufcol, 1, stdout);
805 	putchar(c);
806 	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
807 	h->bufcol = 0;
808 	h->flags &= ~HTML_BUFFER;
809 }
810 
811 /*
812  * If something was printed on the current output line, end it.
813  * Not to be called right after print_indent().
814  */
815 void
816 print_endline(struct html *h)
817 {
818 	if (h->col == 0)
819 		return;
820 
821 	if (h->bufcol) {
822 		putchar(' ');
823 		fwrite(h->buf, h->bufcol, 1, stdout);
824 		h->bufcol = 0;
825 	}
826 	putchar('\n');
827 	h->col = 0;
828 	h->flags |= HTML_NOSPACE;
829 	h->flags &= ~HTML_BUFFER;
830 }
831 
832 /*
833  * Flush the HTML output buffer.
834  * If it is inactive, activate it.
835  */
836 static void
837 print_endword(struct html *h)
838 {
839 	if (h->noindent) {
840 		print_byte(h, ' ');
841 		return;
842 	}
843 
844 	if ((h->flags & HTML_BUFFER) == 0) {
845 		h->col++;
846 		h->flags |= HTML_BUFFER;
847 	} else if (h->bufcol) {
848 		putchar(' ');
849 		fwrite(h->buf, h->bufcol, 1, stdout);
850 		h->col += h->bufcol + 1;
851 	}
852 	h->bufcol = 0;
853 }
854 
855 /*
856  * If at the beginning of a new output line,
857  * perform indentation and mark the line as containing output.
858  * Make sure to really produce some output right afterwards,
859  * but do not use print_otag() for producing it.
860  */
861 static void
862 print_indent(struct html *h)
863 {
864 	size_t	 i;
865 
866 	if (h->col)
867 		return;
868 
869 	if (h->noindent == 0) {
870 		h->col = h->indent * 2;
871 		for (i = 0; i < h->col; i++)
872 			putchar(' ');
873 	}
874 	h->flags &= ~HTML_NOSPACE;
875 }
876 
877 /*
878  * Print or buffer some characters
879  * depending on the current HTML output buffer state.
880  */
881 static void
882 print_word(struct html *h, const char *cp)
883 {
884 	while (*cp != '\0')
885 		print_byte(h, *cp++);
886 }
887