xref: /illumos-gate/usr/src/cmd/mandoc/html.c (revision 5f82aa32fbc5dc2c59bca6ff315f44a4c4c9ea86)
1 /*	$Id: html.c,v 1.219 2017/07/15 17:57:51 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc.h"
33 #include "roff.h"
34 #include "out.h"
35 #include "html.h"
36 #include "manconf.h"
37 #include "main.h"
38 
39 struct	htmldata {
40 	const char	 *name;
41 	int		  flags;
42 #define	HTML_NOSTACK	 (1 << 0)
43 #define	HTML_AUTOCLOSE	 (1 << 1)
44 #define	HTML_NLBEFORE	 (1 << 2)
45 #define	HTML_NLBEGIN	 (1 << 3)
46 #define	HTML_NLEND	 (1 << 4)
47 #define	HTML_NLAFTER	 (1 << 5)
48 #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
49 #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
50 #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
51 #define	HTML_INDENT	 (1 << 6)
52 #define	HTML_NOINDENT	 (1 << 7)
53 };
54 
55 static	const struct htmldata htmltags[TAG_MAX] = {
56 	{"html",	HTML_NLALL},
57 	{"head",	HTML_NLALL | HTML_INDENT},
58 	{"body",	HTML_NLALL},
59 	{"meta",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
60 	{"title",	HTML_NLAROUND},
61 	{"div",		HTML_NLAROUND},
62 	{"h1",		HTML_NLAROUND},
63 	{"h2",		HTML_NLAROUND},
64 	{"span",	0},
65 	{"link",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
66 	{"br",		HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
67 	{"a",		0},
68 	{"table",	HTML_NLALL | HTML_INDENT},
69 	{"colgroup",	HTML_NLALL | HTML_INDENT},
70 	{"col",		HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
71 	{"tr",		HTML_NLALL | HTML_INDENT},
72 	{"td",		HTML_NLAROUND},
73 	{"li",		HTML_NLAROUND | HTML_INDENT},
74 	{"ul",		HTML_NLALL | HTML_INDENT},
75 	{"ol",		HTML_NLALL | HTML_INDENT},
76 	{"dl",		HTML_NLALL | HTML_INDENT},
77 	{"dt",		HTML_NLAROUND},
78 	{"dd",		HTML_NLAROUND | HTML_INDENT},
79 	{"pre",		HTML_NLALL | HTML_NOINDENT},
80 	{"var",		0},
81 	{"cite",	0},
82 	{"b",		0},
83 	{"i",		0},
84 	{"code",	0},
85 	{"small",	0},
86 	{"style",	HTML_NLALL | HTML_INDENT},
87 	{"math",	HTML_NLALL | HTML_INDENT},
88 	{"mrow",	0},
89 	{"mi",		0},
90 	{"mn",		0},
91 	{"mo",		0},
92 	{"msup",	0},
93 	{"msub",	0},
94 	{"msubsup",	0},
95 	{"mfrac",	0},
96 	{"msqrt",	0},
97 	{"mfenced",	0},
98 	{"mtable",	0},
99 	{"mtr",		0},
100 	{"mtd",		0},
101 	{"munderover",	0},
102 	{"munder",	0},
103 	{"mover",	0},
104 };
105 
106 static	const char	*const roffscales[SCALE_MAX] = {
107 	"cm", /* SCALE_CM */
108 	"in", /* SCALE_IN */
109 	"pc", /* SCALE_PC */
110 	"pt", /* SCALE_PT */
111 	"em", /* SCALE_EM */
112 	"em", /* SCALE_MM */
113 	"ex", /* SCALE_EN */
114 	"ex", /* SCALE_BU */
115 	"em", /* SCALE_VS */
116 	"ex", /* SCALE_FS */
117 };
118 
119 static	void	 a2width(const char *, struct roffsu *);
120 static	void	 print_byte(struct html *, char);
121 static	void	 print_endword(struct html *);
122 static	void	 print_indent(struct html *);
123 static	void	 print_word(struct html *, const char *);
124 
125 static	void	 print_ctag(struct html *, struct tag *);
126 static	int	 print_escape(struct html *, char);
127 static	int	 print_encode(struct html *, const char *, const char *, int);
128 static	void	 print_href(struct html *, const char *, const char *, int);
129 static	void	 print_metaf(struct html *, enum mandoc_esc);
130 
131 
132 void *
133 html_alloc(const struct manoutput *outopts)
134 {
135 	struct html	*h;
136 
137 	h = mandoc_calloc(1, sizeof(struct html));
138 
139 	h->tag = NULL;
140 	h->style = outopts->style;
141 	h->base_man = outopts->man;
142 	h->base_includes = outopts->includes;
143 	if (outopts->fragment)
144 		h->oflags |= HTML_FRAGMENT;
145 
146 	return h;
147 }
148 
149 void
150 html_free(void *p)
151 {
152 	struct tag	*tag;
153 	struct html	*h;
154 
155 	h = (struct html *)p;
156 
157 	while ((tag = h->tag) != NULL) {
158 		h->tag = tag->next;
159 		free(tag);
160 	}
161 
162 	free(h);
163 }
164 
165 void
166 print_gen_head(struct html *h)
167 {
168 	struct tag	*t;
169 
170 	print_otag(h, TAG_META, "?", "charset", "utf-8");
171 
172 	/*
173 	 * Print a default style-sheet.
174 	 */
175 
176 	t = print_otag(h, TAG_STYLE, "");
177 	print_text(h, "table.head, table.foot { width: 100%; }");
178 	print_endline(h);
179 	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
180 	print_endline(h);
181 	print_text(h, "td.head-vol { text-align: center; }");
182 	print_endline(h);
183 	print_text(h, "div.Pp { margin: 1ex 0ex; }");
184 	print_tagq(h, t);
185 
186 	if (h->style)
187 		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
188 		    h->style, "type", "text/css", "media", "all");
189 }
190 
191 static void
192 print_metaf(struct html *h, enum mandoc_esc deco)
193 {
194 	enum htmlfont	 font;
195 
196 	switch (deco) {
197 	case ESCAPE_FONTPREV:
198 		font = h->metal;
199 		break;
200 	case ESCAPE_FONTITALIC:
201 		font = HTMLFONT_ITALIC;
202 		break;
203 	case ESCAPE_FONTBOLD:
204 		font = HTMLFONT_BOLD;
205 		break;
206 	case ESCAPE_FONTBI:
207 		font = HTMLFONT_BI;
208 		break;
209 	case ESCAPE_FONT:
210 	case ESCAPE_FONTROMAN:
211 		font = HTMLFONT_NONE;
212 		break;
213 	default:
214 		abort();
215 	}
216 
217 	if (h->metaf) {
218 		print_tagq(h, h->metaf);
219 		h->metaf = NULL;
220 	}
221 
222 	h->metal = h->metac;
223 	h->metac = font;
224 
225 	switch (font) {
226 	case HTMLFONT_ITALIC:
227 		h->metaf = print_otag(h, TAG_I, "");
228 		break;
229 	case HTMLFONT_BOLD:
230 		h->metaf = print_otag(h, TAG_B, "");
231 		break;
232 	case HTMLFONT_BI:
233 		h->metaf = print_otag(h, TAG_B, "");
234 		print_otag(h, TAG_I, "");
235 		break;
236 	default:
237 		break;
238 	}
239 }
240 
241 char *
242 html_make_id(const struct roff_node *n)
243 {
244 	const struct roff_node	*nch;
245 	char			*buf, *cp;
246 
247 	for (nch = n->child; nch != NULL; nch = nch->next)
248 		if (nch->type != ROFFT_TEXT)
249 			return NULL;
250 
251 	buf = NULL;
252 	deroff(&buf, n);
253 
254 	/* http://www.w3.org/TR/html5/dom.html#the-id-attribute */
255 
256 	for (cp = buf; *cp != '\0'; cp++)
257 		if (*cp == ' ')
258 			*cp = '_';
259 
260 	return buf;
261 }
262 
263 int
264 html_strlen(const char *cp)
265 {
266 	size_t		 rsz;
267 	int		 skip, sz;
268 
269 	/*
270 	 * Account for escaped sequences within string length
271 	 * calculations.  This follows the logic in term_strlen() as we
272 	 * must calculate the width of produced strings.
273 	 * Assume that characters are always width of "1".  This is
274 	 * hacky, but it gets the job done for approximation of widths.
275 	 */
276 
277 	sz = 0;
278 	skip = 0;
279 	while (1) {
280 		rsz = strcspn(cp, "\\");
281 		if (rsz) {
282 			cp += rsz;
283 			if (skip) {
284 				skip = 0;
285 				rsz--;
286 			}
287 			sz += rsz;
288 		}
289 		if ('\0' == *cp)
290 			break;
291 		cp++;
292 		switch (mandoc_escape(&cp, NULL, NULL)) {
293 		case ESCAPE_ERROR:
294 			return sz;
295 		case ESCAPE_UNICODE:
296 		case ESCAPE_NUMBERED:
297 		case ESCAPE_SPECIAL:
298 		case ESCAPE_OVERSTRIKE:
299 			if (skip)
300 				skip = 0;
301 			else
302 				sz++;
303 			break;
304 		case ESCAPE_SKIPCHAR:
305 			skip = 1;
306 			break;
307 		default:
308 			break;
309 		}
310 	}
311 	return sz;
312 }
313 
314 static int
315 print_escape(struct html *h, char c)
316 {
317 
318 	switch (c) {
319 	case '<':
320 		print_word(h, "&lt;");
321 		break;
322 	case '>':
323 		print_word(h, "&gt;");
324 		break;
325 	case '&':
326 		print_word(h, "&amp;");
327 		break;
328 	case '"':
329 		print_word(h, "&quot;");
330 		break;
331 	case ASCII_NBRSP:
332 		print_word(h, "&nbsp;");
333 		break;
334 	case ASCII_HYPH:
335 		print_byte(h, '-');
336 		break;
337 	case ASCII_BREAK:
338 		break;
339 	default:
340 		return 0;
341 	}
342 	return 1;
343 }
344 
345 static int
346 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
347 {
348 	char		 numbuf[16];
349 	struct tag	*t;
350 	const char	*seq;
351 	size_t		 sz;
352 	int		 c, len, breakline, nospace;
353 	enum mandoc_esc	 esc;
354 	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
355 		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
356 
357 	if (pend == NULL)
358 		pend = strchr(p, '\0');
359 
360 	breakline = 0;
361 	nospace = 0;
362 
363 	while (p < pend) {
364 		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
365 			h->flags &= ~HTML_SKIPCHAR;
366 			p++;
367 			continue;
368 		}
369 
370 		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
371 			print_byte(h, *p);
372 
373 		if (breakline &&
374 		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
375 			t = print_otag(h, TAG_DIV, "");
376 			print_text(h, "\\~");
377 			print_tagq(h, t);
378 			breakline = 0;
379 			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
380 				p++;
381 			continue;
382 		}
383 
384 		if (p >= pend)
385 			break;
386 
387 		if (*p == ' ') {
388 			print_endword(h);
389 			p++;
390 			continue;
391 		}
392 
393 		if (print_escape(h, *p++))
394 			continue;
395 
396 		esc = mandoc_escape(&p, &seq, &len);
397 		if (ESCAPE_ERROR == esc)
398 			break;
399 
400 		switch (esc) {
401 		case ESCAPE_FONT:
402 		case ESCAPE_FONTPREV:
403 		case ESCAPE_FONTBOLD:
404 		case ESCAPE_FONTITALIC:
405 		case ESCAPE_FONTBI:
406 		case ESCAPE_FONTROMAN:
407 			if (0 == norecurse)
408 				print_metaf(h, esc);
409 			continue;
410 		case ESCAPE_SKIPCHAR:
411 			h->flags |= HTML_SKIPCHAR;
412 			continue;
413 		default:
414 			break;
415 		}
416 
417 		if (h->flags & HTML_SKIPCHAR) {
418 			h->flags &= ~HTML_SKIPCHAR;
419 			continue;
420 		}
421 
422 		switch (esc) {
423 		case ESCAPE_UNICODE:
424 			/* Skip past "u" header. */
425 			c = mchars_num2uc(seq + 1, len - 1);
426 			break;
427 		case ESCAPE_NUMBERED:
428 			c = mchars_num2char(seq, len);
429 			if (c < 0)
430 				continue;
431 			break;
432 		case ESCAPE_SPECIAL:
433 			c = mchars_spec2cp(seq, len);
434 			if (c <= 0)
435 				continue;
436 			break;
437 		case ESCAPE_BREAK:
438 			breakline = 1;
439 			continue;
440 		case ESCAPE_NOSPACE:
441 			if ('\0' == *p)
442 				nospace = 1;
443 			continue;
444 		case ESCAPE_OVERSTRIKE:
445 			if (len == 0)
446 				continue;
447 			c = seq[len - 1];
448 			break;
449 		default:
450 			continue;
451 		}
452 		if ((c < 0x20 && c != 0x09) ||
453 		    (c > 0x7E && c < 0xA0))
454 			c = 0xFFFD;
455 		if (c > 0x7E) {
456 			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
457 			print_word(h, numbuf);
458 		} else if (print_escape(h, c) == 0)
459 			print_byte(h, c);
460 	}
461 
462 	return nospace;
463 }
464 
465 static void
466 print_href(struct html *h, const char *name, const char *sec, int man)
467 {
468 	const char	*p, *pp;
469 
470 	pp = man ? h->base_man : h->base_includes;
471 	while ((p = strchr(pp, '%')) != NULL) {
472 		print_encode(h, pp, p, 1);
473 		if (man && p[1] == 'S') {
474 			if (sec == NULL)
475 				print_byte(h, '1');
476 			else
477 				print_encode(h, sec, NULL, 1);
478 		} else if ((man && p[1] == 'N') ||
479 		    (man == 0 && p[1] == 'I'))
480 			print_encode(h, name, NULL, 1);
481 		else
482 			print_encode(h, p, p + 2, 1);
483 		pp = p + 2;
484 	}
485 	if (*pp != '\0')
486 		print_encode(h, pp, NULL, 1);
487 }
488 
489 struct tag *
490 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
491 {
492 	va_list		 ap;
493 	struct roffsu	 mysu, *su;
494 	char		 numbuf[16];
495 	struct tag	*t;
496 	const char	*attr;
497 	char		*arg1, *arg2;
498 	double		 v;
499 	int		 i, have_style, tflags;
500 
501 	tflags = htmltags[tag].flags;
502 
503 	/* Push this tag onto the stack of open scopes. */
504 
505 	if ((tflags & HTML_NOSTACK) == 0) {
506 		t = mandoc_malloc(sizeof(struct tag));
507 		t->tag = tag;
508 		t->next = h->tag;
509 		h->tag = t;
510 	} else
511 		t = NULL;
512 
513 	if (tflags & HTML_NLBEFORE)
514 		print_endline(h);
515 	if (h->col == 0)
516 		print_indent(h);
517 	else if ((h->flags & HTML_NOSPACE) == 0) {
518 		if (h->flags & HTML_KEEP)
519 			print_word(h, "&#x00A0;");
520 		else {
521 			if (h->flags & HTML_PREKEEP)
522 				h->flags |= HTML_KEEP;
523 			print_endword(h);
524 		}
525 	}
526 
527 	if ( ! (h->flags & HTML_NONOSPACE))
528 		h->flags &= ~HTML_NOSPACE;
529 	else
530 		h->flags |= HTML_NOSPACE;
531 
532 	/* Print out the tag name and attributes. */
533 
534 	print_byte(h, '<');
535 	print_word(h, htmltags[tag].name);
536 
537 	va_start(ap, fmt);
538 
539 	have_style = 0;
540 	while (*fmt != '\0') {
541 		if (*fmt == 's') {
542 			have_style = 1;
543 			fmt++;
544 			break;
545 		}
546 
547 		/* Parse a non-style attribute and its arguments. */
548 
549 		arg1 = va_arg(ap, char *);
550 		switch (*fmt++) {
551 		case 'c':
552 			attr = "class";
553 			break;
554 		case 'h':
555 			attr = "href";
556 			break;
557 		case 'i':
558 			attr = "id";
559 			break;
560 		case '?':
561 			attr = arg1;
562 			arg1 = va_arg(ap, char *);
563 			break;
564 		default:
565 			abort();
566 		}
567 		arg2 = NULL;
568 		if (*fmt == 'M')
569 			arg2 = va_arg(ap, char *);
570 		if (arg1 == NULL)
571 			continue;
572 
573 		/* Print the non-style attributes. */
574 
575 		print_byte(h, ' ');
576 		print_word(h, attr);
577 		print_byte(h, '=');
578 		print_byte(h, '"');
579 		switch (*fmt) {
580 		case 'I':
581 			print_href(h, arg1, NULL, 0);
582 			fmt++;
583 			break;
584 		case 'M':
585 			print_href(h, arg1, arg2, 1);
586 			fmt++;
587 			break;
588 		case 'R':
589 			print_byte(h, '#');
590 			print_encode(h, arg1, NULL, 1);
591 			fmt++;
592 			break;
593 		case 'T':
594 			print_encode(h, arg1, NULL, 1);
595 			print_word(h, "\" title=\"");
596 			print_encode(h, arg1, NULL, 1);
597 			fmt++;
598 			break;
599 		default:
600 			print_encode(h, arg1, NULL, 1);
601 			break;
602 		}
603 		print_byte(h, '"');
604 	}
605 
606 	/* Print out styles. */
607 
608 	while (*fmt != '\0') {
609 		arg1 = NULL;
610 		su = NULL;
611 
612 		/* First letter: input argument type. */
613 
614 		switch (*fmt++) {
615 		case 'h':
616 			i = va_arg(ap, int);
617 			su = &mysu;
618 			SCALE_HS_INIT(su, i);
619 			break;
620 		case 's':
621 			arg1 = va_arg(ap, char *);
622 			break;
623 		case 'u':
624 			su = va_arg(ap, struct roffsu *);
625 			break;
626 		case 'w':
627 			if ((arg2 = va_arg(ap, char *)) != NULL) {
628 				su = &mysu;
629 				a2width(arg2, su);
630 			}
631 			if (*fmt == '*') {
632 				if (su != NULL && su->unit == SCALE_EN &&
633 				    su->scale > 5.9 && su->scale < 6.1)
634 					su = NULL;
635 				fmt++;
636 			}
637 			if (*fmt == '+') {
638 				if (su != NULL) {
639 					/* Make even bold text fit. */
640 					su->scale *= 1.2;
641 					/* Add padding. */
642 					su->scale += 3.0;
643 				}
644 				fmt++;
645 			}
646 			if (*fmt == '-') {
647 				if (su != NULL)
648 					su->scale *= -1.0;
649 				fmt++;
650 			}
651 			break;
652 		default:
653 			abort();
654 		}
655 
656 		/* Second letter: style name. */
657 
658 		switch (*fmt++) {
659 		case 'h':
660 			attr = "height";
661 			break;
662 		case 'i':
663 			attr = "text-indent";
664 			break;
665 		case 'l':
666 			attr = "margin-left";
667 			break;
668 		case 'w':
669 			attr = "width";
670 			break;
671 		case 'W':
672 			attr = "min-width";
673 			break;
674 		case '?':
675 			attr = arg1;
676 			arg1 = va_arg(ap, char *);
677 			break;
678 		default:
679 			abort();
680 		}
681 		if (su == NULL && arg1 == NULL)
682 			continue;
683 
684 		if (have_style == 1)
685 			print_word(h, " style=\"");
686 		else
687 			print_byte(h, ' ');
688 		print_word(h, attr);
689 		print_byte(h, ':');
690 		print_byte(h, ' ');
691 		if (su != NULL) {
692 			v = su->scale;
693 			if (su->unit == SCALE_MM && (v /= 100.0) == 0.0)
694 				v = 1.0;
695 			else if (su->unit == SCALE_BU)
696 				v /= 24.0;
697 			(void)snprintf(numbuf, sizeof(numbuf), "%.2f", v);
698 			print_word(h, numbuf);
699 			print_word(h, roffscales[su->unit]);
700 		} else
701 			print_word(h, arg1);
702 		print_byte(h, ';');
703 		have_style = 2;
704 	}
705 	if (have_style == 2)
706 		print_byte(h, '"');
707 
708 	va_end(ap);
709 
710 	/* Accommodate for "well-formed" singleton escaping. */
711 
712 	if (HTML_AUTOCLOSE & htmltags[tag].flags)
713 		print_byte(h, '/');
714 
715 	print_byte(h, '>');
716 
717 	if (tflags & HTML_NLBEGIN)
718 		print_endline(h);
719 	else
720 		h->flags |= HTML_NOSPACE;
721 
722 	if (tflags & HTML_INDENT)
723 		h->indent++;
724 	if (tflags & HTML_NOINDENT)
725 		h->noindent++;
726 
727 	return t;
728 }
729 
730 static void
731 print_ctag(struct html *h, struct tag *tag)
732 {
733 	int	 tflags;
734 
735 	/*
736 	 * Remember to close out and nullify the current
737 	 * meta-font and table, if applicable.
738 	 */
739 	if (tag == h->metaf)
740 		h->metaf = NULL;
741 	if (tag == h->tblt)
742 		h->tblt = NULL;
743 
744 	tflags = htmltags[tag->tag].flags;
745 
746 	if (tflags & HTML_INDENT)
747 		h->indent--;
748 	if (tflags & HTML_NOINDENT)
749 		h->noindent--;
750 	if (tflags & HTML_NLEND)
751 		print_endline(h);
752 	print_indent(h);
753 	print_byte(h, '<');
754 	print_byte(h, '/');
755 	print_word(h, htmltags[tag->tag].name);
756 	print_byte(h, '>');
757 	if (tflags & HTML_NLAFTER)
758 		print_endline(h);
759 
760 	h->tag = tag->next;
761 	free(tag);
762 }
763 
764 void
765 print_gen_decls(struct html *h)
766 {
767 	print_word(h, "<!DOCTYPE html>");
768 	print_endline(h);
769 }
770 
771 void
772 print_text(struct html *h, const char *word)
773 {
774 	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
775 		if ( ! (HTML_KEEP & h->flags)) {
776 			if (HTML_PREKEEP & h->flags)
777 				h->flags |= HTML_KEEP;
778 			print_endword(h);
779 		} else
780 			print_word(h, "&#x00A0;");
781 	}
782 
783 	assert(NULL == h->metaf);
784 	switch (h->metac) {
785 	case HTMLFONT_ITALIC:
786 		h->metaf = print_otag(h, TAG_I, "");
787 		break;
788 	case HTMLFONT_BOLD:
789 		h->metaf = print_otag(h, TAG_B, "");
790 		break;
791 	case HTMLFONT_BI:
792 		h->metaf = print_otag(h, TAG_B, "");
793 		print_otag(h, TAG_I, "");
794 		break;
795 	default:
796 		print_indent(h);
797 		break;
798 	}
799 
800 	assert(word);
801 	if ( ! print_encode(h, word, NULL, 0)) {
802 		if ( ! (h->flags & HTML_NONOSPACE))
803 			h->flags &= ~HTML_NOSPACE;
804 		h->flags &= ~HTML_NONEWLINE;
805 	} else
806 		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
807 
808 	if (h->metaf) {
809 		print_tagq(h, h->metaf);
810 		h->metaf = NULL;
811 	}
812 
813 	h->flags &= ~HTML_IGNDELIM;
814 }
815 
816 void
817 print_tagq(struct html *h, const struct tag *until)
818 {
819 	struct tag	*tag;
820 
821 	while ((tag = h->tag) != NULL) {
822 		print_ctag(h, tag);
823 		if (until && tag == until)
824 			return;
825 	}
826 }
827 
828 void
829 print_stagq(struct html *h, const struct tag *suntil)
830 {
831 	struct tag	*tag;
832 
833 	while ((tag = h->tag) != NULL) {
834 		if (suntil && tag == suntil)
835 			return;
836 		print_ctag(h, tag);
837 	}
838 }
839 
840 void
841 print_paragraph(struct html *h)
842 {
843 	struct tag	*t;
844 
845 	t = print_otag(h, TAG_DIV, "c", "Pp");
846 	print_tagq(h, t);
847 }
848 
849 
850 /***********************************************************************
851  * Low level output functions.
852  * They implement line breaking using a short static buffer.
853  ***********************************************************************/
854 
855 /*
856  * Buffer one HTML output byte.
857  * If the buffer is full, flush and deactivate it and start a new line.
858  * If the buffer is inactive, print directly.
859  */
860 static void
861 print_byte(struct html *h, char c)
862 {
863 	if ((h->flags & HTML_BUFFER) == 0) {
864 		putchar(c);
865 		h->col++;
866 		return;
867 	}
868 
869 	if (h->col + h->bufcol < sizeof(h->buf)) {
870 		h->buf[h->bufcol++] = c;
871 		return;
872 	}
873 
874 	putchar('\n');
875 	h->col = 0;
876 	print_indent(h);
877 	putchar(' ');
878 	putchar(' ');
879 	fwrite(h->buf, h->bufcol, 1, stdout);
880 	putchar(c);
881 	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
882 	h->bufcol = 0;
883 	h->flags &= ~HTML_BUFFER;
884 }
885 
886 /*
887  * If something was printed on the current output line, end it.
888  * Not to be called right after print_indent().
889  */
890 void
891 print_endline(struct html *h)
892 {
893 	if (h->col == 0)
894 		return;
895 
896 	if (h->bufcol) {
897 		putchar(' ');
898 		fwrite(h->buf, h->bufcol, 1, stdout);
899 		h->bufcol = 0;
900 	}
901 	putchar('\n');
902 	h->col = 0;
903 	h->flags |= HTML_NOSPACE;
904 	h->flags &= ~HTML_BUFFER;
905 }
906 
907 /*
908  * Flush the HTML output buffer.
909  * If it is inactive, activate it.
910  */
911 static void
912 print_endword(struct html *h)
913 {
914 	if (h->noindent) {
915 		print_byte(h, ' ');
916 		return;
917 	}
918 
919 	if ((h->flags & HTML_BUFFER) == 0) {
920 		h->col++;
921 		h->flags |= HTML_BUFFER;
922 	} else if (h->bufcol) {
923 		putchar(' ');
924 		fwrite(h->buf, h->bufcol, 1, stdout);
925 		h->col += h->bufcol + 1;
926 	}
927 	h->bufcol = 0;
928 }
929 
930 /*
931  * If at the beginning of a new output line,
932  * perform indentation and mark the line as containing output.
933  * Make sure to really produce some output right afterwards,
934  * but do not use print_otag() for producing it.
935  */
936 static void
937 print_indent(struct html *h)
938 {
939 	size_t	 i;
940 
941 	if (h->col)
942 		return;
943 
944 	if (h->noindent == 0) {
945 		h->col = h->indent * 2;
946 		for (i = 0; i < h->col; i++)
947 			putchar(' ');
948 	}
949 	h->flags &= ~HTML_NOSPACE;
950 }
951 
952 /*
953  * Print or buffer some characters
954  * depending on the current HTML output buffer state.
955  */
956 static void
957 print_word(struct html *h, const char *cp)
958 {
959 	while (*cp != '\0')
960 		print_byte(h, *cp++);
961 }
962 
963 /*
964  * Calculate the scaling unit passed in a `-width' argument.  This uses
965  * either a native scaling unit (e.g., 1i, 2m) or the string length of
966  * the value.
967  */
968 static void
969 a2width(const char *p, struct roffsu *su)
970 {
971 	const char	*end;
972 
973 	end = a2roffsu(p, su, SCALE_MAX);
974 	if (end == NULL || *end != '\0') {
975 		su->unit = SCALE_EN;
976 		su->scale = html_strlen(p);
977 	} else if (su->scale < 0.0)
978 		su->scale = 0.0;
979 }
980