xref: /illumos-gate/usr/src/lib/libldap5/sources/ldap/util/line64.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * Copyright 2001-2002 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 
7 #pragma ident	"%Z%%M%	%I%	%E% SMI"
8 
9 /*
10  * The contents of this file are subject to the Netscape Public
11  * License Version 1.1 (the "License"); you may not use this file
12  * except in compliance with the License. You may obtain a copy of
13  * the License at http://www.mozilla.org/NPL/
14  *
15  * Software distributed under the License is distributed on an "AS
16  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
17  * implied. See the License for the specific language governing
18  * rights and limitations under the License.
19  *
20  * The Original Code is Mozilla Communicator client code, released
21  * March 31, 1998.
22  *
23  * The Initial Developer of the Original Code is Netscape
24  * Communications Corporation. Portions created by Netscape are
25  * Copyright (C) 1998-1999 Netscape Communications Corporation. All
26  * Rights Reserved.
27  *
28  * Contributor(s):
29  */
30 
31 /* line64.c - routines for dealing with the slapd line format */
32 
33 #include <stdio.h>
34 #include <string.h>
35 #include <stdlib.h>
36 #include <ctype.h>
37 #ifndef macintosh
38 #include <sys/types.h>
39 #endif
40 #ifdef _WIN32
41 #include <windows.h>
42 #elif !defined( macintosh )
43 #include <sys/socket.h>
44 #endif
45 #include "ldaplog.h"
46 #include "ldif.h"
47 
48 #ifndef isascii
49 #define isascii( c )	(!((c) & ~0177))
50 #endif
51 
52 #define RIGHT2			0x03
53 #define RIGHT4			0x0f
54 #define CONTINUED_LINE_MARKER	'\001'
55 
56 #define ISBLANK(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') /* not "\r\v\f" */
57 
58 #define LDIF_OPT_ISSET( value, opt )	(((value) & (opt)) != 0 )
59 
60 static char nib2b64[0x40] =
61         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
62 
63 static unsigned char b642nib[0x80] = {
64 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
65 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
66 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 	0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
70 	0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
71 	0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 	0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
73 	0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
74 	0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
75 	0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
76 	0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
77 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
78 	0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
79 	0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
80 };
81 
82 static int ldif_base64_encode_internal( unsigned char *src, char *dst, int srclen,
83 	int lenused, int wraplen );
84 
85 /*
86  * str_parse_line - takes a line of the form "type:[:] value" and splits it
87  * into components "type" and "value".  if a double colon separates type from
88  * value, then value is encoded in base 64, and parse_line un-decodes it
89  * (in place) before returning.
90  */
91 
92 int
93 str_parse_line(
94     char	*line,
95     char	**type,
96     char	**value,
97     int		*vlen
98 )
99 {
100 	char	*p, *s, *d;
101 	int	b64;
102 
103 	/* skip any leading space */
104 	while ( ISBLANK( *line ) ) {
105 		line++;
106 	}
107 	*type = line;
108 
109 	for ( s = line; *s && *s != ':'; s++ )
110 		;	/* NULL */
111 	if ( *s == '\0' ) {
112 
113 		/* Comment-out while we address calling libldif from ns-back-ldbm
114 			on NT. 1 of 3 */
115 #if defined( _WIN32 )
116 		/*
117 #endif
118 		 LDAPDebug( LDAP_DEBUG_PARSE, "str_parse_line: missing ':' "
119 			"on line \"%s\"\n", line, 0, 0 );
120 #if defined( _WIN32 )
121 		*/
122 #endif
123 		return( -1 );
124 	}
125 
126 	/* trim any space between type and : */
127 	for ( p = s - 1; p > line && ISBLANK( *p ); p-- ) {
128 		*p = '\0';
129 	}
130 	*s++ = '\0';
131 
132 	/* check for double : - indicates base 64 encoded value */
133 	if ( *s == ':' ) {
134 		s++;
135 		b64 = 1;
136 
137 	/* single : - normally encoded value */
138 	} else {
139 		b64 = 0;
140 	}
141 
142 	/* skip space between : and value */
143 	while ( ISBLANK( *s ) ) {
144 		s++;
145 	}
146 
147 	/*
148 	 * If no value is present, return a zero-length string for
149 	 * *value, with *vlen set to zero.
150 	 */
151 	if ( *s == '\0' ) {
152 		*value = s;
153 		*vlen = 0;
154 		return( 0 );
155 	}
156 
157 	/* check for continued line markers that should be deleted */
158 	for ( p = s, d = s; *p; p++ ) {
159 		if ( *p != CONTINUED_LINE_MARKER )
160 			*d++ = *p;
161 	}
162 	*d = '\0';
163 
164 	*value = s;
165 	if ( b64 ) {
166 		if (( *vlen = ldif_base64_decode( s, (unsigned char *)s ))
167 		    < 0 ) {
168 			/* Comment-out while we address calling libldif from ns-back-ldbm
169 				on NT. 3 of 3 */
170 #if defined( _WIN32 )
171 		/*
172 #endif
173 			 LDAPDebug( LDAP_DEBUG_ANY,
174 			    "str_parse_line: invalid base 64 char on line \"%s\"\n",
175 			    line, 0, 0 );
176 #if defined( _WIN32 )
177 		*/
178 #endif
179 			return( -1 );
180 		}
181 		s[ *vlen ] = '\0';
182 	} else {
183 		*vlen = (int) (d - s);
184 	}
185 
186 	return( 0 );
187 }
188 
189 
190 /*
191  * ldif_base64_decode - take the BASE64-encoded characters in "src"
192  * (a zero-terminated string) and decode them into the the buffer "dst".
193  * "src" and "dst" can be the same if in-place decoding is desired.
194  * "dst" must be large enough to hold the decoded octets.  No more than
195  *	3 * strlen( src ) / 4 bytes will be produced.
196  * "dst" may contain zero octets anywhere within it, but it is not
197  *	zero-terminated by this function.
198  *
199  * The number of bytes copied to "dst" is returned if all goes well.
200  * -1 is returned if the BASE64 encoding in "src" is invalid.
201  */
202 
203 int
204 ldif_base64_decode( char *src, unsigned char *dst )
205 {
206 	char		*p, *stop;
207 	unsigned char	nib, *byte;
208 	int		i, len;
209 
210 	stop = strchr( src, '\0' );
211 	byte = dst;
212 	for ( p = src, len = 0; p < stop; p += 4, len += 3 ) {
213 		for ( i = 0; i < 4; i++ ) {
214 			if ( p[i] != '=' && (p[i] & 0x80 ||
215 			    b642nib[ p[i] & 0x7f ] > 0x3f) ) {
216 				return( -1 );
217 			}
218 		}
219 
220 		/* first digit */
221 		nib = b642nib[ p[0] & 0x7f ];
222 		byte[0] = nib << 2;
223 
224 		/* second digit */
225 		nib = b642nib[ p[1] & 0x7f ];
226 		byte[0] |= nib >> 4;
227 
228 		/* third digit */
229 		if ( p[2] == '=' ) {
230 			len += 1;
231 			break;
232 		}
233 		byte[1] = (nib & RIGHT4) << 4;
234 		nib = b642nib[ p[2] & 0x7f ];
235 		byte[1] |= nib >> 2;
236 
237 		/* fourth digit */
238 		if ( p[3] == '=' ) {
239 			len += 2;
240 			break;
241 		}
242 		byte[2] = (nib & RIGHT2) << 6;
243 		nib = b642nib[ p[3] & 0x7f ];
244 		byte[2] |= nib;
245 
246 		byte += 3;
247 	}
248 
249 	return( len );
250 }
251 
252 /*
253  * str_getline - return the next "line" (minus newline) of input from a
254  * string buffer of lines separated by newlines, terminated by \n\n
255  * or \0.  this routine handles continued lines, bundling them into
256  * a single big line before returning.  if a line begins with a white
257  * space character, it is a continuation of the previous line. the white
258  * space character (nb: only one char), and preceeding newline are changed
259  * into CONTINUED_LINE_MARKER chars, to be deleted later by the
260  * str_parse_line() routine above.
261  *
262  * it takes a pointer to a pointer to the buffer on the first call,
263  * which it updates and must be supplied on subsequent calls.
264  *
265  * XXX need to update this function to also support <CR><LF> as EOL.
266  * XXX supports <CR><LF> as of 07/29/1998 (richm)
267  */
268 
269 char *
270 str_getline( char **next )
271 {
272 	char	*l;
273 	char	c;
274 	char	*p;
275 
276 	if ( *next == NULL || **next == '\n' || **next == '\0' ) {
277 		return( NULL );
278 	}
279 
280 	while ( **next == '#' ) {	/* skip comment lines */
281 		if (( *next = strchr( *next, '\n' )) == NULL ) {
282 			return( NULL );
283 		}
284 		(*next)++;
285 	}
286 
287 	l = *next;
288 	while ( (*next = strchr( *next, '\n' )) != NULL ) {
289 		p = *next - 1; /* pointer to character previous to the newline */
290 		c = *(*next + 1); /* character after the newline */
291 		if ( ISBLANK( c ) && c != '\n' ) {
292 			/* DOS EOL is \r\n, so if the character before */
293 			/* the \n is \r, continue it too */
294 			if (*p == '\r')
295 				*p = CONTINUED_LINE_MARKER;
296 			**next = CONTINUED_LINE_MARKER;
297 			*(*next+1) = CONTINUED_LINE_MARKER;
298 		} else {
299 			/* DOS EOL is \r\n, so if the character before */
300 			/* the \n is \r, null it too */
301 			if (*p == '\r')
302 				*p = '\0';
303 			*(*next)++ = '\0';
304 			break;
305 		}
306 		(*next)++;
307 	}
308 
309 	return( l );
310 }
311 
312 
313 #define LDIF_SAFE_CHAR( c )		( (c) != '\r' && (c) != '\n' )
314 #define LDIF_CONSERVATIVE_CHAR( c )	( LDIF_SAFE_CHAR(c) && isascii((c)) \
315 					 && ( isprint((c)) || (c) == '\t' ))
316 #define LDIF_SAFE_INITCHAR( c )		( LDIF_SAFE_CHAR(c) && (c) != ':' \
317 					 && (c) != ' ' && (c) != '<' )
318 #define LDIF_CONSERVATIVE_INITCHAR( c ) ( LDIF_SAFE_INITCHAR( c ) && \
319 					 ! ( isascii((c)) && isspace((c))))
320 #define LDIF_CONSERVATIVE_FINALCHAR( c ) ( (c) != ' ' )
321 
322 
323 void
324 ldif_put_type_and_value_with_options( char **out, char *t, char *val,
325 	int vlen, unsigned long options )
326 {
327 	unsigned char	*p, *byte, *stop;
328 	char		*save;
329 	int		b64, len, savelen, wraplen;
330 	len = 0;
331 
332 	if ( LDIF_OPT_ISSET( options, LDIF_OPT_NOWRAP )) {
333 		wraplen = -1;
334 	} else {
335 		wraplen = LDIF_MAX_LINE_WIDTH;
336 	}
337 
338 	/* put the type + ": " */
339 	for ( p = (unsigned char *) t; *p; p++, len++ ) {
340 		*(*out)++ = *p;
341 	}
342 	*(*out)++ = ':';
343 	len++;
344 	if ( LDIF_OPT_ISSET( options, LDIF_OPT_VALUE_IS_URL )) {
345 		*(*out)++ = '<';	/* add '<' for URLs */
346 		len++;
347 	}
348 	save = *out;
349 	savelen = len;
350 	b64 = 0;
351 
352 	stop = (unsigned char *)val;
353 	if ( val && vlen > 0 ) {
354 		*(*out)++ = ' ';
355 		stop = (unsigned char *) (val + vlen);
356 		if ( LDIF_OPT_ISSET( options, LDIF_OPT_MINIMAL_ENCODING )) {
357 			if ( !LDIF_SAFE_INITCHAR( val[0] )) {
358 				b64 = 1;
359 			}
360 		} else {
361 			if ( !LDIF_CONSERVATIVE_INITCHAR( val[0] ) ||
362 				 !LDIF_CONSERVATIVE_FINALCHAR( val[vlen-1] )) {
363 				b64 = 1;
364 			}
365 		}
366 	}
367 
368 	if ( !b64 ) {
369 		for ( byte = (unsigned char *) val; byte < stop;
370 		    byte++, len++ ) {
371 			if ( LDIF_OPT_ISSET( options,
372 			    LDIF_OPT_MINIMAL_ENCODING )) {
373 				if ( !LDIF_SAFE_CHAR( *byte )) {
374 					b64 = 1;
375 					break;
376 				}
377 			} else if ( !LDIF_CONSERVATIVE_CHAR( *byte )) {
378 				b64 = 1;
379 				break;
380 			}
381 
382 			if ( wraplen != -1 && len > wraplen ) {
383 				*(*out)++ = '\n';
384 				*(*out)++ = ' ';
385 				len = 1;
386 			}
387 			*(*out)++ = *byte;
388 		}
389 	}
390 
391 	if ( b64 ) {
392 		*out = save;
393 		*(*out)++ = ':';
394 		*(*out)++ = ' ';
395 		len = ldif_base64_encode_internal( (unsigned char *)val, *out, vlen,
396 		    savelen + 2, wraplen );
397 		*out += len;
398 	}
399 
400 	*(*out)++ = '\n';
401 }
402 
403 void
404 ldif_put_type_and_value( char **out, char *t, char *val, int vlen )
405 {
406     ldif_put_type_and_value_with_options( out, t, val, vlen, 0 );
407 }
408 
409 void
410 ldif_put_type_and_value_nowrap( char **out, char *t, char *val, int vlen )
411 {
412     ldif_put_type_and_value_with_options( out, t, val, vlen, LDIF_OPT_NOWRAP );
413 }
414 
415 /*
416  * ldif_base64_encode_internal - encode "srclen" bytes in "src", place BASE64
417  * encoded bytes in "dst" and return the length of the BASE64
418  * encoded string.  "dst" is also zero-terminated by this function.
419  *
420  * If "lenused" >= 0, newlines will be included in "dst" and "lenused" if
421  * appropriate.  "lenused" should be a count of characters already used
422  * on the current line.  The LDIF lines we create will contain at most
423  * "wraplen" characters on each line, unless "wraplen" is -1, in which
424  * case output line length is unlimited.
425  *
426  * If "lenused" < 0, no newlines will be included, and the LDIF_BASE64_LEN()
427  * macro can be used to determine how many bytes will be placed in "dst."
428  */
429 
430 static int
431 ldif_base64_encode_internal( unsigned char *src, char *dst, int srclen, int lenused, int wraplen )
432 {
433 	unsigned char	*byte, *stop;
434 	unsigned char	buf[3];
435 	char		*out;
436 	unsigned long	bits;
437 	int		i, pad, len;
438 
439 	len = 0;
440 	out = dst;
441 	stop = src + srclen;
442 
443 	/* convert to base 64 (3 bytes => 4 base 64 digits) */
444 	for ( byte = src; byte < stop - 2; byte += 3 ) {
445 		bits = (byte[0] & 0xff) << 16;
446 		bits |= (byte[1] & 0xff) << 8;
447 		bits |= (byte[2] & 0xff);
448 
449 		for ( i = 0; i < 4; i++, bits <<= 6 ) {
450 			if ( wraplen != -1 &&  lenused >= 0 && lenused++ > wraplen ) {
451 				*out++ = '\n';
452 				*out++ = ' ';
453 				lenused = 2;
454 			}
455 
456 			/* get b64 digit from high order 6 bits */
457 			*out++ = nib2b64[ (bits & 0xfc0000L) >> 18 ];
458 		}
459 	}
460 
461 	/* add padding if necessary */
462 	if ( byte < stop ) {
463 		for ( i = 0; byte + i < stop; i++ ) {
464 			buf[i] = byte[i];
465 		}
466 		for ( pad = 0; i < 3; i++, pad++ ) {
467 			buf[i] = '\0';
468 		}
469 		byte = buf;
470 		bits = (byte[0] & 0xff) << 16;
471 		bits |= (byte[1] & 0xff) << 8;
472 		bits |= (byte[2] & 0xff);
473 
474 		for ( i = 0; i < 4; i++, bits <<= 6 ) {
475 			if ( wraplen != -1 && lenused >= 0 && lenused++ > wraplen ) {
476 				*out++ = '\n';
477 				*out++ = ' ';
478 				lenused = 2;
479 			}
480 
481 			if (( i == 3 && pad > 0 ) || ( i == 2 && pad == 2 )) {
482 				/* Pad as appropriate */
483 				*out++ = '=';
484 			} else {
485 				/* get b64 digit from low order 6 bits */
486 				*out++ = nib2b64[ (bits & 0xfc0000L) >> 18 ];
487 			}
488 		}
489 	}
490 
491 	*out = '\0';
492 
493 	return( out - dst );
494 }
495 
496 int
497 ldif_base64_encode( unsigned char *src, char *dst, int srclen, int lenused )
498 {
499     return ldif_base64_encode_internal( src, dst, srclen, lenused, LDIF_MAX_LINE_WIDTH );
500 }
501 
502 int
503 ldif_base64_encode_nowrap( unsigned char *src, char *dst, int srclen, int lenused )
504 {
505     return ldif_base64_encode_internal( src, dst, srclen, lenused, -1 );
506 }
507 
508 
509 /*
510  * return malloc'd, zero-terminated LDIF line
511  */
512 char *
513 ldif_type_and_value_with_options( char *type, char *val, int vlen,
514 	unsigned long options )
515 {
516     char	*buf, *p;
517     int		tlen;
518 
519     tlen = strlen( type );
520     if (( buf = (char *)malloc( LDIF_SIZE_NEEDED( tlen, vlen ) + 1 )) !=
521 	    NULL ) {
522 	p = buf;
523 	ldif_put_type_and_value_with_options( &p, type, val, vlen, options );
524 	*p = '\0';
525     }
526 
527     return( buf );
528 }
529 
530 char *
531 ldif_type_and_value( char *type, char *val, int vlen )
532 {
533     return ldif_type_and_value_with_options( type, val, vlen, 0 );
534 }
535 
536 char *
537 ldif_type_and_value_nowrap( char *type, char *val, int vlen )
538 {
539     return ldif_type_and_value_with_options( type, val, vlen, LDIF_OPT_NOWRAP );
540 }
541 
542 /*
543  * ldif_get_entry - read the next ldif entry from the FILE referenced
544  * by fp. return a pointer to a malloc'd, null-terminated buffer. also
545  * returned is the last line number read, in *lineno.
546  */
547 char *
548 ldif_get_entry( FILE *fp, int *lineno )
549 {
550 	char	line[BUFSIZ];
551 	char	*buf;
552 	int	max, cur, len, gotsome;
553 
554 	buf = NULL;
555 	max = cur = gotsome = 0;
556 	while ( fgets( line, sizeof(line), fp ) != NULL ) {
557 		if ( lineno != NULL ) {
558 			(*lineno)++;
559 		}
560 		/* ldif entries are terminated by a \n on a line by itself */
561 		if ( line[0] == '\0' || line[0] == '\n'
562 #if !defined( XP_WIN32 )
563 		     || ( line[0] == '\r' && line[1] == '\n' ) /* DOS format */
564 #endif
565 		   ) {
566 			if ( gotsome ) {
567 				break;
568 			} else {
569 				continue;
570 			}
571 		} else if ( line[0] == '#' ) {
572 			continue;
573 		}
574 		gotsome = 1;
575 		len = strlen( line );
576 #if !defined( XP_WIN32 )
577 		/* DOS format */
578 		if ( len > 0 && line[len-1] == '\r' ) {
579 			--len;
580 			line[len] = '\0';
581 		} else if ( len > 1 && line[len-2] == '\r' && line[len-1] == '\n' ) {
582 			--len;
583 			line[len-1] = line[len];
584 			line[len] = '\0';
585 		}
586 #endif
587 		while ( cur + (len + 1) > max ) {
588 			if ( buf == NULL ) {
589 				max += BUFSIZ;
590 				buf = (char *) malloc( max );
591 			} else {
592 				max *= 2;
593 				buf = (char *) realloc( buf, max );
594 			}
595 			if ( buf == NULL ) {
596 				return( NULL );
597 			}
598 		}
599 
600 		memcpy( buf + cur, line, len + 1 );
601 		cur += len;
602 	}
603 
604 	return( buf );
605 }
606