xref: /illumos-gate/usr/src/lib/libc/port/i18n/wstod.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * This file is based on /usr/src/lib/libc/port/gen/strtod.c and
34  * /usr/src/lib/libc/sparc/fp/string_decim.c
35  */
36 
37 #pragma weak _wcstod = wcstod
38 #pragma weak _wstod = wstod
39 
40 #include "lint.h"
41 #include <errno.h>
42 #include <stdio.h>
43 #include <values.h>
44 #include <floatingpoint.h>
45 #include <stddef.h>
46 #include <wctype.h>
47 #include "base_conversion.h"	/* from usr/src/lib/libc/inc */
48 #include <locale.h>
49 #include "libc.h"
50 #include "xpg6.h"
51 
52 static void wstring_to_decimal(const wchar_t **, int, decimal_record *, int *);
53 
54 double
55 wcstod(const wchar_t *cp, wchar_t **ptr)
56 {
57 	double		x;
58 	decimal_mode	mr;
59 	decimal_record	dr;
60 	fp_exception_field_type fs;
61 	int 		form;
62 
63 	wstring_to_decimal(&cp, __xpg6 & _C99SUSv3_recognize_hexfp, &dr, &form);
64 	if (ptr != NULL)
65 		*ptr = (wchar_t *)cp;
66 	if (form == 0)
67 		return (0.0);	/* Shameful kluge for SVID's sake. */
68 #if defined(__i386) || defined(__amd64)
69 	mr.rd = __xgetRD();
70 #elif defined(__sparc)
71 	mr.rd = _QgetRD();
72 #else
73 #error Unknown architecture!
74 #endif
75 	if (form < 0)
76 		__hex_to_double(&dr, mr.rd, &x, &fs);
77 	else
78 		decimal_to_double(&x, &mr, &dr, &fs);
79 	if (fs & ((1 << fp_overflow) | (1 << fp_underflow)))
80 		errno = ERANGE;
81 	return (x);
82 }
83 
84 float
85 wcstof(const wchar_t *cp, wchar_t **ptr)
86 {
87 	float		x;
88 	decimal_mode	mr;
89 	decimal_record	dr;
90 	fp_exception_field_type fs;
91 	int		form;
92 
93 	wstring_to_decimal(&cp, 1, &dr, &form);
94 	if (ptr != NULL)
95 		*ptr = (wchar_t *)cp;
96 	if (form == 0)
97 		return (0.0f);
98 #if defined(__i386) || defined(__amd64)
99 	mr.rd = __xgetRD();
100 #elif defined(__sparc)
101 	mr.rd = _QgetRD();
102 #else
103 #error Unknown architecture!
104 #endif
105 	if (form < 0)
106 		__hex_to_single(&dr, mr.rd, &x, &fs);
107 	else
108 		decimal_to_single(&x, &mr, &dr, &fs);
109 	if (fs & ((1 << fp_overflow) | (1 << fp_underflow)))
110 		errno = ERANGE;
111 	return (x);
112 }
113 
114 long double
115 wcstold(const wchar_t *cp, wchar_t **ptr)
116 {
117 	long double	x;
118 	decimal_mode	mr;
119 	decimal_record	dr;
120 	fp_exception_field_type fs;
121 	int		form;
122 
123 	wstring_to_decimal(&cp, 1, &dr, &form);
124 	if (ptr != NULL)
125 		*ptr = (wchar_t *)cp;
126 	if (form == 0)
127 		return (0.0L);
128 #if defined(__i386) || defined(__amd64)
129 	mr.rd = __xgetRD();
130 	if (form < 0)
131 		__hex_to_extended(&dr, mr.rd, (extended *)&x, &fs);
132 	else
133 		decimal_to_extended((extended *)&x, &mr, &dr, &fs);
134 #elif defined(__sparc)
135 	mr.rd = _QgetRD();
136 	if (form < 0)
137 		__hex_to_quadruple(&dr, mr.rd, &x, &fs);
138 	else
139 		decimal_to_quadruple(&x, &mr, &dr, &fs);
140 #else
141 #error Unknown architecture!
142 #endif
143 	if (fs & ((1 << fp_overflow) | (1 << fp_underflow)))
144 		errno = ERANGE;
145 	return (x);
146 }
147 
148 double
149 wstod(const wchar_t *cp, wchar_t **ptr)
150 {
151 	return (wcstod(cp, ptr));
152 }
153 
154 static const char *infstring = "INFINITY";
155 static const char *nanstring = "NAN";
156 
157 /*
158  * The following macro is applied to wchar_t arguments solely for the
159  * purpose of comparing the result with one of the characters in the
160  * strings above.
161  */
162 #define	UCASE(c)	(((L'a' <= c) && (c <= L'z'))? c - 32 : c)
163 
164 /*
165  * The following macro yields an expression that is true whenever
166  * the argument is a valid nonzero digit for the form being parsed.
167  */
168 #define	NZDIGIT(c)	((L'1' <= c && c <= L'9') || (form < 0 && \
169 			((L'a' <= c && c <= L'f') || (L'A' <= c && c <= L'F'))))
170 
171 /*
172  * wstring_to_decimal is modelled on string_to_decimal, the majority
173  * of which can be found in the common file char_to_decimal.h.  The
174  * significant differences are:
175  *
176  * 1. This code recognizes only C99 (hex fp strings and restricted
177  *    characters in parentheses following "nan") vs. C90 modes, no
178  *    Fortran conventions.
179  *
180  * 2. *pform is an int rather than an enum decimal_string_form.  On
181  *    return, *pform == 0 if no valid token was found, *pform < 0
182  *    if a C99 hex fp string was found, and *pform > 0 if a decimal
183  *    string was found.
184  */
185 static void
186 wstring_to_decimal(const wchar_t **ppc, int c99, decimal_record *pd,
187     int *pform)
188 {
189 	const wchar_t	*cp = *ppc; /* last character seen */
190 	const wchar_t	*good = cp - 1;	/* last character accepted */
191 	wchar_t		current; /* always equal to *cp */
192 	int		sigfound;
193 	int		ids = 0;
194 	int		i, agree;
195 	int		nzbp = 0; /* number of zeros before point */
196 	int		nzap = 0; /* number of zeros after point */
197 	char		decpt;
198 	int		nfast, nfastlimit;
199 	char		*pfast;
200 	int		e, esign;
201 	int		expshift = 0;
202 	int		form;
203 
204 	/*
205 	 * This routine assumes that the radix point is a single
206 	 * ASCII character, so that following this assignment, the
207 	 * condition (current == decpt) will correctly detect it.
208 	 */
209 	decpt = *(localeconv()->decimal_point);
210 
211 	/* input is invalid until we find something */
212 	pd->fpclass = fp_signaling;
213 	pd->sign = 0;
214 	pd->exponent = 0;
215 	pd->ds[0] = '\0';
216 	pd->more = 0;
217 	pd->ndigits = 0;
218 	*pform = form = 0;
219 
220 	/* skip white space */
221 	current = *cp;
222 	while (iswspace((wint_t)current))
223 		current = *++cp;
224 
225 	/* look for optional leading sign */
226 	if (current == L'+') {
227 		current = *++cp;
228 	} else if (current == L'-') {
229 		pd->sign = 1;
230 		current = *++cp;
231 	}
232 
233 	sigfound = -1;		/* -1 = no digits found yet */
234 
235 	/*
236 	 * Admissible first non-white-space, non-sign characters are
237 	 * 0-9, i, I, n, N, or the radix point.
238 	 */
239 	if (L'1' <= current && current <= L'9') {
240 		pd->fpclass = fp_normal;
241 		form = 1;
242 		good = cp;
243 		sigfound = 1;	/* 1 = significant digits found */
244 		pd->ds[ids++] = (char)current;
245 		current = *++cp;
246 	} else {
247 		switch (current) {
248 		case L'0':
249 			/*
250 			 * Accept the leading zero and set pd->fpclass
251 			 * accordingly, but don't set sigfound until we
252 			 * determine that this isn't a "fake" hex string
253 			 * (i.e., 0x.p...).
254 			 */
255 			good = cp;
256 			pd->fpclass = fp_zero;
257 			if (c99) {
258 				/* look for a hex fp string */
259 				current = *++cp;
260 				if (current == L'X' || current == L'x') {
261 					/* assume hex fp form */
262 					form = -1;
263 					expshift = 2;
264 					current = *++cp;
265 					/*
266 					 * Only a digit or radix point can
267 					 * follow "0x".
268 					 */
269 					if (NZDIGIT(current)) {
270 						pd->fpclass = fp_normal;
271 						good = cp;
272 						sigfound = 1;
273 						pd->ds[ids++] = (char)current;
274 						current = *++cp;
275 						break;
276 					} else if (current == (wchar_t)decpt) {
277 						current = *++cp;
278 						goto afterpoint;
279 					} else if (current != L'0') {
280 						/* not hex fp after all */
281 						form = 1;
282 						expshift = 0;
283 						goto done;
284 					}
285 				} else {
286 					form = 1;
287 				}
288 			} else {
289 				form = 1;
290 			}
291 
292 			/* skip all leading zeros */
293 			while (current == L'0')
294 				current = *++cp;
295 			good = cp - 1;
296 			sigfound = 0;	/* 0 = only zeros found so far */
297 			break;
298 
299 		case L'i':
300 		case L'I':
301 			/* look for inf or infinity */
302 			current = *++cp;
303 			agree = 1;
304 			while (agree <= 7 &&
305 			    UCASE(current) == (wchar_t)infstring[agree]) {
306 				current = *++cp;
307 				agree++;
308 			}
309 			if (agree >= 3) {
310 				/* found valid infinity */
311 				pd->fpclass = fp_infinity;
312 				form = 1;
313 				good = (agree < 8)? cp + 2 - agree : cp - 1;
314 				__inf_read = 1;
315 			}
316 			goto done;
317 
318 		case L'n':
319 		case L'N':
320 			/* look for nan or nan(string) */
321 			current = *++cp;
322 			agree = 1;
323 			while (agree <= 2 &&
324 			    UCASE(current) == (wchar_t)nanstring[agree]) {
325 				current = *++cp;
326 				agree++;
327 			}
328 			if (agree == 3) {
329 				/* found valid NaN */
330 				pd->fpclass = fp_quiet;
331 				form = 1;
332 				good = cp - 1;
333 				__nan_read = 1;
334 				if (current == L'(') {
335 					/* accept parenthesized string */
336 					if (c99) {
337 						do {
338 							current = *++cp;
339 						} while (iswalnum(current) ||
340 						    current == L'_');
341 					} else {
342 						do {
343 							current = *++cp;
344 						} while (current &&
345 						    current != L')');
346 					}
347 					if (current == L')')
348 						good = cp;
349 				}
350 			}
351 			goto done;
352 
353 		default:
354 			if (current == (wchar_t)decpt) {
355 				/*
356 				 * Don't accept the radix point just yet;
357 				 * we need to see at least one digit.
358 				 */
359 				current = *++cp;
360 				goto afterpoint;
361 			}
362 			goto done;
363 		}
364 	}
365 
366 nextnumber:
367 	/*
368 	 * Admissible characters after the first digit are a valid
369 	 * digit, an exponent delimiter (E or e for decimal form,
370 	 * P or p for hex form), or the radix point.  (Note that we
371 	 * can't get here unless we've already found a digit.)
372 	 */
373 	if (NZDIGIT(current)) {
374 		/*
375 		 * Found another nonzero digit.  If there's enough room
376 		 * in pd->ds, store any intervening zeros we've found so far
377 		 * and then store this digit.  Otherwise, stop storing
378 		 * digits in pd->ds and set pd->more.
379 		 */
380 		if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) {
381 			for (i = 0; i < nzbp; i++)
382 				pd->ds[ids++] = '0';
383 			pd->ds[ids++] = (char)current;
384 		} else {
385 			pd->exponent += (nzbp + 1) << expshift;
386 			pd->more = 1;
387 			if (ids < DECIMAL_STRING_LENGTH) {
388 				pd->ds[ids] = '\0';
389 				pd->ndigits = ids;
390 				/* don't store any more digits */
391 				ids = DECIMAL_STRING_LENGTH;
392 			}
393 		}
394 		pd->fpclass = fp_normal;
395 		sigfound = 1;
396 		nzbp = 0;
397 		current = *++cp;
398 
399 		/*
400 		 * Use an optimized loop to grab a consecutive sequence
401 		 * of nonzero digits quickly.
402 		 */
403 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
404 		for (nfast = 0, pfast = &(pd->ds[ids]);
405 		    nfast < nfastlimit && NZDIGIT(current);
406 		    nfast++) {
407 			*pfast++ = (char)current;
408 			current = *++cp;
409 		}
410 		ids += nfast;
411 		if (current == L'0')
412 			goto nextnumberzero;	/* common case */
413 		/* advance good to the last accepted digit */
414 		good = cp - 1;
415 		goto nextnumber;
416 	} else {
417 		switch (current) {
418 		case L'0':
419 nextnumberzero:
420 			/*
421 			 * Count zeros before the radix point.  Later we
422 			 * will either put these zeros into pd->ds or add
423 			 * nzbp to pd->exponent to account for them.
424 			 */
425 			while (current == L'0') {
426 				nzbp++;
427 				current = *++cp;
428 			}
429 			good = cp - 1;
430 			goto nextnumber;
431 
432 		case L'E':
433 		case L'e':
434 			if (form < 0)
435 				goto done;
436 			goto exponent;
437 
438 		case L'P':
439 		case L'p':
440 			if (form > 0)
441 				goto done;
442 			goto exponent;
443 
444 		default:
445 			if (current == decpt) {
446 				/* accept the radix point */
447 				good = cp;
448 				current = *++cp;
449 				goto afterpoint;
450 			}
451 			goto done;
452 		}
453 	}
454 
455 afterpoint:
456 	/*
457 	 * Admissible characters after the radix point are a valid digit
458 	 * or an exponent delimiter.  (Note that it is possible to get
459 	 * here even though we haven't found any digits yet.)
460 	 */
461 	if (NZDIGIT(current)) {
462 		if (form == 0)
463 			form = 1;
464 		if (sigfound < 1) {
465 			/* no significant digits found until now */
466 			pd->fpclass = fp_normal;
467 			sigfound = 1;
468 			pd->ds[ids++] = (char)current;
469 			pd->exponent = (-(nzap + 1)) << expshift;
470 		} else {
471 			/* significant digits have been found */
472 			if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) {
473 				for (i = 0; i < nzbp + nzap; i++)
474 					pd->ds[ids++] = '0';
475 				pd->ds[ids++] = (char)current;
476 				pd->exponent -= (nzap + 1) << expshift;
477 			} else {
478 				pd->exponent += nzbp << expshift;
479 				pd->more = 1;
480 				if (ids < DECIMAL_STRING_LENGTH) {
481 					pd->ds[ids] = '\0';
482 					pd->ndigits = ids;
483 					/* don't store any more digits */
484 					ids = DECIMAL_STRING_LENGTH;
485 				}
486 			}
487 		}
488 		nzbp = 0;
489 		nzap = 0;
490 		current = *++cp;
491 
492 		/*
493 		 * Use an optimized loop to grab a consecutive sequence
494 		 * of nonzero digits quickly.
495 		 */
496 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
497 		for (nfast = 0, pfast = &(pd->ds[ids]);
498 		    nfast < nfastlimit && NZDIGIT(current);
499 		    nfast++) {
500 			*pfast++ = (char)current;
501 			current = *++cp;
502 		}
503 		ids += nfast;
504 		pd->exponent -= nfast << expshift;
505 		if (current == L'0')
506 			goto zeroafterpoint;
507 		/* advance good to the last accepted digit */
508 		good = cp - 1;
509 		goto afterpoint;
510 	} else {
511 		switch (current) {
512 		case L'0':
513 			if (form == 0)
514 				form = 1;
515 			if (sigfound == -1) {
516 				pd->fpclass = fp_zero;
517 				sigfound = 0;
518 			}
519 zeroafterpoint:
520 			/*
521 			 * Count zeros after the radix point.  If we find
522 			 * any more nonzero digits later, we will put these
523 			 * zeros into pd->ds and decrease pd->exponent by
524 			 * nzap.
525 			 */
526 			while (current == L'0') {
527 				nzap++;
528 				current = *++cp;
529 			}
530 			good = cp - 1;
531 			goto afterpoint;
532 
533 		case L'E':
534 		case L'e':
535 			/* don't accept exponent without preceding digits */
536 			if (sigfound == -1 || form < 0)
537 				goto done;
538 			break;
539 
540 		case L'P':
541 		case L'p':
542 			/* don't accept exponent without preceding digits */
543 			if (sigfound == -1 || form > 0)
544 				goto done;
545 			break;
546 
547 		default:
548 			goto done;
549 		}
550 	}
551 
552 exponent:
553 	e = 0;
554 	esign = 0;
555 
556 	/* look for optional exponent sign */
557 	current = *++cp;
558 	if (current == L'+') {
559 		current = *++cp;
560 	} else if (current == L'-') {
561 		esign = 1;
562 		current = *++cp;
563 	}
564 
565 	/*
566 	 * Accumulate explicit exponent.  Note that if we don't find at
567 	 * least one digit, good won't be updated and e will remain 0.
568 	 * Also, we keep e from getting too large so we don't overflow
569 	 * the range of int (but notice that the threshold is large
570 	 * enough that any larger e would cause the result to underflow
571 	 * or overflow anyway).
572 	 */
573 	while (L'0' <= current && current <= L'9') {
574 		good = cp;
575 		if (e <= 1000000)
576 			e = 10 * e + current - L'0';
577 		current = *++cp;
578 	}
579 	if (esign)
580 		pd->exponent -= e;
581 	else
582 		pd->exponent += e;
583 
584 done:
585 	/*
586 	 * If we found any zeros before the radix point that were not
587 	 * accounted for earlier, adjust the exponent.  (This is only
588 	 * relevant when pd->fpclass == fp_normal, but it's harmless
589 	 * in all other cases.)
590 	 */
591 	pd->exponent += nzbp << expshift;
592 
593 	/* terminate pd->ds if we haven't already */
594 	if (ids < DECIMAL_STRING_LENGTH) {
595 		pd->ds[ids] = '\0';
596 		pd->ndigits = ids;
597 	}
598 
599 	/*
600 	 * If we accepted any characters, advance *ppc to point to the
601 	 * first character we didn't accept; otherwise, pass back a
602 	 * signaling nan.
603 	 */
604 	if (good >= *ppc) {
605 		*ppc = good + 1;
606 	} else {
607 		pd->fpclass = fp_signaling;
608 		pd->sign = 0;
609 		form = 0;
610 	}
611 
612 	*pform = form;
613 }
614