xref: /illumos-gate/usr/src/lib/iconv_modules/utf-8/common/utf7_to_ucs.c (revision e74ff6533275aaaa8989786dfbba24b3281e3530)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1998-1999 by Sun Microsystems, Inc.
23  * All rights reserved.
24  *
25  * This program covers UTF-7 to UTF-8, UCS-2, and, UCS-4 code conversions.
26  * UTF-7 described in RFC 2152.
27  * We don't support any other UCS formats to and from UTF-7 unless there is
28  * a significant requirement.
29  */
30 
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <errno.h>
35 #include <sys/types.h>
36 #include <sys/isa_defs.h>
37 #include "utf7_to_ucs.h"
38 
39 
40 void *
41 _icv_open()
42 {
43 	utf7_state_t *cd = (utf7_state_t *)calloc(1, sizeof(utf7_state_t));
44 
45 	if (cd == (utf7_state_t *)NULL) {
46 		errno = ENOMEM;
47 		return((void *)-1);
48 	}
49 
50 #if defined(_LITTLE_ENDIAN)
51 	cd->little_endian = true;
52 #endif
53 
54 	return((void *)cd);
55 }
56 
57 
58 void
59 _icv_close(utf7_state_t *cd)
60 {
61 	if (! cd)
62 		errno = EBADF;
63 	else
64 		free((void *)cd);
65 }
66 
67 
68 size_t
69 _icv_iconv(utf7_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf,
70                 size_t *outbufleft)
71 {
72 	size_t ret_val = 0;
73 	uchar_t *ib;
74 	uchar_t *ob;
75 	uchar_t *ibtail;
76 	uchar_t *obtail;
77 
78 	if (! cd) {
79 		errno = EBADF;
80 		return((size_t)-1);
81 	}
82 
83 	if (!inbuf || !(*inbuf)) {
84 		/* We just ignore any remnant bits we so far accumulated. */
85 		cd->in_the_middle_of_utf7_sequence = false;
86 		cd->remnant = 0;
87 		cd->remnant_count = 0;
88 		cd->prevch = (uchar_t)'\0';
89 
90 		return((size_t)0);
91 	}
92 
93 	ib = (uchar_t *)*inbuf;
94 	ob = (uchar_t *)*outbuf;
95 	ibtail = ib + *inbufleft;
96 	obtail = ob + *outbufleft;
97 
98 	while (ib < ibtail) {
99 		uint_t temp_remnant;
100 		uint_t u4;
101 #if defined(UCS_2) || defined(UCS_4)
102 		signed char obsz;
103 #endif
104 
105 		u4 = ICV_U7_UCS4_OUTOFUTF16;
106 		if (cd->in_the_middle_of_utf7_sequence) {
107 			if (rmb64[*ib] >= 0) {
108 				temp_remnant = (cd->remnant << 6) | rmb64[*ib];
109 
110 				switch (cd->remnant_count) {
111 				case ICV_U7_ACTION_HARVEST1:
112 					u4 = (temp_remnant >> 2) & 0xffff;
113 					break;
114 				case ICV_U7_ACTION_HARVEST2:
115 					u4 = (temp_remnant >> 4) & 0xffff;
116 					break;
117 				case ICV_U7_ACTION_HARVEST3:
118 					u4 = temp_remnant & 0xffff;
119 					break;
120 				}
121 
122 				if (u4 != ICV_U7_UCS4_OUTOFUTF16) {
123 					if (u4 == 0x00fffe || u4 == 0x00ffff ||
124 						(u4 >= 0x00d800 &&
125 						u4 <= 0x00dfff)) {
126 						errno = EILSEQ;
127 						ret_val = (size_t)-1;
128 						break;
129 					}
130 #if defined(UCS_2)
131 					CHECK_OUTBUF_SZ_AND_WRITE_U2;
132 #elif defined(UCS_4)
133 					CHECK_OUTBUF_SZ_AND_WRITE_U4;
134 #elif defined(UTF_8)
135 					CHECK_OUTBUF_SZ_AND_WRITE_U8_OR_EILSEQ;
136 #else
137 #error	"Fatal: One of UCS_2, UCS_4, or, UTF_8 is needed."
138 #endif
139 				}
140 
141 				/* It's now safe to have the bits. */
142 				cd->remnant = temp_remnant;
143 				if (cd->remnant_count == ICV_U7_ACTION_HARVEST3)
144 					cd->remnant_count = ICV_U7_ACTION_START;
145 				else
146 					cd->remnant_count++;
147 			} else {
148 				if (*ib == (uint_t)'-') {
149 					if (cd->prevch == '+')
150 						u4 = (uint_t)'+';
151 				} else
152 					u4 = (uint_t)(*ib);
153 
154 				switch (cd->remnant_count) {
155 				case ICV_U7_ACTION_START:
156 					/* (ICV_U7_ACTION_HARVEST3+1): */
157 					/* These are normal cases. */
158 					break;
159 				case (ICV_U7_ACTION_HARVEST1+1):
160 					if (cd->remnant & 0x03) {
161 						errno = EILSEQ;
162 						ret_val = (size_t)-1;
163 						goto illegal_char_err;
164 					}
165 					break;
166 				case (ICV_U7_ACTION_HARVEST2+1):
167 					if (cd->remnant & 0x0f) {
168 						errno = EILSEQ;
169 						ret_val = (size_t)-1;
170 						goto illegal_char_err;
171 					}
172 					break;
173 				default:
174 					errno = EILSEQ;
175 					ret_val = (size_t)-1;
176 					goto illegal_char_err;
177 					break;
178 				}
179 
180 				if (u4 != ICV_U7_UCS4_OUTOFUTF16) {
181 #if defined(UCS_2)
182 					CHECK_OUTBUF_SZ_AND_WRITE_U2;
183 #elif defined(UCS_4)
184 					CHECK_OUTBUF_SZ_AND_WRITE_U4;
185 #elif defined(UTF_8)
186 					if (ob >= obtail) {
187 						errno = E2BIG;
188 						ret_val = (size_t)-1;
189 						break;
190 					}
191 					*ob++ = (uchar_t)(u4 & 0x7f);
192 #else
193 #error	"Fatal: One of UCS_2, UCS_4, or, UTF_8 is needed."
194 #endif
195 				}
196 
197 				cd->in_the_middle_of_utf7_sequence = false;
198 				cd->remnant = 0;
199 				cd->remnant_count = 0;
200 			}
201 		} else {
202 			if (*ib == '+') {
203 				cd->in_the_middle_of_utf7_sequence = true;
204 				cd->remnant = 0;
205 				cd->remnant_count = 0;
206 			} else {
207 #if defined(UCS_2)
208 				u4 = (uint_t)*ib;
209 				CHECK_OUTBUF_SZ_AND_WRITE_U2;
210 #elif defined(UCS_4)
211 				u4 = (uint_t)*ib;
212 				CHECK_OUTBUF_SZ_AND_WRITE_U4;
213 #elif defined(UTF_8)
214 				if (ob >= obtail) {
215 					errno = E2BIG;
216 					ret_val = (size_t)-1;
217 					break;
218 				}
219 				*ob++ = *ib;
220 #else
221 #error	"Fatal: One of UCS_2, UCS_4, or, UTF_8 is needed."
222 #endif
223 			}
224 		}
225 		cd->prevch = *ib++;
226 	}
227 
228 illegal_char_err:
229 	*inbuf = (char *)ib;
230 	*inbufleft = ibtail - ib;
231 	*outbuf = (char *)ob;
232 	*outbufleft = obtail - ob;
233 
234 
235 	return(ret_val);
236 }
237