xref: /illumos-gate/usr/src/lib/iconv_modules/ja/common/ISO-2022-JP_TO_UTF-8.c (revision f52943a93040563107b95bccb9db87d9971ef47d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 1997-2003 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <euc.h>
31 #include "japanese.h"
32 #include "jfp_iconv_unicode.h"
33 
34 /* Note: JFP_J2U_ICONV_RFC1468 macro pass through hankaku katakata. */
35 #ifdef  RFC1468_MODE
36 #define	JFP_J2U_ICONV_RFC1468
37 #else
38 #define	JFP_J2U_ICONV
39 #endif
40 #include "jfp_jis_to_ucs2.h"
41 
42 /*
43  * struct _cv_state; to keep status
44  */
45 struct _icv_state {
46 	int	_st_cset;
47 	int	_st_cset_sav;
48 };
49 
50 void *
51 _icv_open()
52 {
53 	struct _icv_state *st;
54 
55 	if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
56 									== NULL)
57 		return ((void *)ERR_RETURN);
58 
59 	st->_st_cset_sav = st->_st_cset = CS_0;
60 
61 	return (st);
62 }
63 
64 void
65 _icv_close(struct _icv_state *st)
66 {
67 	free(st);
68 }
69 
70 size_t
71 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
72 				char **outbuf, size_t *outbytesleft)
73 {
74 	int cset, stat, ret_val;
75 	char *ip, ic;
76 	size_t ileft;
77 	size_t retval;
78 	char		*op;
79 	size_t		oleft;
80 	unsigned int index = 0;
81 
82 	/*
83 	 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
84 	 * and put escape sequence if needed.
85 	 */
86 	if ((inbuf == NULL) || (*inbuf == NULL)) {
87 		st->_st_cset_sav = st->_st_cset = CS_0;
88 		return ((size_t)0);
89 	}
90 
91 	cset = st->_st_cset;
92 	stat = ST_INIT;
93 
94 	ip = *inbuf;
95 	op = *outbuf;
96 	ileft = *inbytesleft;
97 	oleft = *outbytesleft;
98 
99 	/*
100 	 * Main loop; 1 loop per 1 input byte
101 	 */
102 
103 	while ((int)ileft > 0) {
104 		GET(ic);
105 		if (stat == ST_ESC) {
106 			if (ic == MBTOG0_1) {
107 				if ((int)ileft > 0) {
108 					stat = ST_MBTOG0_1;
109 					continue;
110 				} else {
111 					UNGET();
112 					UNGET();
113 					errno = EINVAL;
114 					retval = (size_t)ERR_RETURN;
115 					goto ret;
116 				}
117 			} else if (ic == SBTOG0_1) {
118 				if ((int)ileft > 0) {
119 					stat = ST_SBTOG0;
120 					continue;
121 				} else {
122 					UNGET();
123 					UNGET();
124 					errno = EINVAL;
125 					retval = (size_t)ERR_RETURN;
126 					goto ret;
127 				}
128 			} else if (ic == X208REV_1) {
129 				if ((int)ileft > 0) {
130 					stat = ST_208REV_1;
131 					continue;
132 				} else {
133 					UNGET();
134 					UNGET();
135 					errno = EINVAL;
136 					retval = (size_t)ERR_RETURN;
137 					goto ret;
138 				}
139 			} else {
140 				UNGET();
141 				UNGET();
142 				errno = EILSEQ;
143 				retval = (size_t)ERR_RETURN;
144 				goto ret;
145 			}
146 		} else if (stat == ST_MBTOG0_1) {
147 			if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
148 				stat = ST_INIT;
149 				st->_st_cset_sav = cset = CS_1;
150 				continue;
151 			} else if (ic == MBTOG0_2) {
152 				if ((int)ileft > 0) {
153 					stat = ST_MBTOG0_2;
154 					continue;
155 				} else {
156 					UNGET();
157 					UNGET();
158 					UNGET();
159 					errno = EINVAL;
160 					retval = (size_t)ERR_RETURN;
161 					goto ret;
162 				}
163 			} else if (ic == F_X0212_90) {
164 				stat = ST_INIT;
165 				st->_st_cset_sav = cset = CS_3;
166 				continue;
167 			} else {
168 				UNGET();
169 				UNGET();
170 				UNGET();
171 				errno = EILSEQ;
172 				retval = (size_t)ERR_RETURN;
173 				goto ret;
174 			}
175 		} else if (stat == ST_MBTOG0_2) {
176 			if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
177 				stat = ST_INIT;
178 				st->_st_cset_sav = cset = CS_1;
179 				continue;
180 			} else if (ic == F_X0212_90) {
181 				stat = ST_INIT;
182 				st->_st_cset_sav = cset = CS_3;
183 				continue;
184 			} else {
185 				UNGET();
186 				UNGET();
187 				UNGET();
188 				UNGET();
189 				errno = EILSEQ;
190 				retval = (size_t)ERR_RETURN;
191 				goto ret;
192 			}
193 		} else if (stat == ST_SBTOG0) {
194 			if ((ic == F_ASCII) ||
195 				(ic == F_X0201_RM) ||
196 				(ic == F_ISO646)) {
197 				stat = ST_INIT;
198 				st->_st_cset_sav = cset = CS_0;
199 				continue;
200 			} else if (ic == F_X0201_KN) {
201 				stat = ST_INIT;
202 				st->_st_cset_sav = cset = CS_2;
203 				continue;
204 			} else {
205 				UNGET();
206 				UNGET();
207 				UNGET();
208 				errno = EILSEQ;
209 				retval = (size_t)ERR_RETURN;
210 				goto ret;
211 			}
212 		} else if (stat == ST_208REV_1) {
213 			if (ic == X208REV_2) {
214 				if ((int)ileft > 0) {
215 					stat = ST_208REV_2;
216 					continue;
217 				} else {
218 					UNGET();
219 					UNGET();
220 					UNGET();
221 					errno = EINVAL;
222 					retval = (size_t)ERR_RETURN;
223 					goto ret;
224 				}
225 			} else {
226 				UNGET();
227 				UNGET();
228 				UNGET();
229 				errno = EILSEQ;
230 				retval = (size_t)ERR_RETURN;
231 				goto ret;
232 			}
233 		} else if (stat == ST_208REV_2) {
234 			if (ic == ESC) {
235 				if ((int)ileft > 0) {
236 					stat = ST_REV_AFT_ESC;
237 					continue;
238 				} else {
239 					UNGET();
240 					UNGET();
241 					UNGET();
242 					UNGET();
243 					errno = EINVAL;
244 					retval = (size_t)ERR_RETURN;
245 					goto ret;
246 				}
247 			} else {
248 				UNGET();
249 				UNGET();
250 				UNGET();
251 				UNGET();
252 				errno = EILSEQ;
253 				retval = (size_t)ERR_RETURN;
254 				goto ret;
255 			}
256 		} else if (stat == ST_REV_AFT_ESC) {
257 			if (ic == MBTOG0_1) {
258 				if ((int)ileft > 0) {
259 					stat = ST_REV_AFT_MBTOG0_1;
260 					continue;
261 				} else {
262 					UNGET();
263 					UNGET();
264 					UNGET();
265 					UNGET();
266 					UNGET();
267 					errno = EINVAL;
268 					retval = (size_t)ERR_RETURN;
269 					goto ret;
270 				}
271 			} else {
272 				UNGET();
273 				UNGET();
274 				UNGET();
275 				UNGET();
276 				UNGET();
277 				errno = EILSEQ;
278 				retval = (size_t)ERR_RETURN;
279 				goto ret;
280 			}
281 		} else if (stat == ST_REV_AFT_MBTOG0_1) {
282 			if (ic == F_X0208_83_90) {
283 				stat = ST_INIT;
284 				st->_st_cset_sav = cset = CS_1;
285 				continue;
286 			} else if (ic == MBTOG0_2) {
287 				if ((int)ileft > 0) {
288 					stat = ST_REV_AFT_MBTOG0_2;
289 					continue;
290 				} else {
291 					UNGET();
292 					UNGET();
293 					UNGET();
294 					UNGET();
295 					UNGET();
296 					UNGET();
297 					errno = EINVAL;
298 					retval = (size_t)ERR_RETURN;
299 					goto ret;
300 				}
301 			} else {
302 				UNGET();
303 				UNGET();
304 				UNGET();
305 				UNGET();
306 				UNGET();
307 				UNGET();
308 				errno = EILSEQ;
309 				retval = (size_t)ERR_RETURN;
310 				goto ret;
311 			}
312 		} else if (stat == ST_REV_AFT_MBTOG0_2) {
313 			if (ic == F_X0208_83_90) {
314 				stat = ST_INIT;
315 				st->_st_cset_sav = cset = CS_1;
316 				continue;
317 			} else {
318 				UNGET();
319 				UNGET();
320 				UNGET();
321 				UNGET();
322 				UNGET();
323 				UNGET();
324 				UNGET();
325 				errno = EILSEQ;
326 				retval = (size_t)ERR_RETURN;
327 				goto ret;
328 			}
329 		}
330 		/*
331 		 * Break through chars or ESC sequence
332 		 * if (stat == ST_INIT)
333 		 */
334 		if (ic == ESC) {
335 			if ((int)ileft > 0) {
336 				stat = ST_ESC;
337 				continue;
338 			} else {
339 				UNGET();
340 				errno = EINVAL;
341 				retval = (size_t)ERR_RETURN;
342 				goto ret;
343 			}
344 		/*
345 		 * XXX- Because V3 mailtool uses SI/SO to switch
346 		 *	G0 and G1 sets while it puts "iso2022-7"
347 		 *	as its "X-Sun-Charset" tag. Though it
348 		 *	breaks ISO-2022-JP definition based on
349 		 *	UI-OSF, dtmail have handle them correctly.
350 		 *	Therefore, we have to following a few codes, UGH.
351 		 */
352 		} else if (ic == SO) {
353 			cset = CS_2;
354 			stat = ST_INIT;
355 			continue;
356 		} else if (ic == SI) {
357 			cset = st->_st_cset_sav;
358 			stat = ST_INIT;
359 			continue;
360 		} else if (!(ic & CMSB)) {
361 			if ((cset == CS_0) || (cset == CS_2)){
362 				if (cset == CS_0) {
363 					index = (int)_jfp_tbl_jisx0201roman_to_ucs2[(int)ic];
364 				} else if (cset == CS_2) {
365 					index =
366 					(int)_jfp_tbl_jisx0201kana_to_ucs2[(ic - 0x21)];
367 				}
368 				if ((ret_val = write_unicode(
369 					(unsigned int)index, &op, &oleft,
370 					B_FALSE, "writing CS_0/2"))
371 					< 0) {
372 					/* errno is set in write_unicode */
373 					UNGET();
374 					retval = (size_t)ERR_RETURN;
375 					goto ret;
376 				}
377 				stat = ST_INIT;
378 				continue;
379 			} else if ((cset == CS_1) || (cset == CS_3)) {
380 				if ((int)ileft > 0) {
381 					if ((ic < 0x21) || (ic == 0x7f)) {
382 						UNGET();
383 						errno = EILSEQ;
384 						retval = (size_t)ERR_RETURN;
385 						goto ret;
386 					} else if ((*ip < 0x21) || (*ip ==
387 					0x7f)) {
388 						UNGET();
389 						errno = EILSEQ;
390 						retval = (size_t)ERR_RETURN;
391 						goto ret;
392 					}
393 					index = ((ic - 0x21) * 94)
394 							+ (*ip - 0x21);
395 					if (cset == CS_1) {
396 #ifdef  RFC1468_MODE /* Convert VDC and UDC to GETA(DEFC_U in jis%UTF-8.h) */
397 						if ((ic == 0x2d) ||
398 						(0x75 <= ic))
399 							index = 0x3013;
400 						else
401 							index = (int)
402 							_jfp_tbl_jisx0208_to_ucs2[index];
403 #else   /* ISO-2022-JP.UIOSF */
404 						index = (int)
405 							_jfp_tbl_jisx0208_to_ucs2[index];
406 #endif  /* RFC1468_MODE */
407 					} else if (cset == CS_3) {
408 #ifdef  RFC1468_MODE /* Convert JIS X 0212 to GETA(DEFC_U in jis%UTF-8.h) */
409 						index = 0x3013;
410 #else   /* ISO-2022-JP.UIOSF */
411 						index =
412 						(int)_jfp_tbl_jisx0212_to_ucs2[index];
413 #endif  /* RFC1468_MODE */
414 					}
415 					if ((ret_val = write_unicode(
416 						(unsigned int)index,
417 						&op, &oleft,
418 						B_FALSE, "writing CS_1/3"))
419 						< 0) {
420 						/* errno is set
421 						in write_unicode */
422 						UNGET();
423 						retval =
424 						(size_t)ERR_RETURN;
425 		                                goto ret;
426 					}
427 					/* dummy GET for 2nd byte */
428 					GET(ic);
429 					stat = ST_INIT;
430 					continue;
431 				} else {
432 					UNGET();
433 					errno = EINVAL;
434 					retval = (size_t)ERR_RETURN;
435 					goto ret;
436 				}
437 			}
438 		} else {
439 			UNGET();
440 			errno = EILSEQ;
441 			retval = (size_t)ERR_RETURN;
442 			goto ret;
443 		}
444 	}
445 	retval = ileft;
446 ret:
447 	*inbuf = ip;
448 	*inbytesleft = ileft;
449 	*outbuf = (char *)op;
450 	*outbytesleft = oleft;
451 	st->_st_cset = cset;
452 
453 	return (retval);
454 }
455