xref: /illumos-gate/usr/src/lib/iconv_modules/ja/common/PCK_TO_jis.c (revision f52943a93040563107b95bccb9db87d9971ef47d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 1994-2003 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <euc.h>
30 #include "japanese.h"
31 
32 /*
33  * struct _icv_state; to keep stat
34  */
35 struct _icv_state {
36 	int	_st_cset;
37 };
38 
39 void *
40 _icv_open()
41 {
42 	struct _icv_state *st;
43 
44 	if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
45 									== NULL)
46 		return ((void *)ERR_RETURN);
47 
48 	st->_st_cset = CS_0;
49 	return (st);
50 }
51 
52 void
53 _icv_close(struct _icv_state *st)
54 {
55 	free(st);
56 }
57 
58 
59 
60 /*
61  * Actual conversion; called from iconv()
62  */
63 size_t
64 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
65 				char **outbuf, size_t *outbytesleft)
66 {
67 	int		cset;
68 	int		stat = ST_INIT;
69 	unsigned char	*ip, ic;
70 	char			*op;
71 	size_t			ileft, oleft;
72 	size_t			retval;
73 
74 	/*
75 	 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
76 	 * and put escape sequence if needed.
77 	 */
78 	if ((inbuf == NULL) || (*inbuf == NULL)) {
79 		if ((st->_st_cset == CS_1) || (st->_st_cset == CS_3)) {
80 			if ((outbuf != NULL) && (*outbuf != NULL)
81 					&& (outbytesleft != NULL)) {
82 				op = *outbuf;
83 				oleft = *outbytesleft;
84 				if (oleft < SEQ_SBTOG0) {
85 					errno = E2BIG;
86 					return ((size_t)-1);
87 				}
88 				PUT(ESC);
89 				PUT(SBTOG0_1);
90 				PUT(F_X0201_RM);
91 				*outbuf = op;
92 				*outbytesleft = oleft;
93 			}
94 			st->_st_cset = CS_0;
95 		} else if (st->_st_cset == CS_2) {
96 			if ((outbuf != NULL) && (*outbuf != NULL)
97 					&& (outbytesleft != NULL)) {
98 				op = *outbuf;
99 				oleft = *outbytesleft;
100 				if (oleft < SEQ_SOSI) {
101 					errno = E2BIG;
102 					return ((size_t)-1);
103 				}
104 				PUT(SI);
105 				*outbuf = op;
106 				*outbytesleft = oleft;
107 			}
108 			st->_st_cset = CS_0;
109 		}
110 		return ((size_t)0);
111 	}
112 
113 	cset = st->_st_cset;
114 
115 	ip = (unsigned char *)*inbuf;
116 	op = *outbuf;
117 	ileft = *inbytesleft;
118 	oleft = *outbytesleft;
119 
120 	/*
121 	 * Main loop; basically 1 loop per 1 input byte
122 	 */
123 
124 	while ((int)ileft > 0) {
125 		GET(ic);
126 		if ((stat == ST_INCS1) || (stat == ST_INCS3)) {
127 			ic = sjtojis2[ic];
128 			PUT(ic);
129 			stat = ST_INIT;
130 			continue;
131 		} else if (ISASC((int)ic)) {		/* ASCII */
132 			if ((cset == CS_1) || (cset == CS_3)) {
133 				CHECK2BIG(SEQ_SBTOG0,1);
134 				PUT(ESC);	/* to JIS X 0201 Roman */
135 				PUT(SBTOG0_1);
136 				PUT(F_X0201_RM);
137 			} else if (cset == CS_2) {
138 				CHECK2BIG(SEQ_SOSI,1);
139 				PUT(SI);	/* Shift In */
140 			}
141 			cset = CS_0;
142 			CHECK2BIG(JISW0,1);
143 			PUT(ic);
144 			continue;
145 		} else if (ISSJKANA(ic)) {		/* Kana starts */
146 			if ((cset == CS_1) || (cset == CS_3)) {
147 				CHECK2BIG(SEQ_SBTOG0,1);
148 				cset = CS_0;
149 				PUT(ESC);
150 				PUT(SBTOG0_1);
151 				PUT(F_X0201_RM);
152 			}
153 			if (cset != CS_2) {
154 				CHECK2BIG(SEQ_SOSI,1);
155 				cset = CS_2;
156 				PUT(SO);
157 			}
158 			CHECK2BIG(JISW2,1);
159 			stat = ST_INIT;
160 			PUT(ic & CMASK);
161 			continue;
162 		} else if (ISSJKANJI1(ic)) {	/* CS_1 Kanji starts */
163 			if ((int)ileft > 0) {
164 				if (ISSJKANJI2(*ip)) {
165 					if (cset == CS_2) {
166 						cset = CS_0;
167 						PUT(SI);
168 					}
169 					if (cset != CS_1) {
170 						CHECK2BIG(SEQ_MBTOG0_O,1);
171 						cset = CS_1;
172 						PUT(ESC);
173 						PUT(MBTOG0_1);
174 						PUT(F_X0208_83_90);
175 					}
176 					CHECK2BIG(JISW1,1);
177 					stat = ST_INCS1;
178 					ic = sjtojis1[(ic - 0x80)];
179 					if (*ip >= 0x9f) {
180 						ic++;
181 					}
182 					PUT(ic);
183 					continue;
184 				} else {	/* 2nd byte is illegal */
185 					UNGET();
186 					errno = EILSEQ;
187 					retval = (size_t)ERR_RETURN;
188 					goto ret;
189 				}
190 			} else {		/* input fragment of Kanji */
191 				UNGET();
192 				errno = EINVAL;
193 				retval = (size_t)ERR_RETURN;
194 				goto ret;
195 			}
196 		} else if (ISSJSUPKANJI1(ic)) { /* CS_3 Kanji starts */
197 			if ((int)ileft > 0) {
198 				if (ISSJKANJI2(*ip)) {
199 					if (cset == CS_2) {
200 						cset = CS_0;
201 						PUT(SI);
202 					}
203 					if (cset != CS_3) {
204 						CHECK2BIG(SEQ_MBTOG0,1);
205 						cset = CS_3;
206 						PUT(ESC);
207 						PUT(MBTOG0_1);
208 						PUT(MBTOG0_2);
209 						PUT(F_X0212_90);
210 					}
211 					CHECK2BIG(JISW3,1);
212 					stat = ST_INCS3;
213 					ic = sjtojis1[(ic - 0x80)];
214 					if (*ip >= 0x9f) {
215 						ic++;
216 					}
217 					PUT(ic);
218 					continue;
219 				} else {	/* 2nd byte is illegal */
220 					UNGET();
221 					errno = EILSEQ;
222 					retval = (size_t)ERR_RETURN;
223 					goto ret;
224 				}
225 			} else {		/* input fragment of Kanji */
226 				UNGET();
227 				errno = EINVAL;
228 				retval = (size_t)ERR_RETURN;
229 				goto ret;
230 			}
231 		} else if (ISSJIBM(ic) || /* Extended IBM char. area */
232 			ISSJNECIBM(ic)) { /* NEC/IBM char. area */
233 			/*
234 			 * We need a special treatment for each codes.
235 			 * By adding some offset number for them, we
236 			 * can process them as the same way of that of
237 			 * extended IBM chars.
238 			 */
239 			if ((int)ileft > 0) {
240 				if (ISSJKANJI2(*ip)) {
241 					unsigned short dest;
242 					dest = (ic << 8);
243 					GET(ic);
244 					dest += ic;
245 					if (cset == CS_2) {
246 						cset = CS_0;
247 						PUT(SI);
248 					}
249 					if ((0xed40 <= dest) &&
250 						(dest <= 0xeffc)) {
251 						REMAP_NEC(dest);
252 						if (dest == 0xffff) {
253 							goto ill_ibm;
254 						}
255 					}
256 					/*
257 					 * XXX: 0xfa54 and 0xfa5b must be mapped
258 					 *	to JIS0208 area. Therefore we
259 					 *	have to do special treatment.
260 					 */
261 					if ((cset != CS_1) &&
262 						((dest == 0xfa54) ||
263 						(dest == 0xfa5b))) {
264 						CHECK2BIG(SEQ_MBTOG0_O,2);
265 						cset = CS_1;
266 						PUT(ESC);
267 						PUT(MBTOG0_1);
268 						PUT(F_X0208_83_90);
269 						CHECK2BIG(JISW1,2);
270 						if (dest == 0xfa54) {
271 							PUT(0x22);
272 							PUT(0x4c);
273 						} else {
274 							PUT(0x22);
275 							PUT(0x68);
276 						}
277 						continue;
278 					}
279 					if (cset != CS_3) {
280 						CHECK2BIG(SEQ_MBTOG0,2);
281 						cset = CS_3;
282 						PUT(ESC);
283 						PUT(MBTOG0_1);
284 						PUT(MBTOG0_2);
285 						PUT(F_X0212_90);
286 					}
287 					CHECK2BIG(JISW3,2);
288 					dest = dest - 0xfa40 -
289 						(((dest>>8) - 0xfa) * 0x40);
290 					dest = sjtoibmext[dest];
291 					if (dest == 0xffff) {
292 						/*
293 						 * Illegal code points
294 						 * in IBM-EXT area.
295 						 */
296 ill_ibm:
297 						UNGET();
298 						UNGET();
299 						errno = EILSEQ;
300 						retval = (size_t)ERR_RETURN;
301 						goto ret;
302 					}
303 					PUT(((dest>>8) & 0x7f));
304 					PUT(dest & 0x7f);
305 					continue;
306 				} else {	/* 2nd byte is illegal */
307 					UNGET();
308 					errno = EILSEQ;
309 					retval = (size_t)ERR_RETURN;
310 					goto ret;
311 				}
312 			} else {		/* input fragment of Kanji */
313 				UNGET();
314 				errno = EINVAL;
315 				retval = (size_t)ERR_RETURN;
316 				goto ret;
317 			}
318 		} else if ((0xeb <= ic) && (ic <= 0xec)) {
319 		/*
320 		 * Based on the draft convention of OSF-JVC CDEWG,
321 		 * characters in this area will be mapped to
322 		 * "CHIKAN-MOJI." (convertible character)
323 		 * So far, we'll use (0x222e) for it.
324 		 */
325 			if ((int)ileft > 0) {
326 				if (ISSJKANJI2(*ip)) {
327 					if (cset == CS_2) {
328 						cset = CS_0;
329 						PUT(SI);
330 					}
331 					if (cset != CS_1) {
332 						CHECK2BIG(SEQ_MBTOG0_O,1);
333 						cset = CS_1;
334 						PUT(ESC);
335 						PUT(MBTOG0_1);
336 						PUT(F_X0208_83_90);
337 					}
338 					CHECK2BIG(JISW1,1);
339 					GET(ic); /* Dummy */
340 					PUT((JGETA>>8) & CMASK);
341 					PUT(JGETA & CMASK);
342 					continue;
343 				} else {	/* 2nd byte is illegal */
344 					UNGET();
345 					errno = EILSEQ;
346 					retval = (size_t)ERR_RETURN;
347 					goto ret;
348 				}
349 			} else {		/* input fragment of Kanji */
350 				UNGET();
351 				errno = EINVAL;
352 				retval = (size_t)ERR_RETURN;
353 				goto ret;
354 			}
355 		} else {			/* 1st byte is illegal */
356 			UNGET();
357 			errno = EILSEQ;
358 			retval = (size_t)ERR_RETURN;
359 			goto ret;
360 		}
361 	}
362 	retval = ileft;
363 ret:
364 	*inbuf = (char *)ip;
365 	*inbytesleft = ileft;
366 	*outbuf = op;
367 	*outbytesleft = oleft;
368 	st->_st_cset = cset;
369 
370 	return (retval);
371 }
372