1*16d86563SAlexander Pyhalov /*
2*16d86563SAlexander Pyhalov * CDDL HEADER START
3*16d86563SAlexander Pyhalov *
4*16d86563SAlexander Pyhalov * The contents of this file are subject to the terms of the
5*16d86563SAlexander Pyhalov * Common Development and Distribution License (the "License").
6*16d86563SAlexander Pyhalov * You may not use this file except in compliance with the License.
7*16d86563SAlexander Pyhalov *
8*16d86563SAlexander Pyhalov * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9*16d86563SAlexander Pyhalov * or http://www.opensolaris.org/os/licensing.
10*16d86563SAlexander Pyhalov * See the License for the specific language governing permissions
11*16d86563SAlexander Pyhalov * and limitations under the License.
12*16d86563SAlexander Pyhalov *
13*16d86563SAlexander Pyhalov * When distributing Covered Code, include this CDDL HEADER in each
14*16d86563SAlexander Pyhalov * file and include the License file at src/OPENSOLARIS.LICENSE.
15*16d86563SAlexander Pyhalov * If applicable, add the following below this CDDL HEADER, with the
16*16d86563SAlexander Pyhalov * fields enclosed by brackets "[]" replaced with your own identifying
17*16d86563SAlexander Pyhalov * information: Portions Copyright [yyyy] [name of copyright owner]
18*16d86563SAlexander Pyhalov *
19*16d86563SAlexander Pyhalov * CDDL HEADER END
20*16d86563SAlexander Pyhalov */
21*16d86563SAlexander Pyhalov
22*16d86563SAlexander Pyhalov /*
23*16d86563SAlexander Pyhalov * Copyright (c) 1995, by Sun Microsystems, Inc.
24*16d86563SAlexander Pyhalov * All rights reserved.
25*16d86563SAlexander Pyhalov */
26*16d86563SAlexander Pyhalov
27*16d86563SAlexander Pyhalov #include <stdio.h>
28*16d86563SAlexander Pyhalov #include <stdlib.h>
29*16d86563SAlexander Pyhalov #include <sys/types.h>
30*16d86563SAlexander Pyhalov #include <errno.h>
31*16d86563SAlexander Pyhalov #include "unicode_cns11643_TW.h" /* UTF8 to CNS 11643 mapping table */
32*16d86563SAlexander Pyhalov #include "common_defs.h"
33*16d86563SAlexander Pyhalov
34*16d86563SAlexander Pyhalov #define MSB 0x80 /* most significant bit */
35*16d86563SAlexander Pyhalov #define MBYTE 0x8e /* multi-byte (4 byte character) */
36*16d86563SAlexander Pyhalov #define PMASK 0xa0 /* plane number mask */
37*16d86563SAlexander Pyhalov #define ONEBYTE 0xff /* right most byte */
38*16d86563SAlexander Pyhalov
39*16d86563SAlexander Pyhalov #define SI 0x0f /* shift in */
40*16d86563SAlexander Pyhalov #define SO 0x0e /* shift out */
41*16d86563SAlexander Pyhalov #define ESC 0x1b /* escape */
42*16d86563SAlexander Pyhalov
43*16d86563SAlexander Pyhalov /* static const char plane_char[] = "0GH23456789:;<=>?"; */
44*16d86563SAlexander Pyhalov static const char plane_char[] = "0GHIJKLMNOPQRSTUV";
45*16d86563SAlexander Pyhalov
46*16d86563SAlexander Pyhalov #define GET_PLANEC(i) (plane_char[i])
47*16d86563SAlexander Pyhalov
48*16d86563SAlexander Pyhalov #define NON_ID_CHAR '?' /* non-identified character */
49*16d86563SAlexander Pyhalov
50*16d86563SAlexander Pyhalov typedef struct _icv_state {
51*16d86563SAlexander Pyhalov char keepc[6]; /* maximum # byte of UTF8 code */
52*16d86563SAlexander Pyhalov short cstate;
53*16d86563SAlexander Pyhalov short istate;
54*16d86563SAlexander Pyhalov short ustate;
55*16d86563SAlexander Pyhalov int _errno; /* internal errno */
56*16d86563SAlexander Pyhalov } _iconv_st;
57*16d86563SAlexander Pyhalov
58*16d86563SAlexander Pyhalov enum _CSTATE { C0, C1 };
59*16d86563SAlexander Pyhalov enum _ISTATE { IN, OUT };
60*16d86563SAlexander Pyhalov enum _USTATE { U0, U1, U2, U3, U4, U5, U6, U7 };
61*16d86563SAlexander Pyhalov
62*16d86563SAlexander Pyhalov
63*16d86563SAlexander Pyhalov static int get_plane_no_by_utf(const char, const char, int *, unsigned long *);
64*16d86563SAlexander Pyhalov static int utf8_to_iso(int, int, unsigned long, char *, size_t);
65*16d86563SAlexander Pyhalov static int binsearch(unsigned long, utf_cns[], int);
66*16d86563SAlexander Pyhalov
67*16d86563SAlexander Pyhalov /*
68*16d86563SAlexander Pyhalov * Open; called from iconv_open()
69*16d86563SAlexander Pyhalov */
70*16d86563SAlexander Pyhalov void *
_icv_open()71*16d86563SAlexander Pyhalov _icv_open()
72*16d86563SAlexander Pyhalov {
73*16d86563SAlexander Pyhalov _iconv_st *st;
74*16d86563SAlexander Pyhalov
75*16d86563SAlexander Pyhalov if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
76*16d86563SAlexander Pyhalov errno = ENOMEM;
77*16d86563SAlexander Pyhalov return ((void *) -1);
78*16d86563SAlexander Pyhalov }
79*16d86563SAlexander Pyhalov
80*16d86563SAlexander Pyhalov st->cstate = C0;
81*16d86563SAlexander Pyhalov st->istate = IN;
82*16d86563SAlexander Pyhalov st->ustate = U0;
83*16d86563SAlexander Pyhalov st->_errno = 0;
84*16d86563SAlexander Pyhalov
85*16d86563SAlexander Pyhalov #ifdef DEBUG
86*16d86563SAlexander Pyhalov fprintf(stderr, "========== iconv(): UTF2 --> ISO2022-7 ==========\n");
87*16d86563SAlexander Pyhalov #endif
88*16d86563SAlexander Pyhalov
89*16d86563SAlexander Pyhalov return ((void *) st);
90*16d86563SAlexander Pyhalov }
91*16d86563SAlexander Pyhalov
92*16d86563SAlexander Pyhalov
93*16d86563SAlexander Pyhalov /*
94*16d86563SAlexander Pyhalov * Close; called from iconv_close()
95*16d86563SAlexander Pyhalov */
96*16d86563SAlexander Pyhalov void
_icv_close(_iconv_st * st)97*16d86563SAlexander Pyhalov _icv_close(_iconv_st *st)
98*16d86563SAlexander Pyhalov {
99*16d86563SAlexander Pyhalov if (!st)
100*16d86563SAlexander Pyhalov errno = EBADF;
101*16d86563SAlexander Pyhalov else
102*16d86563SAlexander Pyhalov free(st);
103*16d86563SAlexander Pyhalov }
104*16d86563SAlexander Pyhalov
105*16d86563SAlexander Pyhalov
106*16d86563SAlexander Pyhalov /*
107*16d86563SAlexander Pyhalov * Actual conversion; called from iconv()
108*16d86563SAlexander Pyhalov */
109*16d86563SAlexander Pyhalov /*=========================================================
110*16d86563SAlexander Pyhalov *
111*16d86563SAlexander Pyhalov * State Machine for interpreting UTF8 code
112*16d86563SAlexander Pyhalov *
113*16d86563SAlexander Pyhalov *=========================================================
114*16d86563SAlexander Pyhalov * 2nd byte 3rd byte 4th byte
115*16d86563SAlexander Pyhalov * +----->------->------->U5------>U6--------->U7
116*16d86563SAlexander Pyhalov * | |
117*16d86563SAlexander Pyhalov * | 3 byte unicode |
118*16d86563SAlexander Pyhalov * +----->------->-------+ |
119*16d86563SAlexander Pyhalov * | | |
120*16d86563SAlexander Pyhalov * ^ v |
121*16d86563SAlexander Pyhalov * | 2 byte U2 ---> U3 |
122*16d86563SAlexander Pyhalov * | unicode v |
123*16d86563SAlexander Pyhalov * +------> U0 -------> U1 +-------->U4---+
124*16d86563SAlexander Pyhalov * ^ ascii | | ^ |
125*16d86563SAlexander Pyhalov * | | +-------->--------->--------+ |
126*16d86563SAlexander Pyhalov * | v v
127*16d86563SAlexander Pyhalov * +----<---+-----<------------<------------<------------+
128*16d86563SAlexander Pyhalov *
129*16d86563SAlexander Pyhalov *=========================================================*/
130*16d86563SAlexander Pyhalov size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)131*16d86563SAlexander Pyhalov _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
132*16d86563SAlexander Pyhalov char **outbuf, size_t *outbytesleft)
133*16d86563SAlexander Pyhalov {
134*16d86563SAlexander Pyhalov char c1 = '\0', c2 = '\0';
135*16d86563SAlexander Pyhalov int plane_no, n, unidx;
136*16d86563SAlexander Pyhalov /* pre_plane_no: need to be static when re-entry occurs on errno set */
137*16d86563SAlexander Pyhalov static int pre_plane_no = -1; /* previous plane number */
138*16d86563SAlexander Pyhalov unsigned long cnscode;
139*16d86563SAlexander Pyhalov
140*16d86563SAlexander Pyhalov if (st == NULL) {
141*16d86563SAlexander Pyhalov errno = EBADF;
142*16d86563SAlexander Pyhalov return ((size_t) -1);
143*16d86563SAlexander Pyhalov }
144*16d86563SAlexander Pyhalov
145*16d86563SAlexander Pyhalov if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
146*16d86563SAlexander Pyhalov st->cstate = C0;
147*16d86563SAlexander Pyhalov st->istate = IN;
148*16d86563SAlexander Pyhalov st->ustate = U0;
149*16d86563SAlexander Pyhalov st->_errno = 0;
150*16d86563SAlexander Pyhalov return ((size_t) 0);
151*16d86563SAlexander Pyhalov }
152*16d86563SAlexander Pyhalov
153*16d86563SAlexander Pyhalov #ifdef DEBUG
154*16d86563SAlexander Pyhalov fprintf(stderr, "=== (Re-entry) iconv(): UTF-8 --> ISO 2022-7 ===\n");
155*16d86563SAlexander Pyhalov fprintf(stderr, "st->cstate=%d\tst->istate=%d\tst->_errno=%d\tplane_no=%d\n",
156*16d86563SAlexander Pyhalov st->cstate, st->istate, st->_errno, plane_no);
157*16d86563SAlexander Pyhalov #endif
158*16d86563SAlexander Pyhalov st->_errno = 0; /* reset internal errno */
159*16d86563SAlexander Pyhalov errno = 0; /* reset external errno */
160*16d86563SAlexander Pyhalov
161*16d86563SAlexander Pyhalov /* a state machine for interpreting UTF8 code */
162*16d86563SAlexander Pyhalov while (*inbytesleft > 0 && *outbytesleft > 0) {
163*16d86563SAlexander Pyhalov
164*16d86563SAlexander Pyhalov uchar_t first_byte;
165*16d86563SAlexander Pyhalov
166*16d86563SAlexander Pyhalov switch (st->ustate) {
167*16d86563SAlexander Pyhalov case U0: /* assuming ASCII in the beginning */
168*16d86563SAlexander Pyhalov if ((**inbuf & MSB) == 0) { /* ASCII */
169*16d86563SAlexander Pyhalov if (st->istate == OUT) {
170*16d86563SAlexander Pyhalov st->cstate = C0;
171*16d86563SAlexander Pyhalov st->istate = IN;
172*16d86563SAlexander Pyhalov **outbuf = SI;
173*16d86563SAlexander Pyhalov (*outbuf)++;
174*16d86563SAlexander Pyhalov (*outbytesleft)--;
175*16d86563SAlexander Pyhalov if (*outbytesleft <= 0) {
176*16d86563SAlexander Pyhalov errno = E2BIG;
177*16d86563SAlexander Pyhalov return((size_t) -1);
178*16d86563SAlexander Pyhalov }
179*16d86563SAlexander Pyhalov }
180*16d86563SAlexander Pyhalov **outbuf = **inbuf;
181*16d86563SAlexander Pyhalov (*outbuf)++;
182*16d86563SAlexander Pyhalov (*outbytesleft)--;
183*16d86563SAlexander Pyhalov } else { /* Chinese character */
184*16d86563SAlexander Pyhalov if ((**inbuf & 0xe0) == 0xc0) { /* 2 byte unicode 0xc2..0xdf */
185*16d86563SAlexander Pyhalov
186*16d86563SAlexander Pyhalov /* invalid sequence if the first byte is either 0xc0 or 0xc1 */
187*16d86563SAlexander Pyhalov if ( number_of_bytes_in_utf8_char[((uchar_t)**inbuf)] == ICV_TYPE_ILLEGAL_CHAR )
188*16d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
189*16d86563SAlexander Pyhalov else {
190*16d86563SAlexander Pyhalov st->ustate = U1;
191*16d86563SAlexander Pyhalov st->keepc[0] = **inbuf;
192*16d86563SAlexander Pyhalov }
193*16d86563SAlexander Pyhalov } else if ((**inbuf & 0xf0) == 0xe0) { /* 3 byte 0xe0..0xef */
194*16d86563SAlexander Pyhalov st->ustate = U2;
195*16d86563SAlexander Pyhalov st->keepc[0] = **inbuf;
196*16d86563SAlexander Pyhalov } else {
197*16d86563SAlexander Pyhalov /* four bytes of UTF-8 sequences */
198*16d86563SAlexander Pyhalov if ( number_of_bytes_in_utf8_char[((uchar_t)**inbuf)] == ICV_TYPE_ILLEGAL_CHAR )
199*16d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
200*16d86563SAlexander Pyhalov else
201*16d86563SAlexander Pyhalov {
202*16d86563SAlexander Pyhalov st->ustate = U5;
203*16d86563SAlexander Pyhalov st->keepc[0] = **inbuf;
204*16d86563SAlexander Pyhalov }
205*16d86563SAlexander Pyhalov }
206*16d86563SAlexander Pyhalov }
207*16d86563SAlexander Pyhalov break;
208*16d86563SAlexander Pyhalov case U1: /* 2 byte unicode */
209*16d86563SAlexander Pyhalov if ((**inbuf & 0xc0) == 0x80) {
210*16d86563SAlexander Pyhalov st->ustate = U4;
211*16d86563SAlexander Pyhalov st->keepc[1] = **inbuf;
212*16d86563SAlexander Pyhalov c1 = (st->keepc[0]&0x1c)>>2;
213*16d86563SAlexander Pyhalov c2 = ((st->keepc[0]&0x03)<<6) | ((**inbuf)&0x3f);
214*16d86563SAlexander Pyhalov #ifdef DEBUG
215*16d86563SAlexander Pyhalov fprintf(stderr, "UTF8: %02x%02x --> ",
216*16d86563SAlexander Pyhalov st->keepc[0]&ONEBYTE, st->keepc[1]&ONEBYTE);
217*16d86563SAlexander Pyhalov #endif
218*16d86563SAlexander Pyhalov continue; /* should not advance *inbuf */
219*16d86563SAlexander Pyhalov } else {
220*16d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
221*16d86563SAlexander Pyhalov }
222*16d86563SAlexander Pyhalov break;
223*16d86563SAlexander Pyhalov case U2: /* 3 byte unicode - 2nd byte */
224*16d86563SAlexander Pyhalov
225*16d86563SAlexander Pyhalov first_byte = st->keepc[0];
226*16d86563SAlexander Pyhalov
227*16d86563SAlexander Pyhalov /* if the first byte is 0xed, it is illegal sequence if the second
228*16d86563SAlexander Pyhalov * one is between 0xa0 and 0xbf because surrogate section is ill-formed
229*16d86563SAlexander Pyhalov */
230*16d86563SAlexander Pyhalov if (((uchar_t)**inbuf) < valid_min_2nd_byte[first_byte] ||
231*16d86563SAlexander Pyhalov ((uchar_t)**inbuf) > valid_max_2nd_byte[first_byte] )
232*16d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
233*16d86563SAlexander Pyhalov else {
234*16d86563SAlexander Pyhalov st->ustate = U3;
235*16d86563SAlexander Pyhalov st->keepc[1] = **inbuf;
236*16d86563SAlexander Pyhalov }
237*16d86563SAlexander Pyhalov break;
238*16d86563SAlexander Pyhalov case U3: /* 3 byte unicode - 3rd byte */
239*16d86563SAlexander Pyhalov if ((**inbuf & 0xc0) == 0x80) {
240*16d86563SAlexander Pyhalov st->ustate = U4;
241*16d86563SAlexander Pyhalov st->keepc[2] = **inbuf;
242*16d86563SAlexander Pyhalov c1 = ((st->keepc[0]&0x0f)<<4) |
243*16d86563SAlexander Pyhalov ((st->keepc[1]&0x3c)>>2);
244*16d86563SAlexander Pyhalov c2 = ((st->keepc[1]&0x03)<<6) | ((**inbuf)&0x3f);
245*16d86563SAlexander Pyhalov #ifdef DEBUG
246*16d86563SAlexander Pyhalov fprintf(stderr, "UTF8: %02x%02x%02x --> ", st->keepc[0]&ONEBYTE,
247*16d86563SAlexander Pyhalov st->keepc[1]&ONEBYTE, **inbuf&ONEBYTE);
248*16d86563SAlexander Pyhalov #endif
249*16d86563SAlexander Pyhalov continue; /* should not advance *inbuf */
250*16d86563SAlexander Pyhalov } else {
251*16d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
252*16d86563SAlexander Pyhalov }
253*16d86563SAlexander Pyhalov break;
254*16d86563SAlexander Pyhalov case U4:
255*16d86563SAlexander Pyhalov plane_no = get_plane_no_by_utf(c1, c2, &unidx, &cnscode);
256*16d86563SAlexander Pyhalov if (plane_no == -2)
257*16d86563SAlexander Pyhalov { /* unicode is either 0xFFFE or 0xFFFF */
258*16d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
259*16d86563SAlexander Pyhalov break;
260*16d86563SAlexander Pyhalov }
261*16d86563SAlexander Pyhalov
262*16d86563SAlexander Pyhalov if (plane_no > 0) { /* legal unicode; illegal CNS */
263*16d86563SAlexander Pyhalov if ((st->istate == IN) || (pre_plane_no != plane_no)) {
264*16d86563SAlexander Pyhalov if ((st->cstate == C0) ||
265*16d86563SAlexander Pyhalov (pre_plane_no != plane_no)) {
266*16d86563SAlexander Pyhalov /* change plane # in Chinese mode */
267*16d86563SAlexander Pyhalov if (st->cstate == C1) {
268*16d86563SAlexander Pyhalov **outbuf = SI;
269*16d86563SAlexander Pyhalov (*outbuf)++;
270*16d86563SAlexander Pyhalov (*outbytesleft)--;
271*16d86563SAlexander Pyhalov }
272*16d86563SAlexander Pyhalov if (*outbytesleft < 4) {
273*16d86563SAlexander Pyhalov st->_errno = errno = E2BIG;
274*16d86563SAlexander Pyhalov return((size_t) -1);
275*16d86563SAlexander Pyhalov }
276*16d86563SAlexander Pyhalov pre_plane_no = plane_no;
277*16d86563SAlexander Pyhalov st->cstate = C1;
278*16d86563SAlexander Pyhalov **outbuf = ESC;
279*16d86563SAlexander Pyhalov *(*outbuf+1) = '$';
280*16d86563SAlexander Pyhalov *(*outbuf+2) = ')';
281*16d86563SAlexander Pyhalov *(*outbuf+3) = GET_PLANEC(plane_no);
282*16d86563SAlexander Pyhalov #ifdef DEBUG
283*16d86563SAlexander Pyhalov fprintf(stderr, "\n\t\t\t\tESC $ ) %c\t", *(*outbuf+3));
284*16d86563SAlexander Pyhalov #endif
285*16d86563SAlexander Pyhalov (*outbuf) += 4;
286*16d86563SAlexander Pyhalov (*outbytesleft) -= 4;
287*16d86563SAlexander Pyhalov if (*outbytesleft <= 0) {
288*16d86563SAlexander Pyhalov st->_errno = errno = E2BIG;
289*16d86563SAlexander Pyhalov return((size_t) -1);
290*16d86563SAlexander Pyhalov }
291*16d86563SAlexander Pyhalov }
292*16d86563SAlexander Pyhalov st->istate = OUT;
293*16d86563SAlexander Pyhalov **outbuf = SO;
294*16d86563SAlexander Pyhalov (*outbuf)++;
295*16d86563SAlexander Pyhalov (*outbytesleft)--;
296*16d86563SAlexander Pyhalov }
297*16d86563SAlexander Pyhalov }/* get_plane_no OK */
298*16d86563SAlexander Pyhalov
299*16d86563SAlexander Pyhalov n = utf8_to_iso(plane_no, unidx, cnscode,
300*16d86563SAlexander Pyhalov *outbuf, *outbytesleft);
301*16d86563SAlexander Pyhalov if (n > 0) {
302*16d86563SAlexander Pyhalov (*outbuf) += n;
303*16d86563SAlexander Pyhalov (*outbytesleft) -= n;
304*16d86563SAlexander Pyhalov } else {
305*16d86563SAlexander Pyhalov st->_errno = errno;
306*16d86563SAlexander Pyhalov return((size_t) -1);
307*16d86563SAlexander Pyhalov }
308*16d86563SAlexander Pyhalov st->ustate = U0;
309*16d86563SAlexander Pyhalov st->_errno = 0;
310*16d86563SAlexander Pyhalov break;
311*16d86563SAlexander Pyhalov case U5:
312*16d86563SAlexander Pyhalov
313*16d86563SAlexander Pyhalov first_byte = st->keepc[0];
314*16d86563SAlexander Pyhalov
315*16d86563SAlexander Pyhalov /* if the first byte is 0xed, it is illegal sequence if the second
316*16d86563SAlexander Pyhalov * one is between 0xa0 and 0xbf because surrogate section is ill-formed
317*16d86563SAlexander Pyhalov */
318*16d86563SAlexander Pyhalov if (((uchar_t)**inbuf) < valid_min_2nd_byte[first_byte] ||
319*16d86563SAlexander Pyhalov ((uchar_t)**inbuf) > valid_max_2nd_byte[first_byte] )
320*16d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
321*16d86563SAlexander Pyhalov else {
322*16d86563SAlexander Pyhalov st->ustate = U6;
323*16d86563SAlexander Pyhalov st->keepc[1] = **inbuf;
324*16d86563SAlexander Pyhalov }
325*16d86563SAlexander Pyhalov break;
326*16d86563SAlexander Pyhalov case U6:
327*16d86563SAlexander Pyhalov if ((**inbuf & 0xc0) == MSB) /* 0x80..0xbf */
328*16d86563SAlexander Pyhalov {
329*16d86563SAlexander Pyhalov st->ustate = U7;
330*16d86563SAlexander Pyhalov st->keepc[2] = **inbuf;
331*16d86563SAlexander Pyhalov }
332*16d86563SAlexander Pyhalov else
333*16d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
334*16d86563SAlexander Pyhalov break;
335*16d86563SAlexander Pyhalov case U7:
336*16d86563SAlexander Pyhalov if ((**inbuf & 0xc0) == MSB) /* 0x80..0xbf */
337*16d86563SAlexander Pyhalov { /* skip it to simplify */
338*16d86563SAlexander Pyhalov st->ustate = U0;
339*16d86563SAlexander Pyhalov st->_errno = 0;
340*16d86563SAlexander Pyhalov }
341*16d86563SAlexander Pyhalov else
342*16d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
343*16d86563SAlexander Pyhalov break;
344*16d86563SAlexander Pyhalov default: /* should never come here */
345*16d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
346*16d86563SAlexander Pyhalov st->ustate = U0; /* reset state */
347*16d86563SAlexander Pyhalov break;
348*16d86563SAlexander Pyhalov }
349*16d86563SAlexander Pyhalov
350*16d86563SAlexander Pyhalov if (st->_errno) {
351*16d86563SAlexander Pyhalov #ifdef DEBUG
352*16d86563SAlexander Pyhalov fprintf(stderr, "!!!!!\tst->_errno = %d\tst->ustate = %d\n",
353*16d86563SAlexander Pyhalov st->_errno, st->ustate);
354*16d86563SAlexander Pyhalov #endif
355*16d86563SAlexander Pyhalov break;
356*16d86563SAlexander Pyhalov }
357*16d86563SAlexander Pyhalov (*inbuf)++;
358*16d86563SAlexander Pyhalov (*inbytesleft)--;
359*16d86563SAlexander Pyhalov }
360*16d86563SAlexander Pyhalov
361*16d86563SAlexander Pyhalov if (errno)
362*16d86563SAlexander Pyhalov return((size_t) -1);
363*16d86563SAlexander Pyhalov
364*16d86563SAlexander Pyhalov if (*inbytesleft == 0 && st->ustate != U0) {
365*16d86563SAlexander Pyhalov errno = EINVAL;
366*16d86563SAlexander Pyhalov return ((size_t) -1);
367*16d86563SAlexander Pyhalov }
368*16d86563SAlexander Pyhalov
369*16d86563SAlexander Pyhalov if (*inbytesleft > 0 && *outbytesleft == 0) {
370*16d86563SAlexander Pyhalov errno = E2BIG;
371*16d86563SAlexander Pyhalov return((size_t) -1);
372*16d86563SAlexander Pyhalov }
373*16d86563SAlexander Pyhalov return (*inbytesleft);
374*16d86563SAlexander Pyhalov }
375*16d86563SAlexander Pyhalov
376*16d86563SAlexander Pyhalov
377*16d86563SAlexander Pyhalov /*
378*16d86563SAlexander Pyhalov * Get plane number by UTF8 code; i.e. plane #1 returns 1, #2 returns 2, etc.
379*16d86563SAlexander Pyhalov * Returns -1 on error conditions and return -2 due to illegal sequence
380*16d86563SAlexander Pyhalov *
381*16d86563SAlexander Pyhalov * Since binary search of the UTF8 to CNS table is necessary, might as well
382*16d86563SAlexander Pyhalov * return index and CNS code matching to the unicode.
383*16d86563SAlexander Pyhalov */
get_plane_no_by_utf(const char c1,const char c2,int * unidx,unsigned long * cnscode)384*16d86563SAlexander Pyhalov static int get_plane_no_by_utf(const char c1, const char c2,
385*16d86563SAlexander Pyhalov int *unidx, unsigned long *cnscode)
386*16d86563SAlexander Pyhalov {
387*16d86563SAlexander Pyhalov int ret;
388*16d86563SAlexander Pyhalov unsigned long unicode;
389*16d86563SAlexander Pyhalov
390*16d86563SAlexander Pyhalov unicode = (unsigned long) ((c1 & ONEBYTE) << 8) + (c2 & ONEBYTE);
391*16d86563SAlexander Pyhalov /* the 0xfffe and 0xffff should not be allowed */
392*16d86563SAlexander Pyhalov if ( unicode == 0xFFFE || unicode == 0xFFFF ) return -2;
393*16d86563SAlexander Pyhalov
394*16d86563SAlexander Pyhalov *unidx = binsearch(unicode, utf_cns_tab, MAX_UTF_NUM);
395*16d86563SAlexander Pyhalov if ((*unidx) >= 0)
396*16d86563SAlexander Pyhalov *cnscode = utf_cns_tab[*unidx].cnscode;
397*16d86563SAlexander Pyhalov else
398*16d86563SAlexander Pyhalov return(0); /* match from UTF8 to CNS not found */
399*16d86563SAlexander Pyhalov #ifdef DEBUG
400*16d86563SAlexander Pyhalov fprintf(stderr, "Unicode=%04x, idx=%5d, CNS=%x ", unicode, *unidx, *cnscode);
401*16d86563SAlexander Pyhalov #endif
402*16d86563SAlexander Pyhalov
403*16d86563SAlexander Pyhalov ret = (int) (*cnscode >> 16);
404*16d86563SAlexander Pyhalov switch (ret) {
405*16d86563SAlexander Pyhalov case 0x21: /* 0x8EA1 - G */
406*16d86563SAlexander Pyhalov case 0x22: /* 0x8EA2 - H */
407*16d86563SAlexander Pyhalov case 0x23: /* 0x8EA3 - I */
408*16d86563SAlexander Pyhalov case 0x24: /* 0x8EA4 - J */
409*16d86563SAlexander Pyhalov case 0x25: /* 0x8EA5 - K */
410*16d86563SAlexander Pyhalov case 0x26: /* 0x8EA6 - L */
411*16d86563SAlexander Pyhalov case 0x27: /* 0x8EA7 - M */
412*16d86563SAlexander Pyhalov case 0x28: /* 0x8EA8 - N */
413*16d86563SAlexander Pyhalov case 0x29: /* 0x8EA9 - O */
414*16d86563SAlexander Pyhalov case 0x2a: /* 0x8EAA - P */
415*16d86563SAlexander Pyhalov case 0x2b: /* 0x8EAB - Q */
416*16d86563SAlexander Pyhalov case 0x2c: /* 0x8EAC - R */
417*16d86563SAlexander Pyhalov case 0x2d: /* 0x8EAD - S */
418*16d86563SAlexander Pyhalov case 0x2f: /* 0x8EAF - U */
419*16d86563SAlexander Pyhalov case 0x30: /* 0x8EB0 - V */
420*16d86563SAlexander Pyhalov return (ret - 0x20); /* so that we can use GET_PLANEC() */
421*16d86563SAlexander Pyhalov case 0x2e: /* 0x8EAE - T */
422*16d86563SAlexander Pyhalov return (3); /* CNS 11643-1992 */
423*16d86563SAlexander Pyhalov default:
424*16d86563SAlexander Pyhalov return (-1);
425*16d86563SAlexander Pyhalov }
426*16d86563SAlexander Pyhalov }
427*16d86563SAlexander Pyhalov
428*16d86563SAlexander Pyhalov
429*16d86563SAlexander Pyhalov /*
430*16d86563SAlexander Pyhalov * ISO/IEC 10646 (Unicode) --> ISO 2022-7
431*16d86563SAlexander Pyhalov * Unicode --> UTF8 (FSS-UTF)
432*16d86563SAlexander Pyhalov * (File System Safe Universal Character Set Transformation Format)
433*16d86563SAlexander Pyhalov * Return: > 0 - converted with enough space in output buffer
434*16d86563SAlexander Pyhalov * = 0 - no space in outbuf
435*16d86563SAlexander Pyhalov */
utf8_to_iso(int plane_no,int unidx,unsigned long cnscode,char * buf,size_t buflen)436*16d86563SAlexander Pyhalov static int utf8_to_iso(int plane_no, int unidx, unsigned long cnscode,
437*16d86563SAlexander Pyhalov char *buf, size_t buflen)
438*16d86563SAlexander Pyhalov {
439*16d86563SAlexander Pyhalov unsigned long val; /* CNS 11643 value */
440*16d86563SAlexander Pyhalov #ifdef DEBUG
441*16d86563SAlexander Pyhalov char cns_str[5];
442*16d86563SAlexander Pyhalov #endif
443*16d86563SAlexander Pyhalov
444*16d86563SAlexander Pyhalov if (buflen < 2) {
445*16d86563SAlexander Pyhalov errno = E2BIG;
446*16d86563SAlexander Pyhalov return(0);
447*16d86563SAlexander Pyhalov }
448*16d86563SAlexander Pyhalov
449*16d86563SAlexander Pyhalov
450*16d86563SAlexander Pyhalov if (unidx < 0) { /* no match from UTF8 to CNS 11643 */
451*16d86563SAlexander Pyhalov *buf = *(buf+1) = NON_ID_CHAR;
452*16d86563SAlexander Pyhalov return(2);
453*16d86563SAlexander Pyhalov } else {
454*16d86563SAlexander Pyhalov val = cnscode & 0xffff;
455*16d86563SAlexander Pyhalov *buf = (val & 0xff00) >> 8;
456*16d86563SAlexander Pyhalov *(buf+1) = val & 0xff;
457*16d86563SAlexander Pyhalov }
458*16d86563SAlexander Pyhalov #ifdef DEBUG
459*16d86563SAlexander Pyhalov fprintf(stderr, "\t%02x%02x\t", *buf, *(buf+1));
460*16d86563SAlexander Pyhalov #endif
461*16d86563SAlexander Pyhalov
462*16d86563SAlexander Pyhalov #ifdef DEBUG
463*16d86563SAlexander Pyhalov switch (plane_no) {
464*16d86563SAlexander Pyhalov case 1:
465*16d86563SAlexander Pyhalov cns_str[0] = *buf | MSB;
466*16d86563SAlexander Pyhalov cns_str[1] = *(buf+1) | MSB;
467*16d86563SAlexander Pyhalov cns_str[2] = cns_str[3] = cns_str[4] = NULL;
468*16d86563SAlexander Pyhalov break;
469*16d86563SAlexander Pyhalov case 2:
470*16d86563SAlexander Pyhalov case 3:
471*16d86563SAlexander Pyhalov case 4:
472*16d86563SAlexander Pyhalov case 5:
473*16d86563SAlexander Pyhalov case 6:
474*16d86563SAlexander Pyhalov case 7:
475*16d86563SAlexander Pyhalov case 8:
476*16d86563SAlexander Pyhalov case 9:
477*16d86563SAlexander Pyhalov case 10:
478*16d86563SAlexander Pyhalov case 11:
479*16d86563SAlexander Pyhalov case 12:
480*16d86563SAlexander Pyhalov case 13:
481*16d86563SAlexander Pyhalov case 14:
482*16d86563SAlexander Pyhalov case 15:
483*16d86563SAlexander Pyhalov case 16:
484*16d86563SAlexander Pyhalov cns_str[0] = MBYTE;
485*16d86563SAlexander Pyhalov cns_str[1] = (char) PMASK + plane_no;
486*16d86563SAlexander Pyhalov cns_str[2] = (char) *buf | MSB;
487*16d86563SAlexander Pyhalov cns_str[3] = (char) *(buf+1) | MSB;
488*16d86563SAlexander Pyhalov cns_str[4] = NULL;
489*16d86563SAlexander Pyhalov break;
490*16d86563SAlexander Pyhalov }
491*16d86563SAlexander Pyhalov
492*16d86563SAlexander Pyhalov fprintf(stderr, "#%d ->%s<-\n", plane_no, cns_str);
493*16d86563SAlexander Pyhalov #endif
494*16d86563SAlexander Pyhalov return(2);
495*16d86563SAlexander Pyhalov }
496*16d86563SAlexander Pyhalov
497*16d86563SAlexander Pyhalov
498*16d86563SAlexander Pyhalov /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,utf_cns v[],int n)499*16d86563SAlexander Pyhalov static int binsearch(unsigned long x, utf_cns v[], int n)
500*16d86563SAlexander Pyhalov {
501*16d86563SAlexander Pyhalov int low, high, mid;
502*16d86563SAlexander Pyhalov
503*16d86563SAlexander Pyhalov low = 0;
504*16d86563SAlexander Pyhalov high = n - 1;
505*16d86563SAlexander Pyhalov while (low <= high) {
506*16d86563SAlexander Pyhalov mid = (low + high) / 2;
507*16d86563SAlexander Pyhalov if (x < v[mid].unicode)
508*16d86563SAlexander Pyhalov high = mid - 1;
509*16d86563SAlexander Pyhalov else if (x > v[mid].unicode)
510*16d86563SAlexander Pyhalov low = mid + 1;
511*16d86563SAlexander Pyhalov else /* found match */
512*16d86563SAlexander Pyhalov return mid;
513*16d86563SAlexander Pyhalov }
514*16d86563SAlexander Pyhalov return (-1); /* no match */
515*16d86563SAlexander Pyhalov }
516