xref: /illumos-gate/usr/src/lib/libc/port/locale/setrunelocale.c (revision b6805bf78d2bbbeeaea8909a05623587b42d58b3)
1 /*
2  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
3  * Copyright (c) 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Paul Borman at Krystal Technologies.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include "lint.h"
35 #include "file64.h"
36 #include <errno.h>
37 #include <limits.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <unistd.h>
42 #include <wchar.h>
43 #include "runetype.h"
44 #include "ldpart.h"
45 #include "mblocal.h"
46 #include "setlocale.h"
47 #include "_ctype.h"
48 #include "../i18n/_locale.h"
49 
50 extern _RuneLocale	*_Read_RuneMagi(FILE *);
51 extern unsigned char	__ctype_C[];
52 
53 static int		__setrunelocale(const char *);
54 
55 static int
56 __setrunelocale(const char *encoding)
57 {
58 	FILE *fp;
59 	char name[PATH_MAX];
60 	_RuneLocale *rl;
61 	int saverr, ret;
62 	size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD,
63 	    const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
64 	size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
65 	    mbstate_t *_RESTRICT_KYWD);
66 	int (*old__mbsinit)(const mbstate_t *);
67 	size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
68 	    const char **_RESTRICT_KYWD, size_t, size_t,
69 	    mbstate_t *_RESTRICT_KYWD);
70 	size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD,
71 	    const wchar_t **_RESTRICT_KYWD, size_t, size_t,
72 	    mbstate_t *_RESTRICT_KYWD);
73 	static char ctype_encoding[ENCODING_LEN + 1];
74 	static _RuneLocale *CachedRuneLocale;
75 	static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD,
76 	    const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
77 	static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
78 	    mbstate_t *_RESTRICT_KYWD);
79 	static int (*Cached__mbsinit)(const mbstate_t *);
80 	static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
81 	    const char **_RESTRICT_KYWD, size_t, size_t,
82 	    mbstate_t *_RESTRICT_KYWD);
83 	static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD,
84 	    const wchar_t **_RESTRICT_KYWD, size_t, size_t,
85 	    mbstate_t *_RESTRICT_KYWD);
86 
87 	/*
88 	 * The "C" and "POSIX" locale are always here.
89 	 */
90 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
91 		int i;
92 
93 		(void) memcpy(__ctype, __ctype_C, SZ_TOTAL);
94 
95 		for (i = 0; i < _CACHED_RUNES; i++) {
96 			__ctype_mask[i] = _DefaultRuneLocale.__runetype[i];
97 			__trans_upper[i] = _DefaultRuneLocale.__mapupper[i];
98 			__trans_lower[i] = _DefaultRuneLocale.__maplower[i];
99 		}
100 
101 		(void) _none_init(&_DefaultRuneLocale);
102 		return (0);
103 	}
104 
105 	/*
106 	 * If the locale name is the same as our cache, use the cache.
107 	 */
108 	if (CachedRuneLocale != NULL &&
109 	    strcmp(encoding, ctype_encoding) == 0) {
110 		_CurrentRuneLocale = CachedRuneLocale;
111 		__mbrtowc = Cached__mbrtowc;
112 		__mbsinit = Cached__mbsinit;
113 		__mbsnrtowcs = Cached__mbsnrtowcs;
114 		__wcrtomb = Cached__wcrtomb;
115 		__wcsnrtombs = Cached__wcsnrtombs;
116 		return (0);
117 	}
118 
119 	/*
120 	 * Slurp the locale file into the cache.
121 	 */
122 
123 	(void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA",
124 	    _PathLocale, encoding);
125 
126 	if ((fp = fopen(name, "r")) == NULL)
127 		return (errno == 0 ? ENOENT : errno);
128 
129 	if ((rl = _Read_RuneMagi(fp)) == NULL) {
130 		saverr = (errno == 0 ? EINVAL : errno);
131 		(void) fclose(fp);
132 		return (saverr);
133 	}
134 	(void) fclose(fp);
135 
136 	old__mbrtowc = __mbrtowc;
137 	old__mbsinit = __mbsinit;
138 	old__mbsnrtowcs = __mbsnrtowcs;
139 	old__wcrtomb = __wcrtomb;
140 	old__wcsnrtombs = __wcsnrtombs;
141 
142 	__mbrtowc = NULL;
143 	__mbsinit = NULL;
144 	__mbsnrtowcs = __mbsnrtowcs_std;
145 	__wcrtomb = NULL;
146 	__wcsnrtombs = __wcsnrtombs_std;
147 
148 	if (strcmp(rl->__encoding, "NONE") == 0)
149 		ret = _none_init(rl);
150 	else if (strcmp(rl->__encoding, "UTF-8") == 0)
151 		ret = _UTF8_init(rl);
152 	else if (strcmp(rl->__encoding, "EUC-CN") == 0)
153 		ret = _EUC_CN_init(rl);
154 	else if (strcmp(rl->__encoding, "EUC-JP") == 0)
155 		ret = _EUC_JP_init(rl);
156 	else if (strcmp(rl->__encoding, "EUC-KR") == 0)
157 		ret = _EUC_KR_init(rl);
158 	else if (strcmp(rl->__encoding, "EUC-TW") == 0)
159 		ret = _EUC_TW_init(rl);
160 	else if (strcmp(rl->__encoding, "GB18030") == 0)
161 		ret = _GB18030_init(rl);
162 	else if (strcmp(rl->__encoding, "GB2312") == 0)
163 		ret = _GB2312_init(rl);
164 	else if (strcmp(rl->__encoding, "GBK") == 0)
165 		ret = _GBK_init(rl);
166 	else if (strcmp(rl->__encoding, "BIG5") == 0)
167 		ret = _BIG5_init(rl);
168 	else if (strcmp(rl->__encoding, "MSKanji") == 0)
169 		ret = _MSKanji_init(rl);
170 	else
171 		ret = EINVAL;
172 
173 	if (ret == 0) {
174 		if (CachedRuneLocale != NULL) {
175 			free(CachedRuneLocale);
176 		}
177 		CachedRuneLocale = _CurrentRuneLocale;
178 		Cached__mbrtowc = __mbrtowc;
179 		Cached__mbsinit = __mbsinit;
180 		Cached__mbsnrtowcs = __mbsnrtowcs;
181 		Cached__wcrtomb = __wcrtomb;
182 		Cached__wcsnrtombs = __wcsnrtombs;
183 		(void) strcpy(ctype_encoding, encoding);
184 
185 		/*
186 		 * We need to overwrite the _ctype array.  This requires
187 		 * some finagling.  This is because references to it may
188 		 * have been baked into applications.
189 		 *
190 		 * Note that it is interesting that toupper/tolower only
191 		 * produce defined results when the input is representable
192 		 * as a byte.
193 		 */
194 
195 		/*
196 		 * The top half is the type mask array.  Because we
197 		 * want to support both legacy Solaris code (which have
198 		 * mask valeus baked in to them), and we want to be able
199 		 * to import locale files from other sources (FreeBSD)
200 		 * which probably uses different masks, we have to perform
201 		 * a conversion here.  Ugh.  Note that the _CTYPE definitions
202 		 * we use from FreeBSD are richer than the Solaris legacy.
203 		 *
204 		 * We have to cope with these limitations though, because the
205 		 * inadequate Solaris definitions were baked into binaries.
206 		 */
207 		for (int i = 0; i < _CACHED_RUNES; i++) {
208 			/* ctype can only encode the lower 8 bits. */
209 			__ctype[i+1] = rl->__runetype[i] & 0xff;
210 			__ctype_mask[i] = rl->__runetype[i];
211 		}
212 
213 		/* The bottom half is the toupper/lower array */
214 		for (int i = 0; i < _CACHED_RUNES; i++) {
215 			__ctype[258 + i] = i;
216 			if (rl->__mapupper[i] && rl->__mapupper[i] != i)
217 				__ctype[258+i] = rl->__mapupper[i];
218 			if (rl->__maplower[i] && rl->__maplower[i] != i)
219 				__ctype[258+i] = rl->__maplower[i];
220 
221 			/* Don't forget these annoyances either! */
222 			__trans_upper[i] = rl->__mapupper[i];
223 			__trans_lower[i] = rl->__maplower[i];
224 		}
225 
226 		/*
227 		 * Note that we expect the init code will have populated
228 		 * the CSWIDTH array (__ctype[514-520]) properly.
229 		 */
230 	} else {
231 		__mbrtowc = old__mbrtowc;
232 		__mbsinit = old__mbsinit;
233 		__mbsnrtowcs = old__mbsnrtowcs;
234 		__wcrtomb = old__wcrtomb;
235 		__wcsnrtombs = old__wcsnrtombs;
236 		free(rl);
237 	}
238 
239 	return (ret);
240 }
241 
242 int
243 __wrap_setrunelocale(const char *locale)
244 {
245 	int ret = __setrunelocale(locale);
246 
247 	if (ret != 0) {
248 		errno = ret;
249 		return (_LDP_ERROR);
250 	}
251 	return (_LDP_LOADED);
252 }
253