xref: /illumos-gate/usr/src/lib/libc/port/locale/runetype.h (revision b6805bf78d2bbbeeaea8909a05623587b42d58b3)
1 /*
2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
3  * Copyright (c) 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Paul Borman at Krystal Technologies.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 /*
34  * Because we borrowed the __rune_t defintions from _types.h, we need
35  * this copyright notice as well:
36  *
37  * Copyright (c) 2002 Mike Barcroft <mike@FreeBSD.org>
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  */
61 
62 #ifndef	_RUNETYPE_H_
63 #define	_RUNETYPE_H_
64 
65 #define	_CACHED_RUNES	(1 << 8)		/* Must be a power of 2 */
66 #define	_CRMASK		(~(_CACHED_RUNES - 1))
67 
68 /*
69  * rune_t is declared to be an ``int'' instead of the more natural
70  * ``unsigned long'' or ``long''.  Two things are happening here.  It is not
71  * unsigned so that EOF (-1) can be naturally assigned to it and used.  Also,
72  * it looks like 10646 will be a 31 bit standard.  This means that if your
73  * ints cannot hold 32 bits, you will be in trouble.  The reason an int was
74  * chosen over a long is that the is*() and to*() routines take ints (says
75  * ANSI C), but they use __ct_rune_t instead of int.
76  *
77  * NOTE: rune_t is not covered by ANSI nor other standards, and should not
78  * be instantiated outside of lib/libc/locale.  Use wchar_t.  wchar_t and
79  * rune_t must be the same type.  Also, wint_t must be no narrower than
80  * wchar_t, and should be able to hold all members of the largest
81  * character set plus one extra value (WEOF), and must be at least 16 bits.
82  *
83  * For compatibility with Solaris, we want to use long in ILP32, and int in
84  * LP64.  This is due to historical Solaris legacy.  (See <wchar.h> for
85  * the definition.)
86  */
87 
88 #if	defined(_LP64)
89 typedef int		__ct_rune_t;	/* arg type for ctype funcs */
90 #else
91 typedef	int		__ct_rune_t;
92 #endif
93 
94 typedef	int		__rune_t;	/* rune_t (see above) */
95 
96 /*
97  * The lower 8 bits of runetype[] contain the digit value of the rune.
98  */
99 typedef struct {
100 	__rune_t	__min;		/* First rune of the range */
101 	__rune_t	__max;		/* Last rune (inclusive) of the range */
102 	__rune_t	__map;		/* What first maps to in maps */
103 	unsigned	*__types;	/* Array of types in range */
104 } _RuneEntry;
105 
106 typedef struct {
107 	int		__nranges;	/* Number of ranges stored */
108 	_RuneEntry	*__ranges;	/* Pointer to the ranges */
109 } _RuneRange;
110 
111 typedef struct {
112 	char		__magic[8];	/* Magic saying what version we are */
113 	char		__encoding[32];	/* ASCII name of this encoding */
114 
115 	unsigned int	__runetype[_CACHED_RUNES];
116 	__rune_t	__maplower[_CACHED_RUNES];
117 	__rune_t	__mapupper[_CACHED_RUNES];
118 
119 	/*
120 	 * The following are to deal with Runes larger than _CACHED_RUNES - 1.
121 	 * Their data is actually contiguous with this structure so as to make
122 	 * it easier to read/write from/to disk.
123 	 */
124 	_RuneRange	__runetype_ext;
125 	_RuneRange	__maplower_ext;
126 	_RuneRange	__mapupper_ext;
127 
128 	void		*__variable;	/* Data which depends on the encoding */
129 	int		__variable_len;	/* how long that data is */
130 } _RuneLocale;
131 
132 #define	_RUNE_MAGIC_1	"RuneMagi"	/* Indicates version 0 of RuneLocale */
133 
134 extern _RuneLocale _DefaultRuneLocale;
135 extern _RuneLocale *_CurrentRuneLocale;
136 
137 #endif	/* !_RUNETYPE_H_ */
138