xref: /illumos-gate/usr/src/common/smbsrv/smb_string.c (revision b6805bf78d2bbbeeaea8909a05623587b42d58b3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifdef _KERNEL
27 #include <sys/types.h>
28 #include <sys/sunddi.h>
29 #else
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <strings.h>
34 #endif
35 #include <sys/u8_textprep.h>
36 #include <smbsrv/alloc.h>
37 #include <sys/errno.h>
38 #include <smbsrv/string.h>
39 #include <smbsrv/cp_usascii.h>
40 #include <smbsrv/cp_unicode.h>
41 
42 #define	UNICODE_N_ENTRIES	(sizeof (a_unicode) / sizeof (a_unicode[0]))
43 
44 /*
45  * Global pointer to the current codepage: defaults to ASCII,
46  * and a flag indicating whether the codepage is Unicode or ASCII.
47  */
48 static smb_codepage_t *current_codepage = usascii_codepage;
49 static boolean_t is_unicode = B_FALSE;
50 
51 static smb_codepage_t *smb_unicode_init(void);
52 
53 /*
54  * strsubst
55  *
56  * Scan a string replacing all occurrences of orgchar with newchar.
57  * Returns a pointer to s, or null of s is null.
58  */
59 char *
60 strsubst(char *s, char orgchar, char newchar)
61 {
62 	char *p = s;
63 
64 	if (p == 0)
65 		return (0);
66 
67 	while (*p) {
68 		if (*p == orgchar)
69 			*p = newchar;
70 		++p;
71 	}
72 
73 	return (s);
74 }
75 
76 /*
77  * strcanon
78  *
79  * Normalize a string by reducing all the repeated characters in
80  * buf as defined by class. For example;
81  *
82  *		char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
83  *		strcanon(buf, "/\\");
84  *
85  * Would result in buf containing the following string:
86  *
87  *		/d1/d2/d3\d4\f1.txt
88  *
89  * This function modifies the contents of buf in place and returns
90  * a pointer to buf.
91  */
92 char *
93 strcanon(char *buf, const char *class)
94 {
95 	char *p = buf;
96 	char *q = buf;
97 	char *r;
98 
99 	while (*p) {
100 		*q++ = *p;
101 
102 		if ((r = strchr(class, *p)) != 0) {
103 			while (*p == *r)
104 				++p;
105 		} else
106 			++p;
107 	}
108 
109 	*q = '\0';
110 	return (buf);
111 }
112 
113 void
114 smb_codepage_init(void)
115 {
116 	smb_codepage_t *cp;
117 
118 	if (is_unicode)
119 		return;
120 
121 	if ((cp = smb_unicode_init()) != NULL) {
122 		current_codepage = cp;
123 		is_unicode = B_TRUE;
124 	} else {
125 		current_codepage = usascii_codepage;
126 		is_unicode = B_FALSE;
127 	}
128 }
129 
130 /*
131  * Determine whether or not a character is an uppercase character.
132  * This function operates on the current codepage table. Returns
133  * non-zero if the character is uppercase. Otherwise returns zero.
134  */
135 int
136 smb_isupper(int c)
137 {
138 	uint16_t mask = is_unicode ? 0xffff : 0xff;
139 
140 	return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
141 }
142 
143 /*
144  * Determine whether or not a character is an lowercase character.
145  * This function operates on the current codepage table. Returns
146  * non-zero if the character is lowercase. Otherwise returns zero.
147  */
148 int
149 smb_islower(int c)
150 {
151 	uint16_t mask = is_unicode ? 0xffff : 0xff;
152 
153 	return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
154 }
155 
156 /*
157  * Convert individual characters to their uppercase equivalent value.
158  * If the specified character is lowercase, the uppercase value will
159  * be returned. Otherwise the original value will be returned.
160  */
161 int
162 smb_toupper(int c)
163 {
164 	uint16_t mask = is_unicode ? 0xffff : 0xff;
165 
166 	return (current_codepage[c & mask].upper);
167 }
168 
169 /*
170  * Convert individual characters to their lowercase equivalent value.
171  * If the specified character is uppercase, the lowercase value will
172  * be returned. Otherwise the original value will be returned.
173  */
174 int
175 smb_tolower(int c)
176 {
177 	uint16_t mask = is_unicode ? 0xffff : 0xff;
178 
179 	return (current_codepage[c & mask].lower);
180 }
181 
182 /*
183  * Convert a string to uppercase using the appropriate codepage. The
184  * string is converted in place. A pointer to the string is returned.
185  * There is an assumption here that uppercase and lowercase values
186  * always result encode to the same length.
187  */
188 char *
189 smb_strupr(char *s)
190 {
191 	smb_wchar_t c;
192 	char *p = s;
193 
194 	while (*p) {
195 		if (smb_isascii(*p)) {
196 			*p = smb_toupper(*p);
197 			p++;
198 		} else {
199 			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
200 				return (0);
201 
202 			if (c == 0)
203 				break;
204 
205 			c = smb_toupper(c);
206 			p += smb_wctomb(p, c);
207 		}
208 	}
209 
210 	return (s);
211 }
212 
213 /*
214  * Convert a string to lowercase using the appropriate codepage. The
215  * string is converted in place. A pointer to the string is returned.
216  * There is an assumption here that uppercase and lowercase values
217  * always result encode to the same length.
218  */
219 char *
220 smb_strlwr(char *s)
221 {
222 	smb_wchar_t c;
223 	char *p = s;
224 
225 	while (*p) {
226 		if (smb_isascii(*p)) {
227 			*p = smb_tolower(*p);
228 			p++;
229 		} else {
230 			if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
231 				return (0);
232 
233 			if (c == 0)
234 				break;
235 
236 			c = smb_tolower(c);
237 			p += smb_wctomb(p, c);
238 		}
239 	}
240 
241 	return (s);
242 }
243 
244 /*
245  * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
246  * -1 is returned if "s" is not a valid multi-byte string.
247  */
248 int
249 smb_isstrlwr(const char *s)
250 {
251 	smb_wchar_t c;
252 	int n;
253 	const char *p = s;
254 
255 	while (*p) {
256 		if (smb_isascii(*p) && smb_isupper(*p))
257 			return (0);
258 		else {
259 			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
260 				return (-1);
261 
262 			if (c == 0)
263 				break;
264 
265 			if (smb_isupper(c))
266 				return (0);
267 
268 			p += n;
269 		}
270 	}
271 
272 	return (1);
273 }
274 
275 /*
276  * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
277  * -1 is returned if "s" is not a valid multi-byte string.
278  */
279 int
280 smb_isstrupr(const char *s)
281 {
282 	smb_wchar_t c;
283 	int n;
284 	const char *p = s;
285 
286 	while (*p) {
287 		if (smb_isascii(*p) && smb_islower(*p))
288 			return (0);
289 		else {
290 			if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
291 				return (-1);
292 
293 			if (c == 0)
294 				break;
295 
296 			if (smb_islower(c))
297 				return (0);
298 
299 			p += n;
300 		}
301 	}
302 
303 	return (1);
304 }
305 
306 /*
307  * Compare the null-terminated strings s1 and s2 and return an integer
308  * greater than, equal to or less than 0 dependent on whether s1 is
309  * lexicographically greater than, equal to or less than s2 after
310  * translation of each character to lowercase.  The original strings
311  * are not modified.
312  *
313  * If n is non-zero, at most n bytes are compared.  Otherwise, the strings
314  * are compared until a null terminator is encountered.
315  *
316  * Out:    0 if strings are equal
317  *       < 0 if first string < second string
318  *       > 0 if first string > second string
319  */
320 int
321 smb_strcasecmp(const char *s1, const char *s2, size_t n)
322 {
323 	int	err = 0;
324 	int	rc;
325 
326 	rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
327 	if (err != 0)
328 		return (-1);
329 	return (rc);
330 }
331 
332 /*
333  * First build a codepage based on cp_unicode.h.  Then build the unicode
334  * codepage from this interim codepage by copying the entries over while
335  * fixing them and filling in the gaps.
336  */
337 static smb_codepage_t *
338 smb_unicode_init(void)
339 {
340 	smb_codepage_t	*unicode;
341 	uint32_t	a = 0;
342 	uint32_t	b = 0;
343 
344 	unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
345 	if (unicode == NULL)
346 		return (NULL);
347 
348 	while (b != 0xffff) {
349 		/*
350 		 * If there is a gap in the standard,
351 		 * fill in the gap with no-case entries.
352 		 */
353 		if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
354 			unicode[b].ctype = CODEPAGE_ISNONE;
355 			unicode[b].upper = (smb_wchar_t)b;
356 			unicode[b].lower = (smb_wchar_t)b;
357 			b++;
358 			continue;
359 		}
360 
361 		/*
362 		 * Copy the entry and fixup as required.
363 		 */
364 		switch (a_unicode[a].ctype) {
365 		case CODEPAGE_ISNONE:
366 			/*
367 			 * Replace 0xffff in upper/lower fields with its val.
368 			 */
369 			unicode[b].ctype = CODEPAGE_ISNONE;
370 			unicode[b].upper = (smb_wchar_t)b;
371 			unicode[b].lower = (smb_wchar_t)b;
372 			break;
373 		case CODEPAGE_ISUPPER:
374 			/*
375 			 * Some characters may have case yet not have
376 			 * case conversion.  Treat them as no-case.
377 			 */
378 			if (a_unicode[a].lower == 0xffff) {
379 				unicode[b].ctype = CODEPAGE_ISNONE;
380 				unicode[b].upper = (smb_wchar_t)b;
381 				unicode[b].lower = (smb_wchar_t)b;
382 			} else {
383 				unicode[b].ctype = CODEPAGE_ISUPPER;
384 				unicode[b].upper = (smb_wchar_t)b;
385 				unicode[b].lower = a_unicode[a].lower;
386 			}
387 			break;
388 		case CODEPAGE_ISLOWER:
389 			/*
390 			 * Some characters may have case yet not have
391 			 * case conversion.  Treat them as no-case.
392 			 */
393 			if (a_unicode[a].upper == 0xffff) {
394 				unicode[b].ctype = CODEPAGE_ISNONE;
395 				unicode[b].upper = (smb_wchar_t)b;
396 				unicode[b].lower = (smb_wchar_t)b;
397 			} else {
398 				unicode[b].ctype = CODEPAGE_ISLOWER;
399 				unicode[b].upper = a_unicode[a].upper;
400 				unicode[b].lower = (smb_wchar_t)b;
401 			}
402 			break;
403 		default:
404 			MEM_FREE("unicode", unicode);
405 			return (NULL);
406 		}
407 
408 		a++;
409 		b++;
410 	};
411 
412 	return (unicode);
413 }
414 
415 /*
416  * Parse a UNC path (\\server\share\path) into its components.
417  * Although a standard UNC path starts with two '\', in DFS
418  * all UNC paths start with one '\'. So, this function only
419  * checks for one.
420  *
421  * A valid UNC must at least contain two components i.e. server
422  * and share. The path is parsed to:
423  *
424  * unc_server	server or domain name with no leading/trailing '\'
425  * unc_share	share name with no leading/trailing '\'
426  * unc_path	relative path to the share with no leading/trailing '\'
427  * 		it is valid for unc_path to be NULL.
428  *
429  * Upon successful return of this function, smb_unc_free()
430  * MUST be called when returned 'unc' is no longer needed.
431  *
432  * Returns 0 on success, otherwise returns an errno code.
433  */
434 int
435 smb_unc_init(const char *path, smb_unc_t *unc)
436 {
437 	char *p;
438 
439 	if (path == NULL || unc == NULL || (*path != '\\' && *path != '/'))
440 		return (EINVAL);
441 
442 	bzero(unc, sizeof (smb_unc_t));
443 
444 #ifdef _KERNEL
445 	unc->unc_buf = smb_mem_strdup(path);
446 #else
447 	if ((unc->unc_buf = strdup(path)) == NULL)
448 		return (ENOMEM);
449 #endif
450 
451 	(void) strsubst(unc->unc_buf, '\\', '/');
452 	(void) strcanon(unc->unc_buf, "/");
453 
454 	unc->unc_server = unc->unc_buf + 1;
455 	if (*unc->unc_server == '\0') {
456 		smb_unc_free(unc);
457 		return (EINVAL);
458 	}
459 
460 	if ((p = strchr(unc->unc_server, '/')) == NULL) {
461 		smb_unc_free(unc);
462 		return (EINVAL);
463 	}
464 
465 	*p++ = '\0';
466 	unc->unc_share = p;
467 
468 	if (*unc->unc_share == '\0') {
469 		smb_unc_free(unc);
470 		return (EINVAL);
471 	}
472 
473 	unc->unc_path = strchr(unc->unc_share, '/');
474 	if ((p = unc->unc_path) == NULL)
475 		return (0);
476 
477 	unc->unc_path++;
478 	*p = '\0';
479 
480 	/* remove the last '/' if any */
481 	if ((p = strchr(unc->unc_path, '\0')) != NULL) {
482 		if (*(--p) == '/')
483 			*p = '\0';
484 	}
485 
486 	return (0);
487 }
488 
489 void
490 smb_unc_free(smb_unc_t *unc)
491 {
492 	if (unc == NULL)
493 		return;
494 
495 #ifdef _KERNEL
496 	smb_mem_free(unc->unc_buf);
497 #else
498 	free(unc->unc_buf);
499 #endif
500 	unc->unc_buf = NULL;
501 }
502