xref: /illumos-gate/usr/src/lib/pam_modules/authtok_check/packer.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "packer.h"
30 
31 /*
32  * This file steers the creation of the Crack Dictionary Database.
33  * Based on a list of source dictionaries specified by the administrator,
34  * we create the Database by sorting each dictionary (in memory, one at
35  * a time), writing the sorted result to a temporary file, and merging
36  * all the temporary files into the Database.
37  *
38  * The current implementation has a number of limitations
39  *   - each single source dictionary has to fit in memory
40  *   - each single source dictionary has to be smaller than 2GByte
41  *   - each single source dictionary can only hold up to 4GB words
42  * None of these seem real, practical, problems to me.
43  *
44  * All of this is meant to be run by one thread per host. The caller is
45  * responsible for locking things appropriately (as make_dict_database
46  * in dict.c does).
47  */
48 
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <unistd.h>
52 #include <ctype.h>
53 #include <string.h>
54 #include <errno.h>
55 #include <sys/stat.h>
56 #include <fcntl.h>
57 
58 /* Stuff used for sorting the dictionary */
59 static char	*buf;		/* used to hold the source dictionary */
60 static uint_t	*offsets;	/* array of word-offsets into "buf" */
61 static uint_t	off_idx = 0;	/* first free index in offsets array */
62 static size_t	off_size = 0;	/* offsets array size */
63 
64 /* stuff to keep track of the temporary files */
65 #define	FNAME_TEMPLATE	"/var/tmp/authtok_check.XXXXXX"
66 #define	MAXTMP		64
67 static FILE	*tmpfp[MAXTMP];	/* FILE *'s to (unlinked) temporary files */
68 static int	tmpfp_idx = 0;	/* points to first free entry in tmpfp */
69 
70 #define	MODNAME "pam_authtok_check::packer"
71 
72 /*
73  * int writeout(void)
74  *
75  * Write the sorted wordlist to disk. We create a temporary file
76  * (in /var/tmp), and immediately unlink() it. We keep an open
77  * FILE pointer to it in tmpfp[] for later use.
78  *
79  * returns 0 on success, -1 on failure (can't create file/output failure).
80  */
81 int
82 writeout(void)
83 {
84 	int i = 0;
85 	char tmpname[sizeof (FNAME_TEMPLATE)];
86 	int fd;
87 
88 	if (tmpfp_idx == MAXTMP) {
89 		syslog(LOG_ERR, MODNAME ": too many temporary "
90 		    "files (maximum %d exceeded)", MAXTMP);
91 		return (-1);
92 	}
93 
94 	(void) strcpy(tmpname, FNAME_TEMPLATE);
95 	if ((fd = mkstemp(tmpname)) == -1) {
96 		syslog(LOG_ERR, MODNAME ": mkstemp() failed: %s\n",
97 		    strerror(errno));
98 		return (-1);
99 	}
100 	(void) unlink(tmpname);
101 
102 	if ((tmpfp[tmpfp_idx] = fdopen(fd, "w+")) == NULL) {
103 		syslog(LOG_ERR, MODNAME ": fdopen failed: %s",
104 		    strerror(errno));
105 		(void) close(fd);
106 		return (-1);
107 	}
108 
109 	/* write words to file */
110 	while (i < off_idx) {
111 		if (fprintf(tmpfp[tmpfp_idx], "%s\n", &buf[offsets[i++]]) < 0) {
112 			syslog(LOG_ERR, MODNAME ": write to file failed: %s",
113 			    strerror(errno));
114 			(void) close(fd);
115 			return (-1);
116 		}
117 	}
118 
119 	/* we have one extra tmpfp */
120 	tmpfp_idx++;
121 
122 	return (0);
123 }
124 
125 /*
126  * int insert_word(int off)
127  *
128  * insert an offset into the offsets-array. If the offsets-array is out of
129  * space, we allocate additional space (in CHUNKs)
130  *
131  * returns 0 on success, -1 on failure (out of memory)
132  */
133 int
134 insert_word(int off)
135 {
136 #define	CHUNK 10000
137 
138 	if (off_idx == off_size) {
139 		uint_t *tmp;
140 		off_size += CHUNK;
141 		tmp = realloc(offsets, sizeof (uint_t) * off_size);
142 		if (tmp == NULL) {
143 			syslog(LOG_ERR, MODNAME ": out of memory");
144 			free(offsets);
145 			off_idx = off_size = 0;
146 			offsets = NULL;
147 			return (-1);
148 		}
149 		offsets = tmp;
150 	}
151 
152 	offsets[off_idx++] = off;
153 	return (0);
154 }
155 
156 /*
157  * translate(buf, size)
158  *
159  * perform "tr '[A-Z]' '[a-z]' | tr -cd '\012[a-z][0-9]'" on the
160  * words in "buf" and insert each of them into the offsets-array.
161  * We refrain from using 'isupper' and 'islower' to keep this strictly
162  * ASCII-only, as is the original Cracklib code.
163  *
164  * returns 0 on success, -1 on failure (failure of insert_word)
165  */
166 int
167 translate(char *buf, size_t size)
168 {
169 	char *p, *q, *e;
170 	char c;
171 	int wordstart;
172 
173 	e = &buf[size];
174 
175 	wordstart = 0;
176 	for (p = buf, q = buf; q < e; q++) {
177 		c = *q;
178 		if (c >= 'A' && c <= 'Z') {
179 			*(p++) = tolower(c);
180 		} else if (c == '\n') {
181 			*(p++) = '\0';
182 			/*
183 			 * make sure we only insert words consisting of
184 			 * MAXWORDLEN-1 bytes or less
185 			 */
186 			if (p-&buf[wordstart] > MAXWORDLEN)
187 				buf[wordstart+MAXWORDLEN-1] = '\0';
188 			if (insert_word(wordstart) != 0)
189 				return (-1);
190 			wordstart = p-buf;
191 		} else if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
192 			*(p++) = c;
193 		}
194 	}
195 	return (0);
196 }
197 
198 /*
199  * int compare(a, b)
200  *
201  * helper-routine used for quicksort. we compate two words in the
202  * buffer, one start starts at index "a", and the other one that starts
203  * at index "b"
204  */
205 int
206 compare(const void *a, const void *b)
207 {
208 	int idx_a = *(uint_t *)a, idx_b = *(uint_t *)b;
209 
210 	return (strcmp(&buf[idx_a], &buf[idx_b]));
211 }
212 
213 /*
214  *
215  * int sort_file(fname)
216  *
217  * We sort the file in memory: we read the dictionary file, translate all
218  * newlines to '\0's, all uppercase ASCII characters to lowercase characters
219  * and removing all characters but '[a-z][0-9]'.
220  * We maintain an array of offsets into the buffer where each word starts
221  * and sort this array using qsort().
222  *
223  * This implements the original cracklib code that did an execl of
224  *    sh -c "/usr/bin/cat <list of files> |
225  *       /usr/bin/tr '[A-Z]' '[a-z]' | /usr/bin/tr -cd '\012[a-z][0-9]' |
226  *       sort -o tmfpfile
227  *
228  * returns 0 on success, -1 on failure.
229  */
230 int
231 sort_file(char *fname)
232 {
233 	int fd;
234 	struct stat statbuf;
235 	ssize_t n;
236 	int ret = -1;
237 
238 	if ((fd = open(fname, O_RDONLY)) == -1) {
239 		syslog(LOG_ERR, MODNAME ": failed to open %s: %s",
240 		    fname, strerror(errno));
241 		return (-1);
242 	}
243 
244 	if (fstat(fd, &statbuf) == -1) {
245 		syslog(LOG_ERR, MODNAME ": fstat() failed (%s)",
246 		    strerror(errno));
247 		(void) close(fd);
248 		return (-1);
249 	}
250 	if ((buf = malloc(statbuf.st_size + 1)) == NULL) {
251 		syslog(LOG_ERR, MODNAME ": out of memory");
252 		goto error;
253 	}
254 
255 	n = read(fd, buf, statbuf.st_size);
256 
257 	if (n == -1) {
258 		if (errno == EINVAL)
259 			syslog(LOG_ERR, MODNAME ": %s is too big. "
260 			    "Split the file into smaller files.", fname);
261 		else
262 			syslog(LOG_ERR, MODNAME ": read failed: %s",
263 			    strerror(errno));
264 		goto error;
265 	}
266 
267 	if (translate(buf, n) == 0) {
268 		qsort((void *)offsets, off_idx, sizeof (int), compare);
269 
270 		if (writeout() == 0)
271 			ret = 0;
272 	}
273 
274 error:
275 	(void) close(fd);
276 
277 	if (buf != NULL)
278 		free(buf);
279 	if (offsets != NULL)
280 		free(offsets);
281 	offsets = NULL;
282 	off_size = 0;
283 	off_idx = 0;
284 	return (ret);
285 }
286 
287 /*
288  * We merge the temporary files created by previous calls to sort_file()
289  * and insert the thus sorted words into the cracklib database
290  */
291 void
292 merge_files(PWDICT *pwp)
293 {
294 	int ti;
295 	char *words[MAXTMP];
296 	char lastword[MAXWORDLEN];
297 	int choice;
298 
299 	lastword[0] = '\0';
300 
301 	for (ti = 0; ti < tmpfp_idx; ti++)
302 		words[ti] = malloc(MAXWORDLEN);
303 	/*
304 	 * we read the first word of each of the temp-files into words[].
305 	 */
306 	for (ti = 0; ti < tmpfp_idx; ti++) {
307 		(void) fseek(tmpfp[ti], 0, SEEK_SET);
308 		(void) fgets(words[ti], MAXWORDLEN, tmpfp[ti]);
309 		words[ti][MAXWORDLEN-1] = '\0';
310 	}
311 
312 	/*
313 	 * next, we emit the word that comes first (lexicographically),
314 	 * and replace that word with a new word from the file it
315 	 * came from. If the file is exhausted, we close the fp and
316 	 * swap the fp with the last fp in tmpfp[].
317 	 * we then decrease tmpfp_idx and continue with what's left until
318 	 * we run out of open FILE pointers.
319 	 */
320 	while (tmpfp_idx != 0) {
321 		choice = 0;
322 
323 		for (ti = 1; ti < tmpfp_idx; ti++)
324 			if (strcmp(words[choice], words[ti]) > 0)
325 				choice = ti;
326 		/* Insert word in Cracklib database */
327 		(void) Chomp(words[choice]);
328 		if (words[choice][0] != '\0' &&
329 		    strcmp(lastword, words[choice]) != 0) {
330 			(void) PutPW(pwp, words[choice]);
331 			(void) strncpy(lastword, words[choice], MAXWORDLEN);
332 		}
333 
334 		if (fgets(words[choice], MAXWORDLEN, tmpfp[choice]) == NULL) {
335 			(void) fclose(tmpfp[choice]);
336 			tmpfp[choice] = tmpfp[tmpfp_idx - 1];
337 			tmpfp_idx--;
338 		} else
339 			words[choice][MAXWORDLEN-1] = '\0';
340 	}
341 }
342 
343 /*
344  * int packer(list)
345  *
346  * sort all dictionaries in "list", and feed the words into the Crack
347  * Password Database.
348  *
349  * returns 0 on sucess, -1 on failure.
350  */
351 int
352 packer(char *list, char *path)
353 {
354 	PWDICT *pwp;
355 	char *listcopy, *fname;
356 	int ret = 0;
357 
358 	if ((listcopy = strdup(list)) == NULL) {
359 		syslog(LOG_ERR, MODNAME ": out of memory");
360 		return (-1);
361 	}
362 
363 	if (!(pwp = PWOpen(path, "w")))
364 		return (-1);
365 
366 	fname = strtok(listcopy, " \t,");
367 	while (ret == 0 && fname != NULL) {
368 		if ((ret = sort_file(fname)) == 0)
369 			fname = strtok(NULL, " \t,");
370 	}
371 	free(listcopy);
372 
373 	if (ret == 0)
374 		merge_files(pwp);
375 
376 	(void) PWClose(pwp);
377 
378 	return (ret);
379 }
380