xref: /illumos-gate/usr/src/lib/libc/port/threads/thr.c (revision 48bbca816818409505a6e214d0911fda44e622e3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2016 by Delphix. All rights reserved.
25  */
26 /*
27  * Copyright 2016 Joyent, Inc.
28  */
29 
30 #include "lint.h"
31 #include "thr_uberdata.h"
32 #include <pthread.h>
33 #include <procfs.h>
34 #include <sys/uio.h>
35 #include <ctype.h>
36 #include "libc.h"
37 
38 /*
39  * These symbols should not be exported from libc, but
40  * /lib/libm.so.2 references _thr_main.  libm needs to be fixed.
41  * Also, some older versions of the Studio compiler/debugger
42  * components reference them.  These need to be fixed, too.
43  */
44 #pragma weak _thr_main = thr_main
45 #pragma weak _thr_create = thr_create
46 #pragma weak _thr_join = thr_join
47 #pragma weak _thr_self = thr_self
48 
49 #undef errno
50 extern int errno;
51 
52 /*
53  * Between Solaris 2.5 and Solaris 9, __threaded was used to indicate
54  * "we are linked with libthread".  The Sun Workshop 6 update 1 compilation
55  * system used it illegally (it is a consolidation private symbol).
56  * To accommodate this and possibly other abusers of the symbol,
57  * we make it always equal to 1 now that libthread has been folded
58  * into libc.  The new __libc_threaded symbol is used to indicate
59  * the new meaning, "more than one thread exists".
60  */
61 int __threaded = 1;		/* always equal to 1 */
62 int __libc_threaded = 0;	/* zero until first thr_create() */
63 
64 /*
65  * thr_concurrency and pthread_concurrency are not used by the library.
66  * They exist solely to hold and return the values set by calls to
67  * thr_setconcurrency() and pthread_setconcurrency().
68  * Because thr_concurrency is affected by the THR_NEW_LWP flag
69  * to thr_create(), thr_concurrency is protected by link_lock.
70  */
71 static	int	thr_concurrency = 1;
72 static	int	pthread_concurrency;
73 
74 #define	HASHTBLSZ	1024	/* must be a power of two */
75 #define	TIDHASH(tid, udp)	(tid & (udp)->hash_mask)
76 
77 /* initial allocation, just enough for one lwp */
78 #pragma align 64(init_hash_table)
79 thr_hash_table_t init_hash_table[1] = {
80 	{ DEFAULTMUTEX, DEFAULTCV, NULL },
81 };
82 
83 extern const Lc_interface rtld_funcs[];
84 
85 /*
86  * The weak version is known to libc_db and mdb.
87  */
88 #pragma weak _uberdata = __uberdata
89 uberdata_t __uberdata = {
90 	{ DEFAULTMUTEX, NULL, 0 },	/* link_lock */
91 	{ RECURSIVEMUTEX, NULL, 0 },	/* ld_lock */
92 	{ RECURSIVEMUTEX, NULL, 0 },	/* fork_lock */
93 	{ RECURSIVEMUTEX, NULL, 0 },	/* atfork_lock */
94 	{ RECURSIVEMUTEX, NULL, 0 },	/* callout_lock */
95 	{ DEFAULTMUTEX, NULL, 0 },	/* tdb_hash_lock */
96 	{ 0, },				/* tdb_hash_lock_stats */
97 	{ { 0 }, },			/* siguaction[NSIG] */
98 	{{ DEFAULTMUTEX, NULL, 0 },		/* bucket[NBUCKETS] */
99 	{ DEFAULTMUTEX, NULL, 0 },
100 	{ DEFAULTMUTEX, NULL, 0 },
101 	{ DEFAULTMUTEX, NULL, 0 },
102 	{ DEFAULTMUTEX, NULL, 0 },
103 	{ DEFAULTMUTEX, NULL, 0 },
104 	{ DEFAULTMUTEX, NULL, 0 },
105 	{ DEFAULTMUTEX, NULL, 0 },
106 	{ DEFAULTMUTEX, NULL, 0 },
107 	{ DEFAULTMUTEX, NULL, 0 }},
108 	{ RECURSIVEMUTEX, NULL, NULL },		/* atexit_root */
109 	{ RECURSIVEMUTEX, NULL },		/* quickexit_root */
110 	{ DEFAULTMUTEX, 0, 0, NULL },		/* tsd_metadata */
111 	{ DEFAULTMUTEX, {0, 0}, {0, 0} },	/* tls_metadata */
112 	0,			/* primary_map */
113 	0,			/* bucket_init */
114 	0,			/* pad[0] */
115 	0,			/* pad[1] */
116 	{ 0 },			/* uberflags */
117 	NULL,			/* queue_head */
118 	init_hash_table,	/* thr_hash_table */
119 	1,			/* hash_size: size of the hash table */
120 	0,			/* hash_mask: hash_size - 1 */
121 	NULL,			/* ulwp_one */
122 	NULL,			/* all_lwps */
123 	NULL,			/* all_zombies */
124 	0,			/* nthreads */
125 	0,			/* nzombies */
126 	0,			/* ndaemons */
127 	0,			/* pid */
128 	sigacthandler,		/* sigacthandler */
129 	NULL,			/* lwp_stacks */
130 	NULL,			/* lwp_laststack */
131 	0,			/* nfreestack */
132 	10,			/* thread_stack_cache */
133 	NULL,			/* ulwp_freelist */
134 	NULL,			/* ulwp_lastfree */
135 	NULL,			/* ulwp_replace_free */
136 	NULL,			/* ulwp_replace_last */
137 	NULL,			/* atforklist */
138 	NULL,			/* robustlocks */
139 	NULL,			/* robustlist */
140 	NULL,			/* progname */
141 	NULL,			/* __tdb_bootstrap */
142 	{			/* tdb */
143 		NULL,		/* tdb_sync_addr_hash */
144 		0,		/* tdb_register_count */
145 		0,		/* tdb_hash_alloc_failed */
146 		NULL,		/* tdb_sync_addr_free */
147 		NULL,		/* tdb_sync_addr_last */
148 		0,		/* tdb_sync_alloc */
149 		{ 0, 0 },	/* tdb_ev_global_mask */
150 		tdb_events,	/* tdb_events array */
151 	},
152 };
153 
154 /*
155  * The weak version is known to libc_db and mdb.
156  */
157 #pragma weak _tdb_bootstrap = __tdb_bootstrap
158 uberdata_t **__tdb_bootstrap = NULL;
159 
160 int	thread_queue_fifo = 4;
161 int	thread_queue_dump = 0;
162 int	thread_cond_wait_defer = 0;
163 int	thread_error_detection = 0;
164 int	thread_async_safe = 0;
165 int	thread_stack_cache = 10;
166 int	thread_door_noreserve = 0;
167 int	thread_locks_misaligned = 0;
168 
169 static	ulwp_t	*ulwp_alloc(void);
170 static	void	ulwp_free(ulwp_t *);
171 
172 /*
173  * Insert the lwp into the hash table.
174  */
175 void
176 hash_in_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
177 {
178 	ulwp->ul_hash = udp->thr_hash_table[ix].hash_bucket;
179 	udp->thr_hash_table[ix].hash_bucket = ulwp;
180 	ulwp->ul_ix = ix;
181 }
182 
183 void
184 hash_in(ulwp_t *ulwp, uberdata_t *udp)
185 {
186 	int ix = TIDHASH(ulwp->ul_lwpid, udp);
187 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
188 
189 	lmutex_lock(mp);
190 	hash_in_unlocked(ulwp, ix, udp);
191 	lmutex_unlock(mp);
192 }
193 
194 /*
195  * Delete the lwp from the hash table.
196  */
197 void
198 hash_out_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
199 {
200 	ulwp_t **ulwpp;
201 
202 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
203 	    ulwp != *ulwpp;
204 	    ulwpp = &(*ulwpp)->ul_hash)
205 		;
206 	*ulwpp = ulwp->ul_hash;
207 	ulwp->ul_hash = NULL;
208 	ulwp->ul_ix = -1;
209 }
210 
211 void
212 hash_out(ulwp_t *ulwp, uberdata_t *udp)
213 {
214 	int ix;
215 
216 	if ((ix = ulwp->ul_ix) >= 0) {
217 		mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
218 
219 		lmutex_lock(mp);
220 		hash_out_unlocked(ulwp, ix, udp);
221 		lmutex_unlock(mp);
222 	}
223 }
224 
225 /*
226  * Retain stack information for thread structures that are being recycled for
227  * new threads.  All other members of the thread structure should be zeroed.
228  */
229 static void
230 ulwp_clean(ulwp_t *ulwp)
231 {
232 	caddr_t stk = ulwp->ul_stk;
233 	size_t mapsiz = ulwp->ul_mapsiz;
234 	size_t guardsize = ulwp->ul_guardsize;
235 	uintptr_t stktop = ulwp->ul_stktop;
236 	size_t stksiz = ulwp->ul_stksiz;
237 
238 	(void) memset(ulwp, 0, sizeof (*ulwp));
239 
240 	ulwp->ul_stk = stk;
241 	ulwp->ul_mapsiz = mapsiz;
242 	ulwp->ul_guardsize = guardsize;
243 	ulwp->ul_stktop = stktop;
244 	ulwp->ul_stksiz = stksiz;
245 }
246 
247 static int stackprot;
248 
249 /*
250  * Answer the question, "Is the lwp in question really dead?"
251  * We must inquire of the operating system to be really sure
252  * because the lwp may have called lwp_exit() but it has not
253  * yet completed the exit.
254  */
255 static int
256 dead_and_buried(ulwp_t *ulwp)
257 {
258 	if (ulwp->ul_lwpid == (lwpid_t)(-1))
259 		return (1);
260 	if (ulwp->ul_dead && ulwp->ul_detached &&
261 	    _lwp_kill(ulwp->ul_lwpid, 0) == ESRCH) {
262 		ulwp->ul_lwpid = (lwpid_t)(-1);
263 		return (1);
264 	}
265 	return (0);
266 }
267 
268 /*
269  * Attempt to keep the stack cache within the specified cache limit.
270  */
271 static void
272 trim_stack_cache(int cache_limit)
273 {
274 	ulwp_t *self = curthread;
275 	uberdata_t *udp = self->ul_uberdata;
276 	ulwp_t *prev = NULL;
277 	ulwp_t **ulwpp = &udp->lwp_stacks;
278 	ulwp_t *ulwp;
279 
280 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, self));
281 
282 	while (udp->nfreestack > cache_limit && (ulwp = *ulwpp) != NULL) {
283 		if (dead_and_buried(ulwp)) {
284 			*ulwpp = ulwp->ul_next;
285 			if (ulwp == udp->lwp_laststack)
286 				udp->lwp_laststack = prev;
287 			hash_out(ulwp, udp);
288 			udp->nfreestack--;
289 			(void) munmap(ulwp->ul_stk, ulwp->ul_mapsiz);
290 			/*
291 			 * Now put the free ulwp on the ulwp freelist.
292 			 */
293 			ulwp->ul_mapsiz = 0;
294 			ulwp->ul_next = NULL;
295 			if (udp->ulwp_freelist == NULL)
296 				udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
297 			else {
298 				udp->ulwp_lastfree->ul_next = ulwp;
299 				udp->ulwp_lastfree = ulwp;
300 			}
301 		} else {
302 			prev = ulwp;
303 			ulwpp = &ulwp->ul_next;
304 		}
305 	}
306 }
307 
308 /*
309  * Find an unused stack of the requested size
310  * or create a new stack of the requested size.
311  * Return a pointer to the ulwp_t structure referring to the stack, or NULL.
312  * thr_exit() stores 1 in the ul_dead member.
313  * thr_join() stores -1 in the ul_lwpid member.
314  */
315 static ulwp_t *
316 find_stack(size_t stksize, size_t guardsize)
317 {
318 	static size_t pagesize = 0;
319 
320 	uberdata_t *udp = curthread->ul_uberdata;
321 	size_t mapsize;
322 	ulwp_t *prev;
323 	ulwp_t *ulwp;
324 	ulwp_t **ulwpp;
325 	void *stk;
326 
327 	/*
328 	 * The stack is allocated PROT_READ|PROT_WRITE|PROT_EXEC
329 	 * unless overridden by the system's configuration.
330 	 */
331 	if (stackprot == 0) {	/* do this once */
332 		long lprot = _sysconf(_SC_STACK_PROT);
333 		if (lprot <= 0)
334 			lprot = (PROT_READ|PROT_WRITE|PROT_EXEC);
335 		stackprot = (int)lprot;
336 	}
337 	if (pagesize == 0)	/* do this once */
338 		pagesize = _sysconf(_SC_PAGESIZE);
339 
340 	/*
341 	 * One megabyte stacks by default, but subtract off
342 	 * two pages for the system-created red zones.
343 	 * Round up a non-zero stack size to a pagesize multiple.
344 	 */
345 	if (stksize == 0)
346 		stksize = DEFAULTSTACK - 2 * pagesize;
347 	else
348 		stksize = ((stksize + pagesize - 1) & -pagesize);
349 
350 	/*
351 	 * Round up the mapping size to a multiple of pagesize.
352 	 * Note: mmap() provides at least one page of red zone
353 	 * so we deduct that from the value of guardsize.
354 	 */
355 	if (guardsize != 0)
356 		guardsize = ((guardsize + pagesize - 1) & -pagesize) - pagesize;
357 	mapsize = stksize + guardsize;
358 
359 	lmutex_lock(&udp->link_lock);
360 	for (prev = NULL, ulwpp = &udp->lwp_stacks;
361 	    (ulwp = *ulwpp) != NULL;
362 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
363 		if (ulwp->ul_mapsiz == mapsize &&
364 		    ulwp->ul_guardsize == guardsize &&
365 		    dead_and_buried(ulwp)) {
366 			/*
367 			 * The previous lwp is gone; reuse the stack.
368 			 * Remove the ulwp from the stack list.
369 			 */
370 			*ulwpp = ulwp->ul_next;
371 			ulwp->ul_next = NULL;
372 			if (ulwp == udp->lwp_laststack)
373 				udp->lwp_laststack = prev;
374 			hash_out(ulwp, udp);
375 			udp->nfreestack--;
376 			lmutex_unlock(&udp->link_lock);
377 			ulwp_clean(ulwp);
378 			return (ulwp);
379 		}
380 	}
381 
382 	/*
383 	 * None of the cached stacks matched our mapping size.
384 	 * Reduce the stack cache to get rid of possibly
385 	 * very old stacks that will never be reused.
386 	 */
387 	if (udp->nfreestack > udp->thread_stack_cache)
388 		trim_stack_cache(udp->thread_stack_cache);
389 	else if (udp->nfreestack > 0)
390 		trim_stack_cache(udp->nfreestack - 1);
391 	lmutex_unlock(&udp->link_lock);
392 
393 	/*
394 	 * Create a new stack.
395 	 */
396 	if ((stk = mmap(NULL, mapsize, stackprot,
397 	    MAP_PRIVATE|MAP_NORESERVE|MAP_ANON, -1, (off_t)0)) != MAP_FAILED) {
398 		/*
399 		 * We have allocated our stack.  Now allocate the ulwp.
400 		 */
401 		ulwp = ulwp_alloc();
402 		if (ulwp == NULL)
403 			(void) munmap(stk, mapsize);
404 		else {
405 			ulwp->ul_stk = stk;
406 			ulwp->ul_mapsiz = mapsize;
407 			ulwp->ul_guardsize = guardsize;
408 			ulwp->ul_stktop = (uintptr_t)stk + mapsize;
409 			ulwp->ul_stksiz = stksize;
410 			if (guardsize)	/* protect the extra red zone */
411 				(void) mprotect(stk, guardsize, PROT_NONE);
412 		}
413 	}
414 	return (ulwp);
415 }
416 
417 /*
418  * Get a ulwp_t structure from the free list or allocate a new one.
419  * Such ulwp_t's do not have a stack allocated by the library.
420  */
421 static ulwp_t *
422 ulwp_alloc(void)
423 {
424 	ulwp_t *self = curthread;
425 	uberdata_t *udp = self->ul_uberdata;
426 	size_t tls_size;
427 	ulwp_t *prev;
428 	ulwp_t *ulwp;
429 	ulwp_t **ulwpp;
430 	caddr_t data;
431 
432 	lmutex_lock(&udp->link_lock);
433 	for (prev = NULL, ulwpp = &udp->ulwp_freelist;
434 	    (ulwp = *ulwpp) != NULL;
435 	    prev = ulwp, ulwpp = &ulwp->ul_next) {
436 		if (dead_and_buried(ulwp)) {
437 			*ulwpp = ulwp->ul_next;
438 			ulwp->ul_next = NULL;
439 			if (ulwp == udp->ulwp_lastfree)
440 				udp->ulwp_lastfree = prev;
441 			hash_out(ulwp, udp);
442 			lmutex_unlock(&udp->link_lock);
443 			ulwp_clean(ulwp);
444 			return (ulwp);
445 		}
446 	}
447 	lmutex_unlock(&udp->link_lock);
448 
449 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
450 	data = lmalloc(sizeof (*ulwp) + tls_size);
451 	if (data != NULL) {
452 		/* LINTED pointer cast may result in improper alignment */
453 		ulwp = (ulwp_t *)(data + tls_size);
454 	}
455 	return (ulwp);
456 }
457 
458 /*
459  * Free a ulwp structure.
460  * If there is an associated stack, put it on the stack list and
461  * munmap() previously freed stacks up to the residual cache limit.
462  * Else put it on the ulwp free list and never call lfree() on it.
463  */
464 static void
465 ulwp_free(ulwp_t *ulwp)
466 {
467 	uberdata_t *udp = curthread->ul_uberdata;
468 
469 	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, curthread));
470 	ulwp->ul_next = NULL;
471 	if (ulwp == udp->ulwp_one)	/* don't reuse the primoridal stack */
472 		/*EMPTY*/;
473 	else if (ulwp->ul_mapsiz != 0) {
474 		if (udp->lwp_stacks == NULL)
475 			udp->lwp_stacks = udp->lwp_laststack = ulwp;
476 		else {
477 			udp->lwp_laststack->ul_next = ulwp;
478 			udp->lwp_laststack = ulwp;
479 		}
480 		if (++udp->nfreestack > udp->thread_stack_cache)
481 			trim_stack_cache(udp->thread_stack_cache);
482 	} else {
483 		if (udp->ulwp_freelist == NULL)
484 			udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
485 		else {
486 			udp->ulwp_lastfree->ul_next = ulwp;
487 			udp->ulwp_lastfree = ulwp;
488 		}
489 	}
490 }
491 
492 /*
493  * Find a named lwp and return a pointer to its hash list location.
494  * On success, returns with the hash lock held.
495  */
496 ulwp_t **
497 find_lwpp(thread_t tid)
498 {
499 	uberdata_t *udp = curthread->ul_uberdata;
500 	int ix = TIDHASH(tid, udp);
501 	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
502 	ulwp_t *ulwp;
503 	ulwp_t **ulwpp;
504 
505 	if (tid == 0)
506 		return (NULL);
507 
508 	lmutex_lock(mp);
509 	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
510 	    (ulwp = *ulwpp) != NULL;
511 	    ulwpp = &ulwp->ul_hash) {
512 		if (ulwp->ul_lwpid == tid)
513 			return (ulwpp);
514 	}
515 	lmutex_unlock(mp);
516 	return (NULL);
517 }
518 
519 /*
520  * Wake up all lwps waiting on this lwp for some reason.
521  */
522 void
523 ulwp_broadcast(ulwp_t *ulwp)
524 {
525 	ulwp_t *self = curthread;
526 	uberdata_t *udp = self->ul_uberdata;
527 
528 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
529 	(void) cond_broadcast(ulwp_condvar(ulwp, udp));
530 }
531 
532 /*
533  * Find a named lwp and return a pointer to it.
534  * Returns with the hash lock held.
535  */
536 ulwp_t *
537 find_lwp(thread_t tid)
538 {
539 	ulwp_t *self = curthread;
540 	uberdata_t *udp = self->ul_uberdata;
541 	ulwp_t *ulwp = NULL;
542 	ulwp_t **ulwpp;
543 
544 	if (self->ul_lwpid == tid) {
545 		ulwp = self;
546 		ulwp_lock(ulwp, udp);
547 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
548 		ulwp = *ulwpp;
549 	}
550 
551 	if (ulwp && ulwp->ul_dead) {
552 		ulwp_unlock(ulwp, udp);
553 		ulwp = NULL;
554 	}
555 
556 	return (ulwp);
557 }
558 
559 int
560 _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
561     long flags, thread_t *new_thread, size_t guardsize)
562 {
563 	ulwp_t *self = curthread;
564 	uberdata_t *udp = self->ul_uberdata;
565 	ucontext_t uc;
566 	uint_t lwp_flags;
567 	thread_t tid;
568 	int error;
569 	ulwp_t *ulwp;
570 
571 	/*
572 	 * Enforce the restriction of not creating any threads
573 	 * until the primary link map has been initialized.
574 	 * Also, disallow thread creation to a child of vfork().
575 	 */
576 	if (!self->ul_primarymap || self->ul_vfork)
577 		return (ENOTSUP);
578 
579 	if (udp->hash_size == 1)
580 		finish_init();
581 
582 	if ((stk || stksize) && stksize < MINSTACK)
583 		return (EINVAL);
584 
585 	if (stk == NULL) {
586 		if ((ulwp = find_stack(stksize, guardsize)) == NULL)
587 			return (ENOMEM);
588 		stksize = ulwp->ul_mapsiz - ulwp->ul_guardsize;
589 	} else {
590 		/* initialize the private stack */
591 		if ((ulwp = ulwp_alloc()) == NULL)
592 			return (ENOMEM);
593 		ulwp->ul_stk = stk;
594 		ulwp->ul_stktop = (uintptr_t)stk + stksize;
595 		ulwp->ul_stksiz = stksize;
596 	}
597 	/* ulwp is not in the hash table; make sure hash_out() doesn't fail */
598 	ulwp->ul_ix = -1;
599 	ulwp->ul_errnop = &ulwp->ul_errno;
600 
601 	lwp_flags = LWP_SUSPENDED;
602 	if (flags & (THR_DETACHED|THR_DAEMON)) {
603 		flags |= THR_DETACHED;
604 		lwp_flags |= LWP_DETACHED;
605 	}
606 	if (flags & THR_DAEMON)
607 		lwp_flags |= LWP_DAEMON;
608 
609 	/* creating a thread: enforce mt-correctness in mutex_lock() */
610 	self->ul_async_safe = 1;
611 
612 	/* per-thread copies of global variables, for speed */
613 	ulwp->ul_queue_fifo = self->ul_queue_fifo;
614 	ulwp->ul_cond_wait_defer = self->ul_cond_wait_defer;
615 	ulwp->ul_error_detection = self->ul_error_detection;
616 	ulwp->ul_async_safe = self->ul_async_safe;
617 	ulwp->ul_max_spinners = self->ul_max_spinners;
618 	ulwp->ul_adaptive_spin = self->ul_adaptive_spin;
619 	ulwp->ul_queue_spin = self->ul_queue_spin;
620 	ulwp->ul_door_noreserve = self->ul_door_noreserve;
621 	ulwp->ul_misaligned = self->ul_misaligned;
622 
623 	/* new thread inherits creating thread's scheduling parameters */
624 	ulwp->ul_policy = self->ul_policy;
625 	ulwp->ul_pri = (self->ul_epri? self->ul_epri : self->ul_pri);
626 	ulwp->ul_cid = self->ul_cid;
627 	ulwp->ul_rtclassid = self->ul_rtclassid;
628 
629 	ulwp->ul_primarymap = self->ul_primarymap;
630 	ulwp->ul_self = ulwp;
631 	ulwp->ul_uberdata = udp;
632 
633 	/* debugger support */
634 	ulwp->ul_usropts = flags;
635 
636 #ifdef __sparc
637 	/*
638 	 * We cache several instructions in the thread structure for use
639 	 * by the fasttrap DTrace provider. When changing this, read the
640 	 * comment in fasttrap.h for the all the other places that must
641 	 * be changed.
642 	 */
643 	ulwp->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
644 	ulwp->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
645 	ulwp->ul_dftret = 0x91d0203a;	/* ta 0x3a */
646 	ulwp->ul_dreturn = 0x81ca0000;	/* return %o0 */
647 #endif
648 
649 	ulwp->ul_startpc = func;
650 	ulwp->ul_startarg = arg;
651 	_fpinherit(ulwp);
652 	/*
653 	 * Defer signals on the new thread until its TLS constructors
654 	 * have been called.  _thrp_setup() will call sigon() after
655 	 * it has called tls_setup().
656 	 */
657 	ulwp->ul_sigdefer = 1;
658 
659 	error = setup_context(&uc, _thrp_setup, ulwp,
660 	    (caddr_t)ulwp->ul_stk + ulwp->ul_guardsize, stksize);
661 	if (error != 0 && stk != NULL)	/* inaccessible stack */
662 		error = EFAULT;
663 
664 	/*
665 	 * Call enter_critical() to avoid being suspended until we
666 	 * have linked the new thread into the proper lists.
667 	 * This is necessary because forkall() and fork1() must
668 	 * suspend all threads and they must see a complete list.
669 	 */
670 	enter_critical(self);
671 	uc.uc_sigmask = ulwp->ul_sigmask = self->ul_sigmask;
672 	if (error != 0 ||
673 	    (error = __lwp_create(&uc, lwp_flags, &tid)) != 0) {
674 		exit_critical(self);
675 		ulwp->ul_lwpid = (lwpid_t)(-1);
676 		ulwp->ul_dead = 1;
677 		ulwp->ul_detached = 1;
678 		lmutex_lock(&udp->link_lock);
679 		ulwp_free(ulwp);
680 		lmutex_unlock(&udp->link_lock);
681 		return (error);
682 	}
683 	self->ul_nocancel = 0;	/* cancellation is now possible */
684 	udp->uberflags.uf_mt = 1;
685 	if (new_thread)
686 		*new_thread = tid;
687 	if (flags & THR_DETACHED)
688 		ulwp->ul_detached = 1;
689 	ulwp->ul_lwpid = tid;
690 	ulwp->ul_stop = TSTP_REGULAR;
691 	if (flags & THR_SUSPENDED)
692 		ulwp->ul_created = 1;
693 
694 	lmutex_lock(&udp->link_lock);
695 	ulwp->ul_forw = udp->all_lwps;
696 	ulwp->ul_back = udp->all_lwps->ul_back;
697 	ulwp->ul_back->ul_forw = ulwp;
698 	ulwp->ul_forw->ul_back = ulwp;
699 	hash_in(ulwp, udp);
700 	udp->nthreads++;
701 	if (flags & THR_DAEMON)
702 		udp->ndaemons++;
703 	if (flags & THR_NEW_LWP)
704 		thr_concurrency++;
705 	__libc_threaded = 1;		/* inform stdio */
706 	lmutex_unlock(&udp->link_lock);
707 
708 	if (__td_event_report(self, TD_CREATE, udp)) {
709 		self->ul_td_evbuf.eventnum = TD_CREATE;
710 		self->ul_td_evbuf.eventdata = (void *)(uintptr_t)tid;
711 		tdb_event(TD_CREATE, udp);
712 	}
713 
714 	exit_critical(self);
715 
716 	if (!(flags & THR_SUSPENDED))
717 		(void) _thrp_continue(tid, TSTP_REGULAR);
718 
719 	return (0);
720 }
721 
722 int
723 thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
724     long flags, thread_t *new_thread)
725 {
726 	return (_thrp_create(stk, stksize, func, arg, flags, new_thread, 0));
727 }
728 
729 /*
730  * A special cancellation cleanup hook for DCE.
731  * cleanuphndlr, when it is not NULL, will contain a callback
732  * function to be called before a thread is terminated in
733  * thr_exit() as a result of being cancelled.
734  */
735 static void (*cleanuphndlr)(void) = NULL;
736 
737 /*
738  * _pthread_setcleanupinit: sets the cleanup hook.
739  */
740 int
741 _pthread_setcleanupinit(void (*func)(void))
742 {
743 	cleanuphndlr = func;
744 	return (0);
745 }
746 
747 void
748 _thrp_exit()
749 {
750 	ulwp_t *self = curthread;
751 	uberdata_t *udp = self->ul_uberdata;
752 	ulwp_t *replace = NULL;
753 
754 	if (__td_event_report(self, TD_DEATH, udp)) {
755 		self->ul_td_evbuf.eventnum = TD_DEATH;
756 		tdb_event(TD_DEATH, udp);
757 	}
758 
759 	ASSERT(self->ul_sigdefer != 0);
760 
761 	lmutex_lock(&udp->link_lock);
762 	udp->nthreads--;
763 	if (self->ul_usropts & THR_NEW_LWP)
764 		thr_concurrency--;
765 	if (self->ul_usropts & THR_DAEMON)
766 		udp->ndaemons--;
767 	else if (udp->nthreads == udp->ndaemons) {
768 		/*
769 		 * We are the last non-daemon thread exiting.
770 		 * Exit the process.  We retain our TSD and TLS so
771 		 * that atexit() application functions can use them.
772 		 */
773 		lmutex_unlock(&udp->link_lock);
774 		exit(0);
775 		thr_panic("_thrp_exit(): exit(0) returned");
776 	}
777 	lmutex_unlock(&udp->link_lock);
778 
779 	tmem_exit();		/* deallocate tmem allocations */
780 	tsd_exit();		/* deallocate thread-specific data */
781 	tls_exit();		/* deallocate thread-local storage */
782 	heldlock_exit();	/* deal with left-over held locks */
783 
784 	/* block all signals to finish exiting */
785 	block_all_signals(self);
786 	/* also prevent ourself from being suspended */
787 	enter_critical(self);
788 	rwl_free(self);
789 	lmutex_lock(&udp->link_lock);
790 	ulwp_free(self);
791 	(void) ulwp_lock(self, udp);
792 
793 	if (self->ul_mapsiz && !self->ul_detached) {
794 		/*
795 		 * We want to free the stack for reuse but must keep
796 		 * the ulwp_t struct for the benefit of thr_join().
797 		 * For this purpose we allocate a replacement ulwp_t.
798 		 */
799 		if ((replace = udp->ulwp_replace_free) == NULL)
800 			replace = lmalloc(REPLACEMENT_SIZE);
801 		else if ((udp->ulwp_replace_free = replace->ul_next) == NULL)
802 			udp->ulwp_replace_last = NULL;
803 	}
804 
805 	if (udp->all_lwps == self)
806 		udp->all_lwps = self->ul_forw;
807 	if (udp->all_lwps == self)
808 		udp->all_lwps = NULL;
809 	else {
810 		self->ul_forw->ul_back = self->ul_back;
811 		self->ul_back->ul_forw = self->ul_forw;
812 	}
813 	self->ul_forw = self->ul_back = NULL;
814 #if defined(THREAD_DEBUG)
815 	/* collect queue lock statistics before marking ourself dead */
816 	record_spin_locks(self);
817 #endif
818 	self->ul_dead = 1;
819 	self->ul_pleasestop = 0;
820 	if (replace != NULL) {
821 		int ix = self->ul_ix;		/* the hash index */
822 		(void) memcpy(replace, self, REPLACEMENT_SIZE);
823 		replace->ul_self = replace;
824 		replace->ul_next = NULL;	/* clone not on stack list */
825 		replace->ul_mapsiz = 0;		/* allows clone to be freed */
826 		replace->ul_replace = 1;	/* requires clone to be freed */
827 		hash_out_unlocked(self, ix, udp);
828 		hash_in_unlocked(replace, ix, udp);
829 		ASSERT(!(self->ul_detached));
830 		self->ul_detached = 1;		/* this frees the stack */
831 		self->ul_schedctl = NULL;
832 		self->ul_schedctl_called = &udp->uberflags;
833 		set_curthread(self = replace);
834 		/*
835 		 * Having just changed the address of curthread, we
836 		 * must reset the ownership of the locks we hold so
837 		 * that assertions will not fire when we release them.
838 		 */
839 		udp->link_lock.mutex_owner = (uintptr_t)self;
840 		ulwp_mutex(self, udp)->mutex_owner = (uintptr_t)self;
841 		/*
842 		 * NOTE:
843 		 * On i386, %gs still references the original, not the
844 		 * replacement, ulwp structure.  Fetching the replacement
845 		 * curthread pointer via %gs:0 works correctly since the
846 		 * original ulwp structure will not be reallocated until
847 		 * this lwp has completed its lwp_exit() system call (see
848 		 * dead_and_buried()), but from here on out, we must make
849 		 * no references to %gs:<offset> other than %gs:0.
850 		 */
851 	}
852 	/*
853 	 * Put non-detached terminated threads in the all_zombies list.
854 	 */
855 	if (!self->ul_detached) {
856 		udp->nzombies++;
857 		if (udp->all_zombies == NULL) {
858 			ASSERT(udp->nzombies == 1);
859 			udp->all_zombies = self->ul_forw = self->ul_back = self;
860 		} else {
861 			self->ul_forw = udp->all_zombies;
862 			self->ul_back = udp->all_zombies->ul_back;
863 			self->ul_back->ul_forw = self;
864 			self->ul_forw->ul_back = self;
865 		}
866 	}
867 	/*
868 	 * Notify everyone waiting for this thread.
869 	 */
870 	ulwp_broadcast(self);
871 	(void) ulwp_unlock(self, udp);
872 	/*
873 	 * Prevent any more references to the schedctl data.
874 	 * We are exiting and continue_fork() may not find us.
875 	 * Do this just before dropping link_lock, since fork
876 	 * serializes on link_lock.
877 	 */
878 	self->ul_schedctl = NULL;
879 	self->ul_schedctl_called = &udp->uberflags;
880 	lmutex_unlock(&udp->link_lock);
881 
882 	ASSERT(self->ul_critical == 1);
883 	ASSERT(self->ul_preempt == 0);
884 	_lwp_terminate();	/* never returns */
885 	thr_panic("_thrp_exit(): _lwp_terminate() returned");
886 }
887 
888 #if defined(THREAD_DEBUG)
889 void
890 collect_queue_statistics()
891 {
892 	uberdata_t *udp = curthread->ul_uberdata;
893 	ulwp_t *ulwp;
894 
895 	if (thread_queue_dump) {
896 		lmutex_lock(&udp->link_lock);
897 		if ((ulwp = udp->all_lwps) != NULL) {
898 			do {
899 				record_spin_locks(ulwp);
900 			} while ((ulwp = ulwp->ul_forw) != udp->all_lwps);
901 		}
902 		lmutex_unlock(&udp->link_lock);
903 	}
904 }
905 #endif
906 
907 static void __NORETURN
908 _thrp_exit_common(void *status, int unwind)
909 {
910 	ulwp_t *self = curthread;
911 	int cancelled = (self->ul_cancel_pending && status == PTHREAD_CANCELED);
912 
913 	ASSERT(self->ul_critical == 0 && self->ul_preempt == 0);
914 
915 	/*
916 	 * Disable cancellation and call the special DCE cancellation
917 	 * cleanup hook if it is enabled.  Do nothing else before calling
918 	 * the DCE cancellation cleanup hook; it may call longjmp() and
919 	 * never return here.
920 	 */
921 	self->ul_cancel_disabled = 1;
922 	self->ul_cancel_async = 0;
923 	self->ul_save_async = 0;
924 	self->ul_cancelable = 0;
925 	self->ul_cancel_pending = 0;
926 	set_cancel_pending_flag(self, 1);
927 	if (cancelled && cleanuphndlr != NULL)
928 		(*cleanuphndlr)();
929 
930 	/*
931 	 * Block application signals while we are exiting.
932 	 * We call out to C++, TSD, and TLS destructors while exiting
933 	 * and these are application-defined, so we cannot be assured
934 	 * that they won't reset the signal mask.  We use sigoff() to
935 	 * defer any signals that may be received as a result of this
936 	 * bad behavior.  Such signals will be lost to the process
937 	 * when the thread finishes exiting.
938 	 */
939 	(void) thr_sigsetmask(SIG_SETMASK, &maskset, NULL);
940 	sigoff(self);
941 
942 	self->ul_rval = status;
943 
944 	/*
945 	 * If thr_exit is being called from the places where
946 	 * C++ destructors are to be called such as cancellation
947 	 * points, then set this flag. It is checked in _t_cancel()
948 	 * to decide whether _ex_unwind() is to be called or not.
949 	 */
950 	if (unwind)
951 		self->ul_unwind = 1;
952 
953 	/*
954 	 * _thrp_unwind() will eventually call _thrp_exit().
955 	 * It never returns.
956 	 */
957 	_thrp_unwind(NULL);
958 	thr_panic("_thrp_exit_common(): _thrp_unwind() returned");
959 
960 	for (;;)	/* to shut the compiler up about __NORETURN */
961 		continue;
962 }
963 
964 /*
965  * Called when a thread returns from its start function.
966  * We are at the top of the stack; no unwinding is necessary.
967  */
968 void
969 _thrp_terminate(void *status)
970 {
971 	_thrp_exit_common(status, 0);
972 }
973 
974 #pragma weak pthread_exit = thr_exit
975 #pragma weak _thr_exit = thr_exit
976 void
977 thr_exit(void *status)
978 {
979 	_thrp_exit_common(status, 1);
980 }
981 
982 int
983 _thrp_join(thread_t tid, thread_t *departed, void **status, int do_cancel)
984 {
985 	uberdata_t *udp = curthread->ul_uberdata;
986 	mutex_t *mp;
987 	void *rval;
988 	thread_t found;
989 	ulwp_t *ulwp;
990 	ulwp_t **ulwpp;
991 	int replace;
992 	int error;
993 
994 	if (do_cancel)
995 		error = lwp_wait(tid, &found);
996 	else {
997 		while ((error = __lwp_wait(tid, &found)) == EINTR)
998 			;
999 	}
1000 	if (error)
1001 		return (error);
1002 
1003 	/*
1004 	 * We must hold link_lock to avoid a race condition with find_stack().
1005 	 */
1006 	lmutex_lock(&udp->link_lock);
1007 	if ((ulwpp = find_lwpp(found)) == NULL) {
1008 		/*
1009 		 * lwp_wait() found an lwp that the library doesn't know
1010 		 * about.  It must have been created with _lwp_create().
1011 		 * Just return its lwpid; we can't know its status.
1012 		 */
1013 		lmutex_unlock(&udp->link_lock);
1014 		rval = NULL;
1015 	} else {
1016 		/*
1017 		 * Remove ulwp from the hash table.
1018 		 */
1019 		ulwp = *ulwpp;
1020 		*ulwpp = ulwp->ul_hash;
1021 		ulwp->ul_hash = NULL;
1022 		/*
1023 		 * Remove ulwp from all_zombies list.
1024 		 */
1025 		ASSERT(udp->nzombies >= 1);
1026 		if (udp->all_zombies == ulwp)
1027 			udp->all_zombies = ulwp->ul_forw;
1028 		if (udp->all_zombies == ulwp)
1029 			udp->all_zombies = NULL;
1030 		else {
1031 			ulwp->ul_forw->ul_back = ulwp->ul_back;
1032 			ulwp->ul_back->ul_forw = ulwp->ul_forw;
1033 		}
1034 		ulwp->ul_forw = ulwp->ul_back = NULL;
1035 		udp->nzombies--;
1036 		ASSERT(ulwp->ul_dead && !ulwp->ul_detached &&
1037 		    !(ulwp->ul_usropts & (THR_DETACHED|THR_DAEMON)));
1038 		/*
1039 		 * We can't call ulwp_unlock(ulwp) after we set
1040 		 * ulwp->ul_ix = -1 so we have to get a pointer to the
1041 		 * ulwp's hash table mutex now in order to unlock it below.
1042 		 */
1043 		mp = ulwp_mutex(ulwp, udp);
1044 		ulwp->ul_lwpid = (lwpid_t)(-1);
1045 		ulwp->ul_ix = -1;
1046 		rval = ulwp->ul_rval;
1047 		replace = ulwp->ul_replace;
1048 		lmutex_unlock(mp);
1049 		if (replace) {
1050 			ulwp->ul_next = NULL;
1051 			if (udp->ulwp_replace_free == NULL)
1052 				udp->ulwp_replace_free =
1053 				    udp->ulwp_replace_last = ulwp;
1054 			else {
1055 				udp->ulwp_replace_last->ul_next = ulwp;
1056 				udp->ulwp_replace_last = ulwp;
1057 			}
1058 		}
1059 		lmutex_unlock(&udp->link_lock);
1060 	}
1061 
1062 	if (departed != NULL)
1063 		*departed = found;
1064 	if (status != NULL)
1065 		*status = rval;
1066 	return (0);
1067 }
1068 
1069 int
1070 thr_join(thread_t tid, thread_t *departed, void **status)
1071 {
1072 	int error = _thrp_join(tid, departed, status, 1);
1073 	return ((error == EINVAL)? ESRCH : error);
1074 }
1075 
1076 /*
1077  * pthread_join() differs from Solaris thr_join():
1078  * It does not return the departed thread's id
1079  * and hence does not have a "departed" argument.
1080  * It returns EINVAL if tid refers to a detached thread.
1081  */
1082 #pragma weak _pthread_join = pthread_join
1083 int
1084 pthread_join(pthread_t tid, void **status)
1085 {
1086 	return ((tid == 0)? ESRCH : _thrp_join(tid, NULL, status, 1));
1087 }
1088 
1089 int
1090 pthread_detach(pthread_t tid)
1091 {
1092 	uberdata_t *udp = curthread->ul_uberdata;
1093 	ulwp_t *ulwp;
1094 	ulwp_t **ulwpp;
1095 	int error = 0;
1096 
1097 	if ((ulwpp = find_lwpp(tid)) == NULL)
1098 		return (ESRCH);
1099 	ulwp = *ulwpp;
1100 
1101 	if (ulwp->ul_dead) {
1102 		ulwp_unlock(ulwp, udp);
1103 		error = _thrp_join(tid, NULL, NULL, 0);
1104 	} else {
1105 		error = __lwp_detach(tid);
1106 		ulwp->ul_detached = 1;
1107 		ulwp->ul_usropts |= THR_DETACHED;
1108 		ulwp_unlock(ulwp, udp);
1109 	}
1110 	return (error);
1111 }
1112 
1113 static const char *
1114 ematch(const char *ev, const char *match)
1115 {
1116 	int c;
1117 
1118 	while ((c = *match++) != '\0') {
1119 		if (*ev++ != c)
1120 			return (NULL);
1121 	}
1122 	if (*ev++ != '=')
1123 		return (NULL);
1124 	return (ev);
1125 }
1126 
1127 static int
1128 envvar(const char *ev, const char *match, int limit)
1129 {
1130 	int val = -1;
1131 	const char *ename;
1132 
1133 	if ((ename = ematch(ev, match)) != NULL) {
1134 		int c;
1135 		for (val = 0; (c = *ename) != '\0'; ename++) {
1136 			if (!isdigit(c)) {
1137 				val = -1;
1138 				break;
1139 			}
1140 			val = val * 10 + (c - '0');
1141 			if (val > limit) {
1142 				val = limit;
1143 				break;
1144 			}
1145 		}
1146 	}
1147 	return (val);
1148 }
1149 
1150 static void
1151 etest(const char *ev)
1152 {
1153 	int value;
1154 
1155 	if ((value = envvar(ev, "QUEUE_SPIN", 1000000)) >= 0)
1156 		thread_queue_spin = value;
1157 	if ((value = envvar(ev, "ADAPTIVE_SPIN", 1000000)) >= 0)
1158 		thread_adaptive_spin = value;
1159 	if ((value = envvar(ev, "MAX_SPINNERS", 255)) >= 0)
1160 		thread_max_spinners = value;
1161 	if ((value = envvar(ev, "QUEUE_FIFO", 8)) >= 0)
1162 		thread_queue_fifo = value;
1163 #if defined(THREAD_DEBUG)
1164 	if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0)
1165 		thread_queue_verify = value;
1166 	if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0)
1167 		thread_queue_dump = value;
1168 #endif
1169 	if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0)
1170 		thread_stack_cache = value;
1171 	if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0)
1172 		thread_cond_wait_defer = value;
1173 	if ((value = envvar(ev, "ERROR_DETECTION", 2)) >= 0)
1174 		thread_error_detection = value;
1175 	if ((value = envvar(ev, "ASYNC_SAFE", 1)) >= 0)
1176 		thread_async_safe = value;
1177 	if ((value = envvar(ev, "DOOR_NORESERVE", 1)) >= 0)
1178 		thread_door_noreserve = value;
1179 	if ((value = envvar(ev, "LOCKS_MISALIGNED", 1)) >= 0)
1180 		thread_locks_misaligned = value;
1181 }
1182 
1183 /*
1184  * Look for and evaluate environment variables of the form "_THREAD_*".
1185  * For compatibility with the past, we also look for environment
1186  * names of the form "LIBTHREAD_*".
1187  */
1188 static void
1189 set_thread_vars()
1190 {
1191 	extern const char **_environ;
1192 	const char **pev;
1193 	const char *ev;
1194 	char c;
1195 
1196 	if ((pev = _environ) == NULL)
1197 		return;
1198 	while ((ev = *pev++) != NULL) {
1199 		c = *ev;
1200 		if (c == '_' && strncmp(ev, "_THREAD_", 8) == 0)
1201 			etest(ev + 8);
1202 		if (c == 'L' && strncmp(ev, "LIBTHREAD_", 10) == 0)
1203 			etest(ev + 10);
1204 	}
1205 }
1206 
1207 /* PROBE_SUPPORT begin */
1208 #pragma weak __tnf_probe_notify
1209 extern void __tnf_probe_notify(void);
1210 /* PROBE_SUPPORT end */
1211 
1212 /* same as atexit() but private to the library */
1213 extern int _atexit(void (*)(void));
1214 
1215 /* same as _cleanup() but private to the library */
1216 extern void __cleanup(void);
1217 
1218 extern void atfork_init(void);
1219 
1220 #ifdef __amd64
1221 extern void __proc64id(void);
1222 #endif
1223 
1224 /*
1225  * libc_init() is called by ld.so.1 for library initialization.
1226  * We perform minimal initialization; enough to work with the main thread.
1227  */
1228 void
1229 libc_init(void)
1230 {
1231 	uberdata_t *udp = &__uberdata;
1232 	ulwp_t *oldself = __curthread();
1233 	ucontext_t uc;
1234 	ulwp_t *self;
1235 	struct rlimit rl;
1236 	caddr_t data;
1237 	size_t tls_size;
1238 	int setmask;
1239 
1240 	/*
1241 	 * For the initial stage of initialization, we must be careful
1242 	 * not to call any function that could possibly call _cerror().
1243 	 * For this purpose, we call only the raw system call wrappers.
1244 	 */
1245 
1246 #ifdef __amd64
1247 	/*
1248 	 * Gather information about cache layouts for optimized
1249 	 * AMD and Intel assembler strfoo() and memfoo() functions.
1250 	 */
1251 	__proc64id();
1252 #endif
1253 
1254 	/*
1255 	 * Every libc, regardless of which link map, must register __cleanup().
1256 	 */
1257 	(void) _atexit(__cleanup);
1258 
1259 	/*
1260 	 * We keep our uberdata on one of (a) the first alternate link map
1261 	 * or (b) the primary link map.  We switch to the primary link map
1262 	 * and stay there once we see it.  All intermediate link maps are
1263 	 * subject to being unloaded at any time.
1264 	 */
1265 	if (oldself != NULL && (oldself->ul_primarymap || !primary_link_map)) {
1266 		__tdb_bootstrap = oldself->ul_uberdata->tdb_bootstrap;
1267 		mutex_setup();
1268 		atfork_init();	/* every link map needs atfork() processing */
1269 		init_progname();
1270 		return;
1271 	}
1272 
1273 	/*
1274 	 * To establish the main stack information, we have to get our context.
1275 	 * This is also convenient to use for getting our signal mask.
1276 	 */
1277 	uc.uc_flags = UC_ALL;
1278 	(void) __getcontext(&uc);
1279 	ASSERT(uc.uc_link == NULL);
1280 
1281 	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
1282 	ASSERT(primary_link_map || tls_size == 0);
1283 	data = lmalloc(sizeof (ulwp_t) + tls_size);
1284 	if (data == NULL)
1285 		thr_panic("cannot allocate thread structure for main thread");
1286 	/* LINTED pointer cast may result in improper alignment */
1287 	self = (ulwp_t *)(data + tls_size);
1288 	init_hash_table[0].hash_bucket = self;
1289 
1290 	self->ul_sigmask = uc.uc_sigmask;
1291 	delete_reserved_signals(&self->ul_sigmask);
1292 	/*
1293 	 * Are the old and new sets different?
1294 	 * (This can happen if we are currently blocking SIGCANCEL.)
1295 	 * If so, we must explicitly set our signal mask, below.
1296 	 */
1297 	setmask =
1298 	    ((self->ul_sigmask.__sigbits[0] ^ uc.uc_sigmask.__sigbits[0]) |
1299 	    (self->ul_sigmask.__sigbits[1] ^ uc.uc_sigmask.__sigbits[1]) |
1300 	    (self->ul_sigmask.__sigbits[2] ^ uc.uc_sigmask.__sigbits[2]) |
1301 	    (self->ul_sigmask.__sigbits[3] ^ uc.uc_sigmask.__sigbits[3]));
1302 
1303 #ifdef __sparc
1304 	/*
1305 	 * We cache several instructions in the thread structure for use
1306 	 * by the fasttrap DTrace provider. When changing this, read the
1307 	 * comment in fasttrap.h for the all the other places that must
1308 	 * be changed.
1309 	 */
1310 	self->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
1311 	self->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
1312 	self->ul_dftret = 0x91d0203a;	/* ta 0x3a */
1313 	self->ul_dreturn = 0x81ca0000;	/* return %o0 */
1314 #endif
1315 
1316 	self->ul_stktop = (uintptr_t)uc.uc_stack.ss_sp + uc.uc_stack.ss_size;
1317 	(void) getrlimit(RLIMIT_STACK, &rl);
1318 	self->ul_stksiz = rl.rlim_cur;
1319 	self->ul_stk = (caddr_t)(self->ul_stktop - self->ul_stksiz);
1320 
1321 	self->ul_forw = self->ul_back = self;
1322 	self->ul_hash = NULL;
1323 	self->ul_ix = 0;
1324 	self->ul_lwpid = 1; /* _lwp_self() */
1325 	self->ul_main = 1;
1326 	self->ul_self = self;
1327 	self->ul_policy = -1;		/* initialize only when needed */
1328 	self->ul_pri = 0;
1329 	self->ul_cid = 0;
1330 	self->ul_rtclassid = -1;
1331 	self->ul_uberdata = udp;
1332 	if (oldself != NULL) {
1333 		int i;
1334 
1335 		ASSERT(primary_link_map);
1336 		ASSERT(oldself->ul_main == 1);
1337 		self->ul_stsd = oldself->ul_stsd;
1338 		for (i = 0; i < TSD_NFAST; i++)
1339 			self->ul_ftsd[i] = oldself->ul_ftsd[i];
1340 		self->ul_tls = oldself->ul_tls;
1341 		/*
1342 		 * Retrieve all pointers to uberdata allocated
1343 		 * while running on previous link maps.
1344 		 * We would like to do a structure assignment here, but
1345 		 * gcc turns structure assignments into calls to memcpy(),
1346 		 * a function exported from libc.  We can't call any such
1347 		 * external functions until we establish curthread, below,
1348 		 * so we just call our private version of memcpy().
1349 		 */
1350 		(void) memcpy(udp, oldself->ul_uberdata, sizeof (*udp));
1351 		/*
1352 		 * These items point to global data on the primary link map.
1353 		 */
1354 		udp->thr_hash_table = init_hash_table;
1355 		udp->sigacthandler = sigacthandler;
1356 		udp->tdb.tdb_events = tdb_events;
1357 		ASSERT(udp->nthreads == 1 && !udp->uberflags.uf_mt);
1358 		ASSERT(udp->lwp_stacks == NULL);
1359 		ASSERT(udp->ulwp_freelist == NULL);
1360 		ASSERT(udp->ulwp_replace_free == NULL);
1361 		ASSERT(udp->hash_size == 1);
1362 	}
1363 	udp->all_lwps = self;
1364 	udp->ulwp_one = self;
1365 	udp->pid = getpid();
1366 	udp->nthreads = 1;
1367 	/*
1368 	 * In every link map, tdb_bootstrap points to the same piece of
1369 	 * allocated memory.  When the primary link map is initialized,
1370 	 * the allocated memory is assigned a pointer to the one true
1371 	 * uberdata.  This allows libc_db to initialize itself regardless
1372 	 * of which instance of libc it finds in the address space.
1373 	 */
1374 	if (udp->tdb_bootstrap == NULL)
1375 		udp->tdb_bootstrap = lmalloc(sizeof (uberdata_t *));
1376 	__tdb_bootstrap = udp->tdb_bootstrap;
1377 	if (primary_link_map) {
1378 		self->ul_primarymap = 1;
1379 		udp->primary_map = 1;
1380 		*udp->tdb_bootstrap = udp;
1381 	}
1382 	/*
1383 	 * Cancellation can't happen until:
1384 	 *	pthread_cancel() is called
1385 	 * or:
1386 	 *	another thread is created
1387 	 * For now, as a single-threaded process, set the flag that tells
1388 	 * PROLOGUE/EPILOGUE (in scalls.c) that cancellation can't happen.
1389 	 */
1390 	self->ul_nocancel = 1;
1391 
1392 #if defined(__amd64)
1393 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_FSBASE, self);
1394 #elif defined(__i386)
1395 	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_GSBASE, self);
1396 #endif	/* __i386 || __amd64 */
1397 	set_curthread(self);		/* redundant on i386 */
1398 	/*
1399 	 * Now curthread is established and it is safe to call any
1400 	 * function in libc except one that uses thread-local storage.
1401 	 */
1402 	self->ul_errnop = &errno;
1403 	if (oldself != NULL) {
1404 		/* tls_size was zero when oldself was allocated */
1405 		lfree(oldself, sizeof (ulwp_t));
1406 	}
1407 	mutex_setup();
1408 	atfork_init();
1409 	signal_init();
1410 
1411 	/*
1412 	 * If the stack is unlimited, we set the size to zero to disable
1413 	 * stack checking.
1414 	 * XXX: Work harder here.  Get the stack size from /proc/self/rmap
1415 	 */
1416 	if (self->ul_stksiz == RLIM_INFINITY) {
1417 		self->ul_ustack.ss_sp = (void *)self->ul_stktop;
1418 		self->ul_ustack.ss_size = 0;
1419 	} else {
1420 		self->ul_ustack.ss_sp = self->ul_stk;
1421 		self->ul_ustack.ss_size = self->ul_stksiz;
1422 	}
1423 	self->ul_ustack.ss_flags = 0;
1424 	(void) setustack(&self->ul_ustack);
1425 
1426 	/*
1427 	 * Get the variables that affect thread behavior from the environment.
1428 	 */
1429 	set_thread_vars();
1430 	udp->uberflags.uf_thread_error_detection = (char)thread_error_detection;
1431 	udp->thread_stack_cache = thread_stack_cache;
1432 
1433 	/*
1434 	 * Make per-thread copies of global variables, for speed.
1435 	 */
1436 	self->ul_queue_fifo = (char)thread_queue_fifo;
1437 	self->ul_cond_wait_defer = (char)thread_cond_wait_defer;
1438 	self->ul_error_detection = (char)thread_error_detection;
1439 	self->ul_async_safe = (char)thread_async_safe;
1440 	self->ul_door_noreserve = (char)thread_door_noreserve;
1441 	self->ul_misaligned = (char)thread_locks_misaligned;
1442 	self->ul_max_spinners = (uint8_t)thread_max_spinners;
1443 	self->ul_adaptive_spin = thread_adaptive_spin;
1444 	self->ul_queue_spin = thread_queue_spin;
1445 
1446 #if defined(__sparc) && !defined(_LP64)
1447 	if (self->ul_misaligned) {
1448 		/*
1449 		 * Tell the kernel to fix up ldx/stx instructions that
1450 		 * refer to non-8-byte aligned data instead of giving
1451 		 * the process an alignment trap and generating SIGBUS.
1452 		 *
1453 		 * Programs compiled for 32-bit sparc with the Studio SS12
1454 		 * compiler get this done for them automatically (in _init()).
1455 		 * We do it here for the benefit of programs compiled with
1456 		 * other compilers, like gcc.
1457 		 *
1458 		 * This is necessary for the _THREAD_LOCKS_MISALIGNED=1
1459 		 * environment variable horrible hack to work.
1460 		 */
1461 		extern void _do_fix_align(void);
1462 		_do_fix_align();
1463 	}
1464 #endif
1465 
1466 	/*
1467 	 * When we have initialized the primary link map, inform
1468 	 * the dynamic linker about our interface functions.
1469 	 * Set up our pointer to the program name.
1470 	 */
1471 	if (self->ul_primarymap)
1472 		_ld_libc((void *)rtld_funcs);
1473 	init_progname();
1474 
1475 	/*
1476 	 * Defer signals until TLS constructors have been called.
1477 	 */
1478 	sigoff(self);
1479 	tls_setup();
1480 	sigon(self);
1481 	if (setmask)
1482 		(void) restore_signals(self);
1483 
1484 	/*
1485 	 * Make private copies of __xpg4 and __xpg6 so libc can test
1486 	 * them after this point without invoking the dynamic linker.
1487 	 */
1488 	libc__xpg4 = __xpg4;
1489 	libc__xpg6 = __xpg6;
1490 
1491 	/* PROBE_SUPPORT begin */
1492 	if (self->ul_primarymap && __tnf_probe_notify != NULL)
1493 		__tnf_probe_notify();
1494 	/* PROBE_SUPPORT end */
1495 
1496 	init_sigev_thread();
1497 	init_aio();
1498 
1499 	/*
1500 	 * We need to reset __threaded dynamically at runtime, so that
1501 	 * __threaded can be bound to __threaded outside libc which may not
1502 	 * have initial value of 1 (without a copy relocation in a.out).
1503 	 */
1504 	__threaded = 1;
1505 }
1506 
1507 #pragma fini(libc_fini)
1508 void
1509 libc_fini()
1510 {
1511 	/*
1512 	 * If we are doing fini processing for the instance of libc
1513 	 * on the first alternate link map (this happens only when
1514 	 * the dynamic linker rejects a bad audit library), then clear
1515 	 * __curthread().  We abandon whatever memory was allocated by
1516 	 * lmalloc() while running on this alternate link-map but we
1517 	 * don't care (and can't find the memory in any case); we just
1518 	 * want to protect the application from this bad audit library.
1519 	 * No fini processing is done by libc in the normal case.
1520 	 */
1521 
1522 	uberdata_t *udp = curthread->ul_uberdata;
1523 
1524 	if (udp->primary_map == 0 && udp == &__uberdata)
1525 		set_curthread(NULL);
1526 }
1527 
1528 /*
1529  * finish_init is called when we are about to become multi-threaded,
1530  * that is, on the first call to thr_create().
1531  */
1532 void
1533 finish_init()
1534 {
1535 	ulwp_t *self = curthread;
1536 	uberdata_t *udp = self->ul_uberdata;
1537 	thr_hash_table_t *htp;
1538 	void *data;
1539 	int i;
1540 
1541 	/*
1542 	 * No locks needed here; we are single-threaded on the first call.
1543 	 * We can be called only after the primary link map has been set up.
1544 	 */
1545 	ASSERT(self->ul_primarymap);
1546 	ASSERT(self == udp->ulwp_one);
1547 	ASSERT(!udp->uberflags.uf_mt);
1548 	ASSERT(udp->hash_size == 1);
1549 
1550 	/*
1551 	 * Initialize self->ul_policy, self->ul_cid, and self->ul_pri.
1552 	 */
1553 	update_sched(self);
1554 
1555 	/*
1556 	 * Allocate the queue_head array if not already allocated.
1557 	 */
1558 	if (udp->queue_head == NULL)
1559 		queue_alloc();
1560 
1561 	/*
1562 	 * Now allocate the thread hash table.
1563 	 */
1564 	if ((data = mmap(NULL, HASHTBLSZ * sizeof (thr_hash_table_t),
1565 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0))
1566 	    == MAP_FAILED)
1567 		thr_panic("cannot allocate thread hash table");
1568 
1569 	udp->thr_hash_table = htp = (thr_hash_table_t *)data;
1570 	udp->hash_size = HASHTBLSZ;
1571 	udp->hash_mask = HASHTBLSZ - 1;
1572 
1573 	for (i = 0; i < HASHTBLSZ; i++, htp++) {
1574 		htp->hash_lock.mutex_flag = LOCK_INITED;
1575 		htp->hash_lock.mutex_magic = MUTEX_MAGIC;
1576 		htp->hash_cond.cond_magic = COND_MAGIC;
1577 	}
1578 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1579 
1580 	/*
1581 	 * Set up the SIGCANCEL handler for threads cancellation.
1582 	 */
1583 	setup_cancelsig(SIGCANCEL);
1584 
1585 	/*
1586 	 * Arrange to do special things on exit --
1587 	 * - collect queue statistics from all remaining active threads.
1588 	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
1589 	 * - grab assert_lock to ensure that assertion failures
1590 	 *   and a core dump take precedence over _exit().
1591 	 * (Functions are called in the reverse order of their registration.)
1592 	 */
1593 	(void) _atexit(grab_assert_lock);
1594 #if defined(THREAD_DEBUG)
1595 	(void) _atexit(dump_queue_statistics);
1596 	(void) _atexit(collect_queue_statistics);
1597 #endif
1598 }
1599 
1600 /*
1601  * Used only by postfork1_child(), below.
1602  */
1603 static void
1604 mark_dead_and_buried(ulwp_t *ulwp)
1605 {
1606 	ulwp->ul_dead = 1;
1607 	ulwp->ul_lwpid = (lwpid_t)(-1);
1608 	ulwp->ul_hash = NULL;
1609 	ulwp->ul_ix = -1;
1610 	ulwp->ul_schedctl = NULL;
1611 	ulwp->ul_schedctl_called = NULL;
1612 }
1613 
1614 /*
1615  * This is called from fork1() in the child.
1616  * Reset our data structures to reflect one lwp.
1617  */
1618 void
1619 postfork1_child()
1620 {
1621 	ulwp_t *self = curthread;
1622 	uberdata_t *udp = self->ul_uberdata;
1623 	queue_head_t *qp;
1624 	ulwp_t *next;
1625 	ulwp_t *ulwp;
1626 	int i;
1627 
1628 	/* daemon threads shouldn't call fork1(), but oh well... */
1629 	self->ul_usropts &= ~THR_DAEMON;
1630 	udp->nthreads = 1;
1631 	udp->ndaemons = 0;
1632 	udp->uberflags.uf_mt = 0;
1633 	__libc_threaded = 0;
1634 	for (i = 0; i < udp->hash_size; i++)
1635 		udp->thr_hash_table[i].hash_bucket = NULL;
1636 	self->ul_lwpid = _lwp_self();
1637 	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1638 
1639 	/*
1640 	 * Some thread in the parent might have been suspended
1641 	 * while holding udp->callout_lock or udp->ld_lock.
1642 	 * Reinitialize the child's copies.
1643 	 */
1644 	(void) mutex_init(&udp->callout_lock,
1645 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1646 	(void) mutex_init(&udp->ld_lock,
1647 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1648 
1649 	/* no one in the child is on a sleep queue; reinitialize */
1650 	if ((qp = udp->queue_head) != NULL) {
1651 		(void) memset(qp, 0, 2 * QHASHSIZE * sizeof (queue_head_t));
1652 		for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
1653 			qp->qh_type = (i < QHASHSIZE)? MX : CV;
1654 			qp->qh_lock.mutex_flag = LOCK_INITED;
1655 			qp->qh_lock.mutex_magic = MUTEX_MAGIC;
1656 			qp->qh_hlist = &qp->qh_def_root;
1657 #if defined(THREAD_DEBUG)
1658 			qp->qh_hlen = 1;
1659 			qp->qh_hmax = 1;
1660 #endif
1661 		}
1662 	}
1663 
1664 	/*
1665 	 * Do post-fork1 processing for subsystems that need it.
1666 	 * We need to do this before unmapping all of the abandoned
1667 	 * threads' stacks, below(), because the post-fork1 actions
1668 	 * might require access to those stacks.
1669 	 */
1670 	postfork1_child_sigev_aio();
1671 	postfork1_child_sigev_mq();
1672 	postfork1_child_sigev_timer();
1673 	postfork1_child_aio();
1674 	/*
1675 	 * The above subsystems use thread pools, so this action
1676 	 * must be performed after those actions.
1677 	 */
1678 	postfork1_child_tpool();
1679 
1680 	/*
1681 	 * All lwps except ourself are gone.  Mark them so.
1682 	 * First mark all of the lwps that have already been freed.
1683 	 * Then mark and free all of the active lwps except ourself.
1684 	 * Since we are single-threaded, no locks are required here.
1685 	 */
1686 	for (ulwp = udp->lwp_stacks; ulwp != NULL; ulwp = ulwp->ul_next)
1687 		mark_dead_and_buried(ulwp);
1688 	for (ulwp = udp->ulwp_freelist; ulwp != NULL; ulwp = ulwp->ul_next)
1689 		mark_dead_and_buried(ulwp);
1690 	for (ulwp = self->ul_forw; ulwp != self; ulwp = next) {
1691 		next = ulwp->ul_forw;
1692 		ulwp->ul_forw = ulwp->ul_back = NULL;
1693 		mark_dead_and_buried(ulwp);
1694 		tsd_free(ulwp);
1695 		tls_free(ulwp);
1696 		rwl_free(ulwp);
1697 		heldlock_free(ulwp);
1698 		ulwp_free(ulwp);
1699 	}
1700 	self->ul_forw = self->ul_back = udp->all_lwps = self;
1701 	if (self != udp->ulwp_one)
1702 		mark_dead_and_buried(udp->ulwp_one);
1703 	if ((ulwp = udp->all_zombies) != NULL) {
1704 		ASSERT(udp->nzombies != 0);
1705 		do {
1706 			next = ulwp->ul_forw;
1707 			ulwp->ul_forw = ulwp->ul_back = NULL;
1708 			mark_dead_and_buried(ulwp);
1709 			udp->nzombies--;
1710 			if (ulwp->ul_replace) {
1711 				ulwp->ul_next = NULL;
1712 				if (udp->ulwp_replace_free == NULL) {
1713 					udp->ulwp_replace_free =
1714 					    udp->ulwp_replace_last = ulwp;
1715 				} else {
1716 					udp->ulwp_replace_last->ul_next = ulwp;
1717 					udp->ulwp_replace_last = ulwp;
1718 				}
1719 			}
1720 		} while ((ulwp = next) != udp->all_zombies);
1721 		ASSERT(udp->nzombies == 0);
1722 		udp->all_zombies = NULL;
1723 		udp->nzombies = 0;
1724 	}
1725 	trim_stack_cache(0);
1726 }
1727 
1728 lwpid_t
1729 lwp_self(void)
1730 {
1731 	return (curthread->ul_lwpid);
1732 }
1733 
1734 #pragma weak _ti_thr_self = thr_self
1735 #pragma weak pthread_self = thr_self
1736 thread_t
1737 thr_self()
1738 {
1739 	return (curthread->ul_lwpid);
1740 }
1741 
1742 int
1743 thr_main()
1744 {
1745 	ulwp_t *self = __curthread();
1746 
1747 	return ((self == NULL)? -1 : self->ul_main);
1748 }
1749 
1750 int
1751 _thrp_cancelled(void)
1752 {
1753 	return (curthread->ul_rval == PTHREAD_CANCELED);
1754 }
1755 
1756 int
1757 _thrp_stksegment(ulwp_t *ulwp, stack_t *stk)
1758 {
1759 	stk->ss_sp = (void *)ulwp->ul_stktop;
1760 	stk->ss_size = ulwp->ul_stksiz;
1761 	stk->ss_flags = 0;
1762 	return (0);
1763 }
1764 
1765 #pragma weak _thr_stksegment = thr_stksegment
1766 int
1767 thr_stksegment(stack_t *stk)
1768 {
1769 	return (_thrp_stksegment(curthread, stk));
1770 }
1771 
1772 void
1773 force_continue(ulwp_t *ulwp)
1774 {
1775 #if defined(THREAD_DEBUG)
1776 	ulwp_t *self = curthread;
1777 	uberdata_t *udp = self->ul_uberdata;
1778 #endif
1779 	int error;
1780 	timespec_t ts;
1781 
1782 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1783 	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
1784 
1785 	for (;;) {
1786 		error = _lwp_continue(ulwp->ul_lwpid);
1787 		if (error != 0 && error != EINTR)
1788 			break;
1789 		error = 0;
1790 		if (ulwp->ul_stopping) {	/* it is stopping itsself */
1791 			ts.tv_sec = 0;		/* give it a chance to run */
1792 			ts.tv_nsec = 100000;	/* 100 usecs or clock tick */
1793 			(void) __nanosleep(&ts, NULL);
1794 		}
1795 		if (!ulwp->ul_stopping)		/* it is running now */
1796 			break;			/* so we are done */
1797 		/*
1798 		 * It is marked as being in the process of stopping
1799 		 * itself.  Loop around and continue it again.
1800 		 * It may not have been stopped the first time.
1801 		 */
1802 	}
1803 }
1804 
1805 /*
1806  * Suspend an lwp with lwp_suspend(), then move it to a safe point,
1807  * that is, to a point where ul_critical and ul_rtld are both zero.
1808  * On return, the ulwp_lock() is dropped as with ulwp_unlock().
1809  * If 'link_dropped' is non-NULL, then 'link_lock' is held on entry.
1810  * If we have to drop link_lock, we store 1 through link_dropped.
1811  * If the lwp exits before it can be suspended, we return ESRCH.
1812  */
1813 int
1814 safe_suspend(ulwp_t *ulwp, uchar_t whystopped, int *link_dropped)
1815 {
1816 	ulwp_t *self = curthread;
1817 	uberdata_t *udp = self->ul_uberdata;
1818 	cond_t *cvp = ulwp_condvar(ulwp, udp);
1819 	mutex_t *mp = ulwp_mutex(ulwp, udp);
1820 	thread_t tid = ulwp->ul_lwpid;
1821 	int ix = ulwp->ul_ix;
1822 	int error = 0;
1823 
1824 	ASSERT(whystopped == TSTP_REGULAR ||
1825 	    whystopped == TSTP_MUTATOR ||
1826 	    whystopped == TSTP_FORK);
1827 	ASSERT(ulwp != self);
1828 	ASSERT(!ulwp->ul_stop);
1829 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1830 	ASSERT(MUTEX_OWNED(mp, self));
1831 
1832 	if (link_dropped != NULL)
1833 		*link_dropped = 0;
1834 
1835 	/*
1836 	 * We must grab the target's spin lock before suspending it.
1837 	 * See the comments below and in _thrp_suspend() for why.
1838 	 */
1839 	spin_lock_set(&ulwp->ul_spinlock);
1840 	(void) ___lwp_suspend(tid);
1841 	spin_lock_clear(&ulwp->ul_spinlock);
1842 
1843 top:
1844 	if ((ulwp->ul_critical == 0 && ulwp->ul_rtld == 0) ||
1845 	    ulwp->ul_stopping) {
1846 		/* thread is already safe */
1847 		ulwp->ul_stop |= whystopped;
1848 	} else {
1849 		/*
1850 		 * Setting ul_pleasestop causes the target thread to stop
1851 		 * itself in _thrp_suspend(), below, after we drop its lock.
1852 		 * We must continue the critical thread before dropping
1853 		 * link_lock because the critical thread may be holding
1854 		 * the queue lock for link_lock.  This is delicate.
1855 		 */
1856 		ulwp->ul_pleasestop |= whystopped;
1857 		force_continue(ulwp);
1858 		if (link_dropped != NULL) {
1859 			*link_dropped = 1;
1860 			lmutex_unlock(&udp->link_lock);
1861 			/* be sure to drop link_lock only once */
1862 			link_dropped = NULL;
1863 		}
1864 
1865 		/*
1866 		 * The thread may disappear by calling thr_exit() so we
1867 		 * cannot rely on the ulwp pointer after dropping the lock.
1868 		 * Instead, we search the hash table to find it again.
1869 		 * When we return, we may find that the thread has been
1870 		 * continued by some other thread.  The suspend/continue
1871 		 * interfaces are prone to such race conditions by design.
1872 		 */
1873 		while (ulwp && !ulwp->ul_dead && !ulwp->ul_stop &&
1874 		    (ulwp->ul_pleasestop & whystopped)) {
1875 			(void) __cond_wait(cvp, mp);
1876 			for (ulwp = udp->thr_hash_table[ix].hash_bucket;
1877 			    ulwp != NULL; ulwp = ulwp->ul_hash) {
1878 				if (ulwp->ul_lwpid == tid)
1879 					break;
1880 			}
1881 		}
1882 
1883 		if (ulwp == NULL || ulwp->ul_dead)
1884 			error = ESRCH;
1885 		else {
1886 			/*
1887 			 * Do another lwp_suspend() to make sure we don't
1888 			 * return until the target thread is fully stopped
1889 			 * in the kernel.  Don't apply lwp_suspend() until
1890 			 * we know that the target is not holding any
1891 			 * queue locks, that is, that it has completed
1892 			 * ulwp_unlock(self) and has, or at least is
1893 			 * about to, call lwp_suspend() on itself.  We do
1894 			 * this by grabbing the target's spin lock.
1895 			 */
1896 			ASSERT(ulwp->ul_lwpid == tid);
1897 			spin_lock_set(&ulwp->ul_spinlock);
1898 			(void) ___lwp_suspend(tid);
1899 			spin_lock_clear(&ulwp->ul_spinlock);
1900 			/*
1901 			 * If some other thread did a thr_continue()
1902 			 * on the target thread we have to start over.
1903 			 */
1904 			if (!ulwp->ul_stopping || !(ulwp->ul_stop & whystopped))
1905 				goto top;
1906 		}
1907 	}
1908 
1909 	(void) cond_broadcast(cvp);
1910 	lmutex_unlock(mp);
1911 	return (error);
1912 }
1913 
1914 int
1915 _thrp_suspend(thread_t tid, uchar_t whystopped)
1916 {
1917 	ulwp_t *self = curthread;
1918 	uberdata_t *udp = self->ul_uberdata;
1919 	ulwp_t *ulwp;
1920 	int error = 0;
1921 
1922 	ASSERT((whystopped & (TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) != 0);
1923 	ASSERT((whystopped & ~(TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) == 0);
1924 
1925 	/*
1926 	 * We can't suspend anyone except ourself while
1927 	 * some other thread is performing a fork.
1928 	 * This also allows only one suspension at a time.
1929 	 */
1930 	if (tid != self->ul_lwpid)
1931 		fork_lock_enter();
1932 
1933 	if ((ulwp = find_lwp(tid)) == NULL)
1934 		error = ESRCH;
1935 	else if (whystopped == TSTP_MUTATOR && !ulwp->ul_mutator) {
1936 		ulwp_unlock(ulwp, udp);
1937 		error = EINVAL;
1938 	} else if (ulwp->ul_stop) {	/* already stopped */
1939 		ulwp->ul_stop |= whystopped;
1940 		ulwp_broadcast(ulwp);
1941 		ulwp_unlock(ulwp, udp);
1942 	} else if (ulwp != self) {
1943 		/*
1944 		 * After suspending the other thread, move it out of a
1945 		 * critical section and deal with the schedctl mappings.
1946 		 * safe_suspend() suspends the other thread, calls
1947 		 * ulwp_broadcast(ulwp) and drops the ulwp lock.
1948 		 */
1949 		error = safe_suspend(ulwp, whystopped, NULL);
1950 	} else {
1951 		int schedctl_after_fork = 0;
1952 
1953 		/*
1954 		 * We are suspending ourself.  We must not take a signal
1955 		 * until we return from lwp_suspend() and clear ul_stopping.
1956 		 * This is to guard against siglongjmp().
1957 		 */
1958 		enter_critical(self);
1959 		self->ul_sp = stkptr();
1960 		_flush_windows();	/* sparc */
1961 		self->ul_pleasestop = 0;
1962 		self->ul_stop |= whystopped;
1963 		/*
1964 		 * Grab our spin lock before dropping ulwp_mutex(self).
1965 		 * This prevents the suspending thread from applying
1966 		 * lwp_suspend() to us before we emerge from
1967 		 * lmutex_unlock(mp) and have dropped mp's queue lock.
1968 		 */
1969 		spin_lock_set(&self->ul_spinlock);
1970 		self->ul_stopping = 1;
1971 		ulwp_broadcast(self);
1972 		ulwp_unlock(self, udp);
1973 		/*
1974 		 * From this point until we return from lwp_suspend(),
1975 		 * we must not call any function that might invoke the
1976 		 * dynamic linker, that is, we can only call functions
1977 		 * private to the library.
1978 		 *
1979 		 * Also, this is a nasty race condition for a process
1980 		 * that is undergoing a forkall() operation:
1981 		 * Once we clear our spinlock (below), we are vulnerable
1982 		 * to being suspended by the forkall() thread before
1983 		 * we manage to suspend ourself in ___lwp_suspend().
1984 		 * See safe_suspend() and force_continue().
1985 		 *
1986 		 * To avoid a SIGSEGV due to the disappearance
1987 		 * of the schedctl mappings in the child process,
1988 		 * which can happen in spin_lock_clear() if we
1989 		 * are suspended while we are in the middle of
1990 		 * its call to preempt(), we preemptively clear
1991 		 * our own schedctl pointer before dropping our
1992 		 * spinlock.  We reinstate it, in both the parent
1993 		 * and (if this really is a forkall()) the child.
1994 		 */
1995 		if (whystopped & TSTP_FORK) {
1996 			schedctl_after_fork = 1;
1997 			self->ul_schedctl = NULL;
1998 			self->ul_schedctl_called = &udp->uberflags;
1999 		}
2000 		spin_lock_clear(&self->ul_spinlock);
2001 		(void) ___lwp_suspend(tid);
2002 		/*
2003 		 * Somebody else continued us.
2004 		 * We can't grab ulwp_lock(self)
2005 		 * until after clearing ul_stopping.
2006 		 * force_continue() relies on this.
2007 		 */
2008 		self->ul_stopping = 0;
2009 		self->ul_sp = 0;
2010 		if (schedctl_after_fork) {
2011 			self->ul_schedctl_called = NULL;
2012 			self->ul_schedctl = NULL;
2013 			(void) setup_schedctl();
2014 		}
2015 		ulwp_lock(self, udp);
2016 		ulwp_broadcast(self);
2017 		ulwp_unlock(self, udp);
2018 		exit_critical(self);
2019 	}
2020 
2021 	if (tid != self->ul_lwpid)
2022 		fork_lock_exit();
2023 
2024 	return (error);
2025 }
2026 
2027 /*
2028  * Suspend all lwps other than ourself in preparation for fork.
2029  */
2030 void
2031 suspend_fork()
2032 {
2033 	ulwp_t *self = curthread;
2034 	uberdata_t *udp = self->ul_uberdata;
2035 	ulwp_t *ulwp;
2036 	int link_dropped;
2037 
2038 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2039 top:
2040 	lmutex_lock(&udp->link_lock);
2041 
2042 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2043 		ulwp_lock(ulwp, udp);
2044 		if (ulwp->ul_stop) {	/* already stopped */
2045 			ulwp->ul_stop |= TSTP_FORK;
2046 			ulwp_broadcast(ulwp);
2047 			ulwp_unlock(ulwp, udp);
2048 		} else {
2049 			/*
2050 			 * Move the stopped lwp out of a critical section.
2051 			 */
2052 			if (safe_suspend(ulwp, TSTP_FORK, &link_dropped) ||
2053 			    link_dropped)
2054 				goto top;
2055 		}
2056 	}
2057 
2058 	lmutex_unlock(&udp->link_lock);
2059 }
2060 
2061 void
2062 continue_fork(int child)
2063 {
2064 	ulwp_t *self = curthread;
2065 	uberdata_t *udp = self->ul_uberdata;
2066 	ulwp_t *ulwp;
2067 
2068 	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2069 
2070 	/*
2071 	 * Clear the schedctl pointers in the child of forkall().
2072 	 */
2073 	if (child) {
2074 		for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2075 			ulwp->ul_schedctl_called =
2076 			    ulwp->ul_dead? &udp->uberflags : NULL;
2077 			ulwp->ul_schedctl = NULL;
2078 		}
2079 	}
2080 
2081 	/*
2082 	 * Set all lwps that were stopped for fork() running again.
2083 	 */
2084 	lmutex_lock(&udp->link_lock);
2085 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2086 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2087 		lmutex_lock(mp);
2088 		ASSERT(ulwp->ul_stop & TSTP_FORK);
2089 		ulwp->ul_stop &= ~TSTP_FORK;
2090 		ulwp_broadcast(ulwp);
2091 		if (!ulwp->ul_stop)
2092 			force_continue(ulwp);
2093 		lmutex_unlock(mp);
2094 	}
2095 	lmutex_unlock(&udp->link_lock);
2096 }
2097 
2098 int
2099 _thrp_continue(thread_t tid, uchar_t whystopped)
2100 {
2101 	uberdata_t *udp = curthread->ul_uberdata;
2102 	ulwp_t *ulwp;
2103 	mutex_t *mp;
2104 	int error = 0;
2105 
2106 	ASSERT(whystopped == TSTP_REGULAR ||
2107 	    whystopped == TSTP_MUTATOR);
2108 
2109 	/*
2110 	 * We single-thread the entire thread suspend/continue mechanism.
2111 	 */
2112 	fork_lock_enter();
2113 
2114 	if ((ulwp = find_lwp(tid)) == NULL) {
2115 		fork_lock_exit();
2116 		return (ESRCH);
2117 	}
2118 
2119 	mp = ulwp_mutex(ulwp, udp);
2120 	if ((whystopped == TSTP_MUTATOR && !ulwp->ul_mutator)) {
2121 		error = EINVAL;
2122 	} else if (ulwp->ul_stop & whystopped) {
2123 		ulwp->ul_stop &= ~whystopped;
2124 		ulwp_broadcast(ulwp);
2125 		if (!ulwp->ul_stop) {
2126 			if (whystopped == TSTP_REGULAR && ulwp->ul_created) {
2127 				ulwp->ul_sp = 0;
2128 				ulwp->ul_created = 0;
2129 			}
2130 			force_continue(ulwp);
2131 		}
2132 	}
2133 	lmutex_unlock(mp);
2134 
2135 	fork_lock_exit();
2136 	return (error);
2137 }
2138 
2139 int
2140 thr_suspend(thread_t tid)
2141 {
2142 	return (_thrp_suspend(tid, TSTP_REGULAR));
2143 }
2144 
2145 int
2146 thr_continue(thread_t tid)
2147 {
2148 	return (_thrp_continue(tid, TSTP_REGULAR));
2149 }
2150 
2151 void
2152 thr_yield()
2153 {
2154 	yield();
2155 }
2156 
2157 #pragma weak pthread_kill = thr_kill
2158 #pragma weak _thr_kill = thr_kill
2159 int
2160 thr_kill(thread_t tid, int sig)
2161 {
2162 	if (sig == SIGCANCEL)
2163 		return (EINVAL);
2164 	return (_lwp_kill(tid, sig));
2165 }
2166 
2167 /*
2168  * Exit a critical section, take deferred actions if necessary.
2169  * Called from exit_critical() and from sigon().
2170  */
2171 void
2172 do_exit_critical()
2173 {
2174 	ulwp_t *self = curthread;
2175 	int sig;
2176 
2177 	ASSERT(self->ul_critical == 0);
2178 
2179 	/*
2180 	 * Don't suspend ourself or take a deferred signal while dying
2181 	 * or while executing inside the dynamic linker (ld.so.1).
2182 	 */
2183 	if (self->ul_dead || self->ul_rtld)
2184 		return;
2185 
2186 	while (self->ul_pleasestop ||
2187 	    (self->ul_cursig != 0 && self->ul_sigdefer == 0)) {
2188 		/*
2189 		 * Avoid a recursive call to exit_critical() in _thrp_suspend()
2190 		 * by keeping self->ul_critical == 1 here.
2191 		 */
2192 		self->ul_critical++;
2193 		while (self->ul_pleasestop) {
2194 			/*
2195 			 * Guard against suspending ourself while on a sleep
2196 			 * queue.  See the comments in call_user_handler().
2197 			 */
2198 			unsleep_self();
2199 			set_parking_flag(self, 0);
2200 			(void) _thrp_suspend(self->ul_lwpid,
2201 			    self->ul_pleasestop);
2202 		}
2203 		self->ul_critical--;
2204 
2205 		if ((sig = self->ul_cursig) != 0 && self->ul_sigdefer == 0) {
2206 			/*
2207 			 * Clear ul_cursig before proceeding.
2208 			 * This protects us from the dynamic linker's
2209 			 * calls to bind_guard()/bind_clear() in the
2210 			 * event that it is invoked to resolve a symbol
2211 			 * like take_deferred_signal() below.
2212 			 */
2213 			self->ul_cursig = 0;
2214 			take_deferred_signal(sig);
2215 			ASSERT(self->ul_cursig == 0);
2216 		}
2217 	}
2218 	ASSERT(self->ul_critical == 0);
2219 }
2220 
2221 /*
2222  * _ti_bind_guard() and _ti_bind_clear() are called by the dynamic linker
2223  * (ld.so.1) when it has do do something, like resolve a symbol to be called
2224  * by the application or one of its libraries.  _ti_bind_guard() is called
2225  * on entry to ld.so.1, _ti_bind_clear() on exit from ld.so.1 back to the
2226  * application.  The dynamic linker gets special dispensation from libc to
2227  * run in a critical region (all signals deferred and no thread suspension
2228  * or forking allowed), and to be immune from cancellation for the duration.
2229  */
2230 int
2231 _ti_bind_guard(int flags)
2232 {
2233 	ulwp_t *self = curthread;
2234 	uberdata_t *udp = self->ul_uberdata;
2235 	int bindflag = (flags & THR_FLG_RTLD);
2236 
2237 	if ((self->ul_bindflags & bindflag) == bindflag)
2238 		return (0);
2239 	self->ul_bindflags |= bindflag;
2240 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2241 		sigoff(self);	/* see no signals while holding ld_lock */
2242 		self->ul_rtld++;	/* don't suspend while in ld.so.1 */
2243 		(void) mutex_lock(&udp->ld_lock);
2244 	}
2245 	enter_critical(self);
2246 	self->ul_save_state = self->ul_cancel_disabled;
2247 	self->ul_cancel_disabled = 1;
2248 	set_cancel_pending_flag(self, 0);
2249 	return (1);
2250 }
2251 
2252 int
2253 _ti_bind_clear(int flags)
2254 {
2255 	ulwp_t *self = curthread;
2256 	uberdata_t *udp = self->ul_uberdata;
2257 	int bindflag = (flags & THR_FLG_RTLD);
2258 
2259 	if ((self->ul_bindflags & bindflag) == 0)
2260 		return (self->ul_bindflags);
2261 	self->ul_bindflags &= ~bindflag;
2262 	self->ul_cancel_disabled = self->ul_save_state;
2263 	set_cancel_pending_flag(self, 0);
2264 	exit_critical(self);
2265 	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2266 		if (MUTEX_OWNED(&udp->ld_lock, self)) {
2267 			(void) mutex_unlock(&udp->ld_lock);
2268 			self->ul_rtld--;
2269 			sigon(self);	/* reenable signals */
2270 		}
2271 	}
2272 	return (self->ul_bindflags);
2273 }
2274 
2275 /*
2276  * Tell the dynamic linker (ld.so.1) whether or not it was entered from
2277  * a critical region in libc.  Return zero if not, else return non-zero.
2278  */
2279 int
2280 _ti_critical(void)
2281 {
2282 	ulwp_t *self = curthread;
2283 	int level = self->ul_critical;
2284 
2285 	if ((self->ul_bindflags & THR_FLG_RTLD) == 0 || level == 0)
2286 		return (level);	/* ld.so.1 hasn't (yet) called enter() */
2287 	return (level - 1);
2288 }
2289 
2290 /*
2291  * sigoff() and sigon() enable cond_wait() to behave (optionally) like
2292  * it does in the old libthread (see the comments in cond_wait_queue()).
2293  * Also, signals are deferred at thread startup until TLS constructors
2294  * have all been called, at which time _thrp_setup() calls sigon().
2295  *
2296  * _sigoff() and _sigon() are external consolidation-private interfaces to
2297  * sigoff() and sigon(), respectively, in libc.  These are used in libnsl.
2298  * Also, _sigoff() and _sigon() are called from dbx's run-time checking
2299  * (librtc.so) to defer signals during its critical sections (not to be
2300  * confused with libc critical sections [see exit_critical() above]).
2301  */
2302 void
2303 _sigoff(void)
2304 {
2305 	ulwp_t *self = curthread;
2306 
2307 	sigoff(self);
2308 }
2309 
2310 void
2311 _sigon(void)
2312 {
2313 	ulwp_t *self = curthread;
2314 
2315 	ASSERT(self->ul_sigdefer > 0);
2316 	sigon(self);
2317 }
2318 
2319 int
2320 thr_getconcurrency()
2321 {
2322 	return (thr_concurrency);
2323 }
2324 
2325 int
2326 pthread_getconcurrency()
2327 {
2328 	return (pthread_concurrency);
2329 }
2330 
2331 int
2332 thr_setconcurrency(int new_level)
2333 {
2334 	uberdata_t *udp = curthread->ul_uberdata;
2335 
2336 	if (new_level < 0)
2337 		return (EINVAL);
2338 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2339 		return (EAGAIN);
2340 	lmutex_lock(&udp->link_lock);
2341 	if (new_level > thr_concurrency)
2342 		thr_concurrency = new_level;
2343 	lmutex_unlock(&udp->link_lock);
2344 	return (0);
2345 }
2346 
2347 int
2348 pthread_setconcurrency(int new_level)
2349 {
2350 	if (new_level < 0)
2351 		return (EINVAL);
2352 	if (new_level > 65536)		/* 65536 is totally arbitrary */
2353 		return (EAGAIN);
2354 	pthread_concurrency = new_level;
2355 	return (0);
2356 }
2357 
2358 size_t
2359 thr_min_stack(void)
2360 {
2361 	return (MINSTACK);
2362 }
2363 
2364 int
2365 __nthreads(void)
2366 {
2367 	return (curthread->ul_uberdata->nthreads);
2368 }
2369 
2370 /*
2371  * XXX
2372  * The remainder of this file implements the private interfaces to java for
2373  * garbage collection.  It is no longer used, at least by java 1.2.
2374  * It can all go away once all old JVMs have disappeared.
2375  */
2376 
2377 int	suspendingallmutators;	/* when non-zero, suspending all mutators. */
2378 int	suspendedallmutators;	/* when non-zero, all mutators suspended. */
2379 int	mutatorsbarrier;	/* when non-zero, mutators barrier imposed. */
2380 mutex_t	mutatorslock = DEFAULTMUTEX;	/* used to enforce mutators barrier. */
2381 cond_t	mutatorscv = DEFAULTCV;		/* where non-mutators sleep. */
2382 
2383 /*
2384  * Get the available register state for the target thread.
2385  * Return non-volatile registers: TRS_NONVOLATILE
2386  */
2387 #pragma weak _thr_getstate = thr_getstate
2388 int
2389 thr_getstate(thread_t tid, int *flag, lwpid_t *lwp, stack_t *ss, gregset_t rs)
2390 {
2391 	ulwp_t *self = curthread;
2392 	uberdata_t *udp = self->ul_uberdata;
2393 	ulwp_t **ulwpp;
2394 	ulwp_t *ulwp;
2395 	int error = 0;
2396 	int trs_flag = TRS_LWPID;
2397 
2398 	if (tid == 0 || self->ul_lwpid == tid) {
2399 		ulwp = self;
2400 		ulwp_lock(ulwp, udp);
2401 	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
2402 		ulwp = *ulwpp;
2403 	} else {
2404 		if (flag)
2405 			*flag = TRS_INVALID;
2406 		return (ESRCH);
2407 	}
2408 
2409 	if (ulwp->ul_dead) {
2410 		trs_flag = TRS_INVALID;
2411 	} else if (!ulwp->ul_stop && !suspendedallmutators) {
2412 		error = EINVAL;
2413 		trs_flag = TRS_INVALID;
2414 	} else if (ulwp->ul_stop) {
2415 		trs_flag = TRS_NONVOLATILE;
2416 		getgregs(ulwp, rs);
2417 	}
2418 
2419 	if (flag)
2420 		*flag = trs_flag;
2421 	if (lwp)
2422 		*lwp = tid;
2423 	if (ss != NULL)
2424 		(void) _thrp_stksegment(ulwp, ss);
2425 
2426 	ulwp_unlock(ulwp, udp);
2427 	return (error);
2428 }
2429 
2430 /*
2431  * Set the appropriate register state for the target thread.
2432  * This is not used by java.  It exists solely for the MSTC test suite.
2433  */
2434 #pragma weak _thr_setstate = thr_setstate
2435 int
2436 thr_setstate(thread_t tid, int flag, gregset_t rs)
2437 {
2438 	uberdata_t *udp = curthread->ul_uberdata;
2439 	ulwp_t *ulwp;
2440 	int error = 0;
2441 
2442 	if ((ulwp = find_lwp(tid)) == NULL)
2443 		return (ESRCH);
2444 
2445 	if (!ulwp->ul_stop && !suspendedallmutators)
2446 		error = EINVAL;
2447 	else if (rs != NULL) {
2448 		switch (flag) {
2449 		case TRS_NONVOLATILE:
2450 			/* do /proc stuff here? */
2451 			if (ulwp->ul_stop)
2452 				setgregs(ulwp, rs);
2453 			else
2454 				error = EINVAL;
2455 			break;
2456 		case TRS_LWPID:		/* do /proc stuff here? */
2457 		default:
2458 			error = EINVAL;
2459 			break;
2460 		}
2461 	}
2462 
2463 	ulwp_unlock(ulwp, udp);
2464 	return (error);
2465 }
2466 
2467 int
2468 getlwpstatus(thread_t tid, struct lwpstatus *sp)
2469 {
2470 	extern ssize_t __pread(int, void *, size_t, off_t);
2471 	char buf[100];
2472 	int fd;
2473 
2474 	/* "/proc/self/lwp/%u/lwpstatus" w/o stdio */
2475 	(void) strcpy(buf, "/proc/self/lwp/");
2476 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2477 	(void) strcat(buf, "/lwpstatus");
2478 	if ((fd = __open(buf, O_RDONLY, 0)) >= 0) {
2479 		while (__pread(fd, sp, sizeof (*sp), 0) == sizeof (*sp)) {
2480 			if (sp->pr_flags & PR_STOPPED) {
2481 				(void) __close(fd);
2482 				return (0);
2483 			}
2484 			yield();	/* give it a chance to stop */
2485 		}
2486 		(void) __close(fd);
2487 	}
2488 	return (-1);
2489 }
2490 
2491 int
2492 putlwpregs(thread_t tid, prgregset_t prp)
2493 {
2494 	extern ssize_t __writev(int, const struct iovec *, int);
2495 	char buf[100];
2496 	int fd;
2497 	long dstop_sreg[2];
2498 	long run_null[2];
2499 	iovec_t iov[3];
2500 
2501 	/* "/proc/self/lwp/%u/lwpctl" w/o stdio */
2502 	(void) strcpy(buf, "/proc/self/lwp/");
2503 	ultos((uint64_t)tid, 10, buf + strlen(buf));
2504 	(void) strcat(buf, "/lwpctl");
2505 	if ((fd = __open(buf, O_WRONLY, 0)) >= 0) {
2506 		dstop_sreg[0] = PCDSTOP;	/* direct it to stop */
2507 		dstop_sreg[1] = PCSREG;		/* set the registers */
2508 		iov[0].iov_base = (caddr_t)dstop_sreg;
2509 		iov[0].iov_len = sizeof (dstop_sreg);
2510 		iov[1].iov_base = (caddr_t)prp;	/* from the register set */
2511 		iov[1].iov_len = sizeof (prgregset_t);
2512 		run_null[0] = PCRUN;		/* make it runnable again */
2513 		run_null[1] = 0;
2514 		iov[2].iov_base = (caddr_t)run_null;
2515 		iov[2].iov_len = sizeof (run_null);
2516 		if (__writev(fd, iov, 3) >= 0) {
2517 			(void) __close(fd);
2518 			return (0);
2519 		}
2520 		(void) __close(fd);
2521 	}
2522 	return (-1);
2523 }
2524 
2525 static ulong_t
2526 gettsp_slow(thread_t tid)
2527 {
2528 	char buf[100];
2529 	struct lwpstatus status;
2530 
2531 	if (getlwpstatus(tid, &status) != 0) {
2532 		/* "__gettsp(%u): can't read lwpstatus" w/o stdio */
2533 		(void) strcpy(buf, "__gettsp(");
2534 		ultos((uint64_t)tid, 10, buf + strlen(buf));
2535 		(void) strcat(buf, "): can't read lwpstatus");
2536 		thr_panic(buf);
2537 	}
2538 	return (status.pr_reg[R_SP]);
2539 }
2540 
2541 ulong_t
2542 __gettsp(thread_t tid)
2543 {
2544 	uberdata_t *udp = curthread->ul_uberdata;
2545 	ulwp_t *ulwp;
2546 	ulong_t result;
2547 
2548 	if ((ulwp = find_lwp(tid)) == NULL)
2549 		return (0);
2550 
2551 	if (ulwp->ul_stop && (result = ulwp->ul_sp) != 0) {
2552 		ulwp_unlock(ulwp, udp);
2553 		return (result);
2554 	}
2555 
2556 	result = gettsp_slow(tid);
2557 	ulwp_unlock(ulwp, udp);
2558 	return (result);
2559 }
2560 
2561 /*
2562  * This tells java stack walkers how to find the ucontext
2563  * structure passed to signal handlers.
2564  */
2565 #pragma weak _thr_sighndlrinfo = thr_sighndlrinfo
2566 void
2567 thr_sighndlrinfo(void (**func)(), int *funcsize)
2568 {
2569 	*func = &__sighndlr;
2570 	*funcsize = (char *)&__sighndlrend - (char *)&__sighndlr;
2571 }
2572 
2573 /*
2574  * Mark a thread a mutator or reset a mutator to being a default,
2575  * non-mutator thread.
2576  */
2577 #pragma weak _thr_setmutator = thr_setmutator
2578 int
2579 thr_setmutator(thread_t tid, int enabled)
2580 {
2581 	ulwp_t *self = curthread;
2582 	uberdata_t *udp = self->ul_uberdata;
2583 	ulwp_t *ulwp;
2584 	int error;
2585 	int cancel_state;
2586 
2587 	enabled = enabled? 1 : 0;
2588 top:
2589 	if (tid == 0) {
2590 		ulwp = self;
2591 		ulwp_lock(ulwp, udp);
2592 	} else if ((ulwp = find_lwp(tid)) == NULL) {
2593 		return (ESRCH);
2594 	}
2595 
2596 	/*
2597 	 * The target thread should be the caller itself or a suspended thread.
2598 	 * This prevents the target from also changing its ul_mutator field.
2599 	 */
2600 	error = 0;
2601 	if (ulwp != self && !ulwp->ul_stop && enabled)
2602 		error = EINVAL;
2603 	else if (ulwp->ul_mutator != enabled) {
2604 		lmutex_lock(&mutatorslock);
2605 		if (mutatorsbarrier) {
2606 			ulwp_unlock(ulwp, udp);
2607 			(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,
2608 			    &cancel_state);
2609 			while (mutatorsbarrier)
2610 				(void) cond_wait(&mutatorscv, &mutatorslock);
2611 			(void) pthread_setcancelstate(cancel_state, NULL);
2612 			lmutex_unlock(&mutatorslock);
2613 			goto top;
2614 		}
2615 		ulwp->ul_mutator = enabled;
2616 		lmutex_unlock(&mutatorslock);
2617 	}
2618 
2619 	ulwp_unlock(ulwp, udp);
2620 	return (error);
2621 }
2622 
2623 /*
2624  * Establish a barrier against new mutators.  Any non-mutator trying
2625  * to become a mutator is suspended until the barrier is removed.
2626  */
2627 #pragma weak _thr_mutators_barrier = thr_mutators_barrier
2628 void
2629 thr_mutators_barrier(int enabled)
2630 {
2631 	int oldvalue;
2632 	int cancel_state;
2633 
2634 	lmutex_lock(&mutatorslock);
2635 
2636 	/*
2637 	 * Wait if trying to set the barrier while it is already set.
2638 	 */
2639 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2640 	while (mutatorsbarrier && enabled)
2641 		(void) cond_wait(&mutatorscv, &mutatorslock);
2642 	(void) pthread_setcancelstate(cancel_state, NULL);
2643 
2644 	oldvalue = mutatorsbarrier;
2645 	mutatorsbarrier = enabled;
2646 	/*
2647 	 * Wakeup any blocked non-mutators when barrier is removed.
2648 	 */
2649 	if (oldvalue && !enabled)
2650 		(void) cond_broadcast(&mutatorscv);
2651 	lmutex_unlock(&mutatorslock);
2652 }
2653 
2654 /*
2655  * Suspend the set of all mutators except for the caller.  The list
2656  * of actively running threads is searched and only the mutators
2657  * in this list are suspended.  Actively running non-mutators remain
2658  * running.  Any other thread is suspended.
2659  */
2660 #pragma weak _thr_suspend_allmutators = thr_suspend_allmutators
2661 int
2662 thr_suspend_allmutators(void)
2663 {
2664 	ulwp_t *self = curthread;
2665 	uberdata_t *udp = self->ul_uberdata;
2666 	ulwp_t *ulwp;
2667 	int link_dropped;
2668 
2669 	/*
2670 	 * We single-thread the entire thread suspend/continue mechanism.
2671 	 */
2672 	fork_lock_enter();
2673 
2674 top:
2675 	lmutex_lock(&udp->link_lock);
2676 
2677 	if (suspendingallmutators || suspendedallmutators) {
2678 		lmutex_unlock(&udp->link_lock);
2679 		fork_lock_exit();
2680 		return (EINVAL);
2681 	}
2682 	suspendingallmutators = 1;
2683 
2684 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2685 		ulwp_lock(ulwp, udp);
2686 		if (!ulwp->ul_mutator) {
2687 			ulwp_unlock(ulwp, udp);
2688 		} else if (ulwp->ul_stop) {	/* already stopped */
2689 			ulwp->ul_stop |= TSTP_MUTATOR;
2690 			ulwp_broadcast(ulwp);
2691 			ulwp_unlock(ulwp, udp);
2692 		} else {
2693 			/*
2694 			 * Move the stopped lwp out of a critical section.
2695 			 */
2696 			if (safe_suspend(ulwp, TSTP_MUTATOR, &link_dropped) ||
2697 			    link_dropped) {
2698 				suspendingallmutators = 0;
2699 				goto top;
2700 			}
2701 		}
2702 	}
2703 
2704 	suspendedallmutators = 1;
2705 	suspendingallmutators = 0;
2706 	lmutex_unlock(&udp->link_lock);
2707 	fork_lock_exit();
2708 	return (0);
2709 }
2710 
2711 /*
2712  * Suspend the target mutator.  The caller is permitted to suspend
2713  * itself.  If a mutator barrier is enabled, the caller will suspend
2714  * itself as though it had been suspended by thr_suspend_allmutators().
2715  * When the barrier is removed, this thread will be resumed.  Any
2716  * suspended mutator, whether suspended by thr_suspend_mutator(), or by
2717  * thr_suspend_allmutators(), can be resumed by thr_continue_mutator().
2718  */
2719 #pragma weak _thr_suspend_mutator = thr_suspend_mutator
2720 int
2721 thr_suspend_mutator(thread_t tid)
2722 {
2723 	if (tid == 0)
2724 		tid = curthread->ul_lwpid;
2725 	return (_thrp_suspend(tid, TSTP_MUTATOR));
2726 }
2727 
2728 /*
2729  * Resume the set of all suspended mutators.
2730  */
2731 #pragma weak _thr_continue_allmutators = thr_continue_allmutators
2732 int
2733 thr_continue_allmutators()
2734 {
2735 	ulwp_t *self = curthread;
2736 	uberdata_t *udp = self->ul_uberdata;
2737 	ulwp_t *ulwp;
2738 
2739 	/*
2740 	 * We single-thread the entire thread suspend/continue mechanism.
2741 	 */
2742 	fork_lock_enter();
2743 
2744 	lmutex_lock(&udp->link_lock);
2745 	if (!suspendedallmutators) {
2746 		lmutex_unlock(&udp->link_lock);
2747 		fork_lock_exit();
2748 		return (EINVAL);
2749 	}
2750 	suspendedallmutators = 0;
2751 
2752 	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2753 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2754 		lmutex_lock(mp);
2755 		if (ulwp->ul_stop & TSTP_MUTATOR) {
2756 			ulwp->ul_stop &= ~TSTP_MUTATOR;
2757 			ulwp_broadcast(ulwp);
2758 			if (!ulwp->ul_stop)
2759 				force_continue(ulwp);
2760 		}
2761 		lmutex_unlock(mp);
2762 	}
2763 
2764 	lmutex_unlock(&udp->link_lock);
2765 	fork_lock_exit();
2766 	return (0);
2767 }
2768 
2769 /*
2770  * Resume a suspended mutator.
2771  */
2772 #pragma weak _thr_continue_mutator = thr_continue_mutator
2773 int
2774 thr_continue_mutator(thread_t tid)
2775 {
2776 	return (_thrp_continue(tid, TSTP_MUTATOR));
2777 }
2778 
2779 #pragma weak _thr_wait_mutator = thr_wait_mutator
2780 int
2781 thr_wait_mutator(thread_t tid, int dontwait)
2782 {
2783 	uberdata_t *udp = curthread->ul_uberdata;
2784 	ulwp_t *ulwp;
2785 	int cancel_state;
2786 	int error = 0;
2787 
2788 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2789 top:
2790 	if ((ulwp = find_lwp(tid)) == NULL) {
2791 		(void) pthread_setcancelstate(cancel_state, NULL);
2792 		return (ESRCH);
2793 	}
2794 
2795 	if (!ulwp->ul_mutator)
2796 		error = EINVAL;
2797 	else if (dontwait) {
2798 		if (!(ulwp->ul_stop & TSTP_MUTATOR))
2799 			error = EWOULDBLOCK;
2800 	} else if (!(ulwp->ul_stop & TSTP_MUTATOR)) {
2801 		cond_t *cvp = ulwp_condvar(ulwp, udp);
2802 		mutex_t *mp = ulwp_mutex(ulwp, udp);
2803 
2804 		(void) cond_wait(cvp, mp);
2805 		(void) lmutex_unlock(mp);
2806 		goto top;
2807 	}
2808 
2809 	ulwp_unlock(ulwp, udp);
2810 	(void) pthread_setcancelstate(cancel_state, NULL);
2811 	return (error);
2812 }
2813 
2814 /* PROBE_SUPPORT begin */
2815 
2816 void
2817 thr_probe_setup(void *data)
2818 {
2819 	curthread->ul_tpdp = data;
2820 }
2821 
2822 static void *
2823 _thread_probe_getfunc()
2824 {
2825 	return (curthread->ul_tpdp);
2826 }
2827 
2828 void * (*thr_probe_getfunc_addr)(void) = _thread_probe_getfunc;
2829 
2830 /* ARGSUSED */
2831 void
2832 _resume(ulwp_t *ulwp, caddr_t sp, int dontsave)
2833 {
2834 	/* never called */
2835 }
2836 
2837 /* ARGSUSED */
2838 void
2839 _resume_ret(ulwp_t *oldlwp)
2840 {
2841 	/* never called */
2842 }
2843 
2844 /* PROBE_SUPPORT end */
2845