xref: /illumos-gate/usr/src/lib/libnsl/rpc/clnt_vc.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 /*
30  * Portions of this source code were derived from Berkeley
31  * 4.3 BSD under license from the Regents of the University of
32  * California.
33  */
34 
35 #pragma ident	"%Z%%M%	%I%	%E% SMI"
36 
37 /*
38  * clnt_vc.c
39  *
40  * Implements a connectionful client side RPC.
41  *
42  * Connectionful RPC supports 'batched calls'.
43  * A sequence of calls may be batched-up in a send buffer. The rpc call
44  * return immediately to the client even though the call was not necessarily
45  * sent. The batching occurs if the results' xdr routine is NULL (0) AND
46  * the rpc timeout value is zero (see clnt.h, rpc).
47  *
48  * Clients should NOT casually batch calls that in fact return results; that
49  * is the server side should be aware that a call is batched and not produce
50  * any return message. Batched calls that produce many result messages can
51  * deadlock (netlock) the client and the server....
52  */
53 
54 
55 #include "mt.h"
56 #include "rpc_mt.h"
57 #include <assert.h>
58 #include <rpc/rpc.h>
59 #include <errno.h>
60 #include <sys/byteorder.h>
61 #include <sys/mkdev.h>
62 #include <sys/poll.h>
63 #include <syslog.h>
64 #include <stdlib.h>
65 #include <unistd.h>
66 #include <netinet/tcp.h>
67 
68 #define	MCALL_MSG_SIZE 24
69 #define	SECS_TO_MS 1000
70 #define	USECS_TO_MS 1/1000
71 #ifndef MIN
72 #define	MIN(a, b)	(((a) < (b)) ? (a) : (b))
73 #endif
74 
75 extern int __rpc_timeval_to_msec(struct timeval *);
76 extern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
77 extern bool_t xdr_opaque_auth(XDR *, struct opaque_auth *);
78 extern bool_t __rpc_gss_wrap(AUTH *, char *, uint_t, XDR *, bool_t (*)(),
79 								caddr_t);
80 extern bool_t __rpc_gss_unwrap(AUTH *, XDR *, bool_t (*)(), caddr_t);
81 extern CLIENT *_clnt_vc_create_timed(int, struct netbuf *, rpcprog_t,
82 		rpcvers_t, uint_t, uint_t, const struct timeval *);
83 
84 static struct clnt_ops	*clnt_vc_ops(void);
85 static int		read_vc(void *, caddr_t, int);
86 static int		write_vc(void *, caddr_t, int);
87 static int		t_rcvall(int, char *, int);
88 static bool_t		time_not_ok(struct timeval *);
89 
90 struct ct_data;
91 static bool_t		set_up_connection(int, struct netbuf *,
92 				struct ct_data *, const struct timeval *);
93 static bool_t		set_io_mode(struct ct_data *, int);
94 
95 /*
96  * Lock table handle used by various MT sync. routines
97  */
98 static mutex_t	vctbl_lock = DEFAULTMUTEX;
99 static void	*vctbl = NULL;
100 
101 static const char clnt_vc_errstr[] = "%s : %s";
102 static const char clnt_vc_str[] = "clnt_vc_create";
103 static const char clnt_read_vc_str[] = "read_vc";
104 static const char __no_mem_str[] = "out of memory";
105 static const char no_fcntl_getfl_str[] = "could not get status flags and modes";
106 static const char no_nonblock_str[] = "could not set transport blocking mode";
107 
108 /*
109  * Private data structure
110  */
111 struct ct_data {
112 	int		ct_fd;		/* connection's fd */
113 	bool_t		ct_closeit;	/* close it on destroy */
114 	int		ct_tsdu;	/* size of tsdu */
115 	int		ct_wait;	/* wait interval in milliseconds */
116 	bool_t		ct_waitset;	/* wait set by clnt_control? */
117 	struct netbuf	ct_addr;	/* remote addr */
118 	struct rpc_err	ct_error;
119 	char		ct_mcall[MCALL_MSG_SIZE]; /* marshalled callmsg */
120 	uint_t		ct_mpos;	/* pos after marshal */
121 	XDR		ct_xdrs;	/* XDR stream */
122 
123 	/* NON STANDARD INFO - 00-08-31 */
124 	bool_t		ct_is_oneway; /* True if the current call is oneway. */
125 	bool_t		ct_is_blocking;
126 	ushort_t	ct_io_mode;
127 	ushort_t	ct_blocking_mode;
128 	uint_t		ct_bufferSize; /* Total size of the buffer. */
129 	uint_t		ct_bufferPendingSize; /* Size of unsent data. */
130 	char 		*ct_buffer; /* Pointer to the buffer. */
131 	char 		*ct_bufferWritePtr; /* Ptr to the first free byte. */
132 	char 		*ct_bufferReadPtr; /* Ptr to the first byte of data. */
133 };
134 
135 struct nb_reg_node {
136 	struct nb_reg_node *next;
137 	struct ct_data *ct;
138 };
139 
140 static struct nb_reg_node *nb_first = (struct nb_reg_node *)&nb_first;
141 static struct nb_reg_node *nb_free  = (struct nb_reg_node *)&nb_free;
142 
143 static bool_t exit_handler_set = FALSE;
144 
145 static mutex_t nb_list_mutex = DEFAULTMUTEX;
146 
147 
148 /* Define some macros to manage the linked list. */
149 #define	LIST_ISEMPTY(l) (l == (struct nb_reg_node *)&l)
150 #define	LIST_CLR(l) (l = (struct nb_reg_node *)&l)
151 #define	LIST_ADD(l, node) (node->next = l->next, l = node)
152 #define	LIST_EXTRACT(l, node) (node = l, l = l->next)
153 #define	LIST_FOR_EACH(l, node) \
154 	for (node = l; node != (struct nb_reg_node *)&l; node = node->next)
155 
156 
157 /* Default size of the IO buffer used in non blocking mode */
158 #define	DEFAULT_PENDING_ZONE_MAX_SIZE (16*1024)
159 
160 static int nb_send(struct ct_data *, void *, unsigned int);
161 static int do_flush(struct ct_data *, uint_t);
162 static bool_t set_flush_mode(struct ct_data *, int);
163 static bool_t set_blocking_connection(struct ct_data *, bool_t);
164 
165 static int register_nb(struct ct_data *);
166 static int unregister_nb(struct ct_data *);
167 
168 
169 /*
170  * Change the mode of the underlying fd.
171  */
172 static bool_t
173 set_blocking_connection(struct ct_data *ct, bool_t blocking)
174 {
175 	int flag;
176 
177 	/*
178 	 * If the underlying fd is already in the required mode,
179 	 * avoid the syscall.
180 	 */
181 	if (ct->ct_is_blocking == blocking)
182 		return (TRUE);
183 
184 	if ((flag = fcntl(ct->ct_fd, F_GETFL, 0)) < 0) {
185 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
186 		    no_fcntl_getfl_str);
187 		return (FALSE);
188 	}
189 
190 	flag = blocking? flag&~O_NONBLOCK : flag|O_NONBLOCK;
191 	if (fcntl(ct->ct_fd, F_SETFL, flag) != 0) {
192 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
193 		    no_nonblock_str);
194 		return (FALSE);
195 	}
196 	ct->ct_is_blocking = blocking;
197 	return (TRUE);
198 }
199 
200 /*
201  * Create a client handle for a connection.
202  * Default options are set, which the user can change using clnt_control()'s.
203  * The rpc/vc package does buffering similar to stdio, so the client
204  * must pick send and receive buffer sizes, 0 => use the default.
205  * NB: fd is copied into a private area.
206  * NB: The rpch->cl_auth is set null authentication. Caller may wish to
207  * set this something more useful.
208  *
209  * fd should be open and bound.
210  */
211 CLIENT *
212 clnt_vc_create(const int fd, struct netbuf *svcaddr, const rpcprog_t prog,
213 	const rpcvers_t vers, const uint_t sendsz, const uint_t recvsz)
214 {
215 	return (_clnt_vc_create_timed(fd, svcaddr, prog, vers, sendsz,
216 			recvsz, NULL));
217 }
218 
219 /*
220  * This has the same definition as clnt_vc_create(), except it
221  * takes an additional parameter - a pointer to a timeval structure.
222  *
223  * Not a public interface. This is for clnt_create_timed,
224  * clnt_create_vers_timed, clnt_tp_create_timed to pass down the timeout
225  * value to control a tcp connection attempt.
226  * (for bug 4049792: clnt_create_timed does not time out)
227  *
228  * If tp is NULL, use default timeout to set up the connection.
229  */
230 CLIENT *
231 _clnt_vc_create_timed(int fd, struct netbuf *svcaddr, rpcprog_t prog,
232 	rpcvers_t vers, uint_t sendsz, uint_t recvsz, const struct timeval *tp)
233 {
234 	CLIENT *cl;			/* client handle */
235 	struct ct_data *ct;		/* private data */
236 	struct timeval now;
237 	struct rpc_msg call_msg;
238 	struct t_info tinfo;
239 	int flag;
240 
241 	cl = malloc(sizeof (*cl));
242 	ct = malloc(sizeof (*ct));
243 	if ((cl == NULL) || (ct == NULL)) {
244 		(void) syslog(LOG_ERR, clnt_vc_errstr,
245 				clnt_vc_str, __no_mem_str);
246 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
247 		rpc_createerr.cf_error.re_errno = errno;
248 		rpc_createerr.cf_error.re_terrno = 0;
249 		goto err;
250 	}
251 	ct->ct_addr.buf = NULL;
252 
253 	/*
254 	 * The only use of vctbl_lock is for serializing the creation of
255 	 * vctbl. Once created the lock needs to be released so we don't
256 	 * hold it across the set_up_connection() call and end up with a
257 	 * bunch of threads stuck waiting for the mutex.
258 	 */
259 	sig_mutex_lock(&vctbl_lock);
260 
261 	if ((vctbl == NULL) && ((vctbl = rpc_fd_init()) == NULL)) {
262 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
263 		rpc_createerr.cf_error.re_errno = errno;
264 		rpc_createerr.cf_error.re_terrno = 0;
265 		sig_mutex_unlock(&vctbl_lock);
266 		goto err;
267 	}
268 
269 	sig_mutex_unlock(&vctbl_lock);
270 
271 	ct->ct_io_mode = RPC_CL_BLOCKING;
272 	ct->ct_blocking_mode = RPC_CL_BLOCKING_FLUSH;
273 
274 	ct->ct_buffer = NULL;	/* We allocate the buffer when needed. */
275 	ct->ct_bufferSize = DEFAULT_PENDING_ZONE_MAX_SIZE;
276 	ct->ct_bufferPendingSize = 0;
277 	ct->ct_bufferWritePtr = NULL;
278 	ct->ct_bufferReadPtr = NULL;
279 
280 	/* Check the current state of the fd. */
281 	if ((flag = fcntl(fd, F_GETFL, 0)) < 0) {
282 		(void) syslog(LOG_ERR, "_clnt_vc_create_timed : %s",
283 		    no_fcntl_getfl_str);
284 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
285 		rpc_createerr.cf_error.re_terrno = errno;
286 		rpc_createerr.cf_error.re_errno = 0;
287 		goto err;
288 	}
289 	ct->ct_is_blocking = flag & O_NONBLOCK ? FALSE : TRUE;
290 
291 	if (set_up_connection(fd, svcaddr, ct, tp) == FALSE) {
292 		goto err;
293 	}
294 
295 	/*
296 	 * Set up other members of private data struct
297 	 */
298 	ct->ct_fd = fd;
299 	/*
300 	 * The actual value will be set by clnt_call or clnt_control
301 	 */
302 	ct->ct_wait = 30000;
303 	ct->ct_waitset = FALSE;
304 	/*
305 	 * By default, closeit is always FALSE. It is users responsibility
306 	 * to do a t_close on it, else the user may use clnt_control
307 	 * to let clnt_destroy do it for him/her.
308 	 */
309 	ct->ct_closeit = FALSE;
310 
311 	/*
312 	 * Initialize call message
313 	 */
314 	(void) gettimeofday(&now, (struct timezone *)0);
315 	call_msg.rm_xid = getpid() ^ now.tv_sec ^ now.tv_usec;
316 	call_msg.rm_call.cb_prog = prog;
317 	call_msg.rm_call.cb_vers = vers;
318 
319 	/*
320 	 * pre-serialize the static part of the call msg and stash it away
321 	 */
322 	xdrmem_create(&(ct->ct_xdrs), ct->ct_mcall, MCALL_MSG_SIZE, XDR_ENCODE);
323 	if (!xdr_callhdr(&(ct->ct_xdrs), &call_msg)) {
324 		goto err;
325 	}
326 	ct->ct_mpos = XDR_GETPOS(&(ct->ct_xdrs));
327 	XDR_DESTROY(&(ct->ct_xdrs));
328 
329 	if (t_getinfo(fd, &tinfo) == -1) {
330 		rpc_createerr.cf_stat = RPC_TLIERROR;
331 		rpc_createerr.cf_error.re_terrno = t_errno;
332 		rpc_createerr.cf_error.re_errno = 0;
333 		goto err;
334 	}
335 	/*
336 	 * Find the receive and the send size
337 	 */
338 	sendsz = __rpc_get_t_size((int)sendsz, tinfo.tsdu);
339 	recvsz = __rpc_get_t_size((int)recvsz, tinfo.tsdu);
340 	if ((sendsz == 0) || (recvsz == 0)) {
341 		rpc_createerr.cf_stat = RPC_TLIERROR;
342 		rpc_createerr.cf_error.re_terrno = 0;
343 		rpc_createerr.cf_error.re_errno = 0;
344 		goto err;
345 	}
346 	ct->ct_tsdu = tinfo.tsdu;
347 	/*
348 	 * Create a client handle which uses xdrrec for serialization
349 	 * and authnone for authentication.
350 	 */
351 	ct->ct_xdrs.x_ops = NULL;
352 	xdrrec_create(&(ct->ct_xdrs), sendsz, recvsz, (caddr_t)ct,
353 			read_vc, write_vc);
354 	if (ct->ct_xdrs.x_ops == NULL) {
355 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
356 		rpc_createerr.cf_error.re_terrno = 0;
357 		rpc_createerr.cf_error.re_errno = ENOMEM;
358 		goto err;
359 	}
360 	cl->cl_ops = clnt_vc_ops();
361 	cl->cl_private = (caddr_t)ct;
362 	cl->cl_auth = authnone_create();
363 	cl->cl_tp = NULL;
364 	cl->cl_netid = NULL;
365 	return (cl);
366 
367 err:
368 	if (cl) {
369 		if (ct) {
370 			if (ct->ct_addr.len)
371 				free(ct->ct_addr.buf);
372 			free(ct);
373 		}
374 		free(cl);
375 	}
376 	return (NULL);
377 }
378 
379 #define	TCPOPT_BUFSIZE 128
380 
381 /*
382  * Set tcp connection timeout value.
383  * Retun 0 for success, -1 for failure.
384  */
385 static int
386 _set_tcp_conntime(int fd, int optval)
387 {
388 	struct t_optmgmt req, res;
389 	struct opthdr *opt;
390 	int *ip;
391 	char buf[TCPOPT_BUFSIZE];
392 
393 	/* LINTED pointer cast */
394 	opt = (struct opthdr *)buf;
395 	opt->level =  IPPROTO_TCP;
396 	opt->name = TCP_CONN_ABORT_THRESHOLD;
397 	opt->len = sizeof (int);
398 
399 	req.flags = T_NEGOTIATE;
400 	req.opt.len = sizeof (struct opthdr) + opt->len;
401 	req.opt.buf = (char *)opt;
402 	/* LINTED pointer cast */
403 	ip = (int *)((char *)buf + sizeof (struct opthdr));
404 	*ip = optval;
405 
406 	res.flags = 0;
407 	res.opt.buf = (char *)buf;
408 	res.opt.maxlen = sizeof (buf);
409 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
410 		return (-1);
411 	}
412 	return (0);
413 }
414 
415 /*
416  * Get current tcp connection timeout value.
417  * Retun 0 for success, -1 for failure.
418  */
419 static int
420 _get_tcp_conntime(int fd)
421 {
422 	struct t_optmgmt req, res;
423 	struct opthdr *opt;
424 	int *ip, retval;
425 	char buf[TCPOPT_BUFSIZE];
426 
427 	/* LINTED pointer cast */
428 	opt = (struct opthdr *)buf;
429 	opt->level =  IPPROTO_TCP;
430 	opt->name = TCP_CONN_ABORT_THRESHOLD;
431 	opt->len = sizeof (int);
432 
433 	req.flags = T_CURRENT;
434 	req.opt.len = sizeof (struct opthdr) + opt->len;
435 	req.opt.buf = (char *)opt;
436 	/* LINTED pointer cast */
437 	ip = (int *)((char *)buf + sizeof (struct opthdr));
438 	*ip = 0;
439 
440 	res.flags = 0;
441 	res.opt.buf = (char *)buf;
442 	res.opt.maxlen = sizeof (buf);
443 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
444 		return (-1);
445 	}
446 
447 	/* LINTED pointer cast */
448 	ip = (int *)((char *)buf + sizeof (struct opthdr));
449 	retval = *ip;
450 	return (retval);
451 }
452 
453 static bool_t
454 set_up_connection(int fd, struct netbuf *svcaddr, struct ct_data *ct,
455 						const struct timeval *tp)
456 {
457 	int state;
458 	struct t_call sndcallstr, *rcvcall;
459 	int nconnect;
460 	bool_t connected, do_rcv_connect;
461 	int curr_time = 0;
462 
463 	ct->ct_addr.len = 0;
464 	state = t_getstate(fd);
465 	if (state == -1) {
466 		rpc_createerr.cf_stat = RPC_TLIERROR;
467 		rpc_createerr.cf_error.re_errno = 0;
468 		rpc_createerr.cf_error.re_terrno = t_errno;
469 		return (FALSE);
470 	}
471 
472 #ifdef DEBUG
473 	fprintf(stderr, "set_up_connection: state = %d\n", state);
474 #endif
475 	switch (state) {
476 	case T_IDLE:
477 		if (svcaddr == NULL) {
478 			rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
479 			return (FALSE);
480 		}
481 		/*
482 		 * Connect only if state is IDLE and svcaddr known
483 		 */
484 /* LINTED pointer alignment */
485 		rcvcall = (struct t_call *)t_alloc(fd, T_CALL, T_OPT|T_ADDR);
486 		if (rcvcall == NULL) {
487 			rpc_createerr.cf_stat = RPC_TLIERROR;
488 			rpc_createerr.cf_error.re_terrno = t_errno;
489 			rpc_createerr.cf_error.re_errno = errno;
490 			return (FALSE);
491 		}
492 		rcvcall->udata.maxlen = 0;
493 		sndcallstr.addr = *svcaddr;
494 		sndcallstr.opt.len = 0;
495 		sndcallstr.udata.len = 0;
496 		/*
497 		 * Even NULL could have sufficed for rcvcall, because
498 		 * the address returned is same for all cases except
499 		 * for the gateway case, and hence required.
500 		 */
501 		connected = FALSE;
502 		do_rcv_connect = FALSE;
503 
504 		/*
505 		 * If there is a timeout value specified, we will try to
506 		 * reset the tcp connection timeout. If the transport does
507 		 * not support the TCP_CONN_ABORT_THRESHOLD option or fails
508 		 * for other reason, default timeout will be used.
509 		 */
510 		if (tp != NULL) {
511 		    int ms;
512 
513 		    /* TCP_CONN_ABORT_THRESHOLD takes int value in millisecs */
514 		    ms = tp->tv_sec * SECS_TO_MS + tp->tv_usec * USECS_TO_MS;
515 		    if (((curr_time = _get_tcp_conntime(fd)) != -1) &&
516 			(_set_tcp_conntime(fd, ms) == 0)) {
517 			/* EMPTY */
518 #ifdef DEBUG
519 			fprintf(stderr, "set_up_connection: set tcp ");
520 			fprintf(stderr, "connection timeout to %d ms\n", ms);
521 #endif
522 		    }
523 		}
524 
525 		for (nconnect = 0; nconnect < 3; nconnect++) {
526 			if (t_connect(fd, &sndcallstr, rcvcall) != -1) {
527 				connected = TRUE;
528 				break;
529 			}
530 			if (t_errno == TLOOK) {
531 				switch (t_look(fd)) {
532 				case T_DISCONNECT:
533 					(void) t_rcvdis(fd, (struct
534 					    t_discon *) NULL);
535 					break;
536 				default:
537 					break;
538 				}
539 			} else if (!(t_errno == TSYSERR && errno == EINTR)) {
540 				break;
541 			}
542 			if ((state = t_getstate(fd)) == T_OUTCON) {
543 				do_rcv_connect = TRUE;
544 				break;
545 			}
546 			if (state != T_IDLE) {
547 				break;
548 			}
549 		}
550 		if (do_rcv_connect) {
551 			do {
552 				if (t_rcvconnect(fd, rcvcall) != -1) {
553 					connected = TRUE;
554 					break;
555 				}
556 			} while (t_errno == TSYSERR && errno == EINTR);
557 		}
558 
559 		/*
560 		 * Set the connection timeout back to its old value.
561 		 */
562 		if (curr_time) {
563 			(void) _set_tcp_conntime(fd, curr_time);
564 		}
565 
566 		if (!connected) {
567 			rpc_createerr.cf_stat = RPC_TLIERROR;
568 			rpc_createerr.cf_error.re_terrno = t_errno;
569 			rpc_createerr.cf_error.re_errno = errno;
570 			(void) t_free((char *)rcvcall, T_CALL);
571 #ifdef DEBUG
572 			fprintf(stderr, "clnt_vc: t_connect error %d\n",
573 				rpc_createerr.cf_error.re_terrno);
574 #endif
575 			return (FALSE);
576 		}
577 
578 		/* Free old area if allocated */
579 		if (ct->ct_addr.buf)
580 			free(ct->ct_addr.buf);
581 		ct->ct_addr = rcvcall->addr;	/* To get the new address */
582 		/* So that address buf does not get freed */
583 		rcvcall->addr.buf = NULL;
584 		(void) t_free((char *)rcvcall, T_CALL);
585 		break;
586 	case T_DATAXFER:
587 	case T_OUTCON:
588 		if (svcaddr == NULL) {
589 			/*
590 			 * svcaddr could also be NULL in cases where the
591 			 * client is already bound and connected.
592 			 */
593 			ct->ct_addr.len = 0;
594 		} else {
595 			ct->ct_addr.buf = malloc(svcaddr->len);
596 			if (ct->ct_addr.buf == NULL) {
597 				(void) syslog(LOG_ERR, clnt_vc_errstr,
598 					clnt_vc_str, __no_mem_str);
599 				rpc_createerr.cf_stat = RPC_SYSTEMERROR;
600 				rpc_createerr.cf_error.re_errno = errno;
601 				rpc_createerr.cf_error.re_terrno = 0;
602 				return (FALSE);
603 			}
604 			(void) memcpy(ct->ct_addr.buf, svcaddr->buf,
605 					(size_t)svcaddr->len);
606 			ct->ct_addr.len = ct->ct_addr.maxlen = svcaddr->len;
607 		}
608 		break;
609 	default:
610 		rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
611 		return (FALSE);
612 	}
613 	return (TRUE);
614 }
615 
616 static enum clnt_stat
617 clnt_vc_call(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr,
618 	xdrproc_t xdr_results, caddr_t results_ptr, struct timeval timeout)
619 {
620 /* LINTED pointer alignment */
621 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
622 	XDR *xdrs = &(ct->ct_xdrs);
623 	struct rpc_msg reply_msg;
624 	uint32_t x_id;
625 /* LINTED pointer alignment */
626 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
627 	bool_t shipnow;
628 	int refreshes = 2;
629 
630 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
631 		rpc_callerr.re_status = RPC_FAILED;
632 		rpc_callerr.re_errno = errno;
633 		rpc_fd_unlock(vctbl, ct->ct_fd);
634 		return (RPC_FAILED);
635 	}
636 
637 	ct->ct_is_oneway = FALSE;
638 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
639 		if (do_flush(ct, RPC_CL_BLOCKING_FLUSH) != 0) {
640 			rpc_fd_unlock(vctbl, ct->ct_fd);
641 			return (RPC_FAILED);  /* XXX */
642 		}
643 	}
644 
645 	if (!ct->ct_waitset) {
646 		/* If time is not within limits, we ignore it. */
647 		if (time_not_ok(&timeout) == FALSE)
648 			ct->ct_wait = __rpc_timeval_to_msec(&timeout);
649 	} else {
650 		timeout.tv_sec = (ct->ct_wait / 1000);
651 		timeout.tv_usec = (ct->ct_wait % 1000) * 1000;
652 	}
653 
654 	shipnow = ((xdr_results == (xdrproc_t)0) && (timeout.tv_sec == 0) &&
655 	    (timeout.tv_usec == 0)) ? FALSE : TRUE;
656 call_again:
657 	xdrs->x_op = XDR_ENCODE;
658 	rpc_callerr.re_status = RPC_SUCCESS;
659 	/*
660 	 * Due to little endian byte order, it is necessary to convert to host
661 	 * format before decrementing xid.
662 	 */
663 	x_id = ntohl(*msg_x_id) - 1;
664 	*msg_x_id = htonl(x_id);
665 
666 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
667 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
668 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
669 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
670 		    (!xdr_args(xdrs, args_ptr))) {
671 			if (rpc_callerr.re_status == RPC_SUCCESS)
672 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
673 			(void) xdrrec_endofrecord(xdrs, TRUE);
674 			rpc_fd_unlock(vctbl, ct->ct_fd);
675 			return (rpc_callerr.re_status);
676 		}
677 	} else {
678 /* LINTED pointer alignment */
679 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
680 		IXDR_PUT_U_INT32(u, proc);
681 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
682 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
683 			if (rpc_callerr.re_status == RPC_SUCCESS)
684 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
685 			(void) xdrrec_endofrecord(xdrs, TRUE);
686 			rpc_fd_unlock(vctbl, ct->ct_fd);
687 			return (rpc_callerr.re_status);
688 		}
689 	}
690 	if (!xdrrec_endofrecord(xdrs, shipnow)) {
691 		rpc_fd_unlock(vctbl, ct->ct_fd);
692 		return (rpc_callerr.re_status = RPC_CANTSEND);
693 	}
694 	if (!shipnow) {
695 		rpc_fd_unlock(vctbl, ct->ct_fd);
696 		return (RPC_SUCCESS);
697 	}
698 	/*
699 	 * Hack to provide rpc-based message passing
700 	 */
701 	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
702 		rpc_fd_unlock(vctbl, ct->ct_fd);
703 		return (rpc_callerr.re_status = RPC_TIMEDOUT);
704 	}
705 
706 
707 	/*
708 	 * Keep receiving until we get a valid transaction id
709 	 */
710 	xdrs->x_op = XDR_DECODE;
711 	for (;;) {
712 		reply_msg.acpted_rply.ar_verf = _null_auth;
713 		reply_msg.acpted_rply.ar_results.where = NULL;
714 		reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
715 		if (!xdrrec_skiprecord(xdrs)) {
716 			rpc_fd_unlock(vctbl, ct->ct_fd);
717 			return (rpc_callerr.re_status);
718 		}
719 		/* now decode and validate the response header */
720 		if (!xdr_replymsg(xdrs, &reply_msg)) {
721 			if (rpc_callerr.re_status == RPC_SUCCESS)
722 				continue;
723 			rpc_fd_unlock(vctbl, ct->ct_fd);
724 			return (rpc_callerr.re_status);
725 		}
726 		if (reply_msg.rm_xid == x_id)
727 			break;
728 	}
729 
730 	/*
731 	 * process header
732 	 */
733 	if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
734 	    (reply_msg.acpted_rply.ar_stat == SUCCESS))
735 		rpc_callerr.re_status = RPC_SUCCESS;
736 	else
737 		__seterr_reply(&reply_msg, &(rpc_callerr));
738 
739 	if (rpc_callerr.re_status == RPC_SUCCESS) {
740 		if (!AUTH_VALIDATE(cl->cl_auth,
741 				&reply_msg.acpted_rply.ar_verf)) {
742 			rpc_callerr.re_status = RPC_AUTHERROR;
743 			rpc_callerr.re_why = AUTH_INVALIDRESP;
744 		} else if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
745 			if (!(*xdr_results)(xdrs, results_ptr)) {
746 				if (rpc_callerr.re_status == RPC_SUCCESS)
747 				    rpc_callerr.re_status = RPC_CANTDECODERES;
748 			}
749 		} else if (!__rpc_gss_unwrap(cl->cl_auth, xdrs, xdr_results,
750 							results_ptr)) {
751 			if (rpc_callerr.re_status == RPC_SUCCESS)
752 				rpc_callerr.re_status = RPC_CANTDECODERES;
753 		}
754 	}	/* end successful completion */
755 	/*
756 	 * If unsuccesful AND error is an authentication error
757 	 * then refresh credentials and try again, else break
758 	 */
759 	else if (rpc_callerr.re_status == RPC_AUTHERROR) {
760 		/* maybe our credentials need to be refreshed ... */
761 		if (refreshes-- && AUTH_REFRESH(cl->cl_auth, &reply_msg))
762 			goto call_again;
763 		else
764 			/*
765 			 * We are setting rpc_callerr here given that libnsl
766 			 * is not reentrant thereby reinitializing the TSD.
767 			 * If not set here then success could be returned even
768 			 * though refresh failed.
769 			 */
770 			rpc_callerr.re_status = RPC_AUTHERROR;
771 	} /* end of unsuccessful completion */
772 	/* free verifier ... */
773 	if (reply_msg.rm_reply.rp_stat == MSG_ACCEPTED &&
774 			reply_msg.acpted_rply.ar_verf.oa_base != NULL) {
775 		xdrs->x_op = XDR_FREE;
776 		(void) xdr_opaque_auth(xdrs, &(reply_msg.acpted_rply.ar_verf));
777 	}
778 	rpc_fd_unlock(vctbl, ct->ct_fd);
779 	return (rpc_callerr.re_status);
780 }
781 
782 static enum clnt_stat
783 clnt_vc_send(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr)
784 {
785 /* LINTED pointer alignment */
786 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
787 	XDR *xdrs = &(ct->ct_xdrs);
788 	uint32_t x_id;
789 /* LINTED pointer alignment */
790 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
791 
792 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
793 		rpc_callerr.re_status = RPC_FAILED;
794 		rpc_callerr.re_errno = errno;
795 		rpc_fd_unlock(vctbl, ct->ct_fd);
796 		return (RPC_FAILED);
797 	}
798 
799 	ct->ct_is_oneway = TRUE;
800 
801 	xdrs->x_op = XDR_ENCODE;
802 	rpc_callerr.re_status = RPC_SUCCESS;
803 	/*
804 	 * Due to little endian byte order, it is necessary to convert to host
805 	 * format before decrementing xid.
806 	 */
807 	x_id = ntohl(*msg_x_id) - 1;
808 	*msg_x_id = htonl(x_id);
809 
810 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
811 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
812 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
813 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
814 		    (!xdr_args(xdrs, args_ptr))) {
815 			if (rpc_callerr.re_status == RPC_SUCCESS)
816 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
817 			(void) xdrrec_endofrecord(xdrs, TRUE);
818 			rpc_fd_unlock(vctbl, ct->ct_fd);
819 			return (rpc_callerr.re_status);
820 		}
821 	} else {
822 /* LINTED pointer alignment */
823 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
824 		IXDR_PUT_U_INT32(u, proc);
825 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
826 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
827 			if (rpc_callerr.re_status == RPC_SUCCESS)
828 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
829 			(void) xdrrec_endofrecord(xdrs, TRUE);
830 			rpc_fd_unlock(vctbl, ct->ct_fd);
831 			return (rpc_callerr.re_status);
832 		}
833 	}
834 
835 	/*
836 	 * Do not need to check errors, as the following code does
837 	 * not depend on the successful completion of the call.
838 	 * An error, if any occurs, is reported through
839 	 * rpc_callerr.re_status.
840 	 */
841 	(void) xdrrec_endofrecord(xdrs, TRUE);
842 
843 	rpc_fd_unlock(vctbl, ct->ct_fd);
844 	return (rpc_callerr.re_status);
845 }
846 
847 /* ARGSUSED */
848 static void
849 clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp)
850 {
851 	*errp = rpc_callerr;
852 }
853 
854 static bool_t
855 clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, caddr_t res_ptr)
856 {
857 /* LINTED pointer alignment */
858 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
859 	XDR *xdrs = &(ct->ct_xdrs);
860 	bool_t stat;
861 
862 	(void) rpc_fd_lock(vctbl, ct->ct_fd);
863 	xdrs->x_op = XDR_FREE;
864 	stat = (*xdr_res)(xdrs, res_ptr);
865 	rpc_fd_unlock(vctbl, ct->ct_fd);
866 	return (stat);
867 }
868 
869 static void
870 clnt_vc_abort(void)
871 {
872 }
873 
874 /*ARGSUSED*/
875 static bool_t
876 clnt_vc_control(CLIENT *cl, int request, char *info)
877 {
878 	bool_t ret;
879 /* LINTED pointer alignment */
880 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
881 
882 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
883 		rpc_fd_unlock(vctbl, ct->ct_fd);
884 		return (RPC_FAILED);
885 	}
886 
887 	switch (request) {
888 	case CLSET_FD_CLOSE:
889 		ct->ct_closeit = TRUE;
890 		rpc_fd_unlock(vctbl, ct->ct_fd);
891 		return (TRUE);
892 	case CLSET_FD_NCLOSE:
893 		ct->ct_closeit = FALSE;
894 		rpc_fd_unlock(vctbl, ct->ct_fd);
895 		return (TRUE);
896 	case CLFLUSH:
897 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
898 			int res;
899 			res = do_flush(ct, (info == NULL ||
900 			    /* LINTED pointer cast */
901 			    *(int *)info == RPC_CL_DEFAULT_FLUSH)?
902 			    /* LINTED pointer cast */
903 			    ct->ct_blocking_mode: *(int *)info);
904 			ret = (0 == res);
905 		}
906 		rpc_fd_unlock(vctbl, ct->ct_fd);
907 		return (ret);
908 	}
909 
910 	/* for other requests which use info */
911 	if (info == NULL) {
912 		rpc_fd_unlock(vctbl, ct->ct_fd);
913 		return (FALSE);
914 	}
915 	switch (request) {
916 	case CLSET_TIMEOUT:
917 /* LINTED pointer alignment */
918 		if (time_not_ok((struct timeval *)info)) {
919 			rpc_fd_unlock(vctbl, ct->ct_fd);
920 			return (FALSE);
921 		}
922 /* LINTED pointer alignment */
923 		ct->ct_wait = __rpc_timeval_to_msec((struct timeval *)info);
924 		ct->ct_waitset = TRUE;
925 		break;
926 	case CLGET_TIMEOUT:
927 /* LINTED pointer alignment */
928 		((struct timeval *)info)->tv_sec = ct->ct_wait / 1000;
929 /* LINTED pointer alignment */
930 		((struct timeval *)info)->tv_usec =
931 			(ct->ct_wait % 1000) * 1000;
932 		break;
933 	case CLGET_SERVER_ADDR:	/* For compatibility only */
934 		(void) memcpy(info, ct->ct_addr.buf, (size_t)ct->ct_addr.len);
935 		break;
936 	case CLGET_FD:
937 /* LINTED pointer alignment */
938 		*(int *)info = ct->ct_fd;
939 		break;
940 	case CLGET_SVC_ADDR:
941 		/* The caller should not free this memory area */
942 /* LINTED pointer alignment */
943 		*(struct netbuf *)info = ct->ct_addr;
944 		break;
945 	case CLSET_SVC_ADDR:		/* set to new address */
946 #ifdef undef
947 		/*
948 		 * XXX: once the t_snddis(), followed by t_connect() starts to
949 		 * work, this ifdef should be removed.  CLIENT handle reuse
950 		 * would then be possible for COTS as well.
951 		 */
952 		if (t_snddis(ct->ct_fd, NULL) == -1) {
953 			rpc_createerr.cf_stat = RPC_TLIERROR;
954 			rpc_createerr.cf_error.re_terrno = t_errno;
955 			rpc_createerr.cf_error.re_errno = errno;
956 			rpc_fd_unlock(vctbl, ct->ct_fd);
957 			return (FALSE);
958 		}
959 		ret = set_up_connection(ct->ct_fd, (struct netbuf *)info,
960 			ct, NULL));
961 		rpc_fd_unlock(vctbl, ct->ct_fd);
962 		return (ret);
963 #else
964 		rpc_fd_unlock(vctbl, ct->ct_fd);
965 		return (FALSE);
966 #endif
967 	case CLGET_XID:
968 		/*
969 		 * use the knowledge that xid is the
970 		 * first element in the call structure
971 		 * This will get the xid of the PREVIOUS call
972 		 */
973 /* LINTED pointer alignment */
974 		*(uint32_t *)info = ntohl(*(uint32_t *)ct->ct_mcall);
975 		break;
976 	case CLSET_XID:
977 		/* This will set the xid of the NEXT call */
978 /* LINTED pointer alignment */
979 		*(uint32_t *)ct->ct_mcall =  htonl(*(uint32_t *)info + 1);
980 		/* increment by 1 as clnt_vc_call() decrements once */
981 		break;
982 	case CLGET_VERS:
983 		/*
984 		 * This RELIES on the information that, in the call body,
985 		 * the version number field is the fifth field from the
986 		 * begining of the RPC header. MUST be changed if the
987 		 * call_struct is changed
988 		 */
989 /* LINTED pointer alignment */
990 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
991 						4 * BYTES_PER_XDR_UNIT));
992 		break;
993 
994 	case CLSET_VERS:
995 /* LINTED pointer alignment */
996 		*(uint32_t *)(ct->ct_mcall + 4 * BYTES_PER_XDR_UNIT) =
997 /* LINTED pointer alignment */
998 			htonl(*(uint32_t *)info);
999 		break;
1000 
1001 	case CLGET_PROG:
1002 		/*
1003 		 * This RELIES on the information that, in the call body,
1004 		 * the program number field is the fourth field from the
1005 		 * begining of the RPC header. MUST be changed if the
1006 		 * call_struct is changed
1007 		 */
1008 /* LINTED pointer alignment */
1009 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
1010 						3 * BYTES_PER_XDR_UNIT));
1011 		break;
1012 
1013 	case CLSET_PROG:
1014 /* LINTED pointer alignment */
1015 		*(uint32_t *)(ct->ct_mcall + 3 * BYTES_PER_XDR_UNIT) =
1016 /* LINTED pointer alignment */
1017 			htonl(*(uint32_t *)info);
1018 		break;
1019 
1020 	case CLSET_IO_MODE:
1021 		/* LINTED pointer cast */
1022 		if (!set_io_mode(ct, *(int *)info)) {
1023 		    rpc_fd_unlock(vctbl, ct->ct_fd);
1024 		    return (FALSE);
1025 		}
1026 		break;
1027 	case CLSET_FLUSH_MODE:
1028 		/* Set a specific FLUSH_MODE */
1029 		/* LINTED pointer cast */
1030 		if (!set_flush_mode(ct, *(int *)info)) {
1031 		    rpc_fd_unlock(vctbl, ct->ct_fd);
1032 		    return (FALSE);
1033 		}
1034 		break;
1035 	case CLGET_FLUSH_MODE:
1036 		/* LINTED pointer cast */
1037 		*(rpcflushmode_t *)info = ct->ct_blocking_mode;
1038 		break;
1039 
1040 	case CLGET_IO_MODE:
1041 		/* LINTED pointer cast */
1042 		*(rpciomode_t *)info = ct->ct_io_mode;
1043 		break;
1044 
1045 	case CLGET_CURRENT_REC_SIZE:
1046 		/*
1047 		 * Returns the current amount of memory allocated
1048 		 * to pending requests
1049 		 */
1050 		/* LINTED pointer cast */
1051 		*(int *)info = ct->ct_bufferPendingSize;
1052 		break;
1053 
1054 	case CLSET_CONNMAXREC_SIZE:
1055 		/* Cannot resize the buffer if it is used. */
1056 		if (ct->ct_bufferPendingSize != 0) {
1057 			rpc_fd_unlock(vctbl, ct->ct_fd);
1058 			return (FALSE);
1059 		}
1060 		/*
1061 		 * If the new size is equal to the current size,
1062 		 * there is nothing to do.
1063 		 */
1064 		/* LINTED pointer cast */
1065 		if (ct->ct_bufferSize == *(uint_t *)info)
1066 			break;
1067 
1068 		/* LINTED pointer cast */
1069 		ct->ct_bufferSize = *(uint_t *)info;
1070 		if (ct->ct_buffer) {
1071 			free(ct->ct_buffer);
1072 			ct->ct_buffer = NULL;
1073 			ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = NULL;
1074 		}
1075 		break;
1076 
1077 	case CLGET_CONNMAXREC_SIZE:
1078 		/*
1079 		 * Returns the size of buffer allocated
1080 		 * to pending requests
1081 		 */
1082 		/* LINTED pointer cast */
1083 		*(uint_t *)info = ct->ct_bufferSize;
1084 		break;
1085 
1086 	default:
1087 		rpc_fd_unlock(vctbl, ct->ct_fd);
1088 		return (FALSE);
1089 	}
1090 	rpc_fd_unlock(vctbl, ct->ct_fd);
1091 	return (TRUE);
1092 }
1093 
1094 static void
1095 clnt_vc_destroy(CLIENT *cl)
1096 {
1097 /* LINTED pointer alignment */
1098 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
1099 	int ct_fd = ct->ct_fd;
1100 
1101 	(void) rpc_fd_lock(vctbl, ct_fd);
1102 
1103 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1104 		(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1105 		(void) unregister_nb(ct);
1106 	}
1107 
1108 	if (ct->ct_closeit)
1109 		(void) t_close(ct_fd);
1110 	XDR_DESTROY(&(ct->ct_xdrs));
1111 	if (ct->ct_addr.buf)
1112 		free(ct->ct_addr.buf);
1113 	free(ct);
1114 	if (cl->cl_netid && cl->cl_netid[0])
1115 		free(cl->cl_netid);
1116 	if (cl->cl_tp && cl->cl_tp[0])
1117 		free(cl->cl_tp);
1118 	free(cl);
1119 	rpc_fd_unlock(vctbl, ct_fd);
1120 }
1121 
1122 /*
1123  * Interface between xdr serializer and vc connection.
1124  * Behaves like the system calls, read & write, but keeps some error state
1125  * around for the rpc level.
1126  */
1127 static int
1128 read_vc(void *ct_tmp, caddr_t buf, int len)
1129 {
1130 	static pthread_key_t pfdp_key = PTHREAD_ONCE_KEY_NP;
1131 	struct pollfd *pfdp;
1132 	int npfd;		/* total number of pfdp allocated */
1133 	struct ct_data *ct = ct_tmp;
1134 	struct timeval starttime;
1135 	struct timeval curtime;
1136 	int poll_time;
1137 	int delta;
1138 
1139 	if (len == 0)
1140 		return (0);
1141 
1142 	/*
1143 	 * Allocate just one the first time.  thr_get_storage() may
1144 	 * return a larger buffer, left over from the last time we were
1145 	 * here, but that's OK.  realloc() will deal with it properly.
1146 	 */
1147 	npfd = 1;
1148 	pfdp = thr_get_storage(&pfdp_key, sizeof (struct pollfd), free);
1149 	if (pfdp == NULL) {
1150 		(void) syslog(LOG_ERR, clnt_vc_errstr,
1151 			clnt_read_vc_str, __no_mem_str);
1152 		rpc_callerr.re_status = RPC_SYSTEMERROR;
1153 		rpc_callerr.re_errno = errno;
1154 		rpc_callerr.re_terrno = 0;
1155 		return (-1);
1156 	}
1157 
1158 	/*
1159 	 *	N.B.:  slot 0 in the pollfd array is reserved for the file
1160 	 *	descriptor we're really interested in (as opposed to the
1161 	 *	callback descriptors).
1162 	 */
1163 	pfdp[0].fd = ct->ct_fd;
1164 	pfdp[0].events = MASKVAL;
1165 	pfdp[0].revents = 0;
1166 	poll_time = ct->ct_wait;
1167 	if (gettimeofday(&starttime, NULL) == -1) {
1168 		syslog(LOG_ERR, "Unable to get time of day: %m");
1169 		return (-1);
1170 	}
1171 
1172 	for (;;) {
1173 		extern void (*_svc_getreqset_proc)();
1174 		extern pollfd_t *svc_pollfd;
1175 		extern int svc_max_pollfd;
1176 		int fds;
1177 
1178 		/* VARIABLES PROTECTED BY svc_fd_lock: svc_pollfd */
1179 
1180 		if (_svc_getreqset_proc) {
1181 			sig_rw_rdlock(&svc_fd_lock);
1182 
1183 			/* reallocate pfdp to svc_max_pollfd +1 */
1184 			if (npfd != (svc_max_pollfd + 1)) {
1185 				struct pollfd *tmp_pfdp = realloc(pfdp,
1186 						sizeof (struct pollfd) *
1187 						(svc_max_pollfd + 1));
1188 				if (tmp_pfdp == NULL) {
1189 					sig_rw_unlock(&svc_fd_lock);
1190 					(void) syslog(LOG_ERR, clnt_vc_errstr,
1191 						clnt_read_vc_str, __no_mem_str);
1192 					rpc_callerr.re_status = RPC_SYSTEMERROR;
1193 					rpc_callerr.re_errno = errno;
1194 					rpc_callerr.re_terrno = 0;
1195 					return (-1);
1196 				}
1197 
1198 				pfdp = tmp_pfdp;
1199 				npfd = svc_max_pollfd + 1;
1200 				(void) pthread_setspecific(pfdp_key, pfdp);
1201 			}
1202 			if (npfd > 1)
1203 				(void) memcpy(&pfdp[1], svc_pollfd,
1204 				    sizeof (struct pollfd) * (npfd - 1));
1205 
1206 			sig_rw_unlock(&svc_fd_lock);
1207 		} else {
1208 			npfd = 1;	/* don't forget about pfdp[0] */
1209 		}
1210 
1211 		switch (fds = poll(pfdp, npfd, poll_time)) {
1212 		case 0:
1213 			rpc_callerr.re_status = RPC_TIMEDOUT;
1214 			return (-1);
1215 
1216 		case -1:
1217 			if (errno != EINTR)
1218 				continue;
1219 			else {
1220 				/*
1221 				 * interrupted by another signal,
1222 				 * update time_waited
1223 				 */
1224 
1225 				if (gettimeofday(&curtime, NULL) == -1) {
1226 					syslog(LOG_ERR,
1227 					    "Unable to get time of day:  %m");
1228 					errno = 0;
1229 					continue;
1230 				};
1231 				delta = (curtime.tv_sec -
1232 						starttime.tv_sec) * 1000 +
1233 					(curtime.tv_usec -
1234 						starttime.tv_usec) / 1000;
1235 				poll_time -= delta;
1236 				if (poll_time < 0) {
1237 					rpc_callerr.re_status =
1238 						RPC_TIMEDOUT;
1239 					errno = 0;
1240 					return (-1);
1241 				} else {
1242 					errno = 0; /* reset it */
1243 					continue;
1244 				}
1245 			}
1246 		}
1247 
1248 		if (pfdp[0].revents == 0) {
1249 			/* must be for server side of the house */
1250 			(*_svc_getreqset_proc)(&pfdp[1], fds);
1251 			continue;	/* do poll again */
1252 		}
1253 
1254 		if (pfdp[0].revents & POLLNVAL) {
1255 			rpc_callerr.re_status = RPC_CANTRECV;
1256 			/*
1257 			 *	Note:  we're faking errno here because we
1258 			 *	previously would have expected select() to
1259 			 *	return -1 with errno EBADF.  Poll(BA_OS)
1260 			 *	returns 0 and sets the POLLNVAL revents flag
1261 			 *	instead.
1262 			 */
1263 			rpc_callerr.re_errno = errno = EBADF;
1264 			return (-1);
1265 		}
1266 
1267 		if (pfdp[0].revents & (POLLERR | POLLHUP)) {
1268 			rpc_callerr.re_status = RPC_CANTRECV;
1269 			rpc_callerr.re_errno = errno = EPIPE;
1270 			return (-1);
1271 		}
1272 		break;
1273 	}
1274 
1275 	switch (len = t_rcvall(ct->ct_fd, buf, len)) {
1276 	case 0:
1277 		/* premature eof */
1278 		rpc_callerr.re_errno = ENOLINK;
1279 		rpc_callerr.re_terrno = 0;
1280 		rpc_callerr.re_status = RPC_CANTRECV;
1281 		len = -1;	/* it's really an error */
1282 		break;
1283 
1284 	case -1:
1285 		rpc_callerr.re_terrno = t_errno;
1286 		rpc_callerr.re_errno = 0;
1287 		rpc_callerr.re_status = RPC_CANTRECV;
1288 		break;
1289 	}
1290 	return (len);
1291 }
1292 
1293 static int
1294 write_vc(void *ct_tmp, caddr_t buf, int len)
1295 {
1296 	int i, cnt;
1297 	struct ct_data *ct = ct_tmp;
1298 	int flag;
1299 	int maxsz;
1300 
1301 	maxsz = ct->ct_tsdu;
1302 
1303 	/* Handle the non-blocking mode */
1304 	if (ct->ct_is_oneway && ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1305 		/*
1306 		 * Test a special case here. If the length of the current
1307 		 * write is greater than the transport data unit, and the
1308 		 * mode is non blocking, we return RPC_CANTSEND.
1309 		 * XXX  this is not very clean.
1310 		 */
1311 		if (maxsz > 0 && len > maxsz) {
1312 			rpc_callerr.re_terrno = errno;
1313 			rpc_callerr.re_errno = 0;
1314 			rpc_callerr.re_status = RPC_CANTSEND;
1315 			return (-1);
1316 		}
1317 
1318 		len = nb_send(ct, buf, (unsigned)len);
1319 		if (len == -1) {
1320 			rpc_callerr.re_terrno = errno;
1321 			rpc_callerr.re_errno = 0;
1322 			rpc_callerr.re_status = RPC_CANTSEND;
1323 		} else if (len == -2) {
1324 			rpc_callerr.re_terrno = 0;
1325 			rpc_callerr.re_errno = 0;
1326 			rpc_callerr.re_status = RPC_CANTSTORE;
1327 		}
1328 		return (len);
1329 	}
1330 
1331 	if ((maxsz == 0) || (maxsz == -1)) {
1332 		/*
1333 		 * T_snd may return -1 for error on connection (connection
1334 		 * needs to be repaired/closed, and -2 for flow-control
1335 		 * handling error (no operation to do, just wait and call
1336 		 * T_Flush()).
1337 		 */
1338 		if ((len = t_snd(ct->ct_fd, buf, (unsigned)len, 0)) == -1) {
1339 			rpc_callerr.re_terrno = t_errno;
1340 			rpc_callerr.re_errno = 0;
1341 			rpc_callerr.re_status = RPC_CANTSEND;
1342 		}
1343 		return (len);
1344 	}
1345 
1346 	/*
1347 	 * This for those transports which have a max size for data.
1348 	 */
1349 	for (cnt = len, i = 0; cnt > 0; cnt -= i, buf += i) {
1350 		flag = cnt > maxsz ? T_MORE : 0;
1351 		if ((i = t_snd(ct->ct_fd, buf, (unsigned)MIN(cnt, maxsz),
1352 				flag)) == -1) {
1353 			rpc_callerr.re_terrno = t_errno;
1354 			rpc_callerr.re_errno = 0;
1355 			rpc_callerr.re_status = RPC_CANTSEND;
1356 			return (-1);
1357 		}
1358 	}
1359 	return (len);
1360 }
1361 
1362 /*
1363  * Receive the required bytes of data, even if it is fragmented.
1364  */
1365 static int
1366 t_rcvall(int fd, char *buf, int len)
1367 {
1368 	int moreflag;
1369 	int final = 0;
1370 	int res;
1371 
1372 	do {
1373 		moreflag = 0;
1374 		res = t_rcv(fd, buf, (unsigned)len, &moreflag);
1375 		if (res == -1) {
1376 			if (t_errno == TLOOK)
1377 				switch (t_look(fd)) {
1378 				case T_DISCONNECT:
1379 					(void) t_rcvdis(fd, NULL);
1380 					(void) t_snddis(fd, NULL);
1381 					return (-1);
1382 				case T_ORDREL:
1383 				/* Received orderly release indication */
1384 					(void) t_rcvrel(fd);
1385 				/* Send orderly release indicator */
1386 					(void) t_sndrel(fd);
1387 					return (-1);
1388 				default:
1389 					return (-1);
1390 				}
1391 		} else if (res == 0) {
1392 			return (0);
1393 		}
1394 		final += res;
1395 		buf += res;
1396 		len -= res;
1397 	} while ((len > 0) && (moreflag & T_MORE));
1398 	return (final);
1399 }
1400 
1401 static struct clnt_ops *
1402 clnt_vc_ops(void)
1403 {
1404 	static struct clnt_ops ops;
1405 	extern mutex_t	ops_lock;
1406 
1407 	/* VARIABLES PROTECTED BY ops_lock: ops */
1408 
1409 	sig_mutex_lock(&ops_lock);
1410 	if (ops.cl_call == NULL) {
1411 		ops.cl_call = clnt_vc_call;
1412 		ops.cl_send = clnt_vc_send;
1413 		ops.cl_abort = clnt_vc_abort;
1414 		ops.cl_geterr = clnt_vc_geterr;
1415 		ops.cl_freeres = clnt_vc_freeres;
1416 		ops.cl_destroy = clnt_vc_destroy;
1417 		ops.cl_control = clnt_vc_control;
1418 	}
1419 	sig_mutex_unlock(&ops_lock);
1420 	return (&ops);
1421 }
1422 
1423 /*
1424  * Make sure that the time is not garbage.   -1 value is disallowed.
1425  * Note this is different from time_not_ok in clnt_dg.c
1426  */
1427 static bool_t
1428 time_not_ok(struct timeval *t)
1429 {
1430 	return (t->tv_sec <= -1 || t->tv_sec > 100000000 ||
1431 		t->tv_usec <= -1 || t->tv_usec > 1000000);
1432 }
1433 
1434 
1435 /* Compute the # of bytes that remains until the end of the buffer */
1436 #define	REMAIN_BYTES(p) (ct->ct_bufferSize-(ct->ct_##p - ct->ct_buffer))
1437 
1438 static int
1439 addInBuffer(struct ct_data *ct, char *dataToAdd, unsigned int nBytes)
1440 {
1441 	if (NULL == ct->ct_buffer) {
1442 		/* Buffer not allocated yet. */
1443 		char *buffer;
1444 
1445 		buffer = malloc(ct->ct_bufferSize);
1446 		if (NULL == buffer) {
1447 			errno = ENOMEM;
1448 			return (-1);
1449 		}
1450 		(void) memcpy(buffer, dataToAdd, nBytes);
1451 
1452 		ct->ct_buffer = buffer;
1453 		ct->ct_bufferReadPtr = buffer;
1454 		ct->ct_bufferWritePtr = buffer + nBytes;
1455 		ct->ct_bufferPendingSize = nBytes;
1456 	} else {
1457 		/*
1458 		 * For an already allocated buffer, two mem copies
1459 		 * might be needed, depending on the current
1460 		 * writing position.
1461 		 */
1462 
1463 		/* Compute the length of the first copy. */
1464 		int len = MIN(nBytes, REMAIN_BYTES(bufferWritePtr));
1465 
1466 		ct->ct_bufferPendingSize += nBytes;
1467 
1468 		(void) memcpy(ct->ct_bufferWritePtr, dataToAdd, len);
1469 		ct->ct_bufferWritePtr += len;
1470 		nBytes -= len;
1471 		if (0 == nBytes) {
1472 			/* One memcopy needed. */
1473 
1474 			/*
1475 			 * If the write pointer is at the end of the buffer,
1476 			 * wrap it now.
1477 			 */
1478 			if (ct->ct_bufferWritePtr ==
1479 			    (ct->ct_buffer + ct->ct_bufferSize)) {
1480 				ct->ct_bufferWritePtr = ct->ct_buffer;
1481 			}
1482 		} else {
1483 			/* Two memcopy needed. */
1484 			dataToAdd += len;
1485 
1486 			/*
1487 			 * Copy the remaining data to the beginning of the
1488 			 * buffer
1489 			 */
1490 			(void) memcpy(ct->ct_buffer, dataToAdd, nBytes);
1491 			ct->ct_bufferWritePtr = ct->ct_buffer + nBytes;
1492 		}
1493 	}
1494 	return (0);
1495 }
1496 
1497 static void
1498 consumeFromBuffer(struct ct_data *ct, unsigned int nBytes)
1499 {
1500 	ct->ct_bufferPendingSize -= nBytes;
1501 	if (ct->ct_bufferPendingSize == 0) {
1502 		/*
1503 		 * If the buffer contains no data, we set the two pointers at
1504 		 * the beginning of the buffer (to miminize buffer wraps).
1505 		 */
1506 		ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = ct->ct_buffer;
1507 	} else {
1508 		ct->ct_bufferReadPtr += nBytes;
1509 		if (ct->ct_bufferReadPtr >
1510 		    ct->ct_buffer + ct->ct_bufferSize) {
1511 			ct->ct_bufferReadPtr -= ct->ct_bufferSize;
1512 		}
1513 	}
1514 }
1515 
1516 static int
1517 iovFromBuffer(struct ct_data *ct, struct iovec *iov)
1518 {
1519 	int l;
1520 
1521 	if (ct->ct_bufferPendingSize == 0)
1522 		return (0);
1523 
1524 	l = REMAIN_BYTES(bufferReadPtr);
1525 	if (l < ct->ct_bufferPendingSize) {
1526 		/* Buffer in two fragments. */
1527 		iov[0].iov_base = ct->ct_bufferReadPtr;
1528 		iov[0].iov_len  = l;
1529 
1530 		iov[1].iov_base = ct->ct_buffer;
1531 		iov[1].iov_len  = ct->ct_bufferPendingSize - l;
1532 		return (2);
1533 	} else {
1534 		/* Buffer in one fragment. */
1535 		iov[0].iov_base = ct->ct_bufferReadPtr;
1536 		iov[0].iov_len  = ct->ct_bufferPendingSize;
1537 		return (1);
1538 	}
1539 }
1540 
1541 static bool_t
1542 set_flush_mode(struct ct_data *ct, int mode)
1543 {
1544 	switch (mode) {
1545 	case RPC_CL_BLOCKING_FLUSH:
1546 		/* flush as most as possible without blocking */
1547 	case RPC_CL_BESTEFFORT_FLUSH:
1548 		/* flush the buffer completely (possibly blocking) */
1549 	case RPC_CL_DEFAULT_FLUSH:
1550 		/* flush according to the currently defined policy */
1551 		ct->ct_blocking_mode = mode;
1552 		return (TRUE);
1553 	default:
1554 		return (FALSE);
1555 	}
1556 }
1557 
1558 static bool_t
1559 set_io_mode(struct ct_data *ct, int ioMode)
1560 {
1561 	switch (ioMode) {
1562 	case RPC_CL_BLOCKING:
1563 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1564 			if (NULL != ct->ct_buffer) {
1565 				/*
1566 				 * If a buffer was allocated for this
1567 				 * connection, flush it now, and free it.
1568 				 */
1569 				(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1570 				free(ct->ct_buffer);
1571 				ct->ct_buffer = NULL;
1572 			}
1573 			(void) unregister_nb(ct);
1574 			ct->ct_io_mode = ioMode;
1575 		}
1576 		break;
1577 	case RPC_CL_NONBLOCKING:
1578 		if (ct->ct_io_mode == RPC_CL_BLOCKING) {
1579 			if (-1 == register_nb(ct)) {
1580 				return (FALSE);
1581 			}
1582 			ct->ct_io_mode = ioMode;
1583 		}
1584 		break;
1585 	default:
1586 		return (FALSE);
1587 	}
1588 	return (TRUE);
1589 }
1590 
1591 static int
1592 do_flush(struct ct_data *ct, uint_t flush_mode)
1593 {
1594 	int result;
1595 	if (ct->ct_bufferPendingSize == 0) {
1596 		return (0);
1597 	}
1598 
1599 	switch (flush_mode) {
1600 	case RPC_CL_BLOCKING_FLUSH:
1601 		if (!set_blocking_connection(ct, TRUE)) {
1602 			return (-1);
1603 		}
1604 		while (ct->ct_bufferPendingSize > 0) {
1605 			if (REMAIN_BYTES(bufferReadPtr) <
1606 			    ct->ct_bufferPendingSize) {
1607 				struct iovec iov[2];
1608 				(void) iovFromBuffer(ct, iov);
1609 				result = writev(ct->ct_fd, iov, 2);
1610 			} else {
1611 				result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1612 				    ct->ct_bufferPendingSize, 0);
1613 			}
1614 			if (result < 0) {
1615 				return (-1);
1616 			}
1617 			consumeFromBuffer(ct, result);
1618 		}
1619 
1620 		break;
1621 
1622 	case RPC_CL_BESTEFFORT_FLUSH:
1623 		(void) set_blocking_connection(ct, FALSE);
1624 		if (REMAIN_BYTES(bufferReadPtr) < ct->ct_bufferPendingSize) {
1625 			struct iovec iov[2];
1626 			(void) iovFromBuffer(ct, iov);
1627 			result = writev(ct->ct_fd, iov, 2);
1628 		} else {
1629 			result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1630 			    ct->ct_bufferPendingSize, 0);
1631 		}
1632 		if (result < 0) {
1633 			if (errno != EWOULDBLOCK) {
1634 				perror("flush");
1635 				return (-1);
1636 			}
1637 			return (0);
1638 		}
1639 		if (result > 0)
1640 			consumeFromBuffer(ct, result);
1641 		break;
1642 	}
1643 	return (0);
1644 }
1645 
1646 /*
1647  * Non blocking send.
1648  */
1649 
1650 static int
1651 nb_send(struct ct_data *ct, void *buff, unsigned int nBytes)
1652 {
1653 	int result;
1654 
1655 	if (!(ntohl(*(uint32_t *)buff) & 2^31)) {
1656 		return (-1);
1657 	}
1658 
1659 	/*
1660 	 * Check to see if the current message can be stored fully in the
1661 	 * buffer. We have to check this now because it may be impossible
1662 	 * to send any data, so the message must be stored in the buffer.
1663 	 */
1664 	if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize)) {
1665 		/* Try to flush  (to free some space). */
1666 		(void) do_flush(ct, RPC_CL_BESTEFFORT_FLUSH);
1667 
1668 		/* Can we store the message now ? */
1669 		if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize))
1670 			return (-2);
1671 	}
1672 
1673 	(void) set_blocking_connection(ct, FALSE);
1674 
1675 	/*
1676 	 * If there is no data pending, we can simply try
1677 	 * to send our data.
1678 	 */
1679 	if (ct->ct_bufferPendingSize == 0) {
1680 		result = t_snd(ct->ct_fd, buff, nBytes, 0);
1681 		if (result == -1) {
1682 			if (errno == EWOULDBLOCK) {
1683 				result = 0;
1684 			} else {
1685 				perror("send");
1686 				return (-1);
1687 			}
1688 		}
1689 		/*
1690 		 * If we have not sent all data, we must store them
1691 		 * in the buffer.
1692 		 */
1693 		if (result != nBytes) {
1694 			if (addInBuffer(ct, (char *)buff + result,
1695 			    nBytes - result) == -1) {
1696 				return (-1);
1697 			}
1698 		}
1699 	} else {
1700 		/*
1701 		 * Some data pending in the buffer.  We try to send
1702 		 * both buffer data and current message in one shot.
1703 		 */
1704 		struct iovec iov[3];
1705 		int i = iovFromBuffer(ct, &iov[0]);
1706 
1707 		iov[i].iov_base = buff;
1708 		iov[i].iov_len  = nBytes;
1709 
1710 		result = writev(ct->ct_fd, iov, i+1);
1711 		if (result == -1) {
1712 			if (errno == EWOULDBLOCK) {
1713 				/* No bytes sent */
1714 				result = 0;
1715 			} else {
1716 				return (-1);
1717 			}
1718 		}
1719 
1720 		/*
1721 		 * Add the bytes from the message
1722 		 * that we have not sent.
1723 		 */
1724 		if (result <= ct->ct_bufferPendingSize) {
1725 			/* No bytes from the message sent */
1726 			consumeFromBuffer(ct, result);
1727 			if (addInBuffer(ct, buff, nBytes) == -1) {
1728 				return (-1);
1729 			}
1730 		} else {
1731 			/*
1732 			 * Some bytes of the message are sent.
1733 			 * Compute the length of the message that has
1734 			 * been sent.
1735 			 */
1736 			int len = result - ct->ct_bufferPendingSize;
1737 
1738 			/* So, empty the buffer. */
1739 			ct->ct_bufferReadPtr = ct->ct_buffer;
1740 			ct->ct_bufferWritePtr = ct->ct_buffer;
1741 			ct->ct_bufferPendingSize = 0;
1742 
1743 			/* And add the remaining part of the message. */
1744 			if (len != nBytes) {
1745 				if (addInBuffer(ct, (char *)buff + len,
1746 					nBytes-len) == -1) {
1747 					return (-1);
1748 				}
1749 			}
1750 		}
1751 	}
1752 	return (nBytes);
1753 }
1754 
1755 static void
1756 flush_registered_clients(void)
1757 {
1758 	struct nb_reg_node *node;
1759 
1760 	if (LIST_ISEMPTY(nb_first)) {
1761 		return;
1762 	}
1763 
1764 	LIST_FOR_EACH(nb_first, node) {
1765 		(void) do_flush(node->ct, RPC_CL_BLOCKING_FLUSH);
1766 	}
1767 }
1768 
1769 static int
1770 allocate_chunk(void)
1771 {
1772 #define	CHUNK_SIZE 16
1773 	struct nb_reg_node *chk =
1774 	    malloc(sizeof (struct nb_reg_node) * CHUNK_SIZE);
1775 	struct nb_reg_node *n;
1776 	int i;
1777 
1778 	if (NULL == chk) {
1779 		return (-1);
1780 	}
1781 
1782 	n = chk;
1783 	for (i = 0; i < CHUNK_SIZE-1; ++i) {
1784 		n[i].next = &(n[i+1]);
1785 	}
1786 	n[CHUNK_SIZE-1].next = (struct nb_reg_node *)&nb_free;
1787 	nb_free = chk;
1788 	return (0);
1789 }
1790 
1791 static int
1792 register_nb(struct ct_data *ct)
1793 {
1794 	struct nb_reg_node *node;
1795 
1796 	(void) mutex_lock(&nb_list_mutex);
1797 
1798 	if (LIST_ISEMPTY(nb_free) && (allocate_chunk() == -1)) {
1799 		(void) mutex_unlock(&nb_list_mutex);
1800 		errno = ENOMEM;
1801 		return (-1);
1802 	}
1803 
1804 	if (!exit_handler_set) {
1805 		(void) atexit(flush_registered_clients);
1806 		exit_handler_set = TRUE;
1807 	}
1808 	/* Get the first free node */
1809 	LIST_EXTRACT(nb_free, node);
1810 
1811 	node->ct = ct;
1812 
1813 	LIST_ADD(nb_first, node);
1814 	(void) mutex_unlock(&nb_list_mutex);
1815 
1816 	return (0);
1817 }
1818 
1819 static int
1820 unregister_nb(struct ct_data *ct)
1821 {
1822 	struct nb_reg_node *node;
1823 
1824 	(void) mutex_lock(&nb_list_mutex);
1825 	assert(!LIST_ISEMPTY(nb_first));
1826 
1827 	node = nb_first;
1828 	LIST_FOR_EACH(nb_first, node) {
1829 		if (node->next->ct == ct) {
1830 			/* Get the node to unregister. */
1831 			struct nb_reg_node *n = node->next;
1832 			node->next = n->next;
1833 
1834 			n->ct = NULL;
1835 			LIST_ADD(nb_free, n);
1836 			break;
1837 		}
1838 	}
1839 	(void) mutex_unlock(&nb_list_mutex);
1840 	return (0);
1841 }
1842