xref: /illumos-gate/usr/src/lib/libslp/clib/slp_net.c (revision f52943a93040563107b95bccb9db87d9971ef47d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Module for all network transactions. SLP messages can be multicast,
29  * unicast over UDP, or unicast over TCP; this module provides routines
30  * for all three. TCP transactions are handled by a single dedicated
31  * thread, while multicast and UDP unicast messages are sent by the
32  * calling thread.
33  *
34  * slp_uc_tcp_send:	enqueues a message on the TCP transaction thread's
35  *				queue.
36  * slp_tcp_wait:	blocks until all TCP-enqueued transactions for
37  *				a given SLP handle are complete
38  * slp_uc_udp_send:	unicasts a message using a datagram
39  * slp_mc_send:		multicasts a message
40  */
41 
42 /*
43  * todo: correct multicast interfaces;
44  */
45 
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <syslog.h>
49 #include <sys/types.h>
50 #include <sys/socket.h>
51 #include <arpa/inet.h>
52 #include <errno.h>
53 #include <unistd.h>
54 #include <time.h>
55 #include <string.h>
56 #include <slp-internal.h>
57 #include <slp_net_utils.h>
58 
59 /*
60  * TCP thread particulars
61  */
62 static SLPBoolean tcp_thr_running = SLP_FALSE;
63 static slp_queue_t *tcp_q;
64 static int tcp_sockfd;
65 static mutex_t start_lock = DEFAULTMUTEX;
66 
67 /* Used to pass arguments to the TCP thread, via 'tcp_q' */
68 struct tcp_rqst {
69 	slp_handle_impl_t *hp;
70 	slp_target_t *target;
71 	const char *scopes;
72 	SLPBoolean free_target;
73 	unsigned short xid;
74 };
75 
76 /* Used to keep track of broadcast interfaces */
77 struct bc_ifs {
78 	struct sockaddr_in *sin;
79 	int num_ifs;
80 };
81 
82 /*
83  * Private utility routines
84  */
85 static SLPError start_tcp_thr();
86 static void tcp_thread();
87 static SLPError make_header(slp_handle_impl_t *, char *, const char *);
88 static void udp_make_msghdr(struct sockaddr_in *, struct iovec *, int,
89 			    struct msghdr *);
90 static SLPError make_mc_target(slp_handle_impl_t *,
91 				struct sockaddr_in *, char *,
92 				struct pollfd **, nfds_t *, struct bc_ifs *);
93 static SLPError make_bc_target(slp_handle_impl_t *, struct in_addr *,
94 				int, struct bc_ifs *);
95 static SLPError mc_sendmsg(struct pollfd *, struct msghdr *,
96 				struct bc_ifs *);
97 static SLPError bc_sendmsg(struct pollfd *, struct msghdr *, struct bc_ifs *);
98 static void mc_recvmsg(struct pollfd *, nfds_t, slp_handle_impl_t *,
99 			const char *, char *, void **, unsigned long long,
100 			unsigned long long, unsigned long long *,
101 			int *, int *, int);
102 static void free_pfds(struct pollfd *, nfds_t);
103 static void tcp_handoff(slp_handle_impl_t *, const char *,
104 			struct sockaddr_in *, unsigned short);
105 static unsigned long long now_millis();
106 static int wait_for_response(unsigned long long, int *,
107 				unsigned long long, unsigned long long *,
108 				struct pollfd [], nfds_t);
109 static int add2pr_list(slp_msg_t *, struct sockaddr_in *, void **);
110 static void free_pr_node(void *, VISIT, int, void *);
111 
112 /*
113  * Unicasts a message using TCP. 'target' is a targets list
114  * containing DAs corresponding to 'scopes'. 'free_target' directs
115  * tcp_thread to free the target list when finished; this is useful
116  * when a target needs to be synthesised by another message thread
117  * (such as slp_mc_send for tcp_handoffs). If this message is a
118  * retransmission due to a large reply, 'xid' should be the same as for
119  * the original message.
120  *
121  * This call returns as soon as the message has been enqueued on 'tcp_q'.
122  * Callers interested in knowing when the transaction has completed
123  * should call slp_tcp_wait with the same SLP handle.
124  */
125 void slp_uc_tcp_send(slp_handle_impl_t *hp, slp_target_t *target,
126 			const char *scopes, SLPBoolean free_target,
127 			unsigned short xid) {
128 	struct tcp_rqst *rqst;
129 
130 	/* initialize TCP vars in handle, if necessary */
131 	if (!hp->tcp_lock) {
132 		if (!(hp->tcp_lock = malloc(sizeof (*(hp->tcp_lock))))) {
133 			slp_err(LOG_CRIT, 0, "slp_uc_tcp_send",
134 				"out of memory");
135 			return;
136 		}
137 		(void) mutex_init(hp->tcp_lock, USYNC_THREAD, NULL);
138 	}
139 	if (!hp->tcp_wait) {
140 		if (!(hp->tcp_wait = malloc(sizeof (*(hp->tcp_wait))))) {
141 			slp_err(LOG_CRIT, 0, "slp_uc_tcp_send",
142 				"out of memory");
143 			return;
144 		}
145 		(void) cond_init(hp->tcp_wait, USYNC_THREAD, NULL);
146 	}
147 	(void) mutex_lock(hp->tcp_lock);
148 	(hp->tcp_ref_cnt)++;
149 	(void) mutex_unlock(hp->tcp_lock);
150 
151 	/* start TCP thread, if not already running */
152 	if (!tcp_thr_running)
153 		if (start_tcp_thr() != SLP_OK)
154 			return;
155 
156 	/* create and enqueue the request */
157 	if (!(rqst = malloc(sizeof (*rqst)))) {
158 		slp_err(LOG_CRIT, 0, "slp_uc_tcp_send", "out of memory");
159 		return;
160 	}
161 	rqst->hp = hp;
162 	rqst->target = target;
163 	rqst->scopes = scopes;
164 	rqst->free_target = free_target;
165 	rqst->xid = xid;
166 	(void) slp_enqueue(tcp_q, rqst);
167 }
168 
169 /*
170  * Wait for TCP to complete, if a transaction corresponding to this
171  * SLP handle is pending. If none are pending, returns immediately.
172  */
173 void slp_tcp_wait(slp_handle_impl_t *hp) {
174 	(void) mutex_lock(hp->tcp_lock);
175 	while (hp->tcp_ref_cnt > 0)
176 		(void) cond_wait(hp->tcp_wait, hp->tcp_lock);
177 	(void) mutex_unlock(hp->tcp_lock);
178 }
179 
180 /*
181  * Unicasts a message using datagrams. 'target' should contain a
182  * list of DAs corresponding to 'scopes'.
183  *
184  * This call does not return until the transaction has completed. It
185  * may handoff a message to the TCP thread if necessary, but will not
186  * wait for that transaction to complete. Hence callers should always
187  * invoke slp_tcp_wait before cleaning up resources.
188  */
189 void slp_uc_udp_send(slp_handle_impl_t *hp, slp_target_t *target,
190 			const char *scopes) {
191 	slp_target_t *ctarg;
192 	struct sockaddr_in *sin;
193 	struct msghdr msg[1];
194 	char header[SLP_DEFAULT_SENDMTU];
195 	int sockfd;
196 	size_t mtu;
197 	SLPBoolean use_tcp;
198 	struct pollfd pfd[1];
199 	unsigned long long now, sent;
200 	char *reply = NULL;
201 
202 	use_tcp = SLP_FALSE;
203 	/* build the header and iovec */
204 	if (make_header(hp, header, scopes) != SLP_OK)
205 		return;
206 
207 	mtu = slp_get_mtu();
208 
209 	/* walk targets list until we either succeed or run out of targets */
210 	for (ctarg = target; ctarg; ctarg = slp_next_failover(ctarg)) {
211 		char *state;
212 		const char *timeouts;
213 		int timeout;
214 
215 		sin = (struct sockaddr_in *)slp_get_target_sin(ctarg);
216 
217 		/* make the socket, msghdr and reply buf */
218 		if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
219 			slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
220 				"could not create socket: %s",
221 				strerror(errno));
222 			return;
223 		}
224 		pfd[0].fd = sockfd;
225 		pfd[0].events = POLLRDNORM;
226 
227 		udp_make_msghdr(sin, hp->msg.iov, hp->msg.iovlen, msg);
228 		if (!reply && !(reply = malloc(mtu))) {
229 			(void) close(sockfd);
230 			slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
231 				"out of memory");
232 			return;
233 		}
234 
235 		/* timeout loop */
236 		timeouts = SLPGetProperty(SLP_CONFIG_DATAGRAMTIMEOUTS);
237 		state = (char *)timeouts;
238 		for (timeout = slp_get_next_onlist(&state);
239 			timeout != -1 &&
240 			!hp->cancel;
241 			timeout = slp_get_next_onlist(&state)) {
242 			int pollerr;
243 
244 			if (sendmsg(sockfd, msg, 0) < 0) {
245 				slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
246 					"sendmsg failed: %s", strerror(errno));
247 				continue; /* try again */
248 			}
249 			sent = now_millis();
250 
251 			pollerr = wait_for_response(
252 				0, &timeout, sent, &now, pfd, 1);
253 
254 			if (pollerr == 0)
255 				/* timeout */
256 				continue;
257 			if (pollerr < 0)
258 				break;
259 
260 			/* only using one fd, so no need to scan pfd */
261 			if (recvfrom(sockfd, reply, mtu, 0, NULL, NULL) < 0) {
262 				/* if reply overflows, hand off to TCP */
263 				if (errno == ENOMEM) {
264 					free(reply); reply = NULL;
265 					use_tcp = SLP_TRUE;
266 					break;
267 				}
268 				slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
269 					"recvfrom failed: %s",
270 					strerror(errno));
271 			} else {
272 				/* success -- but check error code */
273 				slp_proto_err errcode = slp_get_errcode(reply);
274 				switch (errcode) {
275 				case SLP_MSG_PARSE_ERROR:
276 				case SLP_VER_NOT_SUPPORTED:
277 				case SLP_SICK_DA:
278 				case SLP_DA_BUSY_NOW:
279 				case SLP_OPTION_NOT_UNDERSTOOD:
280 				case SLP_RQST_NOT_SUPPORTED: {
281 				    char addrbuf[INET6_ADDRSTRLEN], *cname;
282 
283 				    cname = slp_ntop(addrbuf, INET6_ADDRSTRLEN,
284 					(const void *) &(sin->sin_addr));
285 				    cname = cname ? cname : "[invalid addr]";
286 
287 				    /* drop it */
288 				    slp_err(LOG_INFO, 0,
289 				"DA %s returned error code %d; dropping reply",
290 							cname, errcode);
291 				    free(reply); reply = NULL;
292 				}
293 				}
294 			}
295 			break;
296 		}
297 		if (timeout != -1)
298 			/* success or cancel */
299 			break;
300 		/* else failure */
301 		slp_mark_target_failed(ctarg);
302 	}
303 	(void) close(sockfd);
304 	if (!ctarg || hp->cancel) {
305 		/* failed all attempts or canceled by consumer */
306 		if (reply) free(reply);
307 		return;
308 	}
309 	/* success or tcp handoff */
310 	if (reply) {
311 		if (slp_get_overflow(reply))
312 			use_tcp = SLP_TRUE;
313 		else
314 			slp_mark_target_used(ctarg);
315 		(void) slp_enqueue(hp->q, reply);
316 	}
317 	if (use_tcp)
318 		slp_uc_tcp_send(
319 			hp, ctarg, scopes, SLP_FALSE, slp_get_xid(header));
320 }
321 
322 /*
323  * Multicasts (or broadcasts) a message, using multicast convergance
324  * to collect results. Large replies will cause the message to be handed
325  * off to the TCP thread.
326  *
327  * This call does not return until the transaction is complete. It does
328  * not, however, wait until pending TCP transactions are complete, so
329  * callers should always invoke slp_tcp_wait before cleaning up any
330  * resources.
331  */
332 void slp_mc_send(slp_handle_impl_t *hp, const char *scopes) {
333 	char header[SLP_DEFAULT_SENDMTU], *state;
334 	const char *timeouts;
335 	struct sockaddr_in sin[1];
336 	struct msghdr msg[1];
337 	int maxwait, timeout, noresults, anyresults;
338 	unsigned long long final_to, now, sent;
339 	struct pollfd *pfd;
340 	nfds_t nfds;
341 	void *collator = NULL;
342 	struct bc_ifs bcifs;
343 
344 	/* build the header and iovec */
345 	if (make_header(hp, header, scopes) != SLP_OK)
346 		return;
347 
348 	(void) memset(sin, 0, sizeof (sin));
349 	if (make_mc_target(hp, sin, header, &pfd, &nfds, &bcifs) != SLP_OK)
350 		return;
351 	udp_make_msghdr(sin, hp->msg.iov, hp->msg.iovlen, msg);
352 
353 	maxwait = slp_get_mcmaxwait();
354 	maxwait = maxwait ? maxwait : SLP_DEFAULT_MAXWAIT;
355 
356 	/* set the final timeout */
357 	now = now_millis();
358 	final_to = now + maxwait;
359 
360 	/* timeout prep and loop */
361 	timeouts = SLPGetProperty(SLP_CONFIG_MULTICASTTIMEOUTS);
362 	state = (char *)timeouts;
363 	noresults = anyresults = 0;
364 
365 	for (timeout = slp_get_next_onlist(&state);
366 		timeout != -1 &&
367 		now < final_to &&
368 		noresults < 2 &&
369 		!hp->cancel;
370 		timeout = slp_get_next_onlist(&state)) {
371 
372 		/* send msg */
373 		if (mc_sendmsg(pfd, msg, &bcifs) != SLP_OK) {
374 			continue; /* try again */
375 		}
376 		sent = now_millis();
377 
378 		/* receive results */
379 		mc_recvmsg(pfd, nfds, hp, scopes, header, &collator, final_to,
380 			sent, &now, &noresults, &anyresults, timeout);
381 
382 		if (!anyresults)
383 			noresults++;
384 		anyresults = 0;
385 	}
386 	/* clean up PR list collator */
387 	if (collator)
388 		slp_twalk(collator, free_pr_node, 0, NULL);
389 
390 	/* close all fds in pfd */
391 	free_pfds(pfd, nfds);
392 
393 	/* free broadcast addrs, if used */
394 	if (bcifs.sin) free(bcifs.sin);
395 }
396 
397 /*
398  * Private net helper routines
399  */
400 
401 /*
402  * Starts the tcp_thread and allocates any necessary resources.
403  */
404 static SLPError start_tcp_thr() {
405 	SLPError err;
406 	int terr;
407 
408 	(void) mutex_lock(&start_lock);
409 	/* make sure someone else hasn't already intialized the thread */
410 	if (tcp_thr_running) {
411 		(void) mutex_unlock(&start_lock);
412 		return (SLP_OK);
413 	}
414 
415 	/* create the tcp queue */
416 	if (!(tcp_q = slp_new_queue(&err))) {
417 		(void) mutex_unlock(&start_lock);
418 		return (err);
419 	}
420 
421 	/* start the tcp thread */
422 	if ((terr = thr_create(0, 0, (void *(*)(void *)) tcp_thread,
423 				NULL, 0, NULL)) != 0) {
424 	    slp_err(LOG_CRIT, 0, "start_tcp_thr",
425 		    "could not start thread: %s", strerror(terr));
426 	    (void) mutex_unlock(&start_lock);
427 	    return (SLP_INTERNAL_SYSTEM_ERROR);
428 	}
429 
430 	tcp_thr_running = SLP_TRUE;
431 	(void) mutex_unlock(&start_lock);
432 	return (SLP_OK);
433 }
434 
435 /*
436  * Called by the tcp thread to shut itself down. The queue must be
437  * empty (and should be, since the tcp thread will only shut itself
438  * down if nothing has been put in its queue for the timeout period).
439  */
440 static void end_tcp_thr() {
441 	(void) mutex_lock(&start_lock);
442 
443 	tcp_thr_running = SLP_FALSE;
444 	slp_destroy_queue(tcp_q);
445 
446 	(void) mutex_unlock(&start_lock);
447 	thr_exit(NULL);
448 }
449 
450 /*
451  * The thread of control for the TCP thread. This sits in a loop, waiting
452  * on 'tcp_q' for new messages. If no message appear after 30 seconds,
453  * this thread cleans up resources and shuts itself down.
454  */
455 static void tcp_thread() {
456 	struct tcp_rqst *rqst;
457 	char *reply, header[SLP_DEFAULT_SENDMTU];
458 	timestruc_t to[1];
459 	to->tv_nsec = 0;
460 
461 	for (;;) {
462 		slp_target_t *ctarg, *targets;
463 		slp_handle_impl_t *hp;
464 		const char *scopes;
465 		struct sockaddr_in *sin;
466 		SLPBoolean free_target, etimed;
467 		unsigned short xid;
468 
469 		/* set idle shutdown timeout */
470 		to->tv_sec = time(NULL) + 30;
471 		/* get the next request from the tcp queue */
472 		if (!(rqst = slp_dequeue_timed(tcp_q, to, &etimed))) {
473 			if (!etimed)
474 				continue;
475 			else
476 				end_tcp_thr();
477 		}
478 
479 		hp = rqst->hp;
480 		scopes = rqst->scopes;
481 		targets = rqst->target;
482 		free_target = rqst->free_target;
483 		xid = rqst->xid;
484 		free(rqst);
485 		reply = NULL;
486 
487 		/* Check if this handle has been cancelled */
488 		if (hp->cancel)
489 			goto transaction_complete;
490 
491 		/* build the header and iovec */
492 		if (make_header(hp, header, scopes) != SLP_OK) {
493 			if (free_target) slp_free_target(targets);
494 			continue;
495 		}
496 		if (xid)
497 			slp_set_xid(header, xid);
498 
499 	/* walk targets list until we either succeed or run out of targets */
500 		for (ctarg = targets;
501 			ctarg && !hp->cancel;
502 			ctarg = slp_next_failover(ctarg)) {
503 
504 			sin = (struct sockaddr_in *)slp_get_target_sin(ctarg);
505 
506 			/* create the socket */
507 			if ((tcp_sockfd = socket(AF_INET, SOCK_STREAM, 0))
508 			    < 0) {
509 				slp_err(LOG_CRIT, 0, "tcp_thread",
510 					"could not create socket: %s",
511 					strerror(errno));
512 				ctarg = NULL;
513 				break;
514 			}
515 
516 			/* connect to target */
517 			if (connect(tcp_sockfd, (struct sockaddr *)sin,
518 				    sizeof (*sin)) < 0) {
519 				slp_err(LOG_INFO, 0, "tcp_thread",
520 					"could not connect, error = %s",
521 					strerror(errno));
522 				goto failed;
523 			}
524 
525 			/* send the message and read the reply */
526 			if (writev(tcp_sockfd, hp->msg.iov, hp->msg.iovlen)
527 			    == -1) {
528 				slp_err(LOG_INFO, 0, "tcp_thread",
529 					"could not send, error = %s",
530 					strerror(errno));
531 				goto failed;
532 			}
533 
534 			/* if success, break out of failover loop */
535 			if ((slp_tcp_read(tcp_sockfd, &reply)) == SLP_OK) {
536 				(void) close(tcp_sockfd);
537 				break;
538 			}
539 
540 		/* else if timed out, mark target failed and try next one */
541 failed:
542 			(void) close(tcp_sockfd);
543 			slp_mark_target_failed(ctarg);
544 		}
545 
546 		if (hp->cancel) {
547 			if (reply) {
548 				free(reply);
549 			}
550 		} else if (ctarg) {
551 			/* success */
552 			(void) slp_enqueue(hp->q, reply);
553 			slp_mark_target_used(ctarg);
554 		}
555 
556 	/* If all TCP transactions on this handle are complete, send notice */
557 transaction_complete:
558 		(void) mutex_lock(hp->tcp_lock);
559 		if (--(hp->tcp_ref_cnt) == 0)
560 			(void) cond_signal(hp->tcp_wait);
561 		(void) mutex_unlock(hp->tcp_lock);
562 
563 		if (free_target)
564 			slp_free_target(targets);
565 	}
566 }
567 
568 /*
569  * Performs a full read for TCP replies, dynamically allocating a
570  * buffer large enough to hold the reply.
571  */
572 SLPError slp_tcp_read(int sockfd, char **reply) {
573 	char lenbuf[5], *p;
574 	size_t nleft;
575 	ssize_t nread;
576 	unsigned int len;
577 
578 	/* find out how long the reply is */
579 	nleft = 5;
580 	p = lenbuf;
581 	while (nleft != 0) {
582 		if ((nread = read(sockfd, p, 5)) < 0) {
583 			if (errno == EINTR)
584 				nread = 0;
585 			else
586 				return (SLP_NETWORK_ERROR);
587 		} else if (nread == 0)
588 			/* shouldn't hit EOF here */
589 			return (SLP_NETWORK_ERROR);
590 		nleft -= nread;
591 		p += nread;
592 	}
593 
594 	len = slp_get_length(lenbuf);
595 
596 	/* allocate space for the reply, and copy in what we've already read */
597 	/* This buffer gets freed by a msg-specific unpacking routine later */
598 	if (!(*reply = malloc(len))) {
599 		slp_err(LOG_CRIT, 0, "tcp_read", "out of memory");
600 		return (SLP_MEMORY_ALLOC_FAILED);
601 	}
602 	(void) memcpy(*reply, lenbuf, 5);
603 
604 	/* read the rest of the message */
605 	nleft = len - 5;
606 	p = *reply + 5;
607 	while (nleft != 0) {
608 		if ((nread = read(sockfd, p, nleft)) < 0) {
609 			if (errno == EINTR)
610 				nread = 0;
611 			else {
612 				free(*reply);
613 				return (SLP_NETWORK_ERROR);
614 			}
615 		} else if (nread == 0)
616 			/*
617 			 * shouldn't hit EOF here, but perhaps we've
618 			 * gotten something useful, so return OK.
619 			 */
620 			return (SLP_OK);
621 
622 		nleft -= nread;
623 		p += nread;
624 	}
625 
626 	return (SLP_OK);
627 }
628 
629 /*
630  * Lays in a SLP header for this message into the scatter / gather
631  * array 'iov'. 'header' is the buffer used to contain the header,
632  * and must contain enough space. 'scopes' should contain a string
633  * with the scopes to be used for this message.
634  */
635 static SLPError make_header(slp_handle_impl_t *hp, char *header,
636 			    const char *scopes) {
637 	SLPError err;
638 	size_t msgLen, off;
639 	int i;
640 	size_t mtu;
641 	unsigned short slen = (unsigned short)strlen(scopes);
642 
643 	mtu = slp_get_mtu();
644 	msgLen = slp_hdrlang_length(hp);
645 	hp->msg.iov[0].iov_base = header;
646 	hp->msg.iov[0].iov_len = msgLen;	/* now the length of the hdr */
647 
648 	/* use the remaining buffer in header for the prlist */
649 	hp->msg.prlist->iov_base = header + msgLen;
650 
651 	for (i = 1; i < hp->msg.iovlen; i++) {
652 		msgLen += hp->msg.iov[i].iov_len;
653 	}
654 	msgLen += slen;
655 
656 	off = 0;
657 	if ((err = slp_add_header(hp->locale, header, mtu,
658 					hp->fid, msgLen, &off)) != SLP_OK)
659 		return (err);
660 
661 	/* start out with empty prlist */
662 	hp->msg.prlist->iov_len = 0;
663 
664 	/* store the scope string len into the space provided by the caller */
665 	off = 0;
666 	if ((err = slp_add_sht((char *)hp->msg.scopeslen.iov_base,
667 				2, slen, &off)) != SLP_OK) {
668 		return (err);
669 	}
670 	hp->msg.scopes->iov_base = (caddr_t)scopes;
671 	hp->msg.scopes->iov_len = slen;
672 
673 	return (SLP_OK);
674 }
675 
676 /*
677  * Populates a struct msghdr suitable for use with sendmsg.
678  */
679 static void udp_make_msghdr(struct sockaddr_in *sin, struct iovec *iov,
680 			    int iovlen, struct msghdr *msg) {
681 	msg->msg_name = (caddr_t)sin;
682 	msg->msg_namelen = 16;
683 	msg->msg_iov = iov;
684 	msg->msg_iovlen = iovlen;
685 	msg->msg_accrights = NULL;
686 	msg->msg_accrightslen = 0;
687 }
688 
689 /*
690  * Sets the address on 'sin', sets the flag in the message header,
691  * and creates an array of pollfds for all interfaces we need to
692  * use. If we need to use only broadcast, and net.slp.interfaces
693  * is set, fills bcifs with an array of subnet broadcast addresses
694  * to which we should send. Returns err != SLP_OK only on catastrophic
695  * error.
696  */
697 static SLPError make_mc_target(slp_handle_impl_t *hp,
698 				struct sockaddr_in *sin, char *header,
699 				struct pollfd **fds, nfds_t *nfds,
700 				struct bc_ifs *bcifs) {
701 
702 	unsigned char ttl = slp_get_multicastTTL();
703 	char *ifs_string;
704 	SLPBoolean have_valid_if = SLP_FALSE;
705 	SLPBoolean use_broadcast = slp_get_usebroadcast();
706 	int fd, i, num_givenifs;
707 	struct in_addr *given_ifs = NULL;
708 	nfds_t nfd_i;
709 
710 	sin->sin_port = htons(SLP_PORT);
711 	sin->sin_family = AF_INET;
712 	slp_set_mcast(header);
713 
714 	/* Get the desired multicast interfaces, if set */
715 	bcifs->sin = NULL;
716 	*fds = NULL;
717 	if ((ifs_string = (char *)SLPGetProperty(
718 		SLP_CONFIG_INTERFACES)) != NULL && *ifs_string) {
719 
720 		char *p, *tstate;
721 
722 		/* count the number of IFs given */
723 		p = strchr(ifs_string, ',');
724 		for (num_givenifs = 1; p; num_givenifs++) {
725 			p = strchr(p + 1, ',');
726 		}
727 
728 		/* copy the given IFs into an array for easier processing */
729 		if (!(given_ifs = calloc(num_givenifs, sizeof (*given_ifs)))) {
730 			slp_err(LOG_CRIT, 0, "make_mc_target",
731 						"out of memory");
732 			return (SLP_MEMORY_ALLOC_FAILED);
733 		}
734 
735 		i = 0;
736 		/* strtok_r will destructively modify, so make a copy first */
737 		if (!(ifs_string = strdup(ifs_string))) {
738 			slp_err(LOG_CRIT, 0, "make_mc_target",
739 						"out of memory");
740 			free(given_ifs);
741 			return (SLP_MEMORY_ALLOC_FAILED);
742 		}
743 		for (
744 			p = strtok_r(ifs_string, ",", &tstate);
745 			p;
746 			p = strtok_r(NULL, ",", &tstate)) {
747 
748 			if (slp_pton(p, &(given_ifs[i])) < 1) {
749 				/* skip */
750 				num_givenifs--;
751 				continue;
752 			}
753 			i++;
754 		}
755 		*nfds = num_givenifs;
756 		free(ifs_string);
757 
758 		/* allocate a pollfd array for all interfaces */
759 		if (!(*fds = calloc(num_givenifs, sizeof (**fds)))) {
760 			slp_err(LOG_CRIT, 0, "make_mc_target",
761 						"out of memory");
762 			free(ifs_string);
763 			free(given_ifs);
764 			return (SLP_MEMORY_ALLOC_FAILED);
765 		}
766 
767 		/* lay the given interfaces into the pollfd array */
768 		for (i = 0; i < num_givenifs; i++) {
769 
770 			/* create a socket to bind to this interface */
771 			if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
772 				slp_err(LOG_CRIT, 0, "make_mc_target",
773 						"could not create socket: %s",
774 						strerror(errno));
775 				free_pfds(*fds, *nfds);
776 				return (SLP_INTERNAL_SYSTEM_ERROR);
777 			}
778 
779 			/* fill in the pollfd structure */
780 			(*fds)[i].fd = fd;
781 			(*fds)[i].events |= POLLRDNORM;
782 
783 			if (use_broadcast) {
784 				struct sockaddr_in bcsin[1];
785 
786 				(void) memcpy(
787 					&(bcsin->sin_addr), &(given_ifs[i]),
788 					sizeof (bcsin->sin_addr));
789 				bcsin->sin_family = AF_INET;
790 				bcsin->sin_port = 0;
791 
792 				/* bind fd to interface */
793 				if (bind(fd, (struct sockaddr *)bcsin,
794 						sizeof (*bcsin)) == 0) {
795 					continue;
796 				}
797 				/* else fallthru to default (multicast) */
798 				slp_err(LOG_INFO, 0, "make_mc_target",
799 				"could not set broadcast interface: %s",
800 					strerror(errno));
801 			}
802 			/* else use multicast */
803 			if (setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF,
804 					&(given_ifs[i]), sizeof (given_ifs[i]))
805 					< 0) {
806 
807 					slp_err(LOG_INFO, 0, "make_mc_target",
808 				"could not set multicast interface: %s",
809 							strerror(errno));
810 					continue;
811 			}
812 
813 			have_valid_if = SLP_TRUE;
814 		}
815 
816 		if (use_broadcast) {
817 		    SLPError err;
818 
819 		    if ((err = make_bc_target(
820 					hp, given_ifs, num_givenifs, bcifs))
821 			!= SLP_OK) {
822 
823 			if (err == SLP_MEMORY_ALLOC_FAILED) {
824 			    /* the only thing which is really a showstopper */
825 			    return (err);
826 			}
827 
828 			/* else no valid interfaces */
829 			have_valid_if = SLP_FALSE;
830 		    }
831 		}
832 		free(given_ifs);
833 	}
834 
835 	if (!have_valid_if) {
836 		if (*fds && !have_valid_if) {
837 			/* couldn't process net.slp.interfaces property */
838 			free(*fds);
839 		}
840 
841 		/* bind to default interface */
842 		if (!(*fds = calloc(1, sizeof (**fds)))) {
843 			slp_err(LOG_CRIT, 0, "make_mc_target",
844 						"out of memory");
845 			return (SLP_MEMORY_ALLOC_FAILED);
846 		}
847 
848 		if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
849 			slp_err(LOG_CRIT, 0, "make_mc_target",
850 						"could not create socket: %s",
851 						strerror(errno));
852 			free(*fds);
853 			return (SLP_INTERNAL_SYSTEM_ERROR);
854 		}
855 
856 		(**fds).fd = fd;
857 		(**fds).events |= POLLRDNORM;
858 		*nfds = 1;
859 	}
860 
861 	/* set required options on all configured fds */
862 	for (nfd_i = 0; nfd_i < *nfds; nfd_i++) {
863 		if (use_broadcast) {
864 			const int on = 1;
865 			if (setsockopt((*fds)[nfd_i].fd, SOL_SOCKET,
866 					SO_BROADCAST,
867 					(void *) &on, sizeof (on)) < 0) {
868 				slp_err(LOG_CRIT, 0, "make_mc_target",
869 					"could not enable broadcast: %s",
870 					strerror(errno));
871 			}
872 		} else {
873 			if (setsockopt((*fds)[nfd_i].fd, IPPROTO_IP,
874 					IP_MULTICAST_TTL, &ttl, 1) < 0) {
875 				slp_err(LOG_CRIT, 0, "make_mc_target",
876 					    "could not set multicast TTL: %s",
877 					    strerror(errno));
878 			}
879 		}
880 	}
881 
882 	if (use_broadcast) {
883 	    sin->sin_addr.s_addr = INADDR_BROADCAST;
884 	} else {
885 		sin->sin_addr.s_addr = SLP_MULTICAST_ADDRESS;
886 	}
887 
888 	return (SLP_OK);
889 }
890 
891 /*
892  * Obtains the subnet broadcast address for each interface specified
893  * in net.slp.interfaces, and fill bcifs->sin with an array of these
894  * addresses.
895  */
896 static SLPError make_bc_target(slp_handle_impl_t *hp,
897 				struct in_addr *given_ifs,
898 				int num_givenifs, struct bc_ifs *bcifs) {
899 	SLPError err;
900 	int i;
901 
902 	if ((err = slp_broadcast_addrs(hp, given_ifs, num_givenifs,
903 					&(bcifs->sin), &(bcifs->num_ifs)))
904 	    != SLP_OK) {
905 	    return (err);
906 	}
907 
908 	/* set SLP port on each sockaddr_in */
909 	for (i = 0; i < bcifs->num_ifs; i++) {
910 		bcifs->sin[i].sin_port = htons(SLP_PORT);
911 	}
912 
913 	return (SLP_OK);
914 }
915 
916 /*
917  * Sends msg on 1st fd in fds for multicast, or on all interfaces
918  * specified in net.slp.interfaces for broadcast. Returns SLP_OK if
919  * msg was sent successfully on at least one interface; otherwise
920  * returns SLP_NETWORK_ERROR if msg was not sent on any interfaces.
921  */
922 static SLPError mc_sendmsg(struct pollfd *fds,
923 				struct msghdr *msg, struct bc_ifs *bcifs) {
924 
925 	if (slp_get_usebroadcast()) {
926 	    char *ifs = (char *)SLPGetProperty(SLP_CONFIG_INTERFACES);
927 
928 	    /* hand off to broadcast-specific send function */
929 	    if (ifs && *ifs && bc_sendmsg(fds, msg, bcifs) == SLP_OK) {
930 		return (SLP_OK);
931 	    }
932 
933 		/*
934 		 * else  no ifs given, or bc_sendmsg failed, so send on
935 		 * general broadcast addr (255.255.255.255). This will
936 		 * cause the message to be sent on all interfaces. The
937 		 * address will have been set in make_mc_target.
938 		 */
939 	}
940 
941 	/*
942 	 * Send only on one interface -- let routing take care of
943 	 * sending the message everywhere it needs to go. Sending
944 	 * on more than one interface can cause nasty routing loops.
945 	 * Note that this approach doesn't work with partitioned
946 	 * networks.
947 	 */
948 	if (sendmsg(fds[0].fd, msg, 0) < 0) {
949 		slp_err(LOG_CRIT, 0, "mc_sendmsg",
950 			"sendmsg failed: %s", strerror(errno));
951 		return (SLP_NETWORK_ERROR);
952 	}
953 
954 	return (SLP_OK);
955 }
956 
957 /*
958  * Send msg to each subnet broadcast address in bcifs->sin. Note
959  * that we can send on any fd (regardless of which interface to which
960  * it is bound), since the kernel will take care of routing for us.
961  * Returns err != SLP_OK only if no message was sent on any interface.
962  */
963 static SLPError bc_sendmsg(struct pollfd *fds, struct msghdr *msg,
964 				struct bc_ifs *bcifs) {
965 	int i;
966 	SLPBoolean sent_one = SLP_FALSE;
967 
968 	for (i = 0; i < bcifs->num_ifs; i++) {
969 		msg->msg_name = (caddr_t)&(bcifs->sin[i]);
970 
971 		if (sendmsg(fds[0].fd, msg, 0) < 0) {
972 			slp_err(LOG_CRIT, 0, "bc_sendmsg",
973 				"sendmsg failed: %s", strerror(errno));
974 			continue;
975 		}
976 		sent_one = SLP_TRUE;
977 	}
978 	return (sent_one ? SLP_OK : SLP_NETWORK_ERROR);
979 }
980 
981 /*
982  * This is where the bulk of the multicast convergance algorithm resides.
983  * mc_recvmsg() waits for data to be ready on any fd in pfd, iterates
984  * through pfd and reads data from ready fd's. It also checks timeouts
985  * and user-cancels.
986  *
987  * Parameters:
988  *   pfd	IN	an array of pollfd structs containing fds to poll
989  *   nfds	IN	number of elements in pfd
990  *   hp		IN	SLPHandle from originating call
991  *   scopes	IN	scopes to use for this message
992  *   header	IN	the SLP message header for this message
993  *   collator	IN/OUT	btree collator for PR list
994  *   final_to	IN	final timeout
995  *   sent	IN	time when message was sent
996  *   now	IN/OUT	set to current time at beginning of convergance
997  *   noresults	OUT	set to 0 if any results are received
998  *   anyresults	OUT	set to true if any results are received
999  *   timeout	IN	time for this convergence iteration
1000  *
1001  * Returns only if an error has occured, or if either this retransmit
1002  * timeout or the final timeout has expired, or if hp->cancel becomes true.
1003  */
1004 static void mc_recvmsg(struct pollfd *pfd, nfds_t nfds, slp_handle_impl_t *hp,
1005 			const char *scopes, char *header, void **collator,
1006 			unsigned long long final_to,
1007 			unsigned long long sent,
1008 			unsigned long long *now,
1009 			int *noresults, int *anyresults, int timeout) {
1010 	char *reply = NULL;
1011 	nfds_t i;
1012 	struct sockaddr_in responder;
1013 	int pollerr;
1014 	socklen_t addrlen = sizeof (responder);
1015 	size_t mtu = slp_get_mtu();
1016 
1017 	for (; !hp->cancel; ) {
1018 	    /* wait until we can read something */
1019 	    pollerr = wait_for_response(
1020 				final_to, &timeout, sent, now, pfd, nfds);
1021 	    if (pollerr == 0)
1022 		/* timeout */
1023 		goto cleanup;
1024 	    if (pollerr < 0)
1025 		/* error */
1026 		goto cleanup;
1027 
1028 	    /* iterate through all fds to find one with data to read */
1029 	    for (i = 0; !hp->cancel && i < nfds; i++) {
1030 
1031 		if (pfd[i].fd < 0 ||
1032 		    !(pfd[i].revents & (POLLRDNORM | POLLERR))) {
1033 
1034 		    /* unused fd or unwanted event */
1035 		    continue;
1036 		}
1037 
1038 		/* alloc reply buffer */
1039 		if (!reply && !(reply = malloc(mtu))) {
1040 		    slp_err(LOG_CRIT, 0, "mc_revcmsg", "out of memory");
1041 		    return;
1042 	    }
1043 		if (recvfrom(pfd[i].fd, reply, mtu, 0,
1044 				(struct sockaddr *)&responder,
1045 				(int *)&addrlen) < 0) {
1046 
1047 		    /* if reply overflows, hand off to TCP */
1048 		    if (errno == ENOMEM) {
1049 			free(reply); reply = NULL;
1050 			tcp_handoff(hp, scopes,
1051 					&responder, slp_get_xid(header));
1052 			continue;
1053 		    }
1054 
1055 		    /* else something nasty happened */
1056 		    slp_err(LOG_CRIT, 0, "mc_recvmsg",
1057 					"recvfrom failed: %s",
1058 					strerror(errno));
1059 		    continue;
1060 		} else {
1061 		    /* success */
1062 		    if (slp_get_overflow(reply)) {
1063 			tcp_handoff(hp, scopes,
1064 					&responder, slp_get_xid(header));
1065 		    }
1066 			/*
1067 			 * Add to the PR list. If this responder has already
1068 			 * answered, it doesn't count.
1069 			 */
1070 		    if (add2pr_list(&(hp->msg), &responder, collator)) {
1071 			(void) slp_enqueue(hp->q, reply);
1072 			*noresults = 0;
1073 			*anyresults = 1;
1074 			reply = NULL;
1075 		    }
1076 
1077 		    /* if we've exceeded maxwait, break out */
1078 		    *now = now_millis();
1079 		    if (*now > final_to)
1080 			goto cleanup;
1081 
1082 		} /* end successful receive */
1083 
1084 	    } /* end fd iteration */
1085 
1086 	    /* reset poll's timeout */
1087 	    timeout = timeout - (int)(*now - sent);
1088 	    if (timeout <= 0) {
1089 		goto cleanup;
1090 	    }
1091 
1092 	} /* end main poll loop */
1093 
1094 cleanup:
1095 	if (reply) {
1096 	    free(reply);
1097 	}
1098 }
1099 
1100 /*
1101  * Closes any open sockets and frees the pollfd array.
1102  */
1103 static void free_pfds(struct pollfd *pfds, nfds_t nfds) {
1104 	nfds_t i;
1105 
1106 	for (i = 0; i < nfds; i++) {
1107 	    if (pfds[i].fd <= 0) {
1108 		continue;
1109 	    }
1110 
1111 	    (void) close(pfds[i].fd);
1112 	}
1113 
1114 	free(pfds);
1115 }
1116 
1117 /*
1118  * Hands off a message to the TCP thread, fabricating a new target
1119  * from 'sin'. 'xid' will be used to create the XID for the TCP message.
1120  */
1121 static void tcp_handoff(slp_handle_impl_t *hp, const char *scopes,
1122 			struct sockaddr_in *sin, unsigned short xid) {
1123 	slp_target_t *target;
1124 
1125 	target = slp_fabricate_target(sin);
1126 	slp_uc_tcp_send(hp, target, scopes, SLP_TRUE, xid);
1127 }
1128 
1129 /*
1130  * Returns the current time in milliseconds.
1131  */
1132 static unsigned long long now_millis() {
1133 	unsigned long long i;
1134 	struct timeval tv[1];
1135 
1136 	(void) gettimeofday(tv, NULL);
1137 	i = (unsigned long long) tv->tv_sec * 1000;
1138 	i += tv->tv_usec / 1000;
1139 	return (i);
1140 }
1141 
1142 /*
1143  * A wrapper around poll which waits until a reply comes in. This will
1144  * wait no longer than 'timeout' before returning. poll can return
1145  * even if no data is on the pipe or timeout has occured, so the
1146  * additional paramaters are used to break out of the wait loop if
1147  * we have exceeded the timeout value. 'final_to' is ignored if it is 0.
1148  *
1149  * returns:	< 0 on error
1150  *		0 on timeout
1151  *		> 0 on success (i.e. ready to read data).
1152  * side effect: 'now' is set to the time when poll found data on the pipe.
1153  */
1154 static int wait_for_response(
1155 	unsigned long long final_to,
1156 	int *timeout,
1157 	unsigned long long sent,
1158 	unsigned long long *now,
1159 	struct pollfd pfd[], nfds_t nfds) {
1160 
1161 	int when, pollerr;
1162 
1163 	/* wait until we can read something */
1164 	for (;;) {
1165 		pollerr = poll(pfd, nfds, *timeout);
1166 		*now = now_millis();
1167 
1168 		/* ready to read */
1169 		if (pollerr > 0)
1170 			return (pollerr);
1171 
1172 		/* time out */
1173 		if (pollerr == 0)
1174 			/* timeout */
1175 			return (0);
1176 
1177 		/* error */
1178 		if (pollerr < 0)
1179 			if (errno == EAGAIN || errno == EINTR) {
1180 				/* poll is weird. */
1181 				when = (int)(*now - sent);
1182 				if (
1183 					(final_to != 0 && *now > final_to) ||
1184 					when > *timeout)
1185 					break;
1186 				*timeout = *timeout - when;
1187 				continue;
1188 			} else {
1189 				slp_err(LOG_INFO, 0, "wait for response",
1190 					"poll error: %s",
1191 					strerror(errno));
1192 				return (pollerr);
1193 			}
1194 	}
1195 
1196 	return (0);
1197 }
1198 
1199 /*
1200  * Adds the cname of the host whose address is in 'sin' to this message's
1201  * previous responder list. The message is contained in 'msg'.
1202  * 'collator' contains the complete previous responder list, so that
1203  * even if the PR list in the message overflows and must be truncated,
1204  * the function can still correctly determine if we have heard from this
1205  * host before.
1206  *
1207  * returns:	1 if this is the first time we've heard from this host
1208  *		0 is this is a duplicate reply
1209  */
1210 static int add2pr_list(
1211 	slp_msg_t *msg,
1212 	struct sockaddr_in *sin,
1213 	void **collator) {
1214 
1215 	char **res, *cname, *p, *header;
1216 	size_t mtu;
1217 	size_t len, off, namelen;
1218 	unsigned short prlen;
1219 
1220 	/* Attempt to resolve the responder's IP address to its host name */
1221 	if (!(cname = slp_gethostbyaddr((char *)&(sin->sin_addr),
1222 					sizeof (sin->sin_addr))))
1223 		return (0);
1224 
1225 	res = slp_tsearch(
1226 		cname, collator,
1227 		(int (*)(const void *, const void *)) strcasecmp);
1228 	if (*res != cname) {
1229 		/* duplicate */
1230 		slp_err(LOG_INFO, 0, "add2pr_list",
1231 			"drop PR ignored by host: %s",
1232 			cname);
1233 		free(cname);
1234 		return (0);
1235 	}
1236 
1237 	/* new responder: add to the msg PR list if there is room */
1238 	mtu = slp_get_mtu();
1239 
1240 	header = msg->iov[0].iov_base;
1241 	len = slp_get_length(header);
1242 
1243 	namelen = strlen(cname);
1244 	if ((namelen + 2 + len) >= mtu)
1245 		return (1);	/* no room */
1246 
1247 	/* else  there is enough room */
1248 	prlen = (unsigned short)msg->prlist->iov_len;
1249 	p = msg->prlist->iov_base + prlen;
1250 	*p = 0;
1251 
1252 	if (prlen) {
1253 		namelen++;	/* add the ',' */
1254 		(void) strcat(p, ",");
1255 	}
1256 	(void) strcat(p, cname);
1257 
1258 	/* update msg and pr list length */
1259 	len += namelen;
1260 	slp_set_length(header, len);
1261 	prlen += (unsigned short)namelen;
1262 	off = 0;
1263 	(void) slp_add_sht(msg->prlistlen.iov_base, 2, prlen, &off);
1264 	msg->prlist->iov_len += namelen;
1265 
1266 	return (1);
1267 }
1268 
1269 /*
1270  * The iterator function used while traversing the previous responder
1271  * tree. Just frees resources.
1272  */
1273 /*ARGSUSED2*/
1274 static void free_pr_node(void *node, VISIT order, int level, void *cookie) {
1275 	if (order == endorder || order == leaf) {
1276 		char *pr = *(char **)node;
1277 		free(pr);
1278 		free(node);
1279 	}
1280 }
1281