xref: /linux/net/unix/af_unix.c (revision 89721e3038d181bacbd6be54354b513fdf1b4f10)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/filter.h>
93 #include <linux/termios.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
96 #include <linux/in.h>
97 #include <linux/fs.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <net/net_namespace.h>
103 #include <net/sock.h>
104 #include <net/tcp_states.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <linux/seq_file.h>
108 #include <net/scm.h>
109 #include <linux/init.h>
110 #include <linux/poll.h>
111 #include <linux/rtnetlink.h>
112 #include <linux/mount.h>
113 #include <net/checksum.h>
114 #include <linux/security.h>
115 #include <linux/splice.h>
116 #include <linux/freezer.h>
117 #include <linux/file.h>
118 #include <linux/btf_ids.h>
119 #include <linux/bpf-cgroup.h>
120 
121 static atomic_long_t unix_nr_socks;
122 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
123 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
124 
125 /* SMP locking strategy:
126  *    hash table is protected with spinlock.
127  *    each socket state is protected by separate spinlock.
128  */
129 
unix_unbound_hash(struct sock * sk)130 static unsigned int unix_unbound_hash(struct sock *sk)
131 {
132 	unsigned long hash = (unsigned long)sk;
133 
134 	hash ^= hash >> 16;
135 	hash ^= hash >> 8;
136 	hash ^= sk->sk_type;
137 
138 	return hash & UNIX_HASH_MOD;
139 }
140 
unix_bsd_hash(struct inode * i)141 static unsigned int unix_bsd_hash(struct inode *i)
142 {
143 	return i->i_ino & UNIX_HASH_MOD;
144 }
145 
unix_abstract_hash(struct sockaddr_un * sunaddr,int addr_len,int type)146 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
147 				       int addr_len, int type)
148 {
149 	__wsum csum = csum_partial(sunaddr, addr_len, 0);
150 	unsigned int hash;
151 
152 	hash = (__force unsigned int)csum_fold(csum);
153 	hash ^= hash >> 8;
154 	hash ^= type;
155 
156 	return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
157 }
158 
unix_table_double_lock(struct net * net,unsigned int hash1,unsigned int hash2)159 static void unix_table_double_lock(struct net *net,
160 				   unsigned int hash1, unsigned int hash2)
161 {
162 	if (hash1 == hash2) {
163 		spin_lock(&net->unx.table.locks[hash1]);
164 		return;
165 	}
166 
167 	if (hash1 > hash2)
168 		swap(hash1, hash2);
169 
170 	spin_lock(&net->unx.table.locks[hash1]);
171 	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
172 }
173 
unix_table_double_unlock(struct net * net,unsigned int hash1,unsigned int hash2)174 static void unix_table_double_unlock(struct net *net,
175 				     unsigned int hash1, unsigned int hash2)
176 {
177 	if (hash1 == hash2) {
178 		spin_unlock(&net->unx.table.locks[hash1]);
179 		return;
180 	}
181 
182 	spin_unlock(&net->unx.table.locks[hash1]);
183 	spin_unlock(&net->unx.table.locks[hash2]);
184 }
185 
186 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)187 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
188 {
189 	UNIXCB(skb).secid = scm->secid;
190 }
191 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)192 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
193 {
194 	scm->secid = UNIXCB(skb).secid;
195 }
196 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)197 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
198 {
199 	return (scm->secid == UNIXCB(skb).secid);
200 }
201 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)202 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
203 { }
204 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)205 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
206 { }
207 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)208 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
209 {
210 	return true;
211 }
212 #endif /* CONFIG_SECURITY_NETWORK */
213 
unix_our_peer(struct sock * sk,struct sock * osk)214 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
215 {
216 	return unix_peer(osk) == sk;
217 }
218 
unix_may_send(struct sock * sk,struct sock * osk)219 static inline int unix_may_send(struct sock *sk, struct sock *osk)
220 {
221 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
222 }
223 
unix_recvq_full(const struct sock * sk)224 static inline int unix_recvq_full(const struct sock *sk)
225 {
226 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
227 }
228 
unix_recvq_full_lockless(const struct sock * sk)229 static inline int unix_recvq_full_lockless(const struct sock *sk)
230 {
231 	return skb_queue_len_lockless(&sk->sk_receive_queue) >
232 		READ_ONCE(sk->sk_max_ack_backlog);
233 }
234 
unix_peer_get(struct sock * s)235 struct sock *unix_peer_get(struct sock *s)
236 {
237 	struct sock *peer;
238 
239 	unix_state_lock(s);
240 	peer = unix_peer(s);
241 	if (peer)
242 		sock_hold(peer);
243 	unix_state_unlock(s);
244 	return peer;
245 }
246 EXPORT_SYMBOL_GPL(unix_peer_get);
247 
unix_create_addr(struct sockaddr_un * sunaddr,int addr_len)248 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
249 					     int addr_len)
250 {
251 	struct unix_address *addr;
252 
253 	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
254 	if (!addr)
255 		return NULL;
256 
257 	refcount_set(&addr->refcnt, 1);
258 	addr->len = addr_len;
259 	memcpy(addr->name, sunaddr, addr_len);
260 
261 	return addr;
262 }
263 
unix_release_addr(struct unix_address * addr)264 static inline void unix_release_addr(struct unix_address *addr)
265 {
266 	if (refcount_dec_and_test(&addr->refcnt))
267 		kfree(addr);
268 }
269 
270 /*
271  *	Check unix socket name:
272  *		- should be not zero length.
273  *	        - if started by not zero, should be NULL terminated (FS object)
274  *		- if started by zero, it is abstract name.
275  */
276 
unix_validate_addr(struct sockaddr_un * sunaddr,int addr_len)277 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
278 {
279 	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
280 	    addr_len > sizeof(*sunaddr))
281 		return -EINVAL;
282 
283 	if (sunaddr->sun_family != AF_UNIX)
284 		return -EINVAL;
285 
286 	return 0;
287 }
288 
unix_mkname_bsd(struct sockaddr_un * sunaddr,int addr_len)289 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
290 {
291 	struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
292 	short offset = offsetof(struct sockaddr_storage, __data);
293 
294 	BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
295 
296 	/* This may look like an off by one error but it is a bit more
297 	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
298 	 * sun_path[108] doesn't as such exist.  However in kernel space
299 	 * we are guaranteed that it is a valid memory location in our
300 	 * kernel address buffer because syscall functions always pass
301 	 * a pointer of struct sockaddr_storage which has a bigger buffer
302 	 * than 108.  Also, we must terminate sun_path for strlen() in
303 	 * getname_kernel().
304 	 */
305 	addr->__data[addr_len - offset] = 0;
306 
307 	/* Don't pass sunaddr->sun_path to strlen().  Otherwise, 108 will
308 	 * cause panic if CONFIG_FORTIFY_SOURCE=y.  Let __fortify_strlen()
309 	 * know the actual buffer.
310 	 */
311 	return strlen(addr->__data) + offset + 1;
312 }
313 
__unix_remove_socket(struct sock * sk)314 static void __unix_remove_socket(struct sock *sk)
315 {
316 	sk_del_node_init(sk);
317 }
318 
__unix_insert_socket(struct net * net,struct sock * sk)319 static void __unix_insert_socket(struct net *net, struct sock *sk)
320 {
321 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
322 	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
323 }
324 
__unix_set_addr_hash(struct net * net,struct sock * sk,struct unix_address * addr,unsigned int hash)325 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
326 				 struct unix_address *addr, unsigned int hash)
327 {
328 	__unix_remove_socket(sk);
329 	smp_store_release(&unix_sk(sk)->addr, addr);
330 
331 	sk->sk_hash = hash;
332 	__unix_insert_socket(net, sk);
333 }
334 
unix_remove_socket(struct net * net,struct sock * sk)335 static void unix_remove_socket(struct net *net, struct sock *sk)
336 {
337 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
338 	__unix_remove_socket(sk);
339 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
340 }
341 
unix_insert_unbound_socket(struct net * net,struct sock * sk)342 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
343 {
344 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
345 	__unix_insert_socket(net, sk);
346 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
347 }
348 
unix_insert_bsd_socket(struct sock * sk)349 static void unix_insert_bsd_socket(struct sock *sk)
350 {
351 	spin_lock(&bsd_socket_locks[sk->sk_hash]);
352 	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
353 	spin_unlock(&bsd_socket_locks[sk->sk_hash]);
354 }
355 
unix_remove_bsd_socket(struct sock * sk)356 static void unix_remove_bsd_socket(struct sock *sk)
357 {
358 	if (!hlist_unhashed(&sk->sk_bind_node)) {
359 		spin_lock(&bsd_socket_locks[sk->sk_hash]);
360 		__sk_del_bind_node(sk);
361 		spin_unlock(&bsd_socket_locks[sk->sk_hash]);
362 
363 		sk_node_init(&sk->sk_bind_node);
364 	}
365 }
366 
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)367 static struct sock *__unix_find_socket_byname(struct net *net,
368 					      struct sockaddr_un *sunname,
369 					      int len, unsigned int hash)
370 {
371 	struct sock *s;
372 
373 	sk_for_each(s, &net->unx.table.buckets[hash]) {
374 		struct unix_sock *u = unix_sk(s);
375 
376 		if (u->addr->len == len &&
377 		    !memcmp(u->addr->name, sunname, len))
378 			return s;
379 	}
380 	return NULL;
381 }
382 
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)383 static inline struct sock *unix_find_socket_byname(struct net *net,
384 						   struct sockaddr_un *sunname,
385 						   int len, unsigned int hash)
386 {
387 	struct sock *s;
388 
389 	spin_lock(&net->unx.table.locks[hash]);
390 	s = __unix_find_socket_byname(net, sunname, len, hash);
391 	if (s)
392 		sock_hold(s);
393 	spin_unlock(&net->unx.table.locks[hash]);
394 	return s;
395 }
396 
unix_find_socket_byinode(struct inode * i)397 static struct sock *unix_find_socket_byinode(struct inode *i)
398 {
399 	unsigned int hash = unix_bsd_hash(i);
400 	struct sock *s;
401 
402 	spin_lock(&bsd_socket_locks[hash]);
403 	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
404 		struct dentry *dentry = unix_sk(s)->path.dentry;
405 
406 		if (dentry && d_backing_inode(dentry) == i) {
407 			sock_hold(s);
408 			spin_unlock(&bsd_socket_locks[hash]);
409 			return s;
410 		}
411 	}
412 	spin_unlock(&bsd_socket_locks[hash]);
413 	return NULL;
414 }
415 
416 /* Support code for asymmetrically connected dgram sockets
417  *
418  * If a datagram socket is connected to a socket not itself connected
419  * to the first socket (eg, /dev/log), clients may only enqueue more
420  * messages if the present receive queue of the server socket is not
421  * "too large". This means there's a second writeability condition
422  * poll and sendmsg need to test. The dgram recv code will do a wake
423  * up on the peer_wait wait queue of a socket upon reception of a
424  * datagram which needs to be propagated to sleeping would-be writers
425  * since these might not have sent anything so far. This can't be
426  * accomplished via poll_wait because the lifetime of the server
427  * socket might be less than that of its clients if these break their
428  * association with it or if the server socket is closed while clients
429  * are still connected to it and there's no way to inform "a polling
430  * implementation" that it should let go of a certain wait queue
431  *
432  * In order to propagate a wake up, a wait_queue_entry_t of the client
433  * socket is enqueued on the peer_wait queue of the server socket
434  * whose wake function does a wake_up on the ordinary client socket
435  * wait queue. This connection is established whenever a write (or
436  * poll for write) hit the flow control condition and broken when the
437  * association to the server socket is dissolved or after a wake up
438  * was relayed.
439  */
440 
unix_dgram_peer_wake_relay(wait_queue_entry_t * q,unsigned mode,int flags,void * key)441 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
442 				      void *key)
443 {
444 	struct unix_sock *u;
445 	wait_queue_head_t *u_sleep;
446 
447 	u = container_of(q, struct unix_sock, peer_wake);
448 
449 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
450 			    q);
451 	u->peer_wake.private = NULL;
452 
453 	/* relaying can only happen while the wq still exists */
454 	u_sleep = sk_sleep(&u->sk);
455 	if (u_sleep)
456 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
457 
458 	return 0;
459 }
460 
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)461 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
462 {
463 	struct unix_sock *u, *u_other;
464 	int rc;
465 
466 	u = unix_sk(sk);
467 	u_other = unix_sk(other);
468 	rc = 0;
469 	spin_lock(&u_other->peer_wait.lock);
470 
471 	if (!u->peer_wake.private) {
472 		u->peer_wake.private = other;
473 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
474 
475 		rc = 1;
476 	}
477 
478 	spin_unlock(&u_other->peer_wait.lock);
479 	return rc;
480 }
481 
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)482 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
483 					    struct sock *other)
484 {
485 	struct unix_sock *u, *u_other;
486 
487 	u = unix_sk(sk);
488 	u_other = unix_sk(other);
489 	spin_lock(&u_other->peer_wait.lock);
490 
491 	if (u->peer_wake.private == other) {
492 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
493 		u->peer_wake.private = NULL;
494 	}
495 
496 	spin_unlock(&u_other->peer_wait.lock);
497 }
498 
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)499 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
500 						   struct sock *other)
501 {
502 	unix_dgram_peer_wake_disconnect(sk, other);
503 	wake_up_interruptible_poll(sk_sleep(sk),
504 				   EPOLLOUT |
505 				   EPOLLWRNORM |
506 				   EPOLLWRBAND);
507 }
508 
509 /* preconditions:
510  *	- unix_peer(sk) == other
511  *	- association is stable
512  */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)513 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
514 {
515 	int connected;
516 
517 	connected = unix_dgram_peer_wake_connect(sk, other);
518 
519 	/* If other is SOCK_DEAD, we want to make sure we signal
520 	 * POLLOUT, such that a subsequent write() can get a
521 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
522 	 * to other and its full, we will hang waiting for POLLOUT.
523 	 */
524 	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
525 		return 1;
526 
527 	if (connected)
528 		unix_dgram_peer_wake_disconnect(sk, other);
529 
530 	return 0;
531 }
532 
unix_writable(const struct sock * sk)533 static int unix_writable(const struct sock *sk)
534 {
535 	return sk->sk_state != TCP_LISTEN &&
536 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
537 }
538 
unix_write_space(struct sock * sk)539 static void unix_write_space(struct sock *sk)
540 {
541 	struct socket_wq *wq;
542 
543 	rcu_read_lock();
544 	if (unix_writable(sk)) {
545 		wq = rcu_dereference(sk->sk_wq);
546 		if (skwq_has_sleeper(wq))
547 			wake_up_interruptible_sync_poll(&wq->wait,
548 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
549 		sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
550 	}
551 	rcu_read_unlock();
552 }
553 
554 /* When dgram socket disconnects (or changes its peer), we clear its receive
555  * queue of packets arrived from previous peer. First, it allows to do
556  * flow control based only on wmem_alloc; second, sk connected to peer
557  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)558 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
559 {
560 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
561 		skb_queue_purge(&sk->sk_receive_queue);
562 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
563 
564 		/* If one link of bidirectional dgram pipe is disconnected,
565 		 * we signal error. Messages are lost. Do not make this,
566 		 * when peer was not connected to us.
567 		 */
568 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
569 			WRITE_ONCE(other->sk_err, ECONNRESET);
570 			sk_error_report(other);
571 		}
572 	}
573 	other->sk_state = TCP_CLOSE;
574 }
575 
unix_sock_destructor(struct sock * sk)576 static void unix_sock_destructor(struct sock *sk)
577 {
578 	struct unix_sock *u = unix_sk(sk);
579 
580 	skb_queue_purge(&sk->sk_receive_queue);
581 
582 	DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
583 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
584 	DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
585 	if (!sock_flag(sk, SOCK_DEAD)) {
586 		pr_info("Attempt to release alive unix socket: %p\n", sk);
587 		return;
588 	}
589 
590 	if (u->addr)
591 		unix_release_addr(u->addr);
592 
593 	atomic_long_dec(&unix_nr_socks);
594 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
595 #ifdef UNIX_REFCNT_DEBUG
596 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
597 		atomic_long_read(&unix_nr_socks));
598 #endif
599 }
600 
unix_release_sock(struct sock * sk,int embrion)601 static void unix_release_sock(struct sock *sk, int embrion)
602 {
603 	struct unix_sock *u = unix_sk(sk);
604 	struct sock *skpair;
605 	struct sk_buff *skb;
606 	struct path path;
607 	int state;
608 
609 	unix_remove_socket(sock_net(sk), sk);
610 	unix_remove_bsd_socket(sk);
611 
612 	/* Clear state */
613 	unix_state_lock(sk);
614 	sock_orphan(sk);
615 	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
616 	path	     = u->path;
617 	u->path.dentry = NULL;
618 	u->path.mnt = NULL;
619 	state = sk->sk_state;
620 	sk->sk_state = TCP_CLOSE;
621 
622 	skpair = unix_peer(sk);
623 	unix_peer(sk) = NULL;
624 
625 	unix_state_unlock(sk);
626 
627 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
628 	if (u->oob_skb) {
629 		kfree_skb(u->oob_skb);
630 		u->oob_skb = NULL;
631 	}
632 #endif
633 
634 	wake_up_interruptible_all(&u->peer_wait);
635 
636 	if (skpair != NULL) {
637 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
638 			unix_state_lock(skpair);
639 			/* No more writes */
640 			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
641 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
642 				WRITE_ONCE(skpair->sk_err, ECONNRESET);
643 			unix_state_unlock(skpair);
644 			skpair->sk_state_change(skpair);
645 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
646 		}
647 
648 		unix_dgram_peer_wake_disconnect(sk, skpair);
649 		sock_put(skpair); /* It may now die */
650 	}
651 
652 	/* Try to flush out this socket. Throw out buffers at least */
653 
654 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
655 		if (state == TCP_LISTEN)
656 			unix_release_sock(skb->sk, 1);
657 		/* passed fds are erased in the kfree_skb hook	      */
658 		UNIXCB(skb).consumed = skb->len;
659 		kfree_skb(skb);
660 	}
661 
662 	if (path.dentry)
663 		path_put(&path);
664 
665 	sock_put(sk);
666 
667 	/* ---- Socket is dead now and most probably destroyed ---- */
668 
669 	/*
670 	 * Fixme: BSD difference: In BSD all sockets connected to us get
671 	 *	  ECONNRESET and we die on the spot. In Linux we behave
672 	 *	  like files and pipes do and wait for the last
673 	 *	  dereference.
674 	 *
675 	 * Can't we simply set sock->err?
676 	 *
677 	 *	  What the above comment does talk about? --ANK(980817)
678 	 */
679 
680 	if (READ_ONCE(unix_tot_inflight))
681 		unix_gc();		/* Garbage collect fds */
682 }
683 
init_peercred(struct sock * sk)684 static void init_peercred(struct sock *sk)
685 {
686 	const struct cred *old_cred;
687 	struct pid *old_pid;
688 
689 	spin_lock(&sk->sk_peer_lock);
690 	old_pid = sk->sk_peer_pid;
691 	old_cred = sk->sk_peer_cred;
692 	sk->sk_peer_pid  = get_pid(task_tgid(current));
693 	sk->sk_peer_cred = get_current_cred();
694 	spin_unlock(&sk->sk_peer_lock);
695 
696 	put_pid(old_pid);
697 	put_cred(old_cred);
698 }
699 
copy_peercred(struct sock * sk,struct sock * peersk)700 static void copy_peercred(struct sock *sk, struct sock *peersk)
701 {
702 	const struct cred *old_cred;
703 	struct pid *old_pid;
704 
705 	if (sk < peersk) {
706 		spin_lock(&sk->sk_peer_lock);
707 		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
708 	} else {
709 		spin_lock(&peersk->sk_peer_lock);
710 		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
711 	}
712 	old_pid = sk->sk_peer_pid;
713 	old_cred = sk->sk_peer_cred;
714 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
715 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
716 
717 	spin_unlock(&sk->sk_peer_lock);
718 	spin_unlock(&peersk->sk_peer_lock);
719 
720 	put_pid(old_pid);
721 	put_cred(old_cred);
722 }
723 
unix_listen(struct socket * sock,int backlog)724 static int unix_listen(struct socket *sock, int backlog)
725 {
726 	int err;
727 	struct sock *sk = sock->sk;
728 	struct unix_sock *u = unix_sk(sk);
729 
730 	err = -EOPNOTSUPP;
731 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
732 		goto out;	/* Only stream/seqpacket sockets accept */
733 	err = -EINVAL;
734 	if (!u->addr)
735 		goto out;	/* No listens on an unbound socket */
736 	unix_state_lock(sk);
737 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
738 		goto out_unlock;
739 	if (backlog > sk->sk_max_ack_backlog)
740 		wake_up_interruptible_all(&u->peer_wait);
741 	sk->sk_max_ack_backlog	= backlog;
742 	sk->sk_state		= TCP_LISTEN;
743 	/* set credentials so connect can copy them */
744 	init_peercred(sk);
745 	err = 0;
746 
747 out_unlock:
748 	unix_state_unlock(sk);
749 out:
750 	return err;
751 }
752 
753 static int unix_release(struct socket *);
754 static int unix_bind(struct socket *, struct sockaddr *, int);
755 static int unix_stream_connect(struct socket *, struct sockaddr *,
756 			       int addr_len, int flags);
757 static int unix_socketpair(struct socket *, struct socket *);
758 static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
759 static int unix_getname(struct socket *, struct sockaddr *, int);
760 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
761 static __poll_t unix_dgram_poll(struct file *, struct socket *,
762 				    poll_table *);
763 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
764 #ifdef CONFIG_COMPAT
765 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
766 #endif
767 static int unix_shutdown(struct socket *, int);
768 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
769 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
770 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
771 				       struct pipe_inode_info *, size_t size,
772 				       unsigned int flags);
773 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
774 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
775 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
776 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
777 static int unix_dgram_connect(struct socket *, struct sockaddr *,
778 			      int, int);
779 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
780 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
781 				  int);
782 
783 #ifdef CONFIG_PROC_FS
unix_count_nr_fds(struct sock * sk)784 static int unix_count_nr_fds(struct sock *sk)
785 {
786 	struct sk_buff *skb;
787 	struct unix_sock *u;
788 	int nr_fds = 0;
789 
790 	spin_lock(&sk->sk_receive_queue.lock);
791 	skb = skb_peek(&sk->sk_receive_queue);
792 	while (skb) {
793 		u = unix_sk(skb->sk);
794 		nr_fds += atomic_read(&u->scm_stat.nr_fds);
795 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
796 	}
797 	spin_unlock(&sk->sk_receive_queue.lock);
798 
799 	return nr_fds;
800 }
801 
unix_show_fdinfo(struct seq_file * m,struct socket * sock)802 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
803 {
804 	struct sock *sk = sock->sk;
805 	unsigned char s_state;
806 	struct unix_sock *u;
807 	int nr_fds = 0;
808 
809 	if (sk) {
810 		s_state = READ_ONCE(sk->sk_state);
811 		u = unix_sk(sk);
812 
813 		/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
814 		 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
815 		 * SOCK_DGRAM is ordinary. So, no lock is needed.
816 		 */
817 		if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
818 			nr_fds = atomic_read(&u->scm_stat.nr_fds);
819 		else if (s_state == TCP_LISTEN)
820 			nr_fds = unix_count_nr_fds(sk);
821 
822 		seq_printf(m, "scm_fds: %u\n", nr_fds);
823 	}
824 }
825 #else
826 #define unix_show_fdinfo NULL
827 #endif
828 
829 static const struct proto_ops unix_stream_ops = {
830 	.family =	PF_UNIX,
831 	.owner =	THIS_MODULE,
832 	.release =	unix_release,
833 	.bind =		unix_bind,
834 	.connect =	unix_stream_connect,
835 	.socketpair =	unix_socketpair,
836 	.accept =	unix_accept,
837 	.getname =	unix_getname,
838 	.poll =		unix_poll,
839 	.ioctl =	unix_ioctl,
840 #ifdef CONFIG_COMPAT
841 	.compat_ioctl =	unix_compat_ioctl,
842 #endif
843 	.listen =	unix_listen,
844 	.shutdown =	unix_shutdown,
845 	.sendmsg =	unix_stream_sendmsg,
846 	.recvmsg =	unix_stream_recvmsg,
847 	.read_skb =	unix_stream_read_skb,
848 	.mmap =		sock_no_mmap,
849 	.splice_read =	unix_stream_splice_read,
850 	.set_peek_off =	sk_set_peek_off,
851 	.show_fdinfo =	unix_show_fdinfo,
852 };
853 
854 static const struct proto_ops unix_dgram_ops = {
855 	.family =	PF_UNIX,
856 	.owner =	THIS_MODULE,
857 	.release =	unix_release,
858 	.bind =		unix_bind,
859 	.connect =	unix_dgram_connect,
860 	.socketpair =	unix_socketpair,
861 	.accept =	sock_no_accept,
862 	.getname =	unix_getname,
863 	.poll =		unix_dgram_poll,
864 	.ioctl =	unix_ioctl,
865 #ifdef CONFIG_COMPAT
866 	.compat_ioctl =	unix_compat_ioctl,
867 #endif
868 	.listen =	sock_no_listen,
869 	.shutdown =	unix_shutdown,
870 	.sendmsg =	unix_dgram_sendmsg,
871 	.read_skb =	unix_read_skb,
872 	.recvmsg =	unix_dgram_recvmsg,
873 	.mmap =		sock_no_mmap,
874 	.set_peek_off =	sk_set_peek_off,
875 	.show_fdinfo =	unix_show_fdinfo,
876 };
877 
878 static const struct proto_ops unix_seqpacket_ops = {
879 	.family =	PF_UNIX,
880 	.owner =	THIS_MODULE,
881 	.release =	unix_release,
882 	.bind =		unix_bind,
883 	.connect =	unix_stream_connect,
884 	.socketpair =	unix_socketpair,
885 	.accept =	unix_accept,
886 	.getname =	unix_getname,
887 	.poll =		unix_dgram_poll,
888 	.ioctl =	unix_ioctl,
889 #ifdef CONFIG_COMPAT
890 	.compat_ioctl =	unix_compat_ioctl,
891 #endif
892 	.listen =	unix_listen,
893 	.shutdown =	unix_shutdown,
894 	.sendmsg =	unix_seqpacket_sendmsg,
895 	.recvmsg =	unix_seqpacket_recvmsg,
896 	.mmap =		sock_no_mmap,
897 	.set_peek_off =	sk_set_peek_off,
898 	.show_fdinfo =	unix_show_fdinfo,
899 };
900 
unix_close(struct sock * sk,long timeout)901 static void unix_close(struct sock *sk, long timeout)
902 {
903 	/* Nothing to do here, unix socket does not need a ->close().
904 	 * This is merely for sockmap.
905 	 */
906 }
907 
unix_unhash(struct sock * sk)908 static void unix_unhash(struct sock *sk)
909 {
910 	/* Nothing to do here, unix socket does not need a ->unhash().
911 	 * This is merely for sockmap.
912 	 */
913 }
914 
unix_bpf_bypass_getsockopt(int level,int optname)915 static bool unix_bpf_bypass_getsockopt(int level, int optname)
916 {
917 	if (level == SOL_SOCKET) {
918 		switch (optname) {
919 		case SO_PEERPIDFD:
920 			return true;
921 		default:
922 			return false;
923 		}
924 	}
925 
926 	return false;
927 }
928 
929 struct proto unix_dgram_proto = {
930 	.name			= "UNIX",
931 	.owner			= THIS_MODULE,
932 	.obj_size		= sizeof(struct unix_sock),
933 	.close			= unix_close,
934 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
935 #ifdef CONFIG_BPF_SYSCALL
936 	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
937 #endif
938 };
939 
940 struct proto unix_stream_proto = {
941 	.name			= "UNIX-STREAM",
942 	.owner			= THIS_MODULE,
943 	.obj_size		= sizeof(struct unix_sock),
944 	.close			= unix_close,
945 	.unhash			= unix_unhash,
946 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
947 #ifdef CONFIG_BPF_SYSCALL
948 	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
949 #endif
950 };
951 
unix_create1(struct net * net,struct socket * sock,int kern,int type)952 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
953 {
954 	struct unix_sock *u;
955 	struct sock *sk;
956 	int err;
957 
958 	atomic_long_inc(&unix_nr_socks);
959 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
960 		err = -ENFILE;
961 		goto err;
962 	}
963 
964 	if (type == SOCK_STREAM)
965 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
966 	else /*dgram and  seqpacket */
967 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
968 
969 	if (!sk) {
970 		err = -ENOMEM;
971 		goto err;
972 	}
973 
974 	sock_init_data(sock, sk);
975 
976 	sk->sk_hash		= unix_unbound_hash(sk);
977 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
978 	sk->sk_write_space	= unix_write_space;
979 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
980 	sk->sk_destruct		= unix_sock_destructor;
981 	u = unix_sk(sk);
982 	u->listener = NULL;
983 	u->vertex = NULL;
984 	u->path.dentry = NULL;
985 	u->path.mnt = NULL;
986 	spin_lock_init(&u->lock);
987 	mutex_init(&u->iolock); /* single task reading lock */
988 	mutex_init(&u->bindlock); /* single task binding lock */
989 	init_waitqueue_head(&u->peer_wait);
990 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
991 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
992 	unix_insert_unbound_socket(net, sk);
993 
994 	sock_prot_inuse_add(net, sk->sk_prot, 1);
995 
996 	return sk;
997 
998 err:
999 	atomic_long_dec(&unix_nr_socks);
1000 	return ERR_PTR(err);
1001 }
1002 
unix_create(struct net * net,struct socket * sock,int protocol,int kern)1003 static int unix_create(struct net *net, struct socket *sock, int protocol,
1004 		       int kern)
1005 {
1006 	struct sock *sk;
1007 
1008 	if (protocol && protocol != PF_UNIX)
1009 		return -EPROTONOSUPPORT;
1010 
1011 	sock->state = SS_UNCONNECTED;
1012 
1013 	switch (sock->type) {
1014 	case SOCK_STREAM:
1015 		sock->ops = &unix_stream_ops;
1016 		break;
1017 		/*
1018 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
1019 		 *	nothing uses it.
1020 		 */
1021 	case SOCK_RAW:
1022 		sock->type = SOCK_DGRAM;
1023 		fallthrough;
1024 	case SOCK_DGRAM:
1025 		sock->ops = &unix_dgram_ops;
1026 		break;
1027 	case SOCK_SEQPACKET:
1028 		sock->ops = &unix_seqpacket_ops;
1029 		break;
1030 	default:
1031 		return -ESOCKTNOSUPPORT;
1032 	}
1033 
1034 	sk = unix_create1(net, sock, kern, sock->type);
1035 	if (IS_ERR(sk))
1036 		return PTR_ERR(sk);
1037 
1038 	return 0;
1039 }
1040 
unix_release(struct socket * sock)1041 static int unix_release(struct socket *sock)
1042 {
1043 	struct sock *sk = sock->sk;
1044 
1045 	if (!sk)
1046 		return 0;
1047 
1048 	sk->sk_prot->close(sk, 0);
1049 	unix_release_sock(sk, 0);
1050 	sock->sk = NULL;
1051 
1052 	return 0;
1053 }
1054 
unix_find_bsd(struct sockaddr_un * sunaddr,int addr_len,int type)1055 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1056 				  int type)
1057 {
1058 	struct inode *inode;
1059 	struct path path;
1060 	struct sock *sk;
1061 	int err;
1062 
1063 	unix_mkname_bsd(sunaddr, addr_len);
1064 	err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1065 	if (err)
1066 		goto fail;
1067 
1068 	err = path_permission(&path, MAY_WRITE);
1069 	if (err)
1070 		goto path_put;
1071 
1072 	err = -ECONNREFUSED;
1073 	inode = d_backing_inode(path.dentry);
1074 	if (!S_ISSOCK(inode->i_mode))
1075 		goto path_put;
1076 
1077 	sk = unix_find_socket_byinode(inode);
1078 	if (!sk)
1079 		goto path_put;
1080 
1081 	err = -EPROTOTYPE;
1082 	if (sk->sk_type == type)
1083 		touch_atime(&path);
1084 	else
1085 		goto sock_put;
1086 
1087 	path_put(&path);
1088 
1089 	return sk;
1090 
1091 sock_put:
1092 	sock_put(sk);
1093 path_put:
1094 	path_put(&path);
1095 fail:
1096 	return ERR_PTR(err);
1097 }
1098 
unix_find_abstract(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1099 static struct sock *unix_find_abstract(struct net *net,
1100 				       struct sockaddr_un *sunaddr,
1101 				       int addr_len, int type)
1102 {
1103 	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1104 	struct dentry *dentry;
1105 	struct sock *sk;
1106 
1107 	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1108 	if (!sk)
1109 		return ERR_PTR(-ECONNREFUSED);
1110 
1111 	dentry = unix_sk(sk)->path.dentry;
1112 	if (dentry)
1113 		touch_atime(&unix_sk(sk)->path);
1114 
1115 	return sk;
1116 }
1117 
unix_find_other(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1118 static struct sock *unix_find_other(struct net *net,
1119 				    struct sockaddr_un *sunaddr,
1120 				    int addr_len, int type)
1121 {
1122 	struct sock *sk;
1123 
1124 	if (sunaddr->sun_path[0])
1125 		sk = unix_find_bsd(sunaddr, addr_len, type);
1126 	else
1127 		sk = unix_find_abstract(net, sunaddr, addr_len, type);
1128 
1129 	return sk;
1130 }
1131 
unix_autobind(struct sock * sk)1132 static int unix_autobind(struct sock *sk)
1133 {
1134 	unsigned int new_hash, old_hash = sk->sk_hash;
1135 	struct unix_sock *u = unix_sk(sk);
1136 	struct net *net = sock_net(sk);
1137 	struct unix_address *addr;
1138 	u32 lastnum, ordernum;
1139 	int err;
1140 
1141 	err = mutex_lock_interruptible(&u->bindlock);
1142 	if (err)
1143 		return err;
1144 
1145 	if (u->addr)
1146 		goto out;
1147 
1148 	err = -ENOMEM;
1149 	addr = kzalloc(sizeof(*addr) +
1150 		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1151 	if (!addr)
1152 		goto out;
1153 
1154 	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1155 	addr->name->sun_family = AF_UNIX;
1156 	refcount_set(&addr->refcnt, 1);
1157 
1158 	ordernum = get_random_u32();
1159 	lastnum = ordernum & 0xFFFFF;
1160 retry:
1161 	ordernum = (ordernum + 1) & 0xFFFFF;
1162 	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1163 
1164 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1165 	unix_table_double_lock(net, old_hash, new_hash);
1166 
1167 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1168 		unix_table_double_unlock(net, old_hash, new_hash);
1169 
1170 		/* __unix_find_socket_byname() may take long time if many names
1171 		 * are already in use.
1172 		 */
1173 		cond_resched();
1174 
1175 		if (ordernum == lastnum) {
1176 			/* Give up if all names seems to be in use. */
1177 			err = -ENOSPC;
1178 			unix_release_addr(addr);
1179 			goto out;
1180 		}
1181 
1182 		goto retry;
1183 	}
1184 
1185 	__unix_set_addr_hash(net, sk, addr, new_hash);
1186 	unix_table_double_unlock(net, old_hash, new_hash);
1187 	err = 0;
1188 
1189 out:	mutex_unlock(&u->bindlock);
1190 	return err;
1191 }
1192 
unix_bind_bsd(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)1193 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1194 			 int addr_len)
1195 {
1196 	umode_t mode = S_IFSOCK |
1197 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1198 	unsigned int new_hash, old_hash = sk->sk_hash;
1199 	struct unix_sock *u = unix_sk(sk);
1200 	struct net *net = sock_net(sk);
1201 	struct mnt_idmap *idmap;
1202 	struct unix_address *addr;
1203 	struct dentry *dentry;
1204 	struct path parent;
1205 	int err;
1206 
1207 	addr_len = unix_mkname_bsd(sunaddr, addr_len);
1208 	addr = unix_create_addr(sunaddr, addr_len);
1209 	if (!addr)
1210 		return -ENOMEM;
1211 
1212 	/*
1213 	 * Get the parent directory, calculate the hash for last
1214 	 * component.
1215 	 */
1216 	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1217 	if (IS_ERR(dentry)) {
1218 		err = PTR_ERR(dentry);
1219 		goto out;
1220 	}
1221 
1222 	/*
1223 	 * All right, let's create it.
1224 	 */
1225 	idmap = mnt_idmap(parent.mnt);
1226 	err = security_path_mknod(&parent, dentry, mode, 0);
1227 	if (!err)
1228 		err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1229 	if (err)
1230 		goto out_path;
1231 	err = mutex_lock_interruptible(&u->bindlock);
1232 	if (err)
1233 		goto out_unlink;
1234 	if (u->addr)
1235 		goto out_unlock;
1236 
1237 	new_hash = unix_bsd_hash(d_backing_inode(dentry));
1238 	unix_table_double_lock(net, old_hash, new_hash);
1239 	u->path.mnt = mntget(parent.mnt);
1240 	u->path.dentry = dget(dentry);
1241 	__unix_set_addr_hash(net, sk, addr, new_hash);
1242 	unix_table_double_unlock(net, old_hash, new_hash);
1243 	unix_insert_bsd_socket(sk);
1244 	mutex_unlock(&u->bindlock);
1245 	done_path_create(&parent, dentry);
1246 	return 0;
1247 
1248 out_unlock:
1249 	mutex_unlock(&u->bindlock);
1250 	err = -EINVAL;
1251 out_unlink:
1252 	/* failed after successful mknod?  unlink what we'd created... */
1253 	vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1254 out_path:
1255 	done_path_create(&parent, dentry);
1256 out:
1257 	unix_release_addr(addr);
1258 	return err == -EEXIST ? -EADDRINUSE : err;
1259 }
1260 
unix_bind_abstract(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)1261 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1262 			      int addr_len)
1263 {
1264 	unsigned int new_hash, old_hash = sk->sk_hash;
1265 	struct unix_sock *u = unix_sk(sk);
1266 	struct net *net = sock_net(sk);
1267 	struct unix_address *addr;
1268 	int err;
1269 
1270 	addr = unix_create_addr(sunaddr, addr_len);
1271 	if (!addr)
1272 		return -ENOMEM;
1273 
1274 	err = mutex_lock_interruptible(&u->bindlock);
1275 	if (err)
1276 		goto out;
1277 
1278 	if (u->addr) {
1279 		err = -EINVAL;
1280 		goto out_mutex;
1281 	}
1282 
1283 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1284 	unix_table_double_lock(net, old_hash, new_hash);
1285 
1286 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1287 		goto out_spin;
1288 
1289 	__unix_set_addr_hash(net, sk, addr, new_hash);
1290 	unix_table_double_unlock(net, old_hash, new_hash);
1291 	mutex_unlock(&u->bindlock);
1292 	return 0;
1293 
1294 out_spin:
1295 	unix_table_double_unlock(net, old_hash, new_hash);
1296 	err = -EADDRINUSE;
1297 out_mutex:
1298 	mutex_unlock(&u->bindlock);
1299 out:
1300 	unix_release_addr(addr);
1301 	return err;
1302 }
1303 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1304 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1305 {
1306 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1307 	struct sock *sk = sock->sk;
1308 	int err;
1309 
1310 	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1311 	    sunaddr->sun_family == AF_UNIX)
1312 		return unix_autobind(sk);
1313 
1314 	err = unix_validate_addr(sunaddr, addr_len);
1315 	if (err)
1316 		return err;
1317 
1318 	if (sunaddr->sun_path[0])
1319 		err = unix_bind_bsd(sk, sunaddr, addr_len);
1320 	else
1321 		err = unix_bind_abstract(sk, sunaddr, addr_len);
1322 
1323 	return err;
1324 }
1325 
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1326 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1327 {
1328 	if (unlikely(sk1 == sk2) || !sk2) {
1329 		unix_state_lock(sk1);
1330 		return;
1331 	}
1332 	if (sk1 > sk2)
1333 		swap(sk1, sk2);
1334 
1335 	unix_state_lock(sk1);
1336 	unix_state_lock_nested(sk2, U_LOCK_SECOND);
1337 }
1338 
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1339 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1340 {
1341 	if (unlikely(sk1 == sk2) || !sk2) {
1342 		unix_state_unlock(sk1);
1343 		return;
1344 	}
1345 	unix_state_unlock(sk1);
1346 	unix_state_unlock(sk2);
1347 }
1348 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1349 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1350 			      int alen, int flags)
1351 {
1352 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1353 	struct sock *sk = sock->sk;
1354 	struct sock *other;
1355 	int err;
1356 
1357 	err = -EINVAL;
1358 	if (alen < offsetofend(struct sockaddr, sa_family))
1359 		goto out;
1360 
1361 	if (addr->sa_family != AF_UNSPEC) {
1362 		err = unix_validate_addr(sunaddr, alen);
1363 		if (err)
1364 			goto out;
1365 
1366 		err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1367 		if (err)
1368 			goto out;
1369 
1370 		if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1371 		     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1372 		    !unix_sk(sk)->addr) {
1373 			err = unix_autobind(sk);
1374 			if (err)
1375 				goto out;
1376 		}
1377 
1378 restart:
1379 		other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1380 		if (IS_ERR(other)) {
1381 			err = PTR_ERR(other);
1382 			goto out;
1383 		}
1384 
1385 		unix_state_double_lock(sk, other);
1386 
1387 		/* Apparently VFS overslept socket death. Retry. */
1388 		if (sock_flag(other, SOCK_DEAD)) {
1389 			unix_state_double_unlock(sk, other);
1390 			sock_put(other);
1391 			goto restart;
1392 		}
1393 
1394 		err = -EPERM;
1395 		if (!unix_may_send(sk, other))
1396 			goto out_unlock;
1397 
1398 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1399 		if (err)
1400 			goto out_unlock;
1401 
1402 		sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1403 	} else {
1404 		/*
1405 		 *	1003.1g breaking connected state with AF_UNSPEC
1406 		 */
1407 		other = NULL;
1408 		unix_state_double_lock(sk, other);
1409 	}
1410 
1411 	/*
1412 	 * If it was connected, reconnect.
1413 	 */
1414 	if (unix_peer(sk)) {
1415 		struct sock *old_peer = unix_peer(sk);
1416 
1417 		unix_peer(sk) = other;
1418 		if (!other)
1419 			sk->sk_state = TCP_CLOSE;
1420 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1421 
1422 		unix_state_double_unlock(sk, other);
1423 
1424 		if (other != old_peer)
1425 			unix_dgram_disconnected(sk, old_peer);
1426 		sock_put(old_peer);
1427 	} else {
1428 		unix_peer(sk) = other;
1429 		unix_state_double_unlock(sk, other);
1430 	}
1431 
1432 	return 0;
1433 
1434 out_unlock:
1435 	unix_state_double_unlock(sk, other);
1436 	sock_put(other);
1437 out:
1438 	return err;
1439 }
1440 
unix_wait_for_peer(struct sock * other,long timeo)1441 static long unix_wait_for_peer(struct sock *other, long timeo)
1442 	__releases(&unix_sk(other)->lock)
1443 {
1444 	struct unix_sock *u = unix_sk(other);
1445 	int sched;
1446 	DEFINE_WAIT(wait);
1447 
1448 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1449 
1450 	sched = !sock_flag(other, SOCK_DEAD) &&
1451 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1452 		unix_recvq_full_lockless(other);
1453 
1454 	unix_state_unlock(other);
1455 
1456 	if (sched)
1457 		timeo = schedule_timeout(timeo);
1458 
1459 	finish_wait(&u->peer_wait, &wait);
1460 	return timeo;
1461 }
1462 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1463 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1464 			       int addr_len, int flags)
1465 {
1466 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1467 	struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1468 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1469 	struct net *net = sock_net(sk);
1470 	struct sk_buff *skb = NULL;
1471 	long timeo;
1472 	int err;
1473 	int st;
1474 
1475 	err = unix_validate_addr(sunaddr, addr_len);
1476 	if (err)
1477 		goto out;
1478 
1479 	err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1480 	if (err)
1481 		goto out;
1482 
1483 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1484 	     test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
1485 		err = unix_autobind(sk);
1486 		if (err)
1487 			goto out;
1488 	}
1489 
1490 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1491 
1492 	/* First of all allocate resources.
1493 	   If we will make it after state is locked,
1494 	   we will have to recheck all again in any case.
1495 	 */
1496 
1497 	/* create new sock for complete connection */
1498 	newsk = unix_create1(net, NULL, 0, sock->type);
1499 	if (IS_ERR(newsk)) {
1500 		err = PTR_ERR(newsk);
1501 		newsk = NULL;
1502 		goto out;
1503 	}
1504 
1505 	err = -ENOMEM;
1506 
1507 	/* Allocate skb for sending to listening sock */
1508 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1509 	if (skb == NULL)
1510 		goto out;
1511 
1512 restart:
1513 	/*  Find listening sock. */
1514 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1515 	if (IS_ERR(other)) {
1516 		err = PTR_ERR(other);
1517 		other = NULL;
1518 		goto out;
1519 	}
1520 
1521 	/* Latch state of peer */
1522 	unix_state_lock(other);
1523 
1524 	/* Apparently VFS overslept socket death. Retry. */
1525 	if (sock_flag(other, SOCK_DEAD)) {
1526 		unix_state_unlock(other);
1527 		sock_put(other);
1528 		goto restart;
1529 	}
1530 
1531 	err = -ECONNREFUSED;
1532 	if (other->sk_state != TCP_LISTEN)
1533 		goto out_unlock;
1534 	if (other->sk_shutdown & RCV_SHUTDOWN)
1535 		goto out_unlock;
1536 
1537 	if (unix_recvq_full(other)) {
1538 		err = -EAGAIN;
1539 		if (!timeo)
1540 			goto out_unlock;
1541 
1542 		timeo = unix_wait_for_peer(other, timeo);
1543 
1544 		err = sock_intr_errno(timeo);
1545 		if (signal_pending(current))
1546 			goto out;
1547 		sock_put(other);
1548 		goto restart;
1549 	}
1550 
1551 	/* Latch our state.
1552 
1553 	   It is tricky place. We need to grab our state lock and cannot
1554 	   drop lock on peer. It is dangerous because deadlock is
1555 	   possible. Connect to self case and simultaneous
1556 	   attempt to connect are eliminated by checking socket
1557 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1558 	   check this before attempt to grab lock.
1559 
1560 	   Well, and we have to recheck the state after socket locked.
1561 	 */
1562 	st = sk->sk_state;
1563 
1564 	switch (st) {
1565 	case TCP_CLOSE:
1566 		/* This is ok... continue with connect */
1567 		break;
1568 	case TCP_ESTABLISHED:
1569 		/* Socket is already connected */
1570 		err = -EISCONN;
1571 		goto out_unlock;
1572 	default:
1573 		err = -EINVAL;
1574 		goto out_unlock;
1575 	}
1576 
1577 	unix_state_lock_nested(sk, U_LOCK_SECOND);
1578 
1579 	if (sk->sk_state != st) {
1580 		unix_state_unlock(sk);
1581 		unix_state_unlock(other);
1582 		sock_put(other);
1583 		goto restart;
1584 	}
1585 
1586 	err = security_unix_stream_connect(sk, other, newsk);
1587 	if (err) {
1588 		unix_state_unlock(sk);
1589 		goto out_unlock;
1590 	}
1591 
1592 	/* The way is open! Fastly set all the necessary fields... */
1593 
1594 	sock_hold(sk);
1595 	unix_peer(newsk)	= sk;
1596 	newsk->sk_state		= TCP_ESTABLISHED;
1597 	newsk->sk_type		= sk->sk_type;
1598 	init_peercred(newsk);
1599 	newu = unix_sk(newsk);
1600 	newu->listener = other;
1601 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1602 	otheru = unix_sk(other);
1603 
1604 	/* copy address information from listening to new sock
1605 	 *
1606 	 * The contents of *(otheru->addr) and otheru->path
1607 	 * are seen fully set up here, since we have found
1608 	 * otheru in hash under its lock.  Insertion into the
1609 	 * hash chain we'd found it in had been done in an
1610 	 * earlier critical area protected by the chain's lock,
1611 	 * the same one where we'd set *(otheru->addr) contents,
1612 	 * as well as otheru->path and otheru->addr itself.
1613 	 *
1614 	 * Using smp_store_release() here to set newu->addr
1615 	 * is enough to make those stores, as well as stores
1616 	 * to newu->path visible to anyone who gets newu->addr
1617 	 * by smp_load_acquire().  IOW, the same warranties
1618 	 * as for unix_sock instances bound in unix_bind() or
1619 	 * in unix_autobind().
1620 	 */
1621 	if (otheru->path.dentry) {
1622 		path_get(&otheru->path);
1623 		newu->path = otheru->path;
1624 	}
1625 	refcount_inc(&otheru->addr->refcnt);
1626 	smp_store_release(&newu->addr, otheru->addr);
1627 
1628 	/* Set credentials */
1629 	copy_peercred(sk, other);
1630 
1631 	sock->state	= SS_CONNECTED;
1632 	sk->sk_state	= TCP_ESTABLISHED;
1633 	sock_hold(newsk);
1634 
1635 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1636 	unix_peer(sk)	= newsk;
1637 
1638 	unix_state_unlock(sk);
1639 
1640 	/* take ten and send info to listening sock */
1641 	spin_lock(&other->sk_receive_queue.lock);
1642 	__skb_queue_tail(&other->sk_receive_queue, skb);
1643 	spin_unlock(&other->sk_receive_queue.lock);
1644 	unix_state_unlock(other);
1645 	other->sk_data_ready(other);
1646 	sock_put(other);
1647 	return 0;
1648 
1649 out_unlock:
1650 	if (other)
1651 		unix_state_unlock(other);
1652 
1653 out:
1654 	kfree_skb(skb);
1655 	if (newsk)
1656 		unix_release_sock(newsk, 0);
1657 	if (other)
1658 		sock_put(other);
1659 	return err;
1660 }
1661 
unix_socketpair(struct socket * socka,struct socket * sockb)1662 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1663 {
1664 	struct sock *ska = socka->sk, *skb = sockb->sk;
1665 
1666 	/* Join our sockets back to back */
1667 	sock_hold(ska);
1668 	sock_hold(skb);
1669 	unix_peer(ska) = skb;
1670 	unix_peer(skb) = ska;
1671 	init_peercred(ska);
1672 	init_peercred(skb);
1673 
1674 	ska->sk_state = TCP_ESTABLISHED;
1675 	skb->sk_state = TCP_ESTABLISHED;
1676 	socka->state  = SS_CONNECTED;
1677 	sockb->state  = SS_CONNECTED;
1678 	return 0;
1679 }
1680 
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1681 static void unix_sock_inherit_flags(const struct socket *old,
1682 				    struct socket *new)
1683 {
1684 	if (test_bit(SOCK_PASSCRED, &old->flags))
1685 		set_bit(SOCK_PASSCRED, &new->flags);
1686 	if (test_bit(SOCK_PASSPIDFD, &old->flags))
1687 		set_bit(SOCK_PASSPIDFD, &new->flags);
1688 	if (test_bit(SOCK_PASSSEC, &old->flags))
1689 		set_bit(SOCK_PASSSEC, &new->flags);
1690 }
1691 
unix_accept(struct socket * sock,struct socket * newsock,struct proto_accept_arg * arg)1692 static int unix_accept(struct socket *sock, struct socket *newsock,
1693 		       struct proto_accept_arg *arg)
1694 {
1695 	struct sock *sk = sock->sk;
1696 	struct sk_buff *skb;
1697 	struct sock *tsk;
1698 
1699 	arg->err = -EOPNOTSUPP;
1700 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1701 		goto out;
1702 
1703 	arg->err = -EINVAL;
1704 	if (sk->sk_state != TCP_LISTEN)
1705 		goto out;
1706 
1707 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1708 	 * so that no locks are necessary.
1709 	 */
1710 
1711 	skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1712 				&arg->err);
1713 	if (!skb) {
1714 		/* This means receive shutdown. */
1715 		if (arg->err == 0)
1716 			arg->err = -EINVAL;
1717 		goto out;
1718 	}
1719 
1720 	tsk = skb->sk;
1721 	skb_free_datagram(sk, skb);
1722 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1723 
1724 	/* attach accepted sock to socket */
1725 	unix_state_lock(tsk);
1726 	unix_update_edges(unix_sk(tsk));
1727 	newsock->state = SS_CONNECTED;
1728 	unix_sock_inherit_flags(sock, newsock);
1729 	sock_graft(tsk, newsock);
1730 	unix_state_unlock(tsk);
1731 	return 0;
1732 
1733 out:
1734 	return arg->err;
1735 }
1736 
1737 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int peer)1738 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1739 {
1740 	struct sock *sk = sock->sk;
1741 	struct unix_address *addr;
1742 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1743 	int err = 0;
1744 
1745 	if (peer) {
1746 		sk = unix_peer_get(sk);
1747 
1748 		err = -ENOTCONN;
1749 		if (!sk)
1750 			goto out;
1751 		err = 0;
1752 	} else {
1753 		sock_hold(sk);
1754 	}
1755 
1756 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1757 	if (!addr) {
1758 		sunaddr->sun_family = AF_UNIX;
1759 		sunaddr->sun_path[0] = 0;
1760 		err = offsetof(struct sockaddr_un, sun_path);
1761 	} else {
1762 		err = addr->len;
1763 		memcpy(sunaddr, addr->name, addr->len);
1764 
1765 		if (peer)
1766 			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1767 					       CGROUP_UNIX_GETPEERNAME);
1768 		else
1769 			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1770 					       CGROUP_UNIX_GETSOCKNAME);
1771 	}
1772 	sock_put(sk);
1773 out:
1774 	return err;
1775 }
1776 
1777 /* The "user->unix_inflight" variable is protected by the garbage
1778  * collection lock, and we just read it locklessly here. If you go
1779  * over the limit, there might be a tiny race in actually noticing
1780  * it across threads. Tough.
1781  */
too_many_unix_fds(struct task_struct * p)1782 static inline bool too_many_unix_fds(struct task_struct *p)
1783 {
1784 	struct user_struct *user = current_user();
1785 
1786 	if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
1787 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1788 	return false;
1789 }
1790 
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1791 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1792 {
1793 	if (too_many_unix_fds(current))
1794 		return -ETOOMANYREFS;
1795 
1796 	UNIXCB(skb).fp = scm->fp;
1797 	scm->fp = NULL;
1798 
1799 	if (unix_prepare_fpl(UNIXCB(skb).fp))
1800 		return -ENOMEM;
1801 
1802 	return 0;
1803 }
1804 
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1805 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1806 {
1807 	scm->fp = UNIXCB(skb).fp;
1808 	UNIXCB(skb).fp = NULL;
1809 
1810 	unix_destroy_fpl(scm->fp);
1811 }
1812 
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1813 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1814 {
1815 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1816 }
1817 
unix_destruct_scm(struct sk_buff * skb)1818 static void unix_destruct_scm(struct sk_buff *skb)
1819 {
1820 	struct scm_cookie scm;
1821 
1822 	memset(&scm, 0, sizeof(scm));
1823 	scm.pid  = UNIXCB(skb).pid;
1824 	if (UNIXCB(skb).fp)
1825 		unix_detach_fds(&scm, skb);
1826 
1827 	/* Alas, it calls VFS */
1828 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1829 	scm_destroy(&scm);
1830 	sock_wfree(skb);
1831 }
1832 
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1833 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1834 {
1835 	int err = 0;
1836 
1837 	UNIXCB(skb).pid  = get_pid(scm->pid);
1838 	UNIXCB(skb).uid = scm->creds.uid;
1839 	UNIXCB(skb).gid = scm->creds.gid;
1840 	UNIXCB(skb).fp = NULL;
1841 	unix_get_secdata(scm, skb);
1842 	if (scm->fp && send_fds)
1843 		err = unix_attach_fds(scm, skb);
1844 
1845 	skb->destructor = unix_destruct_scm;
1846 	return err;
1847 }
1848 
unix_passcred_enabled(const struct socket * sock,const struct sock * other)1849 static bool unix_passcred_enabled(const struct socket *sock,
1850 				  const struct sock *other)
1851 {
1852 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1853 	       test_bit(SOCK_PASSPIDFD, &sock->flags) ||
1854 	       !other->sk_socket ||
1855 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
1856 	       test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
1857 }
1858 
1859 /*
1860  * Some apps rely on write() giving SCM_CREDENTIALS
1861  * We include credentials if source or destination socket
1862  * asserted SOCK_PASSCRED.
1863  */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1864 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1865 			    const struct sock *other)
1866 {
1867 	if (UNIXCB(skb).pid)
1868 		return;
1869 	if (unix_passcred_enabled(sock, other)) {
1870 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1871 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1872 	}
1873 }
1874 
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)1875 static bool unix_skb_scm_eq(struct sk_buff *skb,
1876 			    struct scm_cookie *scm)
1877 {
1878 	return UNIXCB(skb).pid == scm->pid &&
1879 	       uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1880 	       gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1881 	       unix_secdata_eq(scm, skb);
1882 }
1883 
scm_stat_add(struct sock * sk,struct sk_buff * skb)1884 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1885 {
1886 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1887 	struct unix_sock *u = unix_sk(sk);
1888 
1889 	if (unlikely(fp && fp->count)) {
1890 		atomic_add(fp->count, &u->scm_stat.nr_fds);
1891 		unix_add_edges(fp, u);
1892 	}
1893 }
1894 
scm_stat_del(struct sock * sk,struct sk_buff * skb)1895 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1896 {
1897 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1898 	struct unix_sock *u = unix_sk(sk);
1899 
1900 	if (unlikely(fp && fp->count)) {
1901 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
1902 		unix_del_edges(fp);
1903 	}
1904 }
1905 
1906 /*
1907  *	Send AF_UNIX data.
1908  */
1909 
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1910 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1911 			      size_t len)
1912 {
1913 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1914 	struct sock *sk = sock->sk, *other = NULL;
1915 	struct unix_sock *u = unix_sk(sk);
1916 	struct scm_cookie scm;
1917 	struct sk_buff *skb;
1918 	int data_len = 0;
1919 	int sk_locked;
1920 	long timeo;
1921 	int err;
1922 
1923 	err = scm_send(sock, msg, &scm, false);
1924 	if (err < 0)
1925 		return err;
1926 
1927 	wait_for_unix_gc(scm.fp);
1928 
1929 	err = -EOPNOTSUPP;
1930 	if (msg->msg_flags&MSG_OOB)
1931 		goto out;
1932 
1933 	if (msg->msg_namelen) {
1934 		err = unix_validate_addr(sunaddr, msg->msg_namelen);
1935 		if (err)
1936 			goto out;
1937 
1938 		err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
1939 							    msg->msg_name,
1940 							    &msg->msg_namelen,
1941 							    NULL);
1942 		if (err)
1943 			goto out;
1944 	} else {
1945 		sunaddr = NULL;
1946 		err = -ENOTCONN;
1947 		other = unix_peer_get(sk);
1948 		if (!other)
1949 			goto out;
1950 	}
1951 
1952 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1953 	     test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
1954 		err = unix_autobind(sk);
1955 		if (err)
1956 			goto out;
1957 	}
1958 
1959 	err = -EMSGSIZE;
1960 	if (len > sk->sk_sndbuf - 32)
1961 		goto out;
1962 
1963 	if (len > SKB_MAX_ALLOC) {
1964 		data_len = min_t(size_t,
1965 				 len - SKB_MAX_ALLOC,
1966 				 MAX_SKB_FRAGS * PAGE_SIZE);
1967 		data_len = PAGE_ALIGN(data_len);
1968 
1969 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1970 	}
1971 
1972 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1973 				   msg->msg_flags & MSG_DONTWAIT, &err,
1974 				   PAGE_ALLOC_COSTLY_ORDER);
1975 	if (skb == NULL)
1976 		goto out;
1977 
1978 	err = unix_scm_to_skb(&scm, skb, true);
1979 	if (err < 0)
1980 		goto out_free;
1981 
1982 	skb_put(skb, len - data_len);
1983 	skb->data_len = data_len;
1984 	skb->len = len;
1985 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1986 	if (err)
1987 		goto out_free;
1988 
1989 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1990 
1991 restart:
1992 	if (!other) {
1993 		err = -ECONNRESET;
1994 		if (sunaddr == NULL)
1995 			goto out_free;
1996 
1997 		other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
1998 					sk->sk_type);
1999 		if (IS_ERR(other)) {
2000 			err = PTR_ERR(other);
2001 			other = NULL;
2002 			goto out_free;
2003 		}
2004 	}
2005 
2006 	if (sk_filter(other, skb) < 0) {
2007 		/* Toss the packet but do not return any error to the sender */
2008 		err = len;
2009 		goto out_free;
2010 	}
2011 
2012 	sk_locked = 0;
2013 	unix_state_lock(other);
2014 restart_locked:
2015 	err = -EPERM;
2016 	if (!unix_may_send(sk, other))
2017 		goto out_unlock;
2018 
2019 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
2020 		/*
2021 		 *	Check with 1003.1g - what should
2022 		 *	datagram error
2023 		 */
2024 		unix_state_unlock(other);
2025 		sock_put(other);
2026 
2027 		if (!sk_locked)
2028 			unix_state_lock(sk);
2029 
2030 		err = 0;
2031 		if (sk->sk_type == SOCK_SEQPACKET) {
2032 			/* We are here only when racing with unix_release_sock()
2033 			 * is clearing @other. Never change state to TCP_CLOSE
2034 			 * unlike SOCK_DGRAM wants.
2035 			 */
2036 			unix_state_unlock(sk);
2037 			err = -EPIPE;
2038 		} else if (unix_peer(sk) == other) {
2039 			unix_peer(sk) = NULL;
2040 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2041 
2042 			sk->sk_state = TCP_CLOSE;
2043 			unix_state_unlock(sk);
2044 
2045 			unix_dgram_disconnected(sk, other);
2046 			sock_put(other);
2047 			err = -ECONNREFUSED;
2048 		} else {
2049 			unix_state_unlock(sk);
2050 		}
2051 
2052 		other = NULL;
2053 		if (err)
2054 			goto out_free;
2055 		goto restart;
2056 	}
2057 
2058 	err = -EPIPE;
2059 	if (other->sk_shutdown & RCV_SHUTDOWN)
2060 		goto out_unlock;
2061 
2062 	if (sk->sk_type != SOCK_SEQPACKET) {
2063 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2064 		if (err)
2065 			goto out_unlock;
2066 	}
2067 
2068 	/* other == sk && unix_peer(other) != sk if
2069 	 * - unix_peer(sk) == NULL, destination address bound to sk
2070 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
2071 	 */
2072 	if (other != sk &&
2073 	    unlikely(unix_peer(other) != sk &&
2074 	    unix_recvq_full_lockless(other))) {
2075 		if (timeo) {
2076 			timeo = unix_wait_for_peer(other, timeo);
2077 
2078 			err = sock_intr_errno(timeo);
2079 			if (signal_pending(current))
2080 				goto out_free;
2081 
2082 			goto restart;
2083 		}
2084 
2085 		if (!sk_locked) {
2086 			unix_state_unlock(other);
2087 			unix_state_double_lock(sk, other);
2088 		}
2089 
2090 		if (unix_peer(sk) != other ||
2091 		    unix_dgram_peer_wake_me(sk, other)) {
2092 			err = -EAGAIN;
2093 			sk_locked = 1;
2094 			goto out_unlock;
2095 		}
2096 
2097 		if (!sk_locked) {
2098 			sk_locked = 1;
2099 			goto restart_locked;
2100 		}
2101 	}
2102 
2103 	if (unlikely(sk_locked))
2104 		unix_state_unlock(sk);
2105 
2106 	if (sock_flag(other, SOCK_RCVTSTAMP))
2107 		__net_timestamp(skb);
2108 	maybe_add_creds(skb, sock, other);
2109 	scm_stat_add(other, skb);
2110 	skb_queue_tail(&other->sk_receive_queue, skb);
2111 	unix_state_unlock(other);
2112 	other->sk_data_ready(other);
2113 	sock_put(other);
2114 	scm_destroy(&scm);
2115 	return len;
2116 
2117 out_unlock:
2118 	if (sk_locked)
2119 		unix_state_unlock(sk);
2120 	unix_state_unlock(other);
2121 out_free:
2122 	kfree_skb(skb);
2123 out:
2124 	if (other)
2125 		sock_put(other);
2126 	scm_destroy(&scm);
2127 	return err;
2128 }
2129 
2130 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2131  * bytes, and a minimum of a full page.
2132  */
2133 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2134 
2135 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
queue_oob(struct socket * sock,struct msghdr * msg,struct sock * other,struct scm_cookie * scm,bool fds_sent)2136 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2137 		     struct scm_cookie *scm, bool fds_sent)
2138 {
2139 	struct unix_sock *ousk = unix_sk(other);
2140 	struct sk_buff *skb;
2141 	int err = 0;
2142 
2143 	skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2144 
2145 	if (!skb)
2146 		return err;
2147 
2148 	err = unix_scm_to_skb(scm, skb, !fds_sent);
2149 	if (err < 0) {
2150 		kfree_skb(skb);
2151 		return err;
2152 	}
2153 	skb_put(skb, 1);
2154 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2155 
2156 	if (err) {
2157 		kfree_skb(skb);
2158 		return err;
2159 	}
2160 
2161 	unix_state_lock(other);
2162 
2163 	if (sock_flag(other, SOCK_DEAD) ||
2164 	    (other->sk_shutdown & RCV_SHUTDOWN)) {
2165 		unix_state_unlock(other);
2166 		kfree_skb(skb);
2167 		return -EPIPE;
2168 	}
2169 
2170 	maybe_add_creds(skb, sock, other);
2171 	skb_get(skb);
2172 
2173 	if (ousk->oob_skb)
2174 		consume_skb(ousk->oob_skb);
2175 
2176 	WRITE_ONCE(ousk->oob_skb, skb);
2177 
2178 	scm_stat_add(other, skb);
2179 	skb_queue_tail(&other->sk_receive_queue, skb);
2180 	sk_send_sigurg(other);
2181 	unix_state_unlock(other);
2182 	other->sk_data_ready(other);
2183 
2184 	return err;
2185 }
2186 #endif
2187 
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2188 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2189 			       size_t len)
2190 {
2191 	struct sock *sk = sock->sk;
2192 	struct sock *other = NULL;
2193 	int err, size;
2194 	struct sk_buff *skb;
2195 	int sent = 0;
2196 	struct scm_cookie scm;
2197 	bool fds_sent = false;
2198 	int data_len;
2199 
2200 	err = scm_send(sock, msg, &scm, false);
2201 	if (err < 0)
2202 		return err;
2203 
2204 	wait_for_unix_gc(scm.fp);
2205 
2206 	err = -EOPNOTSUPP;
2207 	if (msg->msg_flags & MSG_OOB) {
2208 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2209 		if (len)
2210 			len--;
2211 		else
2212 #endif
2213 			goto out_err;
2214 	}
2215 
2216 	if (msg->msg_namelen) {
2217 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2218 		goto out_err;
2219 	} else {
2220 		err = -ENOTCONN;
2221 		other = unix_peer(sk);
2222 		if (!other)
2223 			goto out_err;
2224 	}
2225 
2226 	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2227 		goto pipe_err;
2228 
2229 	while (sent < len) {
2230 		size = len - sent;
2231 
2232 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2233 			skb = sock_alloc_send_pskb(sk, 0, 0,
2234 						   msg->msg_flags & MSG_DONTWAIT,
2235 						   &err, 0);
2236 		} else {
2237 			/* Keep two messages in the pipe so it schedules better */
2238 			size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2239 
2240 			/* allow fallback to order-0 allocations */
2241 			size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2242 
2243 			data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2244 
2245 			data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2246 
2247 			skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2248 						   msg->msg_flags & MSG_DONTWAIT, &err,
2249 						   get_order(UNIX_SKB_FRAGS_SZ));
2250 		}
2251 		if (!skb)
2252 			goto out_err;
2253 
2254 		/* Only send the fds in the first buffer */
2255 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
2256 		if (err < 0) {
2257 			kfree_skb(skb);
2258 			goto out_err;
2259 		}
2260 		fds_sent = true;
2261 
2262 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2263 			err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2264 						   sk->sk_allocation);
2265 			if (err < 0) {
2266 				kfree_skb(skb);
2267 				goto out_err;
2268 			}
2269 			size = err;
2270 			refcount_add(size, &sk->sk_wmem_alloc);
2271 		} else {
2272 			skb_put(skb, size - data_len);
2273 			skb->data_len = data_len;
2274 			skb->len = size;
2275 			err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2276 			if (err) {
2277 				kfree_skb(skb);
2278 				goto out_err;
2279 			}
2280 		}
2281 
2282 		unix_state_lock(other);
2283 
2284 		if (sock_flag(other, SOCK_DEAD) ||
2285 		    (other->sk_shutdown & RCV_SHUTDOWN))
2286 			goto pipe_err_free;
2287 
2288 		maybe_add_creds(skb, sock, other);
2289 		scm_stat_add(other, skb);
2290 		skb_queue_tail(&other->sk_receive_queue, skb);
2291 		unix_state_unlock(other);
2292 		other->sk_data_ready(other);
2293 		sent += size;
2294 	}
2295 
2296 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2297 	if (msg->msg_flags & MSG_OOB) {
2298 		err = queue_oob(sock, msg, other, &scm, fds_sent);
2299 		if (err)
2300 			goto out_err;
2301 		sent++;
2302 	}
2303 #endif
2304 
2305 	scm_destroy(&scm);
2306 
2307 	return sent;
2308 
2309 pipe_err_free:
2310 	unix_state_unlock(other);
2311 	kfree_skb(skb);
2312 pipe_err:
2313 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2314 		send_sig(SIGPIPE, current, 0);
2315 	err = -EPIPE;
2316 out_err:
2317 	scm_destroy(&scm);
2318 	return sent ? : err;
2319 }
2320 
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2321 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2322 				  size_t len)
2323 {
2324 	int err;
2325 	struct sock *sk = sock->sk;
2326 
2327 	err = sock_error(sk);
2328 	if (err)
2329 		return err;
2330 
2331 	if (sk->sk_state != TCP_ESTABLISHED)
2332 		return -ENOTCONN;
2333 
2334 	if (msg->msg_namelen)
2335 		msg->msg_namelen = 0;
2336 
2337 	return unix_dgram_sendmsg(sock, msg, len);
2338 }
2339 
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2340 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2341 				  size_t size, int flags)
2342 {
2343 	struct sock *sk = sock->sk;
2344 
2345 	if (sk->sk_state != TCP_ESTABLISHED)
2346 		return -ENOTCONN;
2347 
2348 	return unix_dgram_recvmsg(sock, msg, size, flags);
2349 }
2350 
unix_copy_addr(struct msghdr * msg,struct sock * sk)2351 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2352 {
2353 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2354 
2355 	if (addr) {
2356 		msg->msg_namelen = addr->len;
2357 		memcpy(msg->msg_name, addr->name, addr->len);
2358 	}
2359 }
2360 
__unix_dgram_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2361 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2362 			 int flags)
2363 {
2364 	struct scm_cookie scm;
2365 	struct socket *sock = sk->sk_socket;
2366 	struct unix_sock *u = unix_sk(sk);
2367 	struct sk_buff *skb, *last;
2368 	long timeo;
2369 	int skip;
2370 	int err;
2371 
2372 	err = -EOPNOTSUPP;
2373 	if (flags&MSG_OOB)
2374 		goto out;
2375 
2376 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2377 
2378 	do {
2379 		mutex_lock(&u->iolock);
2380 
2381 		skip = sk_peek_offset(sk, flags);
2382 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2383 					      &skip, &err, &last);
2384 		if (skb) {
2385 			if (!(flags & MSG_PEEK))
2386 				scm_stat_del(sk, skb);
2387 			break;
2388 		}
2389 
2390 		mutex_unlock(&u->iolock);
2391 
2392 		if (err != -EAGAIN)
2393 			break;
2394 	} while (timeo &&
2395 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2396 					      &err, &timeo, last));
2397 
2398 	if (!skb) { /* implies iolock unlocked */
2399 		unix_state_lock(sk);
2400 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2401 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2402 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2403 			err = 0;
2404 		unix_state_unlock(sk);
2405 		goto out;
2406 	}
2407 
2408 	if (wq_has_sleeper(&u->peer_wait))
2409 		wake_up_interruptible_sync_poll(&u->peer_wait,
2410 						EPOLLOUT | EPOLLWRNORM |
2411 						EPOLLWRBAND);
2412 
2413 	if (msg->msg_name) {
2414 		unix_copy_addr(msg, skb->sk);
2415 
2416 		BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2417 						      msg->msg_name,
2418 						      &msg->msg_namelen);
2419 	}
2420 
2421 	if (size > skb->len - skip)
2422 		size = skb->len - skip;
2423 	else if (size < skb->len - skip)
2424 		msg->msg_flags |= MSG_TRUNC;
2425 
2426 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2427 	if (err)
2428 		goto out_free;
2429 
2430 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2431 		__sock_recv_timestamp(msg, sk, skb);
2432 
2433 	memset(&scm, 0, sizeof(scm));
2434 
2435 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2436 	unix_set_secdata(&scm, skb);
2437 
2438 	if (!(flags & MSG_PEEK)) {
2439 		if (UNIXCB(skb).fp)
2440 			unix_detach_fds(&scm, skb);
2441 
2442 		sk_peek_offset_bwd(sk, skb->len);
2443 	} else {
2444 		/* It is questionable: on PEEK we could:
2445 		   - do not return fds - good, but too simple 8)
2446 		   - return fds, and do not return them on read (old strategy,
2447 		     apparently wrong)
2448 		   - clone fds (I chose it for now, it is the most universal
2449 		     solution)
2450 
2451 		   POSIX 1003.1g does not actually define this clearly
2452 		   at all. POSIX 1003.1g doesn't define a lot of things
2453 		   clearly however!
2454 
2455 		*/
2456 
2457 		sk_peek_offset_fwd(sk, size);
2458 
2459 		if (UNIXCB(skb).fp)
2460 			unix_peek_fds(&scm, skb);
2461 	}
2462 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2463 
2464 	scm_recv_unix(sock, msg, &scm, flags);
2465 
2466 out_free:
2467 	skb_free_datagram(sk, skb);
2468 	mutex_unlock(&u->iolock);
2469 out:
2470 	return err;
2471 }
2472 
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2473 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2474 			      int flags)
2475 {
2476 	struct sock *sk = sock->sk;
2477 
2478 #ifdef CONFIG_BPF_SYSCALL
2479 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2480 
2481 	if (prot != &unix_dgram_proto)
2482 		return prot->recvmsg(sk, msg, size, flags, NULL);
2483 #endif
2484 	return __unix_dgram_recvmsg(sk, msg, size, flags);
2485 }
2486 
unix_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2487 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2488 {
2489 	struct unix_sock *u = unix_sk(sk);
2490 	struct sk_buff *skb;
2491 	int err;
2492 
2493 	mutex_lock(&u->iolock);
2494 	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2495 	mutex_unlock(&u->iolock);
2496 	if (!skb)
2497 		return err;
2498 
2499 	return recv_actor(sk, skb);
2500 }
2501 
2502 /*
2503  *	Sleep until more data has arrived. But check for races..
2504  */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)2505 static long unix_stream_data_wait(struct sock *sk, long timeo,
2506 				  struct sk_buff *last, unsigned int last_len,
2507 				  bool freezable)
2508 {
2509 	unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2510 	struct sk_buff *tail;
2511 	DEFINE_WAIT(wait);
2512 
2513 	unix_state_lock(sk);
2514 
2515 	for (;;) {
2516 		prepare_to_wait(sk_sleep(sk), &wait, state);
2517 
2518 		tail = skb_peek_tail(&sk->sk_receive_queue);
2519 		if (tail != last ||
2520 		    (tail && tail->len != last_len) ||
2521 		    sk->sk_err ||
2522 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2523 		    signal_pending(current) ||
2524 		    !timeo)
2525 			break;
2526 
2527 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2528 		unix_state_unlock(sk);
2529 		timeo = schedule_timeout(timeo);
2530 		unix_state_lock(sk);
2531 
2532 		if (sock_flag(sk, SOCK_DEAD))
2533 			break;
2534 
2535 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2536 	}
2537 
2538 	finish_wait(sk_sleep(sk), &wait);
2539 	unix_state_unlock(sk);
2540 	return timeo;
2541 }
2542 
unix_skb_len(const struct sk_buff * skb)2543 static unsigned int unix_skb_len(const struct sk_buff *skb)
2544 {
2545 	return skb->len - UNIXCB(skb).consumed;
2546 }
2547 
2548 struct unix_stream_read_state {
2549 	int (*recv_actor)(struct sk_buff *, int, int,
2550 			  struct unix_stream_read_state *);
2551 	struct socket *socket;
2552 	struct msghdr *msg;
2553 	struct pipe_inode_info *pipe;
2554 	size_t size;
2555 	int flags;
2556 	unsigned int splice_flags;
2557 };
2558 
2559 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
unix_stream_recv_urg(struct unix_stream_read_state * state)2560 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2561 {
2562 	struct socket *sock = state->socket;
2563 	struct sock *sk = sock->sk;
2564 	struct unix_sock *u = unix_sk(sk);
2565 	int chunk = 1;
2566 	struct sk_buff *oob_skb;
2567 
2568 	mutex_lock(&u->iolock);
2569 	unix_state_lock(sk);
2570 
2571 	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2572 		unix_state_unlock(sk);
2573 		mutex_unlock(&u->iolock);
2574 		return -EINVAL;
2575 	}
2576 
2577 	oob_skb = u->oob_skb;
2578 
2579 	if (!(state->flags & MSG_PEEK))
2580 		WRITE_ONCE(u->oob_skb, NULL);
2581 	else
2582 		skb_get(oob_skb);
2583 	unix_state_unlock(sk);
2584 
2585 	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2586 
2587 	if (!(state->flags & MSG_PEEK))
2588 		UNIXCB(oob_skb).consumed += 1;
2589 
2590 	consume_skb(oob_skb);
2591 
2592 	mutex_unlock(&u->iolock);
2593 
2594 	if (chunk < 0)
2595 		return -EFAULT;
2596 
2597 	state->msg->msg_flags |= MSG_OOB;
2598 	return 1;
2599 }
2600 
manage_oob(struct sk_buff * skb,struct sock * sk,int flags,int copied)2601 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2602 				  int flags, int copied)
2603 {
2604 	struct unix_sock *u = unix_sk(sk);
2605 
2606 	if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2607 		skb_unlink(skb, &sk->sk_receive_queue);
2608 		consume_skb(skb);
2609 		skb = NULL;
2610 	} else {
2611 		if (skb == u->oob_skb) {
2612 			if (copied) {
2613 				skb = NULL;
2614 			} else if (sock_flag(sk, SOCK_URGINLINE)) {
2615 				if (!(flags & MSG_PEEK)) {
2616 					WRITE_ONCE(u->oob_skb, NULL);
2617 					consume_skb(skb);
2618 				}
2619 			} else if (flags & MSG_PEEK) {
2620 				skb = NULL;
2621 			} else {
2622 				skb_unlink(skb, &sk->sk_receive_queue);
2623 				WRITE_ONCE(u->oob_skb, NULL);
2624 				if (!WARN_ON_ONCE(skb_unref(skb)))
2625 					kfree_skb(skb);
2626 				skb = skb_peek(&sk->sk_receive_queue);
2627 			}
2628 		}
2629 	}
2630 	return skb;
2631 }
2632 #endif
2633 
unix_stream_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2634 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2635 {
2636 	if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2637 		return -ENOTCONN;
2638 
2639 	return unix_read_skb(sk, recv_actor);
2640 }
2641 
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)2642 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2643 				    bool freezable)
2644 {
2645 	struct scm_cookie scm;
2646 	struct socket *sock = state->socket;
2647 	struct sock *sk = sock->sk;
2648 	struct unix_sock *u = unix_sk(sk);
2649 	int copied = 0;
2650 	int flags = state->flags;
2651 	int noblock = flags & MSG_DONTWAIT;
2652 	bool check_creds = false;
2653 	int target;
2654 	int err = 0;
2655 	long timeo;
2656 	int skip;
2657 	size_t size = state->size;
2658 	unsigned int last_len;
2659 
2660 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2661 		err = -EINVAL;
2662 		goto out;
2663 	}
2664 
2665 	if (unlikely(flags & MSG_OOB)) {
2666 		err = -EOPNOTSUPP;
2667 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2668 		err = unix_stream_recv_urg(state);
2669 #endif
2670 		goto out;
2671 	}
2672 
2673 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2674 	timeo = sock_rcvtimeo(sk, noblock);
2675 
2676 	memset(&scm, 0, sizeof(scm));
2677 
2678 	/* Lock the socket to prevent queue disordering
2679 	 * while sleeps in memcpy_tomsg
2680 	 */
2681 	mutex_lock(&u->iolock);
2682 
2683 	skip = max(sk_peek_offset(sk, flags), 0);
2684 
2685 	do {
2686 		int chunk;
2687 		bool drop_skb;
2688 		struct sk_buff *skb, *last;
2689 
2690 redo:
2691 		unix_state_lock(sk);
2692 		if (sock_flag(sk, SOCK_DEAD)) {
2693 			err = -ECONNRESET;
2694 			goto unlock;
2695 		}
2696 		last = skb = skb_peek(&sk->sk_receive_queue);
2697 		last_len = last ? last->len : 0;
2698 
2699 again:
2700 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2701 		if (skb) {
2702 			skb = manage_oob(skb, sk, flags, copied);
2703 			if (!skb && copied) {
2704 				unix_state_unlock(sk);
2705 				break;
2706 			}
2707 		}
2708 #endif
2709 		if (skb == NULL) {
2710 			if (copied >= target)
2711 				goto unlock;
2712 
2713 			/*
2714 			 *	POSIX 1003.1g mandates this order.
2715 			 */
2716 
2717 			err = sock_error(sk);
2718 			if (err)
2719 				goto unlock;
2720 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2721 				goto unlock;
2722 
2723 			unix_state_unlock(sk);
2724 			if (!timeo) {
2725 				err = -EAGAIN;
2726 				break;
2727 			}
2728 
2729 			mutex_unlock(&u->iolock);
2730 
2731 			timeo = unix_stream_data_wait(sk, timeo, last,
2732 						      last_len, freezable);
2733 
2734 			if (signal_pending(current)) {
2735 				err = sock_intr_errno(timeo);
2736 				scm_destroy(&scm);
2737 				goto out;
2738 			}
2739 
2740 			mutex_lock(&u->iolock);
2741 			goto redo;
2742 unlock:
2743 			unix_state_unlock(sk);
2744 			break;
2745 		}
2746 
2747 		while (skip >= unix_skb_len(skb)) {
2748 			skip -= unix_skb_len(skb);
2749 			last = skb;
2750 			last_len = skb->len;
2751 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2752 			if (!skb)
2753 				goto again;
2754 		}
2755 
2756 		unix_state_unlock(sk);
2757 
2758 		if (check_creds) {
2759 			/* Never glue messages from different writers */
2760 			if (!unix_skb_scm_eq(skb, &scm))
2761 				break;
2762 		} else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
2763 			   test_bit(SOCK_PASSPIDFD, &sock->flags)) {
2764 			/* Copy credentials */
2765 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2766 			unix_set_secdata(&scm, skb);
2767 			check_creds = true;
2768 		}
2769 
2770 		/* Copy address just once */
2771 		if (state->msg && state->msg->msg_name) {
2772 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2773 					 state->msg->msg_name);
2774 			unix_copy_addr(state->msg, skb->sk);
2775 
2776 			BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2777 							      state->msg->msg_name,
2778 							      &state->msg->msg_namelen);
2779 
2780 			sunaddr = NULL;
2781 		}
2782 
2783 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2784 		skb_get(skb);
2785 		chunk = state->recv_actor(skb, skip, chunk, state);
2786 		drop_skb = !unix_skb_len(skb);
2787 		/* skb is only safe to use if !drop_skb */
2788 		consume_skb(skb);
2789 		if (chunk < 0) {
2790 			if (copied == 0)
2791 				copied = -EFAULT;
2792 			break;
2793 		}
2794 		copied += chunk;
2795 		size -= chunk;
2796 
2797 		if (drop_skb) {
2798 			/* the skb was touched by a concurrent reader;
2799 			 * we should not expect anything from this skb
2800 			 * anymore and assume it invalid - we can be
2801 			 * sure it was dropped from the socket queue
2802 			 *
2803 			 * let's report a short read
2804 			 */
2805 			err = 0;
2806 			break;
2807 		}
2808 
2809 		/* Mark read part of skb as used */
2810 		if (!(flags & MSG_PEEK)) {
2811 			UNIXCB(skb).consumed += chunk;
2812 
2813 			sk_peek_offset_bwd(sk, chunk);
2814 
2815 			if (UNIXCB(skb).fp) {
2816 				scm_stat_del(sk, skb);
2817 				unix_detach_fds(&scm, skb);
2818 			}
2819 
2820 			if (unix_skb_len(skb))
2821 				break;
2822 
2823 			skb_unlink(skb, &sk->sk_receive_queue);
2824 			consume_skb(skb);
2825 
2826 			if (scm.fp)
2827 				break;
2828 		} else {
2829 			/* It is questionable, see note in unix_dgram_recvmsg.
2830 			 */
2831 			if (UNIXCB(skb).fp)
2832 				unix_peek_fds(&scm, skb);
2833 
2834 			sk_peek_offset_fwd(sk, chunk);
2835 
2836 			if (UNIXCB(skb).fp)
2837 				break;
2838 
2839 			skip = 0;
2840 			last = skb;
2841 			last_len = skb->len;
2842 			unix_state_lock(sk);
2843 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2844 			if (skb)
2845 				goto again;
2846 			unix_state_unlock(sk);
2847 			break;
2848 		}
2849 	} while (size);
2850 
2851 	mutex_unlock(&u->iolock);
2852 	if (state->msg)
2853 		scm_recv_unix(sock, state->msg, &scm, flags);
2854 	else
2855 		scm_destroy(&scm);
2856 out:
2857 	return copied ? : err;
2858 }
2859 
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2860 static int unix_stream_read_actor(struct sk_buff *skb,
2861 				  int skip, int chunk,
2862 				  struct unix_stream_read_state *state)
2863 {
2864 	int ret;
2865 
2866 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2867 				    state->msg, chunk);
2868 	return ret ?: chunk;
2869 }
2870 
__unix_stream_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2871 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2872 			  size_t size, int flags)
2873 {
2874 	struct unix_stream_read_state state = {
2875 		.recv_actor = unix_stream_read_actor,
2876 		.socket = sk->sk_socket,
2877 		.msg = msg,
2878 		.size = size,
2879 		.flags = flags
2880 	};
2881 
2882 	return unix_stream_read_generic(&state, true);
2883 }
2884 
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2885 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2886 			       size_t size, int flags)
2887 {
2888 	struct unix_stream_read_state state = {
2889 		.recv_actor = unix_stream_read_actor,
2890 		.socket = sock,
2891 		.msg = msg,
2892 		.size = size,
2893 		.flags = flags
2894 	};
2895 
2896 #ifdef CONFIG_BPF_SYSCALL
2897 	struct sock *sk = sock->sk;
2898 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2899 
2900 	if (prot != &unix_stream_proto)
2901 		return prot->recvmsg(sk, msg, size, flags, NULL);
2902 #endif
2903 	return unix_stream_read_generic(&state, true);
2904 }
2905 
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2906 static int unix_stream_splice_actor(struct sk_buff *skb,
2907 				    int skip, int chunk,
2908 				    struct unix_stream_read_state *state)
2909 {
2910 	return skb_splice_bits(skb, state->socket->sk,
2911 			       UNIXCB(skb).consumed + skip,
2912 			       state->pipe, chunk, state->splice_flags);
2913 }
2914 
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)2915 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2916 				       struct pipe_inode_info *pipe,
2917 				       size_t size, unsigned int flags)
2918 {
2919 	struct unix_stream_read_state state = {
2920 		.recv_actor = unix_stream_splice_actor,
2921 		.socket = sock,
2922 		.pipe = pipe,
2923 		.size = size,
2924 		.splice_flags = flags,
2925 	};
2926 
2927 	if (unlikely(*ppos))
2928 		return -ESPIPE;
2929 
2930 	if (sock->file->f_flags & O_NONBLOCK ||
2931 	    flags & SPLICE_F_NONBLOCK)
2932 		state.flags = MSG_DONTWAIT;
2933 
2934 	return unix_stream_read_generic(&state, false);
2935 }
2936 
unix_shutdown(struct socket * sock,int mode)2937 static int unix_shutdown(struct socket *sock, int mode)
2938 {
2939 	struct sock *sk = sock->sk;
2940 	struct sock *other;
2941 
2942 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2943 		return -EINVAL;
2944 	/* This maps:
2945 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2946 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2947 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2948 	 */
2949 	++mode;
2950 
2951 	unix_state_lock(sk);
2952 	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2953 	other = unix_peer(sk);
2954 	if (other)
2955 		sock_hold(other);
2956 	unix_state_unlock(sk);
2957 	sk->sk_state_change(sk);
2958 
2959 	if (other &&
2960 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2961 
2962 		int peer_mode = 0;
2963 		const struct proto *prot = READ_ONCE(other->sk_prot);
2964 
2965 		if (prot->unhash)
2966 			prot->unhash(other);
2967 		if (mode&RCV_SHUTDOWN)
2968 			peer_mode |= SEND_SHUTDOWN;
2969 		if (mode&SEND_SHUTDOWN)
2970 			peer_mode |= RCV_SHUTDOWN;
2971 		unix_state_lock(other);
2972 		WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2973 		unix_state_unlock(other);
2974 		other->sk_state_change(other);
2975 		if (peer_mode == SHUTDOWN_MASK)
2976 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2977 		else if (peer_mode & RCV_SHUTDOWN)
2978 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2979 	}
2980 	if (other)
2981 		sock_put(other);
2982 
2983 	return 0;
2984 }
2985 
unix_inq_len(struct sock * sk)2986 long unix_inq_len(struct sock *sk)
2987 {
2988 	struct sk_buff *skb;
2989 	long amount = 0;
2990 
2991 	if (sk->sk_state == TCP_LISTEN)
2992 		return -EINVAL;
2993 
2994 	spin_lock(&sk->sk_receive_queue.lock);
2995 	if (sk->sk_type == SOCK_STREAM ||
2996 	    sk->sk_type == SOCK_SEQPACKET) {
2997 		skb_queue_walk(&sk->sk_receive_queue, skb)
2998 			amount += unix_skb_len(skb);
2999 	} else {
3000 		skb = skb_peek(&sk->sk_receive_queue);
3001 		if (skb)
3002 			amount = skb->len;
3003 	}
3004 	spin_unlock(&sk->sk_receive_queue.lock);
3005 
3006 	return amount;
3007 }
3008 EXPORT_SYMBOL_GPL(unix_inq_len);
3009 
unix_outq_len(struct sock * sk)3010 long unix_outq_len(struct sock *sk)
3011 {
3012 	return sk_wmem_alloc_get(sk);
3013 }
3014 EXPORT_SYMBOL_GPL(unix_outq_len);
3015 
unix_open_file(struct sock * sk)3016 static int unix_open_file(struct sock *sk)
3017 {
3018 	struct path path;
3019 	struct file *f;
3020 	int fd;
3021 
3022 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3023 		return -EPERM;
3024 
3025 	if (!smp_load_acquire(&unix_sk(sk)->addr))
3026 		return -ENOENT;
3027 
3028 	path = unix_sk(sk)->path;
3029 	if (!path.dentry)
3030 		return -ENOENT;
3031 
3032 	path_get(&path);
3033 
3034 	fd = get_unused_fd_flags(O_CLOEXEC);
3035 	if (fd < 0)
3036 		goto out;
3037 
3038 	f = dentry_open(&path, O_PATH, current_cred());
3039 	if (IS_ERR(f)) {
3040 		put_unused_fd(fd);
3041 		fd = PTR_ERR(f);
3042 		goto out;
3043 	}
3044 
3045 	fd_install(fd, f);
3046 out:
3047 	path_put(&path);
3048 
3049 	return fd;
3050 }
3051 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3052 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3053 {
3054 	struct sock *sk = sock->sk;
3055 	long amount = 0;
3056 	int err;
3057 
3058 	switch (cmd) {
3059 	case SIOCOUTQ:
3060 		amount = unix_outq_len(sk);
3061 		err = put_user(amount, (int __user *)arg);
3062 		break;
3063 	case SIOCINQ:
3064 		amount = unix_inq_len(sk);
3065 		if (amount < 0)
3066 			err = amount;
3067 		else
3068 			err = put_user(amount, (int __user *)arg);
3069 		break;
3070 	case SIOCUNIXFILE:
3071 		err = unix_open_file(sk);
3072 		break;
3073 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3074 	case SIOCATMARK:
3075 		{
3076 			struct sk_buff *skb;
3077 			int answ = 0;
3078 
3079 			skb = skb_peek(&sk->sk_receive_queue);
3080 			if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3081 				answ = 1;
3082 			err = put_user(answ, (int __user *)arg);
3083 		}
3084 		break;
3085 #endif
3086 	default:
3087 		err = -ENOIOCTLCMD;
3088 		break;
3089 	}
3090 	return err;
3091 }
3092 
3093 #ifdef CONFIG_COMPAT
unix_compat_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3094 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3095 {
3096 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3097 }
3098 #endif
3099 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)3100 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3101 {
3102 	struct sock *sk = sock->sk;
3103 	__poll_t mask;
3104 	u8 shutdown;
3105 
3106 	sock_poll_wait(file, sock, wait);
3107 	mask = 0;
3108 	shutdown = READ_ONCE(sk->sk_shutdown);
3109 
3110 	/* exceptional events? */
3111 	if (READ_ONCE(sk->sk_err))
3112 		mask |= EPOLLERR;
3113 	if (shutdown == SHUTDOWN_MASK)
3114 		mask |= EPOLLHUP;
3115 	if (shutdown & RCV_SHUTDOWN)
3116 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3117 
3118 	/* readable? */
3119 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3120 		mask |= EPOLLIN | EPOLLRDNORM;
3121 	if (sk_is_readable(sk))
3122 		mask |= EPOLLIN | EPOLLRDNORM;
3123 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3124 	if (READ_ONCE(unix_sk(sk)->oob_skb))
3125 		mask |= EPOLLPRI;
3126 #endif
3127 
3128 	/* Connection-based need to check for termination and startup */
3129 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3130 	    sk->sk_state == TCP_CLOSE)
3131 		mask |= EPOLLHUP;
3132 
3133 	/*
3134 	 * we set writable also when the other side has shut down the
3135 	 * connection. This prevents stuck sockets.
3136 	 */
3137 	if (unix_writable(sk))
3138 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3139 
3140 	return mask;
3141 }
3142 
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)3143 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3144 				    poll_table *wait)
3145 {
3146 	struct sock *sk = sock->sk, *other;
3147 	unsigned int writable;
3148 	__poll_t mask;
3149 	u8 shutdown;
3150 
3151 	sock_poll_wait(file, sock, wait);
3152 	mask = 0;
3153 	shutdown = READ_ONCE(sk->sk_shutdown);
3154 
3155 	/* exceptional events? */
3156 	if (READ_ONCE(sk->sk_err) ||
3157 	    !skb_queue_empty_lockless(&sk->sk_error_queue))
3158 		mask |= EPOLLERR |
3159 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3160 
3161 	if (shutdown & RCV_SHUTDOWN)
3162 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3163 	if (shutdown == SHUTDOWN_MASK)
3164 		mask |= EPOLLHUP;
3165 
3166 	/* readable? */
3167 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3168 		mask |= EPOLLIN | EPOLLRDNORM;
3169 	if (sk_is_readable(sk))
3170 		mask |= EPOLLIN | EPOLLRDNORM;
3171 
3172 	/* Connection-based need to check for termination and startup */
3173 	if (sk->sk_type == SOCK_SEQPACKET) {
3174 		if (sk->sk_state == TCP_CLOSE)
3175 			mask |= EPOLLHUP;
3176 		/* connection hasn't started yet? */
3177 		if (sk->sk_state == TCP_SYN_SENT)
3178 			return mask;
3179 	}
3180 
3181 	/* No write status requested, avoid expensive OUT tests. */
3182 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3183 		return mask;
3184 
3185 	writable = unix_writable(sk);
3186 	if (writable) {
3187 		unix_state_lock(sk);
3188 
3189 		other = unix_peer(sk);
3190 		if (other && unix_peer(other) != sk &&
3191 		    unix_recvq_full_lockless(other) &&
3192 		    unix_dgram_peer_wake_me(sk, other))
3193 			writable = 0;
3194 
3195 		unix_state_unlock(sk);
3196 	}
3197 
3198 	if (writable)
3199 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3200 	else
3201 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3202 
3203 	return mask;
3204 }
3205 
3206 #ifdef CONFIG_PROC_FS
3207 
3208 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3209 
3210 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3211 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3212 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3213 
unix_from_bucket(struct seq_file * seq,loff_t * pos)3214 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3215 {
3216 	unsigned long offset = get_offset(*pos);
3217 	unsigned long bucket = get_bucket(*pos);
3218 	unsigned long count = 0;
3219 	struct sock *sk;
3220 
3221 	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3222 	     sk; sk = sk_next(sk)) {
3223 		if (++count == offset)
3224 			break;
3225 	}
3226 
3227 	return sk;
3228 }
3229 
unix_get_first(struct seq_file * seq,loff_t * pos)3230 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3231 {
3232 	unsigned long bucket = get_bucket(*pos);
3233 	struct net *net = seq_file_net(seq);
3234 	struct sock *sk;
3235 
3236 	while (bucket < UNIX_HASH_SIZE) {
3237 		spin_lock(&net->unx.table.locks[bucket]);
3238 
3239 		sk = unix_from_bucket(seq, pos);
3240 		if (sk)
3241 			return sk;
3242 
3243 		spin_unlock(&net->unx.table.locks[bucket]);
3244 
3245 		*pos = set_bucket_offset(++bucket, 1);
3246 	}
3247 
3248 	return NULL;
3249 }
3250 
unix_get_next(struct seq_file * seq,struct sock * sk,loff_t * pos)3251 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3252 				  loff_t *pos)
3253 {
3254 	unsigned long bucket = get_bucket(*pos);
3255 
3256 	sk = sk_next(sk);
3257 	if (sk)
3258 		return sk;
3259 
3260 
3261 	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3262 
3263 	*pos = set_bucket_offset(++bucket, 1);
3264 
3265 	return unix_get_first(seq, pos);
3266 }
3267 
unix_seq_start(struct seq_file * seq,loff_t * pos)3268 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3269 {
3270 	if (!*pos)
3271 		return SEQ_START_TOKEN;
3272 
3273 	return unix_get_first(seq, pos);
3274 }
3275 
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3276 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3277 {
3278 	++*pos;
3279 
3280 	if (v == SEQ_START_TOKEN)
3281 		return unix_get_first(seq, pos);
3282 
3283 	return unix_get_next(seq, v, pos);
3284 }
3285 
unix_seq_stop(struct seq_file * seq,void * v)3286 static void unix_seq_stop(struct seq_file *seq, void *v)
3287 {
3288 	struct sock *sk = v;
3289 
3290 	if (sk)
3291 		spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3292 }
3293 
unix_seq_show(struct seq_file * seq,void * v)3294 static int unix_seq_show(struct seq_file *seq, void *v)
3295 {
3296 
3297 	if (v == SEQ_START_TOKEN)
3298 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3299 			 "Inode Path\n");
3300 	else {
3301 		struct sock *s = v;
3302 		struct unix_sock *u = unix_sk(s);
3303 		unix_state_lock(s);
3304 
3305 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3306 			s,
3307 			refcount_read(&s->sk_refcnt),
3308 			0,
3309 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3310 			s->sk_type,
3311 			s->sk_socket ?
3312 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3313 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3314 			sock_i_ino(s));
3315 
3316 		if (u->addr) {	// under a hash table lock here
3317 			int i, len;
3318 			seq_putc(seq, ' ');
3319 
3320 			i = 0;
3321 			len = u->addr->len -
3322 				offsetof(struct sockaddr_un, sun_path);
3323 			if (u->addr->name->sun_path[0]) {
3324 				len--;
3325 			} else {
3326 				seq_putc(seq, '@');
3327 				i++;
3328 			}
3329 			for ( ; i < len; i++)
3330 				seq_putc(seq, u->addr->name->sun_path[i] ?:
3331 					 '@');
3332 		}
3333 		unix_state_unlock(s);
3334 		seq_putc(seq, '\n');
3335 	}
3336 
3337 	return 0;
3338 }
3339 
3340 static const struct seq_operations unix_seq_ops = {
3341 	.start  = unix_seq_start,
3342 	.next   = unix_seq_next,
3343 	.stop   = unix_seq_stop,
3344 	.show   = unix_seq_show,
3345 };
3346 
3347 #ifdef CONFIG_BPF_SYSCALL
3348 struct bpf_unix_iter_state {
3349 	struct seq_net_private p;
3350 	unsigned int cur_sk;
3351 	unsigned int end_sk;
3352 	unsigned int max_sk;
3353 	struct sock **batch;
3354 	bool st_bucket_done;
3355 };
3356 
3357 struct bpf_iter__unix {
3358 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
3359 	__bpf_md_ptr(struct unix_sock *, unix_sk);
3360 	uid_t uid __aligned(8);
3361 };
3362 
unix_prog_seq_show(struct bpf_prog * prog,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3363 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3364 			      struct unix_sock *unix_sk, uid_t uid)
3365 {
3366 	struct bpf_iter__unix ctx;
3367 
3368 	meta->seq_num--;  /* skip SEQ_START_TOKEN */
3369 	ctx.meta = meta;
3370 	ctx.unix_sk = unix_sk;
3371 	ctx.uid = uid;
3372 	return bpf_iter_run_prog(prog, &ctx);
3373 }
3374 
bpf_iter_unix_hold_batch(struct seq_file * seq,struct sock * start_sk)3375 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3376 
3377 {
3378 	struct bpf_unix_iter_state *iter = seq->private;
3379 	unsigned int expected = 1;
3380 	struct sock *sk;
3381 
3382 	sock_hold(start_sk);
3383 	iter->batch[iter->end_sk++] = start_sk;
3384 
3385 	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3386 		if (iter->end_sk < iter->max_sk) {
3387 			sock_hold(sk);
3388 			iter->batch[iter->end_sk++] = sk;
3389 		}
3390 
3391 		expected++;
3392 	}
3393 
3394 	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3395 
3396 	return expected;
3397 }
3398 
bpf_iter_unix_put_batch(struct bpf_unix_iter_state * iter)3399 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3400 {
3401 	while (iter->cur_sk < iter->end_sk)
3402 		sock_put(iter->batch[iter->cur_sk++]);
3403 }
3404 
bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state * iter,unsigned int new_batch_sz)3405 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3406 				       unsigned int new_batch_sz)
3407 {
3408 	struct sock **new_batch;
3409 
3410 	new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3411 			     GFP_USER | __GFP_NOWARN);
3412 	if (!new_batch)
3413 		return -ENOMEM;
3414 
3415 	bpf_iter_unix_put_batch(iter);
3416 	kvfree(iter->batch);
3417 	iter->batch = new_batch;
3418 	iter->max_sk = new_batch_sz;
3419 
3420 	return 0;
3421 }
3422 
bpf_iter_unix_batch(struct seq_file * seq,loff_t * pos)3423 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3424 					loff_t *pos)
3425 {
3426 	struct bpf_unix_iter_state *iter = seq->private;
3427 	unsigned int expected;
3428 	bool resized = false;
3429 	struct sock *sk;
3430 
3431 	if (iter->st_bucket_done)
3432 		*pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3433 
3434 again:
3435 	/* Get a new batch */
3436 	iter->cur_sk = 0;
3437 	iter->end_sk = 0;
3438 
3439 	sk = unix_get_first(seq, pos);
3440 	if (!sk)
3441 		return NULL; /* Done */
3442 
3443 	expected = bpf_iter_unix_hold_batch(seq, sk);
3444 
3445 	if (iter->end_sk == expected) {
3446 		iter->st_bucket_done = true;
3447 		return sk;
3448 	}
3449 
3450 	if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3451 		resized = true;
3452 		goto again;
3453 	}
3454 
3455 	return sk;
3456 }
3457 
bpf_iter_unix_seq_start(struct seq_file * seq,loff_t * pos)3458 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3459 {
3460 	if (!*pos)
3461 		return SEQ_START_TOKEN;
3462 
3463 	/* bpf iter does not support lseek, so it always
3464 	 * continue from where it was stop()-ped.
3465 	 */
3466 	return bpf_iter_unix_batch(seq, pos);
3467 }
3468 
bpf_iter_unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3469 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3470 {
3471 	struct bpf_unix_iter_state *iter = seq->private;
3472 	struct sock *sk;
3473 
3474 	/* Whenever seq_next() is called, the iter->cur_sk is
3475 	 * done with seq_show(), so advance to the next sk in
3476 	 * the batch.
3477 	 */
3478 	if (iter->cur_sk < iter->end_sk)
3479 		sock_put(iter->batch[iter->cur_sk++]);
3480 
3481 	++*pos;
3482 
3483 	if (iter->cur_sk < iter->end_sk)
3484 		sk = iter->batch[iter->cur_sk];
3485 	else
3486 		sk = bpf_iter_unix_batch(seq, pos);
3487 
3488 	return sk;
3489 }
3490 
bpf_iter_unix_seq_show(struct seq_file * seq,void * v)3491 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3492 {
3493 	struct bpf_iter_meta meta;
3494 	struct bpf_prog *prog;
3495 	struct sock *sk = v;
3496 	uid_t uid;
3497 	bool slow;
3498 	int ret;
3499 
3500 	if (v == SEQ_START_TOKEN)
3501 		return 0;
3502 
3503 	slow = lock_sock_fast(sk);
3504 
3505 	if (unlikely(sk_unhashed(sk))) {
3506 		ret = SEQ_SKIP;
3507 		goto unlock;
3508 	}
3509 
3510 	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3511 	meta.seq = seq;
3512 	prog = bpf_iter_get_info(&meta, false);
3513 	ret = unix_prog_seq_show(prog, &meta, v, uid);
3514 unlock:
3515 	unlock_sock_fast(sk, slow);
3516 	return ret;
3517 }
3518 
bpf_iter_unix_seq_stop(struct seq_file * seq,void * v)3519 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3520 {
3521 	struct bpf_unix_iter_state *iter = seq->private;
3522 	struct bpf_iter_meta meta;
3523 	struct bpf_prog *prog;
3524 
3525 	if (!v) {
3526 		meta.seq = seq;
3527 		prog = bpf_iter_get_info(&meta, true);
3528 		if (prog)
3529 			(void)unix_prog_seq_show(prog, &meta, v, 0);
3530 	}
3531 
3532 	if (iter->cur_sk < iter->end_sk)
3533 		bpf_iter_unix_put_batch(iter);
3534 }
3535 
3536 static const struct seq_operations bpf_iter_unix_seq_ops = {
3537 	.start	= bpf_iter_unix_seq_start,
3538 	.next	= bpf_iter_unix_seq_next,
3539 	.stop	= bpf_iter_unix_seq_stop,
3540 	.show	= bpf_iter_unix_seq_show,
3541 };
3542 #endif
3543 #endif
3544 
3545 static const struct net_proto_family unix_family_ops = {
3546 	.family = PF_UNIX,
3547 	.create = unix_create,
3548 	.owner	= THIS_MODULE,
3549 };
3550 
3551 
unix_net_init(struct net * net)3552 static int __net_init unix_net_init(struct net *net)
3553 {
3554 	int i;
3555 
3556 	net->unx.sysctl_max_dgram_qlen = 10;
3557 	if (unix_sysctl_register(net))
3558 		goto out;
3559 
3560 #ifdef CONFIG_PROC_FS
3561 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3562 			     sizeof(struct seq_net_private)))
3563 		goto err_sysctl;
3564 #endif
3565 
3566 	net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3567 					      sizeof(spinlock_t), GFP_KERNEL);
3568 	if (!net->unx.table.locks)
3569 		goto err_proc;
3570 
3571 	net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3572 						sizeof(struct hlist_head),
3573 						GFP_KERNEL);
3574 	if (!net->unx.table.buckets)
3575 		goto free_locks;
3576 
3577 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
3578 		spin_lock_init(&net->unx.table.locks[i]);
3579 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3580 	}
3581 
3582 	return 0;
3583 
3584 free_locks:
3585 	kvfree(net->unx.table.locks);
3586 err_proc:
3587 #ifdef CONFIG_PROC_FS
3588 	remove_proc_entry("unix", net->proc_net);
3589 err_sysctl:
3590 #endif
3591 	unix_sysctl_unregister(net);
3592 out:
3593 	return -ENOMEM;
3594 }
3595 
unix_net_exit(struct net * net)3596 static void __net_exit unix_net_exit(struct net *net)
3597 {
3598 	kvfree(net->unx.table.buckets);
3599 	kvfree(net->unx.table.locks);
3600 	unix_sysctl_unregister(net);
3601 	remove_proc_entry("unix", net->proc_net);
3602 }
3603 
3604 static struct pernet_operations unix_net_ops = {
3605 	.init = unix_net_init,
3606 	.exit = unix_net_exit,
3607 };
3608 
3609 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(unix,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3610 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3611 		     struct unix_sock *unix_sk, uid_t uid)
3612 
3613 #define INIT_BATCH_SZ 16
3614 
3615 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3616 {
3617 	struct bpf_unix_iter_state *iter = priv_data;
3618 	int err;
3619 
3620 	err = bpf_iter_init_seq_net(priv_data, aux);
3621 	if (err)
3622 		return err;
3623 
3624 	err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3625 	if (err) {
3626 		bpf_iter_fini_seq_net(priv_data);
3627 		return err;
3628 	}
3629 
3630 	return 0;
3631 }
3632 
bpf_iter_fini_unix(void * priv_data)3633 static void bpf_iter_fini_unix(void *priv_data)
3634 {
3635 	struct bpf_unix_iter_state *iter = priv_data;
3636 
3637 	bpf_iter_fini_seq_net(priv_data);
3638 	kvfree(iter->batch);
3639 }
3640 
3641 static const struct bpf_iter_seq_info unix_seq_info = {
3642 	.seq_ops		= &bpf_iter_unix_seq_ops,
3643 	.init_seq_private	= bpf_iter_init_unix,
3644 	.fini_seq_private	= bpf_iter_fini_unix,
3645 	.seq_priv_size		= sizeof(struct bpf_unix_iter_state),
3646 };
3647 
3648 static const struct bpf_func_proto *
bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)3649 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3650 			     const struct bpf_prog *prog)
3651 {
3652 	switch (func_id) {
3653 	case BPF_FUNC_setsockopt:
3654 		return &bpf_sk_setsockopt_proto;
3655 	case BPF_FUNC_getsockopt:
3656 		return &bpf_sk_getsockopt_proto;
3657 	default:
3658 		return NULL;
3659 	}
3660 }
3661 
3662 static struct bpf_iter_reg unix_reg_info = {
3663 	.target			= "unix",
3664 	.ctx_arg_info_size	= 1,
3665 	.ctx_arg_info		= {
3666 		{ offsetof(struct bpf_iter__unix, unix_sk),
3667 		  PTR_TO_BTF_ID_OR_NULL },
3668 	},
3669 	.get_func_proto         = bpf_iter_unix_get_func_proto,
3670 	.seq_info		= &unix_seq_info,
3671 };
3672 
bpf_iter_register(void)3673 static void __init bpf_iter_register(void)
3674 {
3675 	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3676 	if (bpf_iter_reg_target(&unix_reg_info))
3677 		pr_warn("Warning: could not register bpf iterator unix\n");
3678 }
3679 #endif
3680 
af_unix_init(void)3681 static int __init af_unix_init(void)
3682 {
3683 	int i, rc = -1;
3684 
3685 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3686 
3687 	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3688 		spin_lock_init(&bsd_socket_locks[i]);
3689 		INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3690 	}
3691 
3692 	rc = proto_register(&unix_dgram_proto, 1);
3693 	if (rc != 0) {
3694 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3695 		goto out;
3696 	}
3697 
3698 	rc = proto_register(&unix_stream_proto, 1);
3699 	if (rc != 0) {
3700 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3701 		proto_unregister(&unix_dgram_proto);
3702 		goto out;
3703 	}
3704 
3705 	sock_register(&unix_family_ops);
3706 	register_pernet_subsys(&unix_net_ops);
3707 	unix_bpf_build_proto();
3708 
3709 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3710 	bpf_iter_register();
3711 #endif
3712 
3713 out:
3714 	return rc;
3715 }
3716 
3717 /* Later than subsys_initcall() because we depend on stuff initialised there */
3718 fs_initcall(af_unix_init);
3719