xref: /illumos-gate/usr/src/uts/common/fs/portfs/port_fop.c (revision b6805bf78d2bbbeeaea8909a05623587b42d58b3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * File Events Notification
29  * ------------------------
30  *
31  * The File Events Notification facility provides file and directory change
32  * notification. It is implemented as an event source(PORT_SOURCE_FILE)
33  * under the Event Ports framework. Therefore the API is an extension to
34  * the Event Ports API.
35  *
36  * It uses the FEM (File Events Monitoring) framework to intercept
37  * operations on the files & directories and generate appropriate events.
38  *
39  * It provides event notification in accordance with what an application
40  * can find out by stat`ing the file and comparing time stamps. The various
41  * system calls that update the file's access, modification, and change
42  * time stamps are documented in the man page section 2.
43  *
44  * It is non intrusive. That is, having an active file event watch on a file
45  * or directory will not prevent it from being removed or renamed or block an
46  * unmount operation of the file system where the watched file or directory
47  * resides.
48  *
49  *
50  * Interface:
51  * ----------
52  *
53  *   The object for this event source is of type 'struct file_obj *'
54  *
55  *   The file that needs to be monitored is specified in 'fo_name'.
56  *   The time stamps collected by a stat(2) call are passed in fo_atime,
57  *   fo_mtime, fo_ctime. At the time a file events watch is registered, the
58  *   time stamps passed in are compared with the current time stamps of the
59  *   file. If it has changed, relevant events are sent immediately. If the time
60  *   stamps are all '0', they will not be compared.
61  *
62  *
63  * The events are delivered to an event port. A port is created using
64  * port_create().
65  *
66  * To register a file events watch on a file or directory.
67  *
68  *   port_associate(int port, PORT_SOURCE_FILE, (uintptr_t)&fobj, events, user)
69  *
70  *   'user' is the user pointer to be returned with the event.
71  *
72  * To de-register a file events watch,
73  *
74  *   port_dissociate(int port, PORT_SOURCE_FILE, (uintptr_t)&fobj)
75  *
76  * The events are collected using the port_get()/port_getn() interface. The
77  * event source will be PORT_SOURCE_FILE.
78  *
79  * After an event is delivered, the file events watch gets de-activated. To
80  * receive the next event, the process will have to re-register the watch and
81  * activate it by calling port_associate() again. This behavior is intentional
82  * and supports proper multi threaded programming when using file events
83  * notification API.
84  *
85  *
86  * Implementation overview:
87  * ------------------------
88  *
89  * Each file events watch is represented by 'portfop_t' in the kernel. A
90  * cache(in portfop_cache_t) of these portfop_t's are maintained per event
91  * port by this source. The object here is the pointer to the file_obj
92  * structure. The portfop_t's are hashed in using the object pointer. Therefore
93  * it is possible to have multiple file events watches on a file by the same
94  * process by using different object structure(file_obj_t) and hence can
95  * receive multiple event notification for a file. These watches can be for
96  * different event types.
97  *
98  * The cached entries of these file objects are retained, even after delivering
99  * an event, marking them inactive for performance reasons. The assumption
100  * is that the process would come back and re-register the file to receive
101  * further events. When there are more then 'port_fop_maxpfps' watches per file
102  * it will attempt to free the oldest inactive watches.
103  *
104  * In case the event that is being delivered is an exception event, the cached
105  * entries get removed. An exception event on a file or directory means its
106  * identity got changed(rename to/from, delete, mounted over, file system
107  * unmount).
108  *
109  * If the event port gets closed, all the associated file event watches will be
110  * removed and discarded.
111  *
112  *
113  * Data structures:
114  * ----------------
115  *
116  * The list of file event watches per file are managed by the data structure
117  * portfop_vp_t. The first time a file events watch is registered for a file,
118  * a portfop_vp_t is installed on the vnode_t's member v_fopdata. This gets
119  * removed and freed only when the vnode becomes inactive. The FEM hooks are
120  * also installed when the first watch is registered on a file. The FEM hooks
121  * get un-installed when all the watches are removed.
122  *
123  * Each file events watch is represented by the structure portfop_t. They
124  * get added to a list of portfop_t's on the vnode(portfop_vp_t). After
125  * delivering an event, the portfop_t is marked inactive but retained. It is
126  * moved to the end of the list. All the active portfop_t's are maintained at
127  * the beginning. In case of exception events, the portfop_t will be removed
128  * and discarded.
129  *
130  * To intercept unmount operations, FSEM hooks are added to the file system
131  * under which files are being watched. A hash table('portfop_vfs_hash_t') of
132  * active file systems is maintained. Each file system that has active watches
133  * is represented by 'portfop_vfs_t' and is added to the hash table.
134  * The vnode's 'portfop_vp_t' structure is added to the list of files(vnodes)
135  * being watched on the portfop_vfs_t structure.
136  *
137  *
138  * File system support:
139  * -------------------
140  *
141  * The file system implementation has to provide vnode event notifications
142  * (vnevents) in order to support watching any files on that file system.
143  * The vnode events(vnevents) are notifications provided by the file system
144  * for name based file operations like rename, remove etc, which do not go
145  * thru the VOP_** interfaces. If the file system does not implement vnode
146  * notifications, watching for file events on such file systems is not
147  * supported. The vnode event notifications support is determined by the call
148  * vnevent_support(vp) (VOP_VNEVENT(vp, VE_SUPPORT)), which the file system
149  * has to implement.
150  *
151  *
152  * Locking order:
153  * --------------
154  *
155  * A file(vnode) can have file event watches registered by different processes.
156  * There is one portfop_t per watch registered. These are on the vnode's list
157  * protected by the mutex 'pvp_mutex' in 'portfop_vp_t'. The portfop_t's are
158  * also on the per port cache. The cache is protected by the pfc_lock of
159  * portfop_cache_t. The lock order here is 'pfc_lock' -> 'pvp_mutex'.
160  *
161  */
162 
163 #include <sys/types.h>
164 #include <sys/systm.h>
165 #include <sys/stat.h>
166 #include <sys/errno.h>
167 #include <sys/kmem.h>
168 #include <sys/sysmacros.h>
169 #include <sys/debug.h>
170 #include <sys/vnode.h>
171 #include <sys/poll_impl.h>
172 #include <sys/port_impl.h>
173 #include <sys/fem.h>
174 #include <sys/vfs_opreg.h>
175 #include <sys/atomic.h>
176 #include <sys/mount.h>
177 #include <sys/mntent.h>
178 
179 /*
180  * For special case support of mnttab (/etc/mnttab).
181  */
182 extern struct vnode *vfs_mntdummyvp;
183 extern int mntfstype;
184 
185 #define	PORTFOP_PVFSH(vfsp)	(&portvfs_hash[PORTFOP_PVFSHASH(vfsp)])
186 portfop_vfs_hash_t	 portvfs_hash[PORTFOP_PVFSHASH_SZ];
187 
188 #define	PORTFOP_NVP	20
189 /*
190  * Inactive file event watches(portfop_t) are retained on the vnode's list
191  * for performance reason. If the applications re-registers the file, the
192  * inactive entry is made active and moved up the list.
193  *
194  * If there are greater then the following number of watches on a vnode,
195  * it will attempt to discard an oldest inactive watch(pfp) at the time
196  * a new watch is being registered and when events get delivered. We
197  * do this to avoid accumulating inactive watches on a file.
198  */
199 int	port_fop_maxpfps = 20;
200 
201 /* local functions */
202 static int	port_fop_callback(void *, int *, pid_t, int, void *);
203 
204 static void	port_pcache_insert(portfop_cache_t *, portfop_t *);
205 static void	port_pcache_delete(portfop_cache_t *, portfop_t *);
206 static void	port_close_fop(void *arg, int port, pid_t pid, int lastclose);
207 
208 /*
209  * port fop functions that will be the fem hooks.
210  */
211 static int port_fop_open(femarg_t *vf, int mode, cred_t *cr,
212     caller_context_t *);
213 static int port_fop_read(femarg_t *vf, uio_t *uiop, int ioflag, cred_t *cr,
214     struct caller_context *ct);
215 static int port_fop_write(femarg_t *vf, uio_t *uiop, int ioflag, cred_t *cr,
216     caller_context_t *ct);
217 static int port_fop_map(femarg_t *vf, offset_t off, struct as *as,
218     caddr_t *addrp, size_t len, uchar_t prot, uchar_t maxport,
219     uint_t flags, cred_t *cr, caller_context_t *ct);
220 static int port_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
221     caller_context_t *ct);
222 static int port_fop_create(femarg_t *vf, char *name, vattr_t *vap,
223     vcexcl_t excl, int mode, vnode_t **vpp, cred_t *cr, int flag,
224     caller_context_t *ct, vsecattr_t *vsecp);
225 static int port_fop_remove(femarg_t *vf, char *nm, cred_t *cr,
226     caller_context_t *ct, int flags);
227 static int port_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
228     caller_context_t *ct, int flags);
229 static int port_fop_rename(femarg_t *vf, char *snm, vnode_t *tdvp, char *tnm,
230     cred_t *cr, caller_context_t *ct, int flags);
231 static int port_fop_mkdir(femarg_t *vf, char *dirname, vattr_t *vap,
232     vnode_t **vpp, cred_t *cr, caller_context_t *ct, int flags,
233     vsecattr_t *vsecp);
234 static int port_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
235     caller_context_t *ct, int flags);
236 static int port_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
237     caller_context_t *ct, int flags);
238 static int port_fop_symlink(femarg_t *vf, char *linkname, vattr_t *vap,
239     char *target, cred_t *cr, caller_context_t *ct, int flags);
240 static int port_fop_setsecattr(femarg_t *vf, vsecattr_t *vsap, int flag,
241     cred_t *cr, caller_context_t *ct);
242 
243 static int port_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp,
244     char *cname, caller_context_t *ct);
245 
246 static int port_fop_unmount(fsemarg_t *vf, int flag, cred_t *cr);
247 
248 
249 /*
250  * Fem hooks.
251  */
252 const fs_operation_def_t	port_vnodesrc_template[] = {
253 	VOPNAME_OPEN,		{ .femop_open = port_fop_open },
254 	VOPNAME_READ,		{ .femop_read = port_fop_read },
255 	VOPNAME_WRITE,		{ .femop_write = port_fop_write },
256 	VOPNAME_MAP,		{ .femop_map = port_fop_map },
257 	VOPNAME_SETATTR, 	{ .femop_setattr = port_fop_setattr },
258 	VOPNAME_CREATE,		{ .femop_create = port_fop_create },
259 	VOPNAME_REMOVE,		{ .femop_remove = port_fop_remove },
260 	VOPNAME_LINK,		{ .femop_link = port_fop_link },
261 	VOPNAME_RENAME,		{ .femop_rename = port_fop_rename },
262 	VOPNAME_MKDIR,		{ .femop_mkdir = port_fop_mkdir },
263 	VOPNAME_RMDIR,		{ .femop_rmdir = port_fop_rmdir },
264 	VOPNAME_READDIR,	{ .femop_readdir = port_fop_readdir },
265 	VOPNAME_SYMLINK,	{ .femop_symlink = port_fop_symlink },
266 	VOPNAME_SETSECATTR, 	{ .femop_setsecattr = port_fop_setsecattr },
267 	VOPNAME_VNEVENT,	{ .femop_vnevent = port_fop_vnevent },
268 	NULL,	NULL
269 };
270 
271 /*
272  * Fsem - vfs ops hooks
273  */
274 const fs_operation_def_t	port_vfssrc_template[] = {
275 	VFSNAME_UNMOUNT, 	{ .fsemop_unmount = port_fop_unmount },
276 	NULL,	NULL
277 };
278 
279 fem_t *fop_femop;
280 fsem_t *fop_fsemop;
281 
282 static fem_t *
283 port_fop_femop()
284 {
285 	fem_t *femp;
286 	if (fop_femop != NULL)
287 		return (fop_femop);
288 	if (fem_create("portfop_fem",
289 	    (const struct fs_operation_def *)port_vnodesrc_template,
290 	    (fem_t **)&femp)) {
291 		return (NULL);
292 	}
293 	if (casptr(&fop_femop, NULL, femp) != NULL) {
294 		/*
295 		 * some other thread beat us to it.
296 		 */
297 		fem_free(femp);
298 	}
299 	return (fop_femop);
300 }
301 
302 static fsem_t *
303 port_fop_fsemop()
304 {
305 	fsem_t *fsemp;
306 	if (fop_fsemop != NULL)
307 		return (fop_fsemop);
308 	if (fsem_create("portfop_fsem", port_vfssrc_template, &fsemp)) {
309 		return (NULL);
310 	}
311 	if (casptr(&fop_fsemop, NULL, fsemp) != NULL) {
312 		/*
313 		 * some other thread beat us to it.
314 		 */
315 		fsem_free(fsemp);
316 	}
317 	return (fop_fsemop);
318 }
319 
320 /*
321  * port_fop_callback()
322  * - PORT_CALLBACK_DEFAULT
323  *	The file event will be delivered to the application.
324  * - PORT_CALLBACK_DISSOCIATE
325  *	The object will be dissociated from  the port.
326  * - PORT_CALLBACK_CLOSE
327  *	The object will be dissociated from the port because the port
328  *	is being closed.
329  */
330 /* ARGSUSED */
331 static int
332 port_fop_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
333 {
334 	portfop_t	*pfp = (portfop_t *)arg;
335 	port_kevent_t	*pkevp = (port_kevent_t *)evp;
336 	int		error = 0;
337 
338 	ASSERT((events != NULL));
339 	if (flag == PORT_CALLBACK_DEFAULT) {
340 		if (curproc->p_pid != pid) {
341 				return (EACCES); /* deny delivery of events */
342 		}
343 
344 		*events = pkevp->portkev_events;
345 		pkevp->portkev_events = 0;
346 		if (pfp != NULL) {
347 			pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ;
348 		}
349 	}
350 	return (error);
351 }
352 
353 /*
354  * Inserts a portfop_t into the port sources cache's.
355  */
356 static void
357 port_pcache_insert(portfop_cache_t *pfcp, portfop_t *pfp)
358 {
359 	portfop_t	**bucket;
360 
361 	ASSERT(MUTEX_HELD(&pfcp->pfc_lock));
362 	bucket = PORT_FOP_BUCKET(pfcp, pfp->pfop_object);
363 	pfp->pfop_hashnext = *bucket;
364 	*bucket = pfp;
365 	pfcp->pfc_objcount++;
366 }
367 
368 /*
369  * Remove the pfp from the port source cache.
370  */
371 static void
372 port_pcache_delete(portfop_cache_t *pfcp, portfop_t *pfp)
373 {
374 	portfop_t	*lpdp;
375 	portfop_t	*cpdp;
376 	portfop_t	**bucket;
377 
378 	bucket = PORT_FOP_BUCKET(pfcp, pfp->pfop_object);
379 	cpdp = *bucket;
380 	if (pfp == cpdp) {
381 		*bucket = pfp->pfop_hashnext;
382 	} else {
383 		while (cpdp != NULL) {
384 			lpdp = cpdp;
385 			cpdp = cpdp->pfop_hashnext;
386 			if (cpdp == pfp) {
387 				/* portfop struct found */
388 				lpdp->pfop_hashnext = pfp->pfop_hashnext;
389 				break;
390 			}
391 		}
392 	}
393 	pfcp->pfc_objcount--;
394 }
395 
396 /*
397  * The vnode's(portfop_vp_t) pfp list management. The 'pvp_mutex' is held
398  * when these routines are called.
399  *
400  * The 'pvp_lpfop' member points to the oldest inactive entry on the list.
401  * It is used to discard the oldtest inactive pfp if the number of entries
402  * exceed the limit.
403  */
404 static void
405 port_fop_listinsert(portfop_vp_t *pvp, portfop_t *pfp, int where)
406 {
407 	if (where == 1) {
408 		list_insert_head(&pvp->pvp_pfoplist, (void *)pfp);
409 	} else {
410 		list_insert_tail(&pvp->pvp_pfoplist, (void *)pfp);
411 	}
412 	if (pvp->pvp_lpfop == NULL) {
413 		pvp->pvp_lpfop = pfp;
414 	}
415 	pvp->pvp_cnt++;
416 }
417 
418 static void
419 port_fop_listinsert_head(portfop_vp_t *pvp, portfop_t *pfp)
420 {
421 	port_fop_listinsert(pvp, pfp, 1);
422 }
423 
424 static void
425 port_fop_listinsert_tail(portfop_vp_t *pvp, portfop_t *pfp)
426 {
427 	/*
428 	 * We point lpfop to an inactive one, if it was initially pointing
429 	 * to an active one. Insert to the tail is done only when a pfp goes
430 	 * inactive.
431 	 */
432 	if (pvp->pvp_lpfop && pvp->pvp_lpfop->pfop_flags & PORT_FOP_ACTIVE) {
433 		pvp->pvp_lpfop = pfp;
434 	}
435 	port_fop_listinsert(pvp, pfp, 0);
436 }
437 
438 static void
439 port_fop_listremove(portfop_vp_t *pvp, portfop_t *pfp)
440 {
441 	if (pvp->pvp_lpfop == pfp) {
442 		pvp->pvp_lpfop = list_next(&pvp->pvp_pfoplist, (void *)pfp);
443 	}
444 
445 	list_remove(&pvp->pvp_pfoplist, (void *)pfp);
446 
447 	pvp->pvp_cnt--;
448 	if (pvp->pvp_cnt && pvp->pvp_lpfop == NULL) {
449 		pvp->pvp_lpfop = list_head(&pvp->pvp_pfoplist);
450 	}
451 }
452 
453 static void
454 port_fop_listmove(portfop_vp_t *pvp, list_t *tlist)
455 {
456 	list_move_tail(tlist, &pvp->pvp_pfoplist);
457 	pvp->pvp_lpfop = NULL;
458 	pvp->pvp_cnt = 0;
459 }
460 
461 /*
462  * Remove a portfop_t from the port cache hash table and discard it.
463  * It is called only when pfp is not on the vnode's list. Otherwise,
464  * port_remove_fop() is called.
465  */
466 void
467 port_pcache_remove_fop(portfop_cache_t *pfcp, portfop_t *pfp)
468 {
469 	port_kevent_t	*pkevp;
470 
471 
472 	ASSERT(MUTEX_HELD(&pfcp->pfc_lock));
473 
474 	pkevp = pfp->pfop_pev;
475 	pfp->pfop_pev = NULL;
476 
477 	if (pkevp != NULL) {
478 		(void) port_remove_done_event(pkevp);
479 		port_free_event_local(pkevp, 0);
480 	}
481 
482 	port_pcache_delete(pfcp, pfp);
483 
484 	if (pfp->pfop_cname != NULL)
485 		kmem_free(pfp->pfop_cname, pfp->pfop_clen + 1);
486 	kmem_free(pfp, sizeof (portfop_t));
487 	if (pfcp->pfc_objcount == 0)
488 		cv_signal(&pfcp->pfc_lclosecv);
489 }
490 
491 /*
492  * if we have too many watches on the vnode, attempt to discard an
493  * inactive one.
494  */
495 static void
496 port_fop_trimpfplist(vnode_t *vp)
497 {
498 	portfop_vp_t *pvp;
499 	portfop_t *pfp = NULL;
500 	portfop_cache_t *pfcp;
501 	vnode_t	*tdvp;
502 
503 	/*
504 	 * Due to a reference the vnode cannot disappear, v_fopdata should
505 	 * not change.
506 	 */
507 	if ((pvp = vp->v_fopdata) != NULL &&
508 	    pvp->pvp_cnt > port_fop_maxpfps) {
509 		mutex_enter(&pvp->pvp_mutex);
510 		pfp = pvp->pvp_lpfop;
511 		pfcp = pfp->pfop_pcache;
512 		/*
513 		 * only if we can get the cache lock, we need to
514 		 * do this due to reverse lock order and some thread
515 		 * that may be trying to reactivate this entry.
516 		 */
517 		if (mutex_tryenter(&pfcp->pfc_lock)) {
518 			if (pfp && !(pfp->pfop_flags & PORT_FOP_ACTIVE) &&
519 			    !(pfp->pfop_flags & PORT_FOP_KEV_ONQ)) {
520 				port_fop_listremove(pvp, pfp);
521 				pfp->pfop_flags |= PORT_FOP_REMOVING;
522 			} else {
523 				mutex_exit(&pfcp->pfc_lock);
524 				pfp = NULL;
525 			}
526 		} else {
527 			pfp = NULL;
528 		}
529 		mutex_exit(&pvp->pvp_mutex);
530 
531 		/*
532 		 * discard pfp if any.
533 		 */
534 		if (pfp != NULL) {
535 			tdvp = pfp->pfop_dvp;
536 			port_pcache_remove_fop(pfcp, pfp);
537 			mutex_exit(&pfcp->pfc_lock);
538 			if (tdvp != NULL)
539 				VN_RELE(tdvp);
540 		}
541 	}
542 }
543 
544 /*
545  * This routine returns 1, if the vnode can be rele'ed by the caller.
546  * The caller has to VN_RELE the vnode with out holding any
547  * locks.
548  */
549 int
550 port_fop_femuninstall(vnode_t *vp)
551 {
552 	portfop_vp_t	*pvp;
553 	vfs_t		*vfsp;
554 	portfop_vfs_t *pvfsp;
555 	portfop_vfs_hash_t	*pvfsh;
556 	kmutex_t	*mtx;
557 	int	ret = 0;
558 
559 	/*
560 	 * if list is empty, uninstall fem.
561 	 */
562 	pvp = vp->v_fopdata;
563 	ASSERT(MUTEX_HELD(&pvp->pvp_mutex));
564 
565 	/*
566 	 * make sure the list is empty.
567 	 */
568 	if (!list_head(&pvp->pvp_pfoplist)) {
569 
570 		/*
571 		 * we could possibly uninstall the fem hooks when
572 		 * the vnode becomes inactive and the v_fopdata is
573 		 * free. But the hooks get triggered unnecessarily
574 		 * even though there are no active watches. So, we
575 		 * uninstall it here.
576 		 */
577 		(void) fem_uninstall(vp, (fem_t *)pvp->pvp_femp, vp);
578 		pvp->pvp_femp = NULL;
579 		mutex_exit(&pvp->pvp_mutex);
580 
581 
582 		/*
583 		 * If we successfully uninstalled fem, no process is watching
584 		 * this vnode, Remove it from the vfs's list of watched vnodes.
585 		 */
586 		pvfsp = pvp->pvp_pvfsp;
587 		vfsp = vp->v_vfsp;
588 		pvfsh = PORTFOP_PVFSH(vfsp);
589 		mtx = &pvfsh->pvfshash_mutex;
590 		mutex_enter(mtx);
591 		/*
592 		 * If unmount is in progress, that thread will remove and
593 		 * release the vnode from the vfs's list, just leave.
594 		 */
595 		if (!pvfsp->pvfs_unmount) {
596 			list_remove(&pvfsp->pvfs_pvplist, pvp);
597 			mutex_exit(mtx);
598 			ret = 1;
599 		} else {
600 			mutex_exit(mtx);
601 		}
602 	} else {
603 		mutex_exit(&pvp->pvp_mutex);
604 	}
605 	return (ret);
606 }
607 
608 /*
609  * Remove pfp from the vnode's watch list and the cache and discard it.
610  * If it is the last pfp on the vnode's list, the fem hooks get uninstalled.
611  * Returns 1 if pfp removed successfully.
612  *
613  * The *active is set to indicate if the pfp was still active(no events had
614  * been posted, or the posted event had not been collected yet and it was
615  * able to remove it from the port's queue).
616  *
617  * vpp and dvpp will point to the vnode and directory vnode which the caller
618  * is required to VN_RELE without holding any locks.
619  */
620 int
621 port_remove_fop(portfop_t *pfp, portfop_cache_t *pfcp, int cleanup,
622     int *active, vnode_t **vpp, vnode_t **dvpp)
623 {
624 	vnode_t		*vp;
625 	portfop_vp_t	*pvp;
626 	int	tactive = 0;
627 
628 	ASSERT(MUTEX_HELD(&pfcp->pfc_lock));
629 	vp = pfp->pfop_vp;
630 	pvp = vp->v_fopdata;
631 	mutex_enter(&pvp->pvp_mutex);
632 
633 	/*
634 	 * if not cleanup, remove it only if the pfp is still active and
635 	 * is not being removed by some other thread.
636 	 */
637 	if (!cleanup && (!(pfp->pfop_flags & PORT_FOP_ACTIVE) ||
638 	    pfp->pfop_flags & PORT_FOP_REMOVING)) {
639 		mutex_exit(&pvp->pvp_mutex);
640 		return (0);
641 	}
642 
643 	/*
644 	 * mark it inactive.
645 	 */
646 	if (pfp->pfop_flags & PORT_FOP_ACTIVE) {
647 		pfp->pfop_flags &= ~PORT_FOP_ACTIVE;
648 		tactive = 1;
649 	}
650 
651 	/*
652 	 * Check if the pfp is still on the vnode's list. This can
653 	 * happen if port_fop_excep() is in the process of removing it.
654 	 * In case of cleanup, just mark this pfp as inactive so that no
655 	 * new events (VNEVENT) will be delivered, and remove it from the
656 	 * event queue if it was already queued. Since the cache lock is
657 	 * held, the pfp will not disappear, even though it is being
658 	 * removed.
659 	 */
660 	if (pfp->pfop_flags & PORT_FOP_REMOVING) {
661 		mutex_exit(&pvp->pvp_mutex);
662 		if (!tactive && port_remove_done_event(pfp->pfop_pev)) {
663 			pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ;
664 			tactive = 1;
665 		}
666 		if (active) {
667 			*active = tactive;
668 		}
669 		return (1);
670 	}
671 
672 	/*
673 	 * if we find an event on the queue and removed it, then this
674 	 * association is considered active.
675 	 */
676 	if (!tactive && port_remove_done_event(pfp->pfop_pev)) {
677 		pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ;
678 		tactive = 1;
679 	}
680 
681 	if (active) {
682 		*active = tactive;
683 	}
684 	pvp = (portfop_vp_t *)vp->v_fopdata;
685 
686 	/*
687 	 * remove pfp from the vnode's list
688 	 */
689 	port_fop_listremove(pvp, pfp);
690 
691 	/*
692 	 * If no more associations on the vnode, uninstall fem hooks.
693 	 * The pvp mutex will be released in this routine.
694 	 */
695 	if (port_fop_femuninstall(vp))
696 		*vpp = vp;
697 	*dvpp = pfp->pfop_dvp;
698 	port_pcache_remove_fop(pfcp, pfp);
699 	return (1);
700 }
701 
702 /*
703  * This routine returns a pointer to a cached portfop entry, or NULL if it
704  * does not find it in the hash table. The object pointer is used as index.
705  * The entries are hashed by the object's address. We need to match the pid
706  * as the evet port can be shared between processes. The file events
707  * watches are per process only.
708  */
709 portfop_t *
710 port_cache_lookup_fop(portfop_cache_t *pfcp, pid_t pid, uintptr_t obj)
711 {
712 	portfop_t	*pfp = NULL;
713 	portfop_t	**bucket;
714 
715 	ASSERT(MUTEX_HELD(&pfcp->pfc_lock));
716 	bucket = PORT_FOP_BUCKET(pfcp, obj);
717 	pfp = *bucket;
718 	while (pfp != NULL) {
719 		if (pfp->pfop_object == obj && pfp->pfop_pid == pid)
720 			break;
721 		pfp = pfp->pfop_hashnext;
722 	}
723 	return (pfp);
724 }
725 
726 /*
727  * Given the file name, get the vnode and also the directory vnode
728  * On return, the vnodes are held (VN_HOLD). The caller has to VN_RELE
729  * the vnode(s).
730  */
731 int
732 port_fop_getdvp(void *objptr, vnode_t **vp, vnode_t **dvp,
733 	char **cname, int *len, int follow)
734 {
735 	int error = 0;
736 	struct pathname pn;
737 	char *fname;
738 
739 	if (get_udatamodel() == DATAMODEL_NATIVE) {
740 		fname = ((file_obj_t *)objptr)->fo_name;
741 #ifdef  _SYSCALL32_IMPL
742 	} else {
743 		fname = (caddr_t)(uintptr_t)((file_obj32_t *)objptr)->fo_name;
744 #endif	/* _SYSCALL32_IMPL */
745 	}
746 
747 	/*
748 	 * lookuppn may fail with EINVAL, if dvp is  non-null(like when
749 	 * looking for "."). So call again with dvp = NULL.
750 	 */
751 	if ((error = pn_get(fname, UIO_USERSPACE, &pn)) != 0) {
752 		return (error);
753 	}
754 
755 	error = lookuppn(&pn, NULL, follow, dvp, vp);
756 	if (error == EINVAL) {
757 		pn_free(&pn);
758 		if ((error = pn_get(fname, UIO_USERSPACE, &pn)) != 0) {
759 			return (error);
760 		}
761 		error = lookuppn(&pn, NULL, follow, NULL, vp);
762 		if (dvp != NULL) {
763 			*dvp = NULL;
764 		}
765 	}
766 
767 	if (error == 0 && cname != NULL && len != NULL) {
768 		pn_setlast(&pn);
769 		*len = pn.pn_pathlen;
770 		*cname = kmem_alloc(*len + 1, KM_SLEEP);
771 		(void) strcpy(*cname, pn.pn_path);
772 	} else {
773 		if (cname != NULL && len != NULL) {
774 			*cname = NULL;
775 			*len = 0;
776 		}
777 	}
778 
779 	pn_free(&pn);
780 	return (error);
781 }
782 
783 port_source_t *
784 port_getsrc(port_t *pp, int source)
785 {
786 	port_source_t *pse;
787 	int	lock = 0;
788 	/*
789 	 * get the port source structure.
790 	 */
791 	if (!MUTEX_HELD(&pp->port_queue.portq_source_mutex)) {
792 		mutex_enter(&pp->port_queue.portq_source_mutex);
793 		lock = 1;
794 	}
795 
796 	pse = pp->port_queue.portq_scache[PORT_SHASH(source)];
797 	for (; pse != NULL; pse = pse->portsrc_next) {
798 		if (pse->portsrc_source == source)
799 			break;
800 	}
801 
802 	if (lock) {
803 		mutex_exit(&pp->port_queue.portq_source_mutex);
804 	}
805 	return (pse);
806 }
807 
808 
809 /*
810  * Compare time stamps and generate an event if it has changed.
811  * Note that the port cache pointer will be valid due to a reference
812  * to the port. We need to grab the port cache lock and verify that
813  * the pfp is still the same before proceeding to deliver an event.
814  */
815 static void
816 port_check_timestamp(portfop_cache_t *pfcp, vnode_t *vp, vnode_t *dvp,
817 	portfop_t *pfp, void *objptr, uintptr_t object)
818 {
819 	vattr_t		vatt;
820 	portfop_vp_t	*pvp = vp->v_fopdata;
821 	int		events = 0;
822 	port_kevent_t	*pkevp;
823 	file_obj_t	*fobj;
824 	portfop_t	*tpfp;
825 
826 	/*
827 	 * If time stamps are specified, get attributes and compare.
828 	 */
829 	vatt.va_mask = AT_ATIME|AT_MTIME|AT_CTIME;
830 	if (get_udatamodel() == DATAMODEL_NATIVE) {
831 		fobj = (file_obj_t *)objptr;
832 		if (fobj->fo_atime.tv_sec || fobj->fo_atime.tv_nsec ||
833 		    fobj->fo_mtime.tv_sec || fobj->fo_mtime.tv_nsec ||
834 		    fobj->fo_ctime.tv_sec || fobj->fo_ctime.tv_nsec) {
835 			if (VOP_GETATTR(vp, &vatt, 0, CRED(), NULL)) {
836 				return;
837 			}
838 		} else {
839 			/*
840 			 * timestamp not specified, all 0's,
841 			 */
842 			return;
843 		}
844 #ifdef  _SYSCALL32_IMPL
845 	} else {
846 		file_obj32_t	*fobj32;
847 		fobj32 = (file_obj32_t *)objptr;
848 		if (fobj32->fo_atime.tv_sec || fobj32->fo_atime.tv_nsec ||
849 		    fobj32->fo_mtime.tv_sec || fobj32->fo_mtime.tv_nsec ||
850 		    fobj32->fo_ctime.tv_sec || fobj32->fo_ctime.tv_nsec) {
851 			if (VOP_GETATTR(vp, &vatt, 0, CRED(), NULL)) {
852 				return;
853 			}
854 		} else {
855 			/*
856 			 * timestamp not specified, all 0.
857 			 */
858 			return;
859 		}
860 #endif /* _SYSCALL32_IMPL */
861 	}
862 
863 	/*
864 	 * Now grab the cache lock and verify that we are still
865 	 * dealing with the same pfp and curthread is the one
866 	 * which registered it. We need to do this to avoid
867 	 * delivering redundant events.
868 	 */
869 	mutex_enter(&pfcp->pfc_lock);
870 	tpfp = port_cache_lookup_fop(pfcp, curproc->p_pid, object);
871 
872 	if (tpfp == NULL || tpfp != pfp ||
873 	    pfp->pfop_vp != vp || pfp->pfop_dvp != dvp ||
874 	    pfp->pfop_callrid != curthread ||
875 	    !(pfp->pfop_flags & PORT_FOP_ACTIVE)) {
876 		/*
877 		 * Some other event was delivered, the file
878 		 * watch was removed or reassociated. Just
879 		 * ignore it and leave
880 		 */
881 		mutex_exit(&pfcp->pfc_lock);
882 		return;
883 	}
884 
885 	mutex_enter(&pvp->pvp_mutex);
886 	/*
887 	 * The pfp cannot disappear as the port cache lock is held.
888 	 * While the pvp_mutex is held, no events will get delivered.
889 	 */
890 	if (pfp->pfop_flags & PORT_FOP_ACTIVE &&
891 	    !(pfp->pfop_flags & PORT_FOP_REMOVING)) {
892 		if (get_udatamodel() == DATAMODEL_NATIVE) {
893 			fobj = (file_obj_t *)objptr;
894 			if (pfp->pfop_events & FILE_ACCESS &&
895 			    (fobj->fo_atime.tv_sec || fobj->fo_atime.tv_nsec) &&
896 			    (vatt.va_atime.tv_sec != fobj->fo_atime.tv_sec ||
897 			    vatt.va_atime.tv_nsec != fobj->fo_atime.tv_nsec))
898 				events |= FILE_ACCESS;
899 
900 			if (pfp->pfop_events & FILE_MODIFIED &&
901 			    (fobj->fo_mtime.tv_sec || fobj->fo_mtime.tv_nsec) &&
902 			    (vatt.va_mtime.tv_sec != fobj->fo_mtime.tv_sec ||
903 			    vatt.va_mtime.tv_nsec != fobj->fo_mtime.tv_nsec))
904 				events |= FILE_MODIFIED;
905 
906 			if (pfp->pfop_events & FILE_ATTRIB &&
907 			    (fobj->fo_ctime.tv_sec || fobj->fo_ctime.tv_nsec) &&
908 			    (vatt.va_ctime.tv_sec != fobj->fo_ctime.tv_sec ||
909 			    vatt.va_ctime.tv_nsec != fobj->fo_ctime.tv_nsec))
910 				events |= FILE_ATTRIB;
911 #ifdef  _SYSCALL32_IMPL
912 		} else {
913 			file_obj32_t	*fobj32;
914 			fobj32 = (file_obj32_t *)objptr;
915 			if (pfp->pfop_events & FILE_ACCESS &&
916 			    (fobj32->fo_atime.tv_sec ||
917 			    fobj32->fo_atime.tv_nsec) &&
918 			    (vatt.va_atime.tv_sec != fobj32->fo_atime.tv_sec ||
919 			    vatt.va_atime.tv_nsec != fobj32->fo_atime.tv_nsec))
920 				events |= FILE_ACCESS;
921 
922 			if (pfp->pfop_events & FILE_MODIFIED &&
923 			    (fobj32->fo_mtime.tv_sec ||
924 			    fobj32->fo_mtime.tv_nsec) &&
925 			    (vatt.va_mtime.tv_sec != fobj32->fo_mtime.tv_sec ||
926 			    vatt.va_mtime.tv_nsec != fobj32->fo_mtime.tv_nsec))
927 				events |= FILE_MODIFIED;
928 
929 			if (pfp->pfop_events & FILE_ATTRIB &&
930 			    (fobj32->fo_ctime.tv_sec ||
931 			    fobj32->fo_ctime.tv_nsec) &&
932 			    (vatt.va_ctime.tv_sec != fobj32->fo_ctime.tv_sec ||
933 			    vatt.va_ctime.tv_nsec != fobj32->fo_ctime.tv_nsec))
934 				events |= FILE_ATTRIB;
935 #endif /* _SYSCALL32_IMPL */
936 		}
937 
938 		/*
939 		 * No events to deliver
940 		 */
941 		if (events == 0) {
942 			mutex_exit(&pvp->pvp_mutex);
943 			mutex_exit(&pfcp->pfc_lock);
944 			return;
945 		}
946 
947 		/*
948 		 * Deliver the event now.
949 		 */
950 		pkevp = pfp->pfop_pev;
951 		pfp->pfop_flags &= ~PORT_FOP_ACTIVE;
952 		pkevp->portkev_events |= events;
953 		/*
954 		 * Move it to the tail as active once are in the
955 		 * beginning of the list.
956 		 */
957 		port_fop_listremove(pvp, pfp);
958 		port_fop_listinsert_tail(pvp, pfp);
959 		port_send_event(pkevp);
960 		pfp->pfop_flags |= PORT_FOP_KEV_ONQ;
961 	}
962 	mutex_exit(&pvp->pvp_mutex);
963 	mutex_exit(&pfcp->pfc_lock);
964 }
965 
966 /*
967  * Add the event source to the port and return the port source cache pointer.
968  */
969 int
970 port_fop_associate_source(portfop_cache_t **pfcpp, port_t *pp, int source)
971 {
972 	portfop_cache_t *pfcp;
973 	port_source_t	*pse;
974 	int		error;
975 
976 	/*
977 	 * associate PORT_SOURCE_FILE source with the port, if it is
978 	 * not associated yet. Note the PORT_SOURCE_FILE source is
979 	 * associated once and will not be dissociated.
980 	 */
981 	if ((pse = port_getsrc(pp, PORT_SOURCE_FILE)) == NULL) {
982 		if (error = port_associate_ksource(pp->port_fd, source,
983 		    &pse, port_close_fop, pp, NULL)) {
984 			*pfcpp = NULL;
985 			return (error);
986 		}
987 	}
988 
989 	/*
990 	 * Get the portfop cache pointer.
991 	 */
992 	if ((pfcp = pse->portsrc_data) == NULL) {
993 		/*
994 		 * This is the first time that a file is being associated,
995 		 * create the portfop cache.
996 		 */
997 		pfcp = kmem_zalloc(sizeof (portfop_cache_t), KM_SLEEP);
998 		mutex_enter(&pp->port_queue.portq_source_mutex);
999 		if (pse->portsrc_data == NULL) {
1000 			pse->portsrc_data = pfcp;
1001 			mutex_exit(&pp->port_queue.portq_source_mutex);
1002 		} else {
1003 			/*
1004 			 * someone else created the port cache, free
1005 			 * what we just now allocated.
1006 			 */
1007 			mutex_exit(&pp->port_queue.portq_source_mutex);
1008 			kmem_free(pfcp, sizeof (portfop_cache_t));
1009 			pfcp = pse->portsrc_data;
1010 		}
1011 	}
1012 	*pfcpp = pfcp;
1013 	return (0);
1014 }
1015 
1016 /*
1017  * Add the given pvp on the file system's list of vnodes watched.
1018  */
1019 int
1020 port_fop_pvfsadd(portfop_vp_t *pvp)
1021 {
1022 	int error = 0;
1023 	vnode_t	*vp = pvp->pvp_vp;
1024 	portfop_vfs_hash_t *pvfsh;
1025 	portfop_vfs_t	 *pvfsp;
1026 	fsem_t		*fsemp;
1027 
1028 	pvfsh = PORTFOP_PVFSH(vp->v_vfsp);
1029 	mutex_enter(&pvfsh->pvfshash_mutex);
1030 	for (pvfsp = pvfsh->pvfshash_pvfsp; pvfsp &&
1031 	    pvfsp->pvfs != vp->v_vfsp; pvfsp = pvfsp->pvfs_next)
1032 		;
1033 
1034 	if (!pvfsp) {
1035 		if ((fsemp = port_fop_fsemop()) != NULL) {
1036 			if ((error = fsem_install(vp->v_vfsp, fsemp,
1037 			    vp->v_vfsp, OPUNIQ, NULL, NULL))) {
1038 				mutex_exit(&pvfsh->pvfshash_mutex);
1039 				return (error);
1040 			}
1041 		} else {
1042 			mutex_exit(&pvfsh->pvfshash_mutex);
1043 			return (EINVAL);
1044 		}
1045 		pvfsp = kmem_zalloc(sizeof (portfop_vfs_t), KM_SLEEP);
1046 		pvfsp->pvfs = vp->v_vfsp;
1047 		list_create(&(pvfsp->pvfs_pvplist), sizeof (portfop_vp_t),
1048 		    offsetof(portfop_vp_t, pvp_pvfsnode));
1049 		pvfsp->pvfs_fsemp = fsemp;
1050 		pvfsp->pvfs_next = pvfsh->pvfshash_pvfsp;
1051 		pvfsh->pvfshash_pvfsp = pvfsp;
1052 	}
1053 
1054 	/*
1055 	 * check if an unmount is in progress.
1056 	 */
1057 	if (!pvfsp->pvfs_unmount) {
1058 		/*
1059 		 * insert the pvp on list.
1060 		 */
1061 		pvp->pvp_pvfsp = pvfsp;
1062 		list_insert_head(&pvfsp->pvfs_pvplist, (void *)pvp);
1063 	} else {
1064 		error = EINVAL;
1065 	}
1066 	mutex_exit(&pvfsh->pvfshash_mutex);
1067 	return (error);
1068 }
1069 
1070 /*
1071  * Installs the portfop_vp_t data structure on the
1072  * vnode. The 'pvp_femp == NULL' indicates it is not
1073  * active. The fem hooks have to be installed.
1074  * The portfop_vp_t is only freed when the vnode gets freed.
1075  */
1076 void
1077 port_install_fopdata(vnode_t *vp)
1078 {
1079 	portfop_vp_t *npvp;
1080 
1081 	npvp = kmem_zalloc(sizeof (*npvp), KM_SLEEP);
1082 	mutex_init(&npvp->pvp_mutex, NULL, MUTEX_DEFAULT, NULL);
1083 	list_create(&npvp->pvp_pfoplist, sizeof (portfop_t),
1084 	    offsetof(portfop_t, pfop_node));
1085 	npvp->pvp_vp = vp;
1086 	/*
1087 	 * If v_fopdata is not null, some other thread beat us to it.
1088 	 */
1089 	if (casptr(&vp->v_fopdata, NULL, npvp) != NULL) {
1090 		mutex_destroy(&npvp->pvp_mutex);
1091 		list_destroy(&npvp->pvp_pfoplist);
1092 		kmem_free(npvp, sizeof (*npvp));
1093 	}
1094 }
1095 
1096 
1097 /*
1098  * Allocate and add a portfop_t to the per port cache. Also add the portfop_t
1099  * to the vnode's list. The association is identified by the object pointer
1100  * address and pid.
1101  */
1102 int
1103 port_pfp_setup(portfop_t **pfpp, port_t *pp, vnode_t *vp, portfop_cache_t *pfcp,
1104 	uintptr_t object, int events, void *user, char *cname, int clen,
1105 	vnode_t *dvp)
1106 {
1107 	portfop_t	*pfp = NULL;
1108 	port_kevent_t	*pkevp;
1109 	fem_t		*femp;
1110 	int		error = 0;
1111 	portfop_vp_t	*pvp;
1112 
1113 
1114 	/*
1115 	 * The port cache mutex is held.
1116 	 */
1117 	*pfpp  = NULL;
1118 
1119 
1120 	/*
1121 	 * At this point the fem monitor is installed.
1122 	 * Allocate a port event structure per vnode association.
1123 	 */
1124 	if (pfp == NULL) {
1125 		if (error = port_alloc_event_local(pp, PORT_SOURCE_FILE,
1126 		    PORT_ALLOC_CACHED, &pkevp)) {
1127 			return (error);
1128 		}
1129 		pfp = kmem_zalloc(sizeof (portfop_t), KM_SLEEP);
1130 		pfp->pfop_pev = pkevp;
1131 	}
1132 
1133 	pfp->pfop_vp = vp;
1134 	pfp->pfop_pid = curproc->p_pid;
1135 	pfp->pfop_pcache = pfcp;
1136 	pfp->pfop_pp = pp;
1137 	pfp->pfop_flags |= PORT_FOP_ACTIVE;
1138 	pfp->pfop_cname = cname;
1139 	pfp->pfop_clen = clen;
1140 	pfp->pfop_dvp = dvp;
1141 	pfp->pfop_object = object;
1142 
1143 	pkevp->portkev_callback = port_fop_callback;
1144 	pkevp->portkev_arg = pfp;
1145 	pkevp->portkev_object = object;
1146 	pkevp->portkev_user = user;
1147 	pkevp->portkev_events = 0;
1148 
1149 	port_pcache_insert(pfcp, pfp);
1150 
1151 	/*
1152 	 * Register a new file events monitor for this file(vnode), if not
1153 	 * done already.
1154 	 */
1155 	if ((pvp = vp->v_fopdata) == NULL) {
1156 		port_install_fopdata(vp);
1157 		pvp = vp->v_fopdata;
1158 	}
1159 
1160 	mutex_enter(&pvp->pvp_mutex);
1161 	/*
1162 	 * if the vnode does not have the file events hooks, install it.
1163 	 */
1164 	if (pvp->pvp_femp == NULL) {
1165 		if ((femp = port_fop_femop()) != NULL) {
1166 			if (!(error = fem_install(pfp->pfop_vp, femp,
1167 			    (void *)vp, OPUNIQ, NULL, NULL))) {
1168 				pvp->pvp_femp = femp;
1169 				/*
1170 				 * add fsem_t hooks to the vfsp and add pvp to
1171 				 * the list of vnodes for this vfs.
1172 				 */
1173 				if (!(error = port_fop_pvfsadd(pvp))) {
1174 					/*
1175 					 * Hold a reference to the vnode since
1176 					 * we successfully installed the hooks.
1177 					 */
1178 					VN_HOLD(vp);
1179 				} else {
1180 					(void) fem_uninstall(vp, femp, vp);
1181 					pvp->pvp_femp = NULL;
1182 				}
1183 			}
1184 		} else {
1185 			error = EINVAL;
1186 		}
1187 	}
1188 
1189 	if (error) {
1190 		/*
1191 		 * pkevp will get freed here.
1192 		 */
1193 		pfp->pfop_cname = NULL;
1194 		port_pcache_remove_fop(pfcp, pfp);
1195 		mutex_exit(&pvp->pvp_mutex);
1196 		return (error);
1197 	}
1198 
1199 	/*
1200 	 * insert the pfp on the vnode's list. After this
1201 	 * events can get delivered.
1202 	 */
1203 	pfp->pfop_events = events;
1204 	port_fop_listinsert_head(pvp, pfp);
1205 
1206 	mutex_exit(&pvp->pvp_mutex);
1207 	/*
1208 	 * Hold the directory vnode since we have a reference now.
1209 	 */
1210 	if (dvp != NULL)
1211 		VN_HOLD(dvp);
1212 	*pfpp = pfp;
1213 	return (0);
1214 }
1215 
1216 vnode_t *
1217 port_resolve_vp(vnode_t *vp)
1218 {
1219 	vnode_t *rvp;
1220 	/*
1221 	 * special case /etc/mnttab(mntfs type). The mntfstype != 0
1222 	 * if mntfs got mounted.
1223 	 */
1224 	if (vfs_mntdummyvp && mntfstype != 0 &&
1225 	    vp->v_vfsp->vfs_fstype == mntfstype) {
1226 		VN_RELE(vp);
1227 		vp = vfs_mntdummyvp;
1228 		VN_HOLD(vfs_mntdummyvp);
1229 	}
1230 
1231 	/*
1232 	 * This should take care of lofs mounted fs systems and nfs4
1233 	 * hardlinks.
1234 	 */
1235 	if ((VOP_REALVP(vp, &rvp, NULL) == 0) && vp != rvp) {
1236 		VN_HOLD(rvp);
1237 		VN_RELE(vp);
1238 		vp = rvp;
1239 	}
1240 	return (vp);
1241 }
1242 
1243 /*
1244  * Register a file events watch on the given file associated to the port *pp.
1245  *
1246  * The association is identified by the object pointer and the pid.
1247  * The events argument contains the events to be monitored for.
1248  *
1249  * The vnode will have a VN_HOLD once the fem hooks are installed.
1250  *
1251  * Every reference(pfp) to the directory vnode will have a VN_HOLD to ensure
1252  * that the directory vnode pointer does not change.
1253  */
1254 int
1255 port_associate_fop(port_t *pp, int source, uintptr_t object, int events,
1256     void *user)
1257 {
1258 	portfop_cache_t	*pfcp;
1259 	vnode_t		*vp, *dvp, *oldvp = NULL, *olddvp = NULL;
1260 	portfop_t	*pfp;
1261 	int		error = 0;
1262 	file_obj_t	fobj;
1263 	void		*objptr;
1264 	char		*cname;
1265 	int		clen;
1266 	int		follow;
1267 
1268 	/*
1269 	 * check that events specified are valid.
1270 	 */
1271 	if ((events & ~FILE_EVENTS_MASK) != 0)
1272 		return (EINVAL);
1273 
1274 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1275 		if (copyin((void *)object, &fobj, sizeof (file_obj_t)))
1276 			return (EFAULT);
1277 		objptr = (void *)&fobj;
1278 #ifdef  _SYSCALL32_IMPL
1279 	} else {
1280 		file_obj32_t	fobj32;
1281 		if (copyin((void *)object, &fobj32, sizeof (file_obj32_t)))
1282 			return (EFAULT);
1283 		objptr = (void *)&fobj32;
1284 #endif  /* _SYSCALL32_IMPL */
1285 	}
1286 
1287 	vp = dvp = NULL;
1288 
1289 	/*
1290 	 * find out if we need to follow symbolic links.
1291 	 */
1292 	follow = !(events & FILE_NOFOLLOW);
1293 	events = events & ~FILE_NOFOLLOW;
1294 
1295 	/*
1296 	 * lookup and find the vnode and its directory vnode of the given
1297 	 * file.
1298 	 */
1299 	if ((error = port_fop_getdvp(objptr, &vp, &dvp, &cname, &clen,
1300 	    follow)) != 0) {
1301 		return (error);
1302 	}
1303 
1304 	if (dvp != NULL) {
1305 		dvp = port_resolve_vp(dvp);
1306 	}
1307 
1308 	/*
1309 	 * Not found
1310 	 */
1311 	if (vp == NULL) {
1312 		error = ENOENT;
1313 		goto errout;
1314 	}
1315 
1316 	vp = port_resolve_vp(vp);
1317 
1318 
1319 	if (vp != NULL && vnevent_support(vp, NULL)) {
1320 		error = ENOTSUP;
1321 		goto errout;
1322 	}
1323 
1324 	/*
1325 	 * If dvp belongs to a different filesystem just ignore it.
1326 	 * Hardlinks cannot exist across filesystems.
1327 	 */
1328 	if (dvp != NULL && dvp->v_vfsp != vp->v_vfsp) {
1329 		VN_RELE(dvp);
1330 		dvp = NULL;
1331 	}
1332 
1333 	/*
1334 	 * Associate this source to the port and get the per port
1335 	 * fop cache pointer. If the source is already associated, it
1336 	 * will just return the cache pointer.
1337 	 */
1338 	if (error = port_fop_associate_source(&pfcp, pp, source)) {
1339 		goto errout;
1340 	}
1341 
1342 	/*
1343 	 * Check if there is an existing association of this file.
1344 	 */
1345 	mutex_enter(&pfcp->pfc_lock);
1346 	pfp = port_cache_lookup_fop(pfcp, curproc->p_pid, object);
1347 
1348 	/*
1349 	 * If it is not the same vnode, just discard it. VN_RELE needs to be
1350 	 * called with no locks held, therefore save vnode pointers and
1351 	 * vn_rele them later.
1352 	 */
1353 	if (pfp != NULL && (pfp->pfop_vp != vp || pfp->pfop_dvp != dvp)) {
1354 		(void) port_remove_fop(pfp, pfcp, 1, NULL, &oldvp, &olddvp);
1355 		pfp = NULL;
1356 	}
1357 
1358 	if (pfp == NULL) {
1359 		vnode_t *tvp, *tdvp;
1360 		portfop_t	*tpfp;
1361 		int error;
1362 
1363 		/*
1364 		 * Add a new association, save the file name and the
1365 		 * directory vnode pointer.
1366 		 */
1367 		if (error = port_pfp_setup(&pfp, pp, vp, pfcp, object,
1368 		    events, user, cname, clen, dvp)) {
1369 			mutex_exit(&pfcp->pfc_lock);
1370 			goto errout;
1371 		}
1372 
1373 		pfp->pfop_callrid = curthread;
1374 		/*
1375 		 * File name used, so make sure we don't free it.
1376 		 */
1377 		cname = NULL;
1378 
1379 		/*
1380 		 * We need to check if the file was removed after the
1381 		 * the lookup and before the fem hooks where added. If
1382 		 * so, return error. The vnode will still exist as we have
1383 		 * a hold on it.
1384 		 *
1385 		 * Drop the cache lock before calling port_fop_getdvp().
1386 		 * port_fop_getdvp() may block either in the vfs layer
1387 		 * or some filesystem.  Therefore there is potential
1388 		 * for deadlock if cache lock is held and if some other
1389 		 * thread is attempting to deliver file events which would
1390 		 * require getting the cache lock, while it may be holding
1391 		 * the filesystem or vfs layer locks.
1392 		 */
1393 		mutex_exit(&pfcp->pfc_lock);
1394 		tvp = NULL;
1395 		if ((error = port_fop_getdvp(objptr, &tvp, NULL,
1396 		    NULL, NULL, follow)) == 0) {
1397 			if (tvp != NULL) {
1398 				tvp = port_resolve_vp(tvp);
1399 				/*
1400 				 * This vnode pointer is just used
1401 				 * for comparison, so rele it
1402 				 */
1403 				VN_RELE(tvp);
1404 			}
1405 		}
1406 
1407 		if (error || tvp == NULL || tvp != vp) {
1408 			/*
1409 			 * Since we dropped the cache lock, make sure
1410 			 * we are still dealing with the same pfp and this
1411 			 * is the thread which registered it.
1412 			 */
1413 			mutex_enter(&pfcp->pfc_lock);
1414 			tpfp = port_cache_lookup_fop(pfcp,
1415 			    curproc->p_pid, object);
1416 
1417 			error = 0;
1418 			if (tpfp == NULL || tpfp != pfp ||
1419 			    pfp->pfop_vp != vp ||
1420 			    pfp->pfop_dvp != dvp ||
1421 			    pfp->pfop_callrid != curthread) {
1422 				/*
1423 				 * Some other event was delivered, the file
1424 				 * watch was removed or reassociated, just
1425 				 * ignore it and leave
1426 				 */
1427 				mutex_exit(&pfcp->pfc_lock);
1428 				goto errout;
1429 			}
1430 
1431 			/*
1432 			 * remove the pfp and fem hooks, if pfp still
1433 			 * active and it is not being removed from
1434 			 * the vnode list. This is checked in
1435 			 * port_remove_fop with the vnode lock held.
1436 			 * The vnode returned is VN_RELE'ed after dropping
1437 			 * the locks.
1438 			 */
1439 			tdvp = tvp = NULL;
1440 			if (port_remove_fop(pfp, pfcp, 0, NULL, &tvp, &tdvp)) {
1441 				/*
1442 				 * The pfp was removed, means no
1443 				 * events where queued. Report the
1444 				 * error now.
1445 				 */
1446 				error = EINVAL;
1447 			}
1448 			mutex_exit(&pfcp->pfc_lock);
1449 			if (tvp != NULL)
1450 				VN_RELE(tvp);
1451 			if (tdvp != NULL)
1452 				VN_RELE(tdvp);
1453 			goto errout;
1454 		}
1455 	} else {
1456 		portfop_vp_t	*pvp = vp->v_fopdata;
1457 
1458 		/*
1459 		 * Re-association of the object.
1460 		 */
1461 		mutex_enter(&pvp->pvp_mutex);
1462 
1463 		/*
1464 		 * remove any queued up event.
1465 		 */
1466 		if (port_remove_done_event(pfp->pfop_pev)) {
1467 			pfp->pfop_flags &= ~PORT_FOP_KEV_ONQ;
1468 		}
1469 
1470 		/*
1471 		 * set new events to watch.
1472 		 */
1473 		pfp->pfop_events = events;
1474 
1475 		/*
1476 		 * If not active, mark it active even if it is being
1477 		 * removed. Then it can send an exception event.
1478 		 *
1479 		 * Move it to the head, as the active ones are only
1480 		 * in the beginning. If removing, the pfp will be on
1481 		 * a temporary list, no need to move it to the front
1482 		 * all the entries will be processed. Some exception
1483 		 * events will be delivered in port_fop_excep();
1484 		 */
1485 		if (!(pfp->pfop_flags & PORT_FOP_ACTIVE)) {
1486 			pfp->pfop_flags |= PORT_FOP_ACTIVE;
1487 			if (!(pfp->pfop_flags & PORT_FOP_REMOVING)) {
1488 				pvp = (portfop_vp_t *)vp->v_fopdata;
1489 				port_fop_listremove(pvp, pfp);
1490 				port_fop_listinsert_head(pvp, pfp);
1491 			}
1492 		}
1493 		pfp->pfop_callrid = curthread;
1494 		mutex_exit(&pvp->pvp_mutex);
1495 		mutex_exit(&pfcp->pfc_lock);
1496 	}
1497 
1498 	/*
1499 	 * Compare time stamps and deliver events.
1500 	 */
1501 	if (vp->v_type != VFIFO) {
1502 		port_check_timestamp(pfcp, vp, dvp, pfp, objptr, object);
1503 	}
1504 
1505 	error = 0;
1506 
1507 	/*
1508 	 *  If we have too many watches on the vnode, discard an
1509 	 *  inactive watch.
1510 	 */
1511 	port_fop_trimpfplist(vp);
1512 
1513 errout:
1514 	/*
1515 	 * Release the hold acquired due to the lookup operation.
1516 	 */
1517 	if (vp != NULL)
1518 		VN_RELE(vp);
1519 	if (dvp != NULL)
1520 		VN_RELE(dvp);
1521 
1522 	if (oldvp != NULL)
1523 		VN_RELE(oldvp);
1524 	if (olddvp != NULL)
1525 		VN_RELE(olddvp);
1526 
1527 	/*
1528 	 * copied file name not used, free it.
1529 	 */
1530 	if (cname != NULL) {
1531 		kmem_free(cname, clen + 1);
1532 	}
1533 	return (error);
1534 }
1535 
1536 
1537 /*
1538  * The port_dissociate_fop() function dissociates the file object
1539  * from the event port and removes any events that are already on the queue.
1540  * Only the owner of the association is allowed to dissociate the file from
1541  * the port. Returns  success (0) if it was found and removed. Otherwise
1542  * ENOENT.
1543  */
1544 int
1545 port_dissociate_fop(port_t *pp, uintptr_t object)
1546 {
1547 	portfop_cache_t	*pfcp;
1548 	portfop_t	*pfp;
1549 	port_source_t	*pse;
1550 	int		active = 0;
1551 	vnode_t		*tvp = NULL, *tdvp = NULL;
1552 
1553 	pse = port_getsrc(pp, PORT_SOURCE_FILE);
1554 
1555 	/*
1556 	 * if this source is not associated or if there is no
1557 	 * cache, nothing to do just return.
1558 	 */
1559 	if (pse == NULL ||
1560 	    (pfcp = (portfop_cache_t *)pse->portsrc_data) == NULL)
1561 		return (EINVAL);
1562 
1563 	/*
1564 	 * Check if this object is on the cache. Only the owner pid
1565 	 * is allowed to dissociate.
1566 	 */
1567 	mutex_enter(&pfcp->pfc_lock);
1568 	pfp = port_cache_lookup_fop(pfcp, curproc->p_pid, object);
1569 	if (pfp == NULL) {
1570 		mutex_exit(&pfcp->pfc_lock);
1571 		return (ENOENT);
1572 	}
1573 
1574 	/*
1575 	 * If this was the last association, it will release
1576 	 * the hold on the vnode. There is a race condition where
1577 	 * the the pfp is being removed due to an exception event
1578 	 * in port_fop_sendevent()->port_fop_excep() and port_remove_fop().
1579 	 * Since port source cache lock is held, port_fop_excep() cannot
1580 	 * complete. The vnode itself will not disappear as long its pfps
1581 	 * have a reference.
1582 	 */
1583 	(void) port_remove_fop(pfp, pfcp, 1, &active, &tvp, &tdvp);
1584 	mutex_exit(&pfcp->pfc_lock);
1585 	if (tvp != NULL)
1586 		VN_RELE(tvp);
1587 	if (tdvp != NULL)
1588 		VN_RELE(tdvp);
1589 	return (active ? 0 : ENOENT);
1590 }
1591 
1592 
1593 /*
1594  * port_close() calls this function to request the PORT_SOURCE_FILE source
1595  * to remove/free all resources allocated and associated with the port.
1596  */
1597 
1598 /* ARGSUSED */
1599 static void
1600 port_close_fop(void *arg, int port, pid_t pid, int lastclose)
1601 {
1602 	port_t		*pp = arg;
1603 	portfop_cache_t	*pfcp;
1604 	portfop_t	**hashtbl;
1605 	portfop_t	*pfp;
1606 	portfop_t	*pfpnext;
1607 	int		index, i;
1608 	port_source_t	*pse;
1609 	vnode_t 	*tdvp = NULL;
1610 	vnode_t		*vpl[PORTFOP_NVP];
1611 
1612 	pse = port_getsrc(pp, PORT_SOURCE_FILE);
1613 
1614 	/*
1615 	 * No source or no cache, nothing to do.
1616 	 */
1617 	if (pse == NULL ||
1618 	    (pfcp = (portfop_cache_t *)pse->portsrc_data) == NULL)
1619 		return;
1620 	/*
1621 	 * Scan the cache and free all allocated portfop_t and port_kevent_t
1622 	 * structures of this pid. Note, no new association for this pid will
1623 	 * be possible as the port is being closed.
1624 	 *
1625 	 * The common case is that the port is not shared and all the entries
1626 	 * are of this pid and have to be freed. Since VN_RELE has to be
1627 	 * called outside the lock, we do it in batches.
1628 	 */
1629 	hashtbl = (portfop_t **)pfcp->pfc_hash;
1630 	index = i = 0;
1631 	bzero(vpl, sizeof (vpl));
1632 	mutex_enter(&pfcp->pfc_lock);
1633 	while (index < PORTFOP_HASHSIZE) {
1634 		pfp = hashtbl[index];
1635 		while (pfp != NULL && i < (PORTFOP_NVP - 1)) {
1636 			pfpnext = pfp->pfop_hashnext;
1637 			if (pid == pfp->pfop_pid) {
1638 				(void) port_remove_fop(pfp, pfcp, 1, NULL,
1639 				    &vpl[i], &tdvp);
1640 				if (vpl[i] != NULL) {
1641 					i++;
1642 				}
1643 				if (tdvp != NULL) {
1644 					vpl[i++] = tdvp;
1645 					tdvp = NULL;
1646 				}
1647 			}
1648 			pfp = pfpnext;
1649 		}
1650 		if (pfp == NULL)
1651 			index++;
1652 		/*
1653 		 * Now call VN_RELE if we have collected enough vnodes or
1654 		 * we have reached the end of the hash table.
1655 		 */
1656 		if (i >= (PORTFOP_NVP - 1) ||
1657 		    (i > 0 && index == PORTFOP_HASHSIZE)) {
1658 			mutex_exit(&pfcp->pfc_lock);
1659 			while (i > 0) {
1660 				VN_RELE(vpl[--i]);
1661 				vpl[i] = NULL;
1662 			}
1663 			mutex_enter(&pfcp->pfc_lock);
1664 		}
1665 	}
1666 
1667 	/*
1668 	 * Due to a race between port_close_fop() and port_fop()
1669 	 * trying to remove the pfp's from the port's cache, it is
1670 	 * possible that some pfp's are still in the process of being
1671 	 * freed so we wait.
1672 	 */
1673 	while (lastclose && pfcp->pfc_objcount) {
1674 		(void) cv_wait_sig(&pfcp->pfc_lclosecv, &pfcp->pfc_lock);
1675 	}
1676 	mutex_exit(&pfcp->pfc_lock);
1677 	/*
1678 	 * last close, free the cache.
1679 	 */
1680 	if (lastclose) {
1681 		ASSERT(pfcp->pfc_objcount == 0);
1682 		pse->portsrc_data = NULL;
1683 		kmem_free(pfcp, sizeof (portfop_cache_t));
1684 	}
1685 }
1686 
1687 /*
1688  * Given the list of associations(watches), it will send exception events,
1689  * if still active, and discard them. The exception events are handled
1690  * separately because, the pfp needs to be removed from the port cache and
1691  * freed as the vnode's identity is changing or being removed. To remove
1692  * the pfp from the port's cache, we need to hold the cache lock (pfc_lock).
1693  * The lock order is pfc_lock -> pvp_mutex(vnode's) mutex and that is why
1694  * the cache's lock cannot be acquired in port_fop_sendevent().
1695  */
1696 static void
1697 port_fop_excep(list_t *tlist, int op)
1698 {
1699 	portfop_t	*pfp;
1700 	portfop_cache_t *pfcp;
1701 	port_t	*pp;
1702 	port_kevent_t	*pkevp;
1703 	vnode_t		*tdvp;
1704 	int		error = 0;
1705 
1706 	while (pfp = (portfop_t *)list_head(tlist)) {
1707 		int removed = 0;
1708 		/*
1709 		 * remove from the temp list. Since PORT_FOP_REMOVING is
1710 		 * set, no other thread should attempt to perform a
1711 		 * list_remove on this pfp.
1712 		 */
1713 		list_remove(tlist, pfp);
1714 
1715 		pfcp = pfp->pfop_pcache;
1716 		mutex_enter(&pfcp->pfc_lock);
1717 
1718 		/*
1719 		 * Remove the event from the port queue if it was queued up.
1720 		 * No need to clear the PORT_FOP_KEV_ONQ flag as this pfp is
1721 		 * no longer on the vnode's list.
1722 		 */
1723 		if ((pfp->pfop_flags & PORT_FOP_KEV_ONQ)) {
1724 			removed = port_remove_done_event(pfp->pfop_pev);
1725 		}
1726 
1727 		/*
1728 		 * If still active or the event was queued up and
1729 		 * had not been collected yet, send an EXCEPTION event.
1730 		 */
1731 		if (pfp->pfop_flags & (PORT_FOP_ACTIVE) || removed) {
1732 			pp = pfp->pfop_pp;
1733 			/*
1734 			 * Allocate a port_kevent_t non cached to send this
1735 			 * event since we will be de-registering.
1736 			 * The port_kevent_t cannot be pointing back to the
1737 			 * pfp anymore.
1738 			 */
1739 			pfp->pfop_flags &= ~PORT_FOP_ACTIVE;
1740 			error = port_alloc_event_local(pp, PORT_SOURCE_FILE,
1741 			    PORT_ALLOC_DEFAULT, &pkevp);
1742 			if (!error) {
1743 
1744 				pkevp->portkev_callback = port_fop_callback;
1745 				pkevp->portkev_arg = NULL;
1746 				pkevp->portkev_object =
1747 				    pfp->pfop_pev->portkev_object;
1748 				pkevp->portkev_user =
1749 				    pfp->pfop_pev->portkev_user;
1750 				/*
1751 				 * Copy the pid of the watching process.
1752 				 */
1753 				pkevp->portkev_pid =
1754 				    pfp->pfop_pev->portkev_pid;
1755 				pkevp->portkev_events = op;
1756 				port_send_event(pkevp);
1757 			}
1758 		}
1759 		/*
1760 		 * At this point the pfp has been removed from the vnode's
1761 		 * list its cached port_kevent_t is not on the done queue.
1762 		 * Remove the pfp and free it from the cache.
1763 		 */
1764 		tdvp = pfp->pfop_dvp;
1765 		port_pcache_remove_fop(pfcp, pfp);
1766 		mutex_exit(&pfcp->pfc_lock);
1767 		if (tdvp != NULL)
1768 			VN_RELE(tdvp);
1769 	}
1770 }
1771 
1772 /*
1773  * Send the file events to all of the processes watching this
1774  * vnode. In case of hard links, the directory vnode pointer and
1775  * the file name are compared. If the names match, then the specified
1776  * event is sent or else, the FILE_ATTRIB event is sent, This is the
1777  * documented behavior.
1778  */
1779 void
1780 port_fop_sendevent(vnode_t *vp, int events, vnode_t *dvp, char *cname)
1781 {
1782 	port_kevent_t	*pkevp;
1783 	portfop_t	*pfp, *npfp;
1784 	portfop_vp_t	*pvp;
1785 	list_t		tmplist;
1786 	int		removeall = 0;
1787 
1788 	pvp = (portfop_vp_t *)vp->v_fopdata;
1789 	mutex_enter(&pvp->pvp_mutex);
1790 
1791 	/*
1792 	 * Check if the list is empty.
1793 	 *
1794 	 * All entries have been removed by some other thread.
1795 	 * The vnode may be still active and we got called,
1796 	 * but some other thread is in the process of removing the hooks.
1797 	 */
1798 	if (!list_head(&pvp->pvp_pfoplist)) {
1799 		mutex_exit(&pvp->pvp_mutex);
1800 		return;
1801 	}
1802 
1803 	if ((events & (FILE_EXCEPTION))) {
1804 		/*
1805 		 * If it is an event for which we are going to remove
1806 		 * the watches so just move it a temporary list and
1807 		 * release this vnode.
1808 		 */
1809 		list_create(&tmplist, sizeof (portfop_t),
1810 		    offsetof(portfop_t, pfop_node));
1811 
1812 		/*
1813 		 * If it is an UNMOUNT, MOUNTEDOVER or no file name has been
1814 		 * passed for an exception event, all associations need to be
1815 		 * removed.
1816 		 */
1817 		if (dvp == NULL || cname == NULL) {
1818 			removeall = 1;
1819 		}
1820 	}
1821 
1822 	if (!removeall) {
1823 		/*
1824 		 * All the active ones are in the beginning of the list.
1825 		 */
1826 		for (pfp = (portfop_t *)list_head(&pvp->pvp_pfoplist);
1827 		    pfp && pfp->pfop_flags & PORT_FOP_ACTIVE; pfp = npfp) {
1828 			int levents = events;
1829 
1830 			npfp = list_next(&pvp->pvp_pfoplist, pfp);
1831 			/*
1832 			 * Hard links case - If the file is being
1833 			 * removed/renamed, and the name matches
1834 			 * the watched file, then it is an EXCEPTION
1835 			 * event or else it will be just a FILE_ATTRIB.
1836 			 */
1837 			if ((events & (FILE_EXCEPTION))) {
1838 				ASSERT(dvp != NULL && cname != NULL);
1839 				if (pfp->pfop_dvp == NULL ||
1840 				    (pfp->pfop_dvp == dvp &&
1841 				    (strcmp(cname, pfp->pfop_cname) == 0))) {
1842 					/*
1843 					 * It is an exception event, move it
1844 					 * to temp list and process it later.
1845 					 * Note we don't set the pfp->pfop_vp
1846 					 * to NULL even thought it has been
1847 					 * removed from the vnode's list. This
1848 					 * pointer is referenced in
1849 					 * port_remove_fop(). The vnode it
1850 					 * self cannot disappear until this
1851 					 * pfp gets removed and freed.
1852 					 */
1853 					port_fop_listremove(pvp, pfp);
1854 					list_insert_tail(&tmplist, (void *)pfp);
1855 					pfp->pfop_flags  |= PORT_FOP_REMOVING;
1856 					continue;
1857 				} else {
1858 					levents = FILE_ATTRIB;
1859 				}
1860 
1861 			}
1862 
1863 			if (pfp->pfop_events & levents) {
1864 				/*
1865 				 * deactivate and move it to the tail.
1866 				 * If the pfp was active, it cannot be
1867 				 * on the port's done queue.
1868 				 */
1869 				pfp->pfop_flags &= ~PORT_FOP_ACTIVE;
1870 				port_fop_listremove(pvp, pfp);
1871 				port_fop_listinsert_tail(pvp, pfp);
1872 
1873 				pkevp = pfp->pfop_pev;
1874 				pkevp->portkev_events |=
1875 				    (levents & pfp->pfop_events);
1876 				port_send_event(pkevp);
1877 				pfp->pfop_flags |= PORT_FOP_KEV_ONQ;
1878 			}
1879 		}
1880 	}
1881 
1882 
1883 	if ((events & (FILE_EXCEPTION))) {
1884 		if (!removeall) {
1885 			/*
1886 			 * Check the inactive associations and remove them if
1887 			 * the file name matches.
1888 			 */
1889 			for (; pfp; pfp = npfp) {
1890 				npfp = list_next(&pvp->pvp_pfoplist, pfp);
1891 				if (dvp == NULL || cname == NULL ||
1892 				    pfp->pfop_dvp == NULL ||
1893 				    (pfp->pfop_dvp == dvp &&
1894 				    (strcmp(cname, pfp->pfop_cname) == 0))) {
1895 					port_fop_listremove(pvp, pfp);
1896 					list_insert_tail(&tmplist, (void *)pfp);
1897 					pfp->pfop_flags  |= PORT_FOP_REMOVING;
1898 				}
1899 			}
1900 		} else {
1901 			/*
1902 			 * Can be optimized to avoid two pass over this list
1903 			 * by having a flag in the vnode's portfop_vp_t
1904 			 * structure to indicate that it is going away,
1905 			 * Or keep the list short by reusing inactive watches.
1906 			 */
1907 			port_fop_listmove(pvp, &tmplist);
1908 			for (pfp = (portfop_t *)list_head(&tmplist);
1909 			    pfp; pfp = list_next(&tmplist, pfp)) {
1910 				pfp->pfop_flags |= PORT_FOP_REMOVING;
1911 			}
1912 		}
1913 
1914 		/*
1915 		 * Uninstall the fem hooks if there are no more associations.
1916 		 * This will release the pvp mutex.
1917 		 *
1918 		 * Even thought all entries may have been removed,
1919 		 * the vnode itself cannot disappear as there will be a
1920 		 * hold on it due to this call to port_fop_sendevent. This is
1921 		 * important to syncronize with a port_dissociate_fop() call
1922 		 * that may be attempting to remove an object from the vnode's.
1923 		 */
1924 		if (port_fop_femuninstall(vp))
1925 			VN_RELE(vp);
1926 
1927 		/*
1928 		 * Send exception events and discard the watch entries.
1929 		 */
1930 		port_fop_excep(&tmplist, events);
1931 		list_destroy(&tmplist);
1932 
1933 	} else {
1934 		mutex_exit(&pvp->pvp_mutex);
1935 
1936 		/*
1937 		 * trim the list.
1938 		 */
1939 		port_fop_trimpfplist(vp);
1940 	}
1941 }
1942 
1943 /*
1944  * Given the file operation, map it to the event types and send.
1945  */
1946 void
1947 port_fop(vnode_t *vp, int op, int retval)
1948 {
1949 	int event = 0;
1950 	/*
1951 	 * deliver events only if the operation was successful.
1952 	 */
1953 	if (retval)
1954 		return;
1955 
1956 	/*
1957 	 * These events occurring on the watched file.
1958 	 */
1959 	if (op & FOP_MODIFIED_MASK) {
1960 		event  = FILE_MODIFIED;
1961 	}
1962 	if (op & FOP_ACCESS_MASK) {
1963 		event  |= FILE_ACCESS;
1964 	}
1965 	if (op & FOP_ATTRIB_MASK) {
1966 		event  |= FILE_ATTRIB;
1967 	}
1968 
1969 	if (event) {
1970 		port_fop_sendevent(vp, 	event, NULL, NULL);
1971 	}
1972 }
1973 
1974 static int port_forceunmount(vfs_t *vfsp)
1975 {
1976 	char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1977 
1978 	if (fsname == NULL) {
1979 		return (0);
1980 	}
1981 
1982 	if (strcmp(fsname, MNTTYPE_NFS) == 0) {
1983 		return (1);
1984 	}
1985 
1986 	if (strcmp(fsname, MNTTYPE_NFS3) == 0) {
1987 		return (1);
1988 	}
1989 
1990 	if (strcmp(fsname, MNTTYPE_NFS4) == 0) {
1991 		return (1);
1992 	}
1993 	return (0);
1994 }
1995 /*
1996  * ----- the unmount filesystem op(fsem) hook.
1997  */
1998 int
1999 port_fop_unmount(fsemarg_t *vf, int flag, cred_t *cr)
2000 {
2001 	vfs_t	*vfsp = (vfs_t *)vf->fa_fnode->fn_available;
2002 	kmutex_t	*mtx;
2003 	portfop_vfs_t	*pvfsp, **ppvfsp;
2004 	portfop_vp_t	*pvp;
2005 	int error;
2006 	int fmfs;
2007 
2008 	fmfs = port_forceunmount(vfsp);
2009 
2010 	mtx = &(portvfs_hash[PORTFOP_PVFSHASH(vfsp)].pvfshash_mutex);
2011 	ppvfsp = &(portvfs_hash[PORTFOP_PVFSHASH(vfsp)].pvfshash_pvfsp);
2012 	pvfsp = NULL;
2013 	mutex_enter(mtx);
2014 	/*
2015 	 * since this fsem hook is triggered, the vfsp has to be on
2016 	 * the hash list.
2017 	 */
2018 	for (pvfsp = *ppvfsp; pvfsp->pvfs != vfsp; pvfsp = pvfsp->pvfs_next)
2019 	;
2020 
2021 	/*
2022 	 * For some of the filesystems, allow unmounts to proceed only if
2023 	 * there are no files being watched or it is a forced unmount.
2024 	 */
2025 	if (fmfs && !(flag & MS_FORCE) &&
2026 	    !list_is_empty(&pvfsp->pvfs_pvplist)) {
2027 		mutex_exit(mtx);
2028 		return (EBUSY);
2029 	}
2030 
2031 	/*
2032 	 * Indicate that the unmount is in process. Don't remove it yet.
2033 	 * The underlying filesystem unmount routine sets the VFS_UNMOUNTED
2034 	 * flag on the vfs_t structure. But we call the filesystem unmount
2035 	 * routine after removing all the file watches for this filesystem,
2036 	 * otherwise the unmount will fail due to active vnodes.
2037 	 * Meanwhile setting pvfsp->unmount = 1 will prevent any thread
2038 	 * attempting to add a file watch.
2039 	 */
2040 	pvfsp->pvfs_unmount = 1;
2041 	mutex_exit(mtx);
2042 
2043 	/*
2044 	 * uninstall the fsem hooks.
2045 	 */
2046 	(void) fsem_uninstall(vfsp, (fsem_t *)pvfsp->pvfs_fsemp, vfsp);
2047 
2048 	while (pvp = list_head(&pvfsp->pvfs_pvplist)) {
2049 		list_remove(&pvfsp->pvfs_pvplist, pvp);
2050 		/*
2051 		 * This should send an UNMOUNTED event to all the
2052 		 * watched vnode of this filesystem and uninstall
2053 		 * the fem hooks. We release the hold on the vnode here
2054 		 * because port_fop_femuninstall() will not do it if
2055 		 * unmount is in process.
2056 		 */
2057 		port_fop_sendevent(pvp->pvp_vp, UNMOUNTED, NULL, NULL);
2058 		VN_RELE(pvp->pvp_vp);
2059 	}
2060 
2061 	error = vfsnext_unmount(vf, flag, cr);
2062 
2063 	/*
2064 	 * we free the pvfsp after the unmount has been completed.
2065 	 */
2066 	mutex_enter(mtx);
2067 	for (; *ppvfsp && (*ppvfsp)->pvfs != vfsp;
2068 	    ppvfsp = &(*ppvfsp)->pvfs_next)
2069 	;
2070 
2071 	/*
2072 	 * remove and free it.
2073 	 */
2074 	ASSERT(list_head(&pvfsp->pvfs_pvplist) == NULL);
2075 	if (*ppvfsp) {
2076 		pvfsp = *ppvfsp;
2077 		*ppvfsp = pvfsp->pvfs_next;
2078 	}
2079 	mutex_exit(mtx);
2080 	kmem_free(pvfsp, sizeof (portfop_vfs_t));
2081 	return (error);
2082 }
2083 
2084 /*
2085  * ------------------------------file op hooks--------------------------
2086  * The O_TRUNC operation is caught with the VOP_SETATTR(AT_SIZE) call.
2087  */
2088 static int
2089 port_fop_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
2090 {
2091 	int		retval;
2092 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2093 
2094 	retval = vnext_open(vf, mode, cr, ct);
2095 	port_fop(vp, FOP_FILE_OPEN, retval);
2096 	return (retval);
2097 }
2098 
2099 static int
2100 port_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
2101     caller_context_t *ct)
2102 {
2103 	int		retval;
2104 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2105 
2106 	retval =  vnext_write(vf, uiop, ioflag, cr, ct);
2107 	port_fop(vp, FOP_FILE_WRITE, retval);
2108 	return (retval);
2109 }
2110 
2111 static int
2112 port_fop_map(femarg_t *vf, offset_t off, struct as *as, caddr_t *addrp,
2113     size_t len, uchar_t prot, uchar_t maxport, uint_t flags, cred_t *cr,
2114     caller_context_t *ct)
2115 {
2116 	int		retval;
2117 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2118 
2119 	retval =  vnext_map(vf, off, as, addrp, len, prot, maxport,
2120 	    flags, cr, ct);
2121 	port_fop(vp, FOP_FILE_MAP, retval);
2122 	return (retval);
2123 }
2124 
2125 static int
2126 port_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
2127     caller_context_t *ct)
2128 {
2129 	int		retval;
2130 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2131 
2132 	retval =  vnext_read(vf, uiop, ioflag, cr, ct);
2133 	port_fop(vp, FOP_FILE_READ, retval);
2134 	return (retval);
2135 }
2136 
2137 
2138 /*
2139  * AT_SIZE - is for the open(O_TRUNC) case.
2140  */
2141 int
2142 port_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
2143     caller_context_t *ct)
2144 {
2145 	int		retval;
2146 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2147 	int		events = 0;
2148 
2149 	retval = vnext_setattr(vf, vap, flags, cr, ct);
2150 	if (vap->va_mask & (AT_SIZE|AT_MTIME)) {
2151 		events |= FOP_FILE_SETATTR_MTIME;
2152 	}
2153 	if (vap->va_mask & AT_ATIME) {
2154 		events |= FOP_FILE_SETATTR_ATIME;
2155 	}
2156 	events |= FOP_FILE_SETATTR_CTIME;
2157 
2158 	port_fop(vp, events, retval);
2159 	return (retval);
2160 }
2161 
2162 int
2163 port_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl,
2164     int mode, vnode_t **vpp, cred_t *cr, int flag,
2165     caller_context_t *ct, vsecattr_t *vsecp)
2166 {
2167 	int		retval, got = 1;
2168 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2169 	vattr_t		vatt, vatt1;
2170 
2171 	/*
2172 	 * If the file already exists, then there will be no change
2173 	 * to the directory. Therefore, we need to compare the
2174 	 * modification time of the directory to determine if the
2175 	 * file was actually created.
2176 	 */
2177 	vatt.va_mask = AT_ATIME|AT_MTIME|AT_CTIME;
2178 	if (VOP_GETATTR(vp, &vatt, 0, CRED(), ct)) {
2179 		got = 0;
2180 	}
2181 	retval = vnext_create(vf, name, vap, excl, mode, vpp, cr,
2182 	    flag, ct, vsecp);
2183 
2184 	vatt1.va_mask = AT_ATIME|AT_MTIME|AT_CTIME;
2185 	if (got && !VOP_GETATTR(vp, &vatt1, 0, CRED(), ct)) {
2186 		if ((vatt1.va_mtime.tv_sec > vatt.va_mtime.tv_sec ||
2187 		    (vatt1.va_mtime.tv_sec = vatt.va_mtime.tv_sec &&
2188 		    vatt1.va_mtime.tv_nsec > vatt.va_mtime.tv_nsec))) {
2189 			/*
2190 			 * File was created.
2191 			 */
2192 			port_fop(vp, FOP_FILE_CREATE, retval);
2193 		}
2194 	}
2195 	return (retval);
2196 }
2197 
2198 int
2199 port_fop_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct,
2200     int flags)
2201 {
2202 	int		retval;
2203 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2204 
2205 	retval = vnext_remove(vf, nm, cr, ct, flags);
2206 	port_fop(vp, FOP_FILE_REMOVE, retval);
2207 	return (retval);
2208 }
2209 
2210 int
2211 port_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
2212     caller_context_t *ct, int flags)
2213 {
2214 	int		retval;
2215 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2216 
2217 	retval = vnext_link(vf, svp, tnm, cr, ct, flags);
2218 	port_fop(vp, FOP_FILE_LINK, retval);
2219 	return (retval);
2220 }
2221 
2222 /*
2223  * Rename operation is allowed only when from and to directories are
2224  * on the same filesystem. This is checked in vn_rename().
2225  * The target directory is notified thru a VNEVENT by the filesystem
2226  * if the source dir != target dir.
2227  */
2228 int
2229 port_fop_rename(femarg_t *vf, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr,
2230     caller_context_t *ct, int flags)
2231 {
2232 	int		retval;
2233 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2234 
2235 	retval = vnext_rename(vf, snm, tdvp, tnm, cr, ct, flags);
2236 	port_fop(vp, FOP_FILE_RENAMESRC, retval);
2237 	return (retval);
2238 }
2239 
2240 int
2241 port_fop_mkdir(femarg_t *vf, char *dirname, vattr_t *vap, vnode_t **vpp,
2242     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
2243 {
2244 	int		retval;
2245 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2246 
2247 	retval = vnext_mkdir(vf, dirname, vap, vpp, cr, ct, flags, vsecp);
2248 	port_fop(vp, FOP_FILE_MKDIR, retval);
2249 	return (retval);
2250 }
2251 
2252 int
2253 port_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
2254     caller_context_t *ct, int flags)
2255 {
2256 	int		retval;
2257 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2258 
2259 	retval = vnext_rmdir(vf, nm, cdir, cr, ct, flags);
2260 	port_fop(vp, FOP_FILE_RMDIR, retval);
2261 	return (retval);
2262 }
2263 
2264 int
2265 port_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
2266     caller_context_t *ct, int flags)
2267 {
2268 	int		retval;
2269 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2270 
2271 	retval = vnext_readdir(vf, uiop, cr, eofp, ct, flags);
2272 	port_fop(vp, FOP_FILE_READDIR, retval);
2273 	return (retval);
2274 }
2275 
2276 int
2277 port_fop_symlink(femarg_t *vf, char *linkname, vattr_t *vap, char *target,
2278     cred_t *cr, caller_context_t *ct, int flags)
2279 {
2280 	int		retval;
2281 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2282 
2283 	retval = vnext_symlink(vf, linkname, vap, target, cr, ct, flags);
2284 	port_fop(vp, FOP_FILE_SYMLINK, retval);
2285 	return (retval);
2286 }
2287 
2288 /*
2289  * acl, facl call this.
2290  */
2291 int
2292 port_fop_setsecattr(femarg_t *vf, vsecattr_t *vsap, int flags, cred_t *cr,
2293     caller_context_t *ct)
2294 {
2295 	int	retval;
2296 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2297 	retval = vnext_setsecattr(vf, vsap, flags, cr, ct);
2298 	port_fop(vp, FOP_FILE_SETSECATTR, retval);
2299 	return (retval);
2300 }
2301 
2302 /*
2303  * these are events on the watched file/directory
2304  */
2305 int
2306 port_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name,
2307     caller_context_t *ct)
2308 {
2309 	vnode_t		*vp = (vnode_t *)vf->fa_fnode->fn_available;
2310 
2311 	switch (vnevent) {
2312 	case	VE_RENAME_SRC:
2313 			port_fop_sendevent(vp, FILE_RENAME_FROM, dvp, name);
2314 		break;
2315 	case	VE_RENAME_DEST:
2316 			port_fop_sendevent(vp, FILE_RENAME_TO, dvp, name);
2317 		break;
2318 	case	VE_REMOVE:
2319 			port_fop_sendevent(vp, FILE_DELETE, dvp, name);
2320 		break;
2321 	case	VE_RMDIR:
2322 			port_fop_sendevent(vp, FILE_DELETE, dvp, name);
2323 		break;
2324 	case	VE_CREATE:
2325 			port_fop_sendevent(vp, FILE_MODIFIED|FILE_ATTRIB,
2326 			    NULL, NULL);
2327 		break;
2328 	case	VE_LINK:
2329 			port_fop_sendevent(vp, FILE_ATTRIB, NULL, NULL);
2330 		break;
2331 
2332 	case	VE_RENAME_DEST_DIR:
2333 			port_fop_sendevent(vp, FILE_MODIFIED|FILE_ATTRIB,
2334 			    NULL, NULL);
2335 		break;
2336 
2337 	case	VE_MOUNTEDOVER:
2338 			port_fop_sendevent(vp, MOUNTEDOVER, NULL, NULL);
2339 		break;
2340 	default:
2341 		break;
2342 	}
2343 	return (vnext_vnevent(vf, vnevent, dvp, name, ct));
2344 }
2345