xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
29  *	All Rights Reserved
30  */
31 
32 #pragma ident	"%Z%%M%	%I%	%E% SMI"
33 
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/cred.h>
38 #include <sys/buf.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 
58 #include <rpc/types.h>
59 #include <rpc/auth.h>
60 #include <rpc/rpcsec_gss.h>
61 #include <rpc/svc.h>
62 
63 #include <nfs/nfs.h>
64 #include <nfs/export.h>
65 #include <nfs/lm.h>
66 #include <nfs/nfs4.h>
67 
68 #include <sys/strsubr.h>
69 #include <sys/strsun.h>
70 
71 #include <inet/common.h>
72 #include <inet/ip.h>
73 #include <inet/ip6.h>
74 
75 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
76 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
77 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
78 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
79 
80 /* End of Tunables */
81 
82 /*
83  * Used to bump the stateid4.seqid value and show changes in the stateid
84  */
85 #define	next_stateid(sp) (++(sp)->bits.chgseq)
86 
87 /*
88  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
89  *	This is used to return NFS4ERR_TOOSMALL when clients specify
90  *	maxcount that isn't large enough to hold the smallest possible
91  *	XDR encoded dirent.
92  *
93  *	    sizeof cookie (8 bytes) +
94  *	    sizeof name_len (4 bytes) +
95  *	    sizeof smallest (padded) name (4 bytes) +
96  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
97  *	    sizeof attrlist4_len (4 bytes) +
98  *	    sizeof next boolean (4 bytes)
99  *
100  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
101  * the smallest possible entry4 (assumes no attrs requested).
102  *	sizeof nfsstat4 (4 bytes) +
103  *	sizeof verifier4 (8 bytes) +
104  *	sizeof entry4list bool (4 bytes) +
105  *	sizeof entry4 	(36 bytes) +
106  *	sizeof eof bool  (4 bytes)
107  *
108  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
109  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
110  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
111  *	required for a given name length.  MAXNAMELEN is the maximum
112  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
113  *	macros are to allow for . and .. entries -- just a minor tweak to try
114  *	and guarantee that buffer we give to VOP_READDIR will be large enough
115  *	to hold ., .., and the largest possible solaris dirent64.
116  */
117 #define	RFS4_MINLEN_ENTRY4 36
118 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
119 #define	RFS4_MINLEN_RDDIR_BUF \
120 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
121 
122 /*
123  * It would be better to pad to 4 bytes since that's what XDR would do,
124  * but the dirents UFS gives us are already padded to 8, so just take
125  * what we're given.  Dircount is only a hint anyway.  Currently the
126  * solaris kernel is ASCII only, so there's no point in calling the
127  * UTF8 functions.
128  *
129  * dirent64: named padded to provide 8 byte struct alignment
130  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
131  *
132  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
133  *
134  */
135 #define	DIRENT64_TO_DIRCOUNT(dp) \
136 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
137 
138 time_t rfs4_start_time;			/* Initialized in rfs4_srvrinit */
139 
140 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
141 
142 u_longlong_t nfs4_srv_caller_id;
143 
144 verifier4	Write4verf;
145 verifier4	Readdir4verf;
146 
147 void		rfs4_init_compound_state(struct compound_state *);
148 
149 static void	nullfree(caddr_t);
150 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
151 			struct compound_state *);
152 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
153 			struct compound_state *);
154 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
155 			struct compound_state *);
156 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
157 			struct compound_state *);
158 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
159 			struct compound_state *);
160 static void	rfs4_op_create_free(nfs_resop4 *resop);
161 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
162 				    struct svc_req *, struct compound_state *);
163 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
164 			struct compound_state *);
165 static void	rfs4_op_getattr_free(nfs_resop4 *);
166 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 			struct compound_state *);
168 static void	rfs4_op_getfh_free(nfs_resop4 *);
169 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 			struct compound_state *);
171 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 			struct compound_state *);
173 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 			struct compound_state *);
175 static void	lock_denied_free(nfs_resop4 *);
176 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
177 			struct compound_state *);
178 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 			struct compound_state *);
180 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 			struct compound_state *);
182 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
183 			struct compound_state *);
184 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
185 				struct svc_req *req, struct compound_state *cs);
186 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 			struct compound_state *);
188 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 			struct compound_state *);
190 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
191 			struct svc_req *, struct compound_state *);
192 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
193 			struct svc_req *, struct compound_state *);
194 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 			struct compound_state *);
196 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 			struct compound_state *);
198 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 			struct compound_state *);
200 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 			struct compound_state *);
202 static void	rfs4_op_read_free(nfs_resop4 *);
203 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
204 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 			struct compound_state *);
206 static void	rfs4_op_readlink_free(nfs_resop4 *);
207 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
208 			struct svc_req *, struct compound_state *);
209 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
210 			struct compound_state *);
211 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 			struct compound_state *);
213 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 			struct compound_state *);
215 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 			struct compound_state *);
217 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 			struct compound_state *);
219 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 			struct compound_state *);
221 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 			struct compound_state *);
223 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 			struct compound_state *);
225 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
226 			struct svc_req *, struct compound_state *);
227 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
228 			struct svc_req *req, struct compound_state *);
229 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 			struct compound_state *);
231 static void	rfs4_op_secinfo_free(nfs_resop4 *);
232 
233 static nfsstat4 check_open_access(uint32_t,
234 				struct compound_state *, struct svc_req *);
235 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
236 static int	vop_shrlock(vnode_t *, int, struct shrlock *, int);
237 static int 	rfs4_shrlock(rfs4_state_t *, int);
238 static int	rfs4_share(rfs4_state_t *);
239 void rfs4_ss_clid(rfs4_client_t *, struct svc_req *);
240 
241 /*
242  * translation table for attrs
243  */
244 struct nfs4_ntov_table {
245 	union nfs4_attr_u *na;
246 	uint8_t amap[NFS4_MAXNUM_ATTRS];
247 	int attrcnt;
248 	bool_t vfsstat;
249 };
250 
251 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
252 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
253 				    struct nfs4_svgetit_arg *sargp);
254 
255 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
256 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
257 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
258 
259 fem_t	*deleg_rdops;
260 fem_t	*deleg_wrops;
261 
262 rfs4_servinst_t	*rfs4_cur_servinst = NULL;	/* current server instance */
263 kmutex_t	rfs4_servinst_lock;		/* protects linked list */
264 int		rfs4_seen_first_compound;	/* set first time we see one */
265 
266 #ifdef DEBUG
267 int	rfs4_servinst_debug = 0;
268 #endif
269 
270 /*
271  * NFS4 op dispatch table
272  */
273 
274 struct rfsv4disp {
275 	void	(*dis_proc)();		/* proc to call */
276 	void	(*dis_resfree)();	/* frees space allocated by proc */
277 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
278 };
279 
280 static struct rfsv4disp rfsv4disptab[] = {
281 	/*
282 	 * NFS VERSION 4
283 	 */
284 
285 	/* RFS_NULL = 0 */
286 	{rfs4_op_illegal, nullfree, 0},
287 
288 	/* UNUSED = 1 */
289 	{rfs4_op_illegal, nullfree, 0},
290 
291 	/* UNUSED = 2 */
292 	{rfs4_op_illegal, nullfree, 0},
293 
294 	/* OP_ACCESS = 3 */
295 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
296 
297 	/* OP_CLOSE = 4 */
298 	{rfs4_op_close, nullfree, 0},
299 
300 	/* OP_COMMIT = 5 */
301 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
302 
303 	/* OP_CREATE = 6 */
304 	{rfs4_op_create, nullfree, 0},
305 
306 	/* OP_DELEGPURGE = 7 */
307 	{rfs4_op_inval, nullfree, 0},
308 
309 	/* OP_DELEGRETURN = 8 */
310 	{rfs4_op_delegreturn, nullfree, 0},
311 
312 	/* OP_GETATTR = 9 */
313 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
314 
315 	/* OP_GETFH = 10 */
316 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
317 
318 	/* OP_LINK = 11 */
319 	{rfs4_op_link, nullfree, 0},
320 
321 	/* OP_LOCK = 12 */
322 	{rfs4_op_lock, lock_denied_free, 0},
323 
324 	/* OP_LOCKT = 13 */
325 	{rfs4_op_lockt, lock_denied_free, 0},
326 
327 	/* OP_LOCKU = 14 */
328 	{rfs4_op_locku, nullfree, 0},
329 
330 	/* OP_LOOKUP = 15 */
331 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
332 
333 	/* OP_LOOKUPP = 16 */
334 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
335 
336 	/* OP_NVERIFY = 17 */
337 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
338 
339 	/* OP_OPEN = 18 */
340 	{rfs4_op_open, rfs4_free_reply, 0},
341 
342 	/* OP_OPENATTR = 19 */
343 	{rfs4_op_openattr, nullfree, 0},
344 
345 	/* OP_OPEN_CONFIRM = 20 */
346 	{rfs4_op_open_confirm, nullfree, 0},
347 
348 	/* OP_OPEN_DOWNGRADE = 21 */
349 	{rfs4_op_open_downgrade, nullfree, 0},
350 
351 	/* OP_OPEN_PUTFH = 22 */
352 	{rfs4_op_putfh, nullfree, RPC_ALL},
353 
354 	/* OP_PUTPUBFH = 23 */
355 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
356 
357 	/* OP_PUTROOTFH = 24 */
358 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
359 
360 	/* OP_READ = 25 */
361 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
362 
363 	/* OP_READDIR = 26 */
364 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
365 
366 	/* OP_READLINK = 27 */
367 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
368 
369 	/* OP_REMOVE = 28 */
370 	{rfs4_op_remove, nullfree, 0},
371 
372 	/* OP_RENAME = 29 */
373 	{rfs4_op_rename, nullfree, 0},
374 
375 	/* OP_RENEW = 30 */
376 	{rfs4_op_renew, nullfree, 0},
377 
378 	/* OP_RESTOREFH = 31 */
379 	{rfs4_op_restorefh, nullfree, RPC_ALL},
380 
381 	/* OP_SAVEFH = 32 */
382 	{rfs4_op_savefh, nullfree, RPC_ALL},
383 
384 	/* OP_SECINFO = 33 */
385 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
386 
387 	/* OP_SETATTR = 34 */
388 	{rfs4_op_setattr, nullfree, 0},
389 
390 	/* OP_SETCLIENTID = 35 */
391 	{rfs4_op_setclientid, nullfree, 0},
392 
393 	/* OP_SETCLIENTID_CONFIRM = 36 */
394 	{rfs4_op_setclientid_confirm, nullfree, 0},
395 
396 	/* OP_VERIFY = 37 */
397 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
398 
399 	/* OP_WRITE = 38 */
400 	{rfs4_op_write, nullfree, 0},
401 
402 	/* OP_RELEASE_LOCKOWNER = 39 */
403 	{rfs4_op_release_lockowner, nullfree, 0},
404 };
405 
406 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
407 
408 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
409 
410 #ifdef DEBUG
411 
412 int rfs4_fillone_debug = 0;
413 int rfs4_shrlock_debug = 0;
414 int rfs4_no_stub_access = 1;
415 int rfs4_rddir_debug = 0;
416 
417 static char *rfs4_op_string[] = {
418 	"rfs4_op_null",
419 	"rfs4_op_1 unused",
420 	"rfs4_op_2 unused",
421 	"rfs4_op_access",
422 	"rfs4_op_close",
423 	"rfs4_op_commit",
424 	"rfs4_op_create",
425 	"rfs4_op_delegpurge",
426 	"rfs4_op_delegreturn",
427 	"rfs4_op_getattr",
428 	"rfs4_op_getfh",
429 	"rfs4_op_link",
430 	"rfs4_op_lock",
431 	"rfs4_op_lockt",
432 	"rfs4_op_locku",
433 	"rfs4_op_lookup",
434 	"rfs4_op_lookupp",
435 	"rfs4_op_nverify",
436 	"rfs4_op_open",
437 	"rfs4_op_openattr",
438 	"rfs4_op_open_confirm",
439 	"rfs4_op_open_downgrade",
440 	"rfs4_op_putfh",
441 	"rfs4_op_putpubfh",
442 	"rfs4_op_putrootfh",
443 	"rfs4_op_read",
444 	"rfs4_op_readdir",
445 	"rfs4_op_readlink",
446 	"rfs4_op_remove",
447 	"rfs4_op_rename",
448 	"rfs4_op_renew",
449 	"rfs4_op_restorefh",
450 	"rfs4_op_savefh",
451 	"rfs4_op_secinfo",
452 	"rfs4_op_setattr",
453 	"rfs4_op_setclientid",
454 	"rfs4_op_setclient_confirm",
455 	"rfs4_op_verify",
456 	"rfs4_op_write",
457 	"rfs4_op_release_lockowner",
458 	"rfs4_op_illegal"
459 };
460 #endif
461 
462 void rfs4_ss_chkclid(rfs4_client_t *);
463 
464 #ifdef	nextdp
465 #undef nextdp
466 #endif
467 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
468 
469 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
470 	VOPNAME_OPEN, deleg_rdopen,
471 	VOPNAME_WRITE, deleg_write,
472 	VOPNAME_SETATTR, deleg_setattr,
473 	VOPNAME_RWLOCK, deleg_rd_rwlock,
474 	VOPNAME_SPACE, deleg_space,
475 	VOPNAME_SETSECATTR, deleg_setsecattr,
476 	VOPNAME_VNEVENT, deleg_vnevent,
477 	NULL, NULL
478 };
479 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
480 	VOPNAME_OPEN, deleg_wropen,
481 	VOPNAME_READ, deleg_read,
482 	VOPNAME_WRITE, deleg_write,
483 	VOPNAME_SETATTR, deleg_setattr,
484 	VOPNAME_RWLOCK, deleg_wr_rwlock,
485 	VOPNAME_SPACE, deleg_space,
486 	VOPNAME_SETSECATTR, deleg_setsecattr,
487 	VOPNAME_VNEVENT, deleg_vnevent,
488 	NULL, NULL
489 };
490 
491 int
492 rfs4_srvrinit(void)
493 {
494 	timespec32_t verf;
495 	int error;
496 	extern void rfs4_attr_init();
497 	extern krwlock_t rfs4_deleg_policy_lock;
498 
499 	/*
500 	 * The following algorithm attempts to find a unique verifier
501 	 * to be used as the write verifier returned from the server
502 	 * to the client.  It is important that this verifier change
503 	 * whenever the server reboots.  Of secondary importance, it
504 	 * is important for the verifier to be unique between two
505 	 * different servers.
506 	 *
507 	 * Thus, an attempt is made to use the system hostid and the
508 	 * current time in seconds when the nfssrv kernel module is
509 	 * loaded.  It is assumed that an NFS server will not be able
510 	 * to boot and then to reboot in less than a second.  If the
511 	 * hostid has not been set, then the current high resolution
512 	 * time is used.  This will ensure different verifiers each
513 	 * time the server reboots and minimize the chances that two
514 	 * different servers will have the same verifier.
515 	 * XXX - this is broken on LP64 kernels.
516 	 */
517 	verf.tv_sec = (time_t)nfs_atoi(hw_serial);
518 	if (verf.tv_sec != 0) {
519 		verf.tv_nsec = gethrestime_sec();
520 	} else {
521 		timespec_t tverf;
522 
523 		gethrestime(&tverf);
524 		verf.tv_sec = (time_t)tverf.tv_sec;
525 		verf.tv_nsec = tverf.tv_nsec;
526 	}
527 
528 	Write4verf = *(uint64_t *)&verf;
529 
530 	rfs4_attr_init();
531 	mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
532 
533 	/* Used to manage create/destroy of server state */
534 	mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
535 
536 	/* Used to manage access to server instance linked list */
537 	mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
538 
539 	/* Used to manage access to rfs4_deleg_policy */
540 	rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
541 
542 	error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
543 	if (error != 0) {
544 		rfs4_disable_delegation();
545 	} else {
546 		error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
547 				&deleg_wrops);
548 		if (error != 0) {
549 			rfs4_disable_delegation();
550 			fem_free(deleg_rdops);
551 		}
552 	}
553 
554 	nfs4_srv_caller_id = fs_new_caller_id();
555 
556 	lockt_sysid = lm_alloc_sysidt();
557 
558 	return (0);
559 }
560 
561 void
562 rfs4_srvrfini(void)
563 {
564 	extern krwlock_t rfs4_deleg_policy_lock;
565 
566 	if (lockt_sysid != LM_NOSYSID) {
567 		lm_free_sysidt(lockt_sysid);
568 		lockt_sysid = LM_NOSYSID;
569 	}
570 
571 	mutex_destroy(&rfs4_deleg_lock);
572 	mutex_destroy(&rfs4_state_lock);
573 	rw_destroy(&rfs4_deleg_policy_lock);
574 
575 	fem_free(deleg_rdops);
576 	fem_free(deleg_wrops);
577 }
578 
579 void
580 rfs4_init_compound_state(struct compound_state *cs)
581 {
582 	bzero(cs, sizeof (*cs));
583 	cs->cont = TRUE;
584 	cs->access = CS_ACCESS_DENIED;
585 	cs->deleg = FALSE;
586 	cs->mandlock = FALSE;
587 	cs->fh.nfs_fh4_val = cs->fhbuf;
588 }
589 
590 void
591 rfs4_grace_start(rfs4_servinst_t *sip)
592 {
593 	time_t now = gethrestime_sec();
594 
595 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
596 	    "rfs4_grace_start: inst %p: 0x%lx", (void *)sip, now));
597 
598 	rw_enter(&sip->rwlock, RW_WRITER);
599 	sip->start_time = now;
600 	sip->grace_period = rfs4_grace_period;
601 	rw_exit(&sip->rwlock);
602 }
603 
604 /*
605  * returns true if the instance's grace period has never been started
606  */
607 int
608 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
609 {
610 	time_t start_time;
611 
612 	rw_enter(&sip->rwlock, RW_READER);
613 	start_time = sip->start_time;
614 	rw_exit(&sip->rwlock);
615 
616 	return (start_time == 0);
617 }
618 
619 /*
620  * Indicates if server instance is within the
621  * grace period.
622  */
623 int
624 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
625 {
626 	time_t grace_expiry;
627 
628 	rw_enter(&sip->rwlock, RW_READER);
629 	grace_expiry = sip->start_time + sip->grace_period;
630 	rw_exit(&sip->rwlock);
631 
632 	return (gethrestime_sec() < grace_expiry);
633 }
634 
635 int
636 rfs4_clnt_in_grace(rfs4_client_t *cp)
637 {
638 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
639 
640 	return (rfs4_servinst_in_grace(cp->server_instance));
641 }
642 
643 /*
644  * reset all currently active grace periods
645  */
646 void
647 rfs4_grace_reset_all(void)
648 {
649 #ifdef DEBUG
650 	int n = 0;
651 #endif
652 	rfs4_servinst_t *sip;
653 
654 	mutex_enter(&rfs4_servinst_lock);
655 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
656 		if (rfs4_servinst_in_grace(sip)) {
657 			rfs4_grace_start(sip);
658 #ifdef DEBUG
659 			n++;
660 #endif
661 		}
662 	}
663 	mutex_exit(&rfs4_servinst_lock);
664 
665 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
666 	    "rfs4_grace_reset_all: reset %d instances", n));
667 }
668 
669 /*
670  * start any new instances' grace periods
671  */
672 void
673 rfs4_grace_start_new(void)
674 {
675 #ifdef DEBUG
676 	int n = 0;
677 #endif
678 	rfs4_servinst_t *sip;
679 
680 	mutex_enter(&rfs4_servinst_lock);
681 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
682 		if (rfs4_servinst_grace_new(sip))
683 			rfs4_grace_start(sip);
684 #ifdef DEBUG
685 		n++;
686 #endif
687 	}
688 	mutex_exit(&rfs4_servinst_lock);
689 
690 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
691 	    "rfs4_grace_start_new: started %d new instances", n));
692 }
693 
694 /*
695  * Create a new server instance, and make it the currently active instance.
696  * Note that starting the grace period too early will reduce the clients'
697  * recovery window.
698  */
699 void
700 rfs4_servinst_create(int start_grace)
701 {
702 	rfs4_servinst_t *sip;
703 
704 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
705 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
706 
707 	sip->start_time = (time_t)0;
708 	sip->grace_period = (time_t)0;
709 	sip->next = NULL;
710 	sip->prev = NULL;
711 
712 	mutex_enter(&rfs4_servinst_lock);
713 	if (rfs4_cur_servinst == NULL) {
714 		NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
715 		    "rfs4_servinst_create: creating first instance"));
716 	} else {
717 		/* add to linked list */
718 		sip->prev = rfs4_cur_servinst;
719 		rfs4_cur_servinst->next = sip;
720 	}
721 	if (start_grace)
722 		rfs4_grace_start(sip);
723 	/* make the new instance "current" */
724 	rfs4_cur_servinst = sip;
725 	mutex_exit(&rfs4_servinst_lock);
726 
727 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
728 	    "rfs4_servinst_create: new current instance: %p; start_grace: %d",
729 	    (void *)sip, start_grace));
730 }
731 
732 /*
733  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
734  * all instances directly.
735  */
736 void
737 rfs4_servinst_destroy_all(void)
738 {
739 	rfs4_servinst_t *sip, *prev, *current;
740 #ifdef DEBUG
741 	int n = 0;
742 #endif
743 
744 	mutex_enter(&rfs4_servinst_lock);
745 	ASSERT(rfs4_cur_servinst != NULL);
746 	current = rfs4_cur_servinst;
747 	rfs4_cur_servinst = NULL;
748 	for (sip = current; sip != NULL; sip = prev) {
749 		prev = sip->prev;
750 		rw_destroy(&sip->rwlock);
751 		kmem_free(sip, sizeof (rfs4_servinst_t));
752 #ifdef DEBUG
753 		n++;
754 #endif
755 	}
756 	mutex_exit(&rfs4_servinst_lock);
757 
758 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
759 	    "rfs4_servinst_destroy_all: destroyed %d instances", n));
760 }
761 
762 /*
763  * Assign the current server instance to a client_t.
764  * Should be called with cp->dbe held.
765  */
766 void
767 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
768 {
769 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
770 
771 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
772 	    "rfs4_servinst_assign: client: %p, old: %p, new: %p", (void *)cp,
773 	    (void *)cp->server_instance, (void *)sip));
774 
775 	/*
776 	 * The lock ensures that if the current instance is in the process
777 	 * of changing, we will see the new one.
778 	 */
779 	mutex_enter(&rfs4_servinst_lock);
780 	cp->server_instance = sip;
781 	mutex_exit(&rfs4_servinst_lock);
782 }
783 
784 rfs4_servinst_t *
785 rfs4_servinst(rfs4_client_t *cp)
786 {
787 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
788 
789 	return (cp->server_instance);
790 }
791 
792 /* ARGSUSED */
793 static void
794 nullfree(caddr_t resop)
795 {
796 }
797 
798 /*
799  * This is a fall-through for invalid or not implemented (yet) ops
800  */
801 /* ARGSUSED */
802 static void
803 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
804 	struct compound_state *cs)
805 {
806 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
807 }
808 
809 /*
810  * Check if the security flavor, nfsnum, is in the flavor_list.
811  */
812 bool_t
813 in_flavor_list(int nfsnum, int *flavor_list, int count)
814 {
815 	int i;
816 
817 	for (i = 0; i < count; i++) {
818 		if (nfsnum == flavor_list[i])
819 			return (TRUE);
820 	}
821 	return (FALSE);
822 }
823 
824 /*
825  * Used by rfs4_op_secinfo to get the security information from the
826  * export structure associated with the component.
827  */
828 /* ARGSUSED */
829 static nfsstat4
830 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
831 {
832 	int error, different_export = 0;
833 	vnode_t *dvp, *vp, *tvp;
834 	struct exportinfo *exi = NULL;
835 	fid_t fid;
836 	uint_t count, i;
837 	secinfo4 *resok_val;
838 	struct secinfo *secp;
839 	bool_t did_traverse;
840 	int dotdot, walk;
841 
842 	dvp = cs->vp;
843 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
844 
845 	/*
846 	 * If dotdotting, then need to check whether it's above the
847 	 * root of a filesystem, or above an export point.
848 	 */
849 	if (dotdot) {
850 
851 		/*
852 		 * If dotdotting at the root of a filesystem, then
853 		 * need to traverse back to the mounted-on filesystem
854 		 * and do the dotdot lookup there.
855 		 */
856 		if (cs->vp->v_flag & VROOT) {
857 
858 			/*
859 			 * If at the system root, then can
860 			 * go up no further.
861 			 */
862 			if (VN_CMP(dvp, rootdir))
863 				return (puterrno4(ENOENT));
864 
865 			/*
866 			 * Traverse back to the mounted-on filesystem
867 			 */
868 			dvp = untraverse(cs->vp);
869 
870 			/*
871 			 * Set the different_export flag so we remember
872 			 * to pick up a new exportinfo entry for
873 			 * this new filesystem.
874 			 */
875 			different_export = 1;
876 		} else {
877 
878 			/*
879 			 * If dotdotting above an export point then set
880 			 * the different_export to get new export info.
881 			 */
882 			different_export = nfs_exported(cs->exi, cs->vp);
883 		}
884 	}
885 
886 	/*
887 	 * Get the vnode for the component "nm".
888 	 */
889 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr);
890 	if (error)
891 		return (puterrno4(error));
892 
893 	VN_SETPATH(rootdir, dvp, vp, nm, strlen(nm));
894 
895 	/*
896 	 * If the vnode is in a pseudo filesystem, or if the security flavor
897 	 * used in the request is valid but not an explicitly shared flavor,
898 	 * or the access bit indicates that this is a limited access,
899 	 * check whether this vnode is visible.
900 	 */
901 	if (!different_export &&
902 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
903 	    cs->access & CS_ACCESS_LIMITED)) {
904 		if (! nfs_visible(cs->exi, vp, &different_export)) {
905 			VN_RELE(vp);
906 			return (puterrno4(ENOENT));
907 		}
908 	}
909 
910 	/*
911 	 * If it's a mountpoint, then traverse it.
912 	 */
913 	if (vn_ismntpt(vp)) {
914 		tvp = vp;
915 		if ((error = traverse(&tvp)) != 0) {
916 			VN_RELE(vp);
917 			return (puterrno4(error));
918 		}
919 		/* remember that we had to traverse mountpoint */
920 		did_traverse = TRUE;
921 		vp = tvp;
922 		different_export = 1;
923 	} else if (vp->v_vfsp != dvp->v_vfsp) {
924 		/*
925 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
926 		 * then vp is probably an LOFS object.  We don't need the
927 		 * realvp, we just need to know that we might have crossed
928 		 * a server fs boundary and need to call checkexport4.
929 		 * (LOFS lookup hides server fs mountpoints, and actually calls
930 		 * traverse)
931 		 */
932 		different_export = 1;
933 		did_traverse = FALSE;
934 	}
935 
936 	/*
937 	 * Get the export information for it.
938 	 */
939 	if (different_export) {
940 
941 		bzero(&fid, sizeof (fid));
942 		fid.fid_len = MAXFIDSZ;
943 		error = vop_fid_pseudo(vp, &fid);
944 		if (error) {
945 			VN_RELE(vp);
946 			return (puterrno4(error));
947 		}
948 
949 		if (dotdot)
950 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
951 		else
952 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
953 
954 		if (exi == NULL) {
955 			if (did_traverse == TRUE) {
956 				/*
957 				 * If this vnode is a mounted-on vnode,
958 				 * but the mounted-on file system is not
959 				 * exported, send back the secinfo for
960 				 * the exported node that the mounted-on
961 				 * vnode lives in.
962 				 */
963 				exi = cs->exi;
964 			} else {
965 				VN_RELE(vp);
966 				return (puterrno4(EACCES));
967 			}
968 		}
969 	} else {
970 		exi = cs->exi;
971 	}
972 	ASSERT(exi != NULL);
973 
974 
975 	/*
976 	 * Create the secinfo result based on the security information
977 	 * from the exportinfo structure (exi).
978 	 *
979 	 * Return all flavors for a pseudo node.
980 	 * For a real export node, return the flavor that the client
981 	 * has access with.
982 	 */
983 	ASSERT(RW_LOCK_HELD(&exported_lock));
984 	if (PSEUDO(exi)) {
985 		count = exi->exi_export.ex_seccnt; /* total sec count */
986 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
987 		secp = exi->exi_export.ex_secinfo;
988 
989 		for (i = 0; i < count; i++) {
990 		    resok_val[i].flavor = secp[i].s_secinfo.sc_rpcnum;
991 		    if (resok_val[i].flavor == RPCSEC_GSS) {
992 			rpcsec_gss_info *info;
993 
994 			info = &resok_val[i].flavor_info;
995 			info->qop = secp[i].s_secinfo.sc_qop;
996 			info->service =
997 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
998 
999 			/* get oid opaque data */
1000 			info->oid.sec_oid4_len =
1001 				secp[i].s_secinfo.sc_gss_mech_type->length;
1002 			info->oid.sec_oid4_val =
1003 				kmem_alloc(
1004 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1005 				    KM_SLEEP);
1006 			bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1007 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1008 		    }
1009 		}
1010 		resp->SECINFO4resok_len = count;
1011 		resp->SECINFO4resok_val = resok_val;
1012 	} else {
1013 		int ret_cnt = 0, k = 0;
1014 		int *flavor_list;
1015 
1016 		count = exi->exi_export.ex_seccnt; /* total sec count */
1017 		secp = exi->exi_export.ex_secinfo;
1018 
1019 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1020 		/* find out which flavors to return */
1021 		for (i = 0; i < count; i ++) {
1022 			int access, flavor, perm;
1023 
1024 			flavor = secp[i].s_secinfo.sc_nfsnum;
1025 			perm = secp[i].s_flags;
1026 
1027 			access = nfsauth4_secinfo_access(exi, cs->req,
1028 						flavor, perm);
1029 
1030 			if (! (access & NFSAUTH_DENIED) &&
1031 			    ! (access & NFSAUTH_WRONGSEC)) {
1032 				flavor_list[ret_cnt] = flavor;
1033 				ret_cnt++;
1034 			}
1035 		}
1036 
1037 		/* Create the returning SECINFO value */
1038 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1039 
1040 		for (i = 0; i < count; i++) {
1041 		/* If the flavor is in the flavor list, fill in resok_val. */
1042 		    if (in_flavor_list(secp[i].s_secinfo.sc_nfsnum,
1043 						flavor_list, ret_cnt)) {
1044 			resok_val[k].flavor = secp[i].s_secinfo.sc_rpcnum;
1045 			if (resok_val[k].flavor == RPCSEC_GSS) {
1046 			    rpcsec_gss_info *info;
1047 
1048 			    info = &resok_val[k].flavor_info;
1049 			    info->qop = secp[i].s_secinfo.sc_qop;
1050 			    info->service =
1051 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
1052 
1053 			    /* get oid opaque data */
1054 			    info->oid.sec_oid4_len =
1055 				secp[i].s_secinfo.sc_gss_mech_type->length;
1056 			    info->oid.sec_oid4_val =
1057 				kmem_alloc(
1058 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1059 				    KM_SLEEP);
1060 			    bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1061 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1062 			}
1063 			k++;
1064 		    }
1065 		    if (k >= ret_cnt)
1066 			break;
1067 		}
1068 		resp->SECINFO4resok_len = ret_cnt;
1069 		resp->SECINFO4resok_val = resok_val;
1070 		kmem_free(flavor_list, count * sizeof (int));
1071 	}
1072 
1073 	VN_RELE(vp);
1074 	return (NFS4_OK);
1075 }
1076 
1077 /*
1078  * SECINFO (Operation 33): Obtain required security information on
1079  * the component name in the format of (security-mechanism-oid, qop, service)
1080  * triplets.
1081  */
1082 /* ARGSUSED */
1083 static void
1084 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1085 	struct compound_state *cs)
1086 {
1087 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1088 	utf8string *utfnm = &argop->nfs_argop4_u.opsecinfo.name;
1089 	uint_t len;
1090 	char *nm;
1091 
1092 	/*
1093 	 * Current file handle (cfh) should have been set before getting
1094 	 * into this function. If not, return error.
1095 	 */
1096 	if (cs->vp == NULL) {
1097 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1098 		return;
1099 	}
1100 
1101 	if (cs->vp->v_type != VDIR) {
1102 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1103 		return;
1104 	}
1105 
1106 	/*
1107 	 * Verify the component name. If failed, error out, but
1108 	 * do not error out if the component name is a "..".
1109 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1110 	 */
1111 	if (!utf8_dir_verify(utfnm)) {
1112 		if (utfnm->utf8string_len != 2 ||
1113 				utfnm->utf8string_val[0] != '.' ||
1114 				utfnm->utf8string_val[1] != '.') {
1115 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1116 			return;
1117 		}
1118 	}
1119 
1120 	nm = utf8_to_str(utfnm, &len, NULL);
1121 	if (nm == NULL) {
1122 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1123 		return;
1124 	}
1125 
1126 	if (len > MAXNAMELEN) {
1127 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1128 		kmem_free(nm, len);
1129 		return;
1130 	}
1131 
1132 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, resp);
1133 
1134 	kmem_free(nm, len);
1135 }
1136 
1137 /*
1138  * Free SECINFO result.
1139  */
1140 /* ARGSUSED */
1141 static void
1142 rfs4_op_secinfo_free(nfs_resop4 *resop)
1143 {
1144 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1145 	int count, i;
1146 	secinfo4 *resok_val;
1147 
1148 	/* If this is not an Ok result, nothing to free. */
1149 	if (resp->status != NFS4_OK) {
1150 		return;
1151 	}
1152 
1153 	count = resp->SECINFO4resok_len;
1154 	resok_val = resp->SECINFO4resok_val;
1155 
1156 	for (i = 0; i < count; i++) {
1157 	    if (resok_val[i].flavor == RPCSEC_GSS) {
1158 		rpcsec_gss_info *info;
1159 
1160 		info = &resok_val[i].flavor_info;
1161 		kmem_free(info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1162 	    }
1163 	}
1164 	kmem_free(resok_val, count * sizeof (secinfo4));
1165 	resp->SECINFO4resok_len = 0;
1166 	resp->SECINFO4resok_val = NULL;
1167 }
1168 
1169 /* ARGSUSED */
1170 static void
1171 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1172 	struct compound_state *cs)
1173 {
1174 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1175 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1176 	int error;
1177 	vnode_t *vp;
1178 	struct vattr va;
1179 	int checkwriteperm;
1180 	cred_t *cr = cs->cr;
1181 
1182 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1183 	if (cs->access == CS_ACCESS_DENIED) {
1184 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1185 		return;
1186 	}
1187 #endif
1188 	if (cs->vp == NULL) {
1189 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1190 		return;
1191 	}
1192 
1193 	ASSERT(cr != NULL);
1194 
1195 	vp = cs->vp;
1196 
1197 	/*
1198 	 * If the file system is exported read only, it is not appropriate
1199 	 * to check write permissions for regular files and directories.
1200 	 * Special files are interpreted by the client, so the underlying
1201 	 * permissions are sent back to the client for interpretation.
1202 	 */
1203 	if (rdonly4(cs->exi, cs->vp, req) &&
1204 		(vp->v_type == VREG || vp->v_type == VDIR))
1205 		checkwriteperm = 0;
1206 	else
1207 		checkwriteperm = 1;
1208 
1209 	/*
1210 	 * XXX
1211 	 * We need the mode so that we can correctly determine access
1212 	 * permissions relative to a mandatory lock file.  Access to
1213 	 * mandatory lock files is denied on the server, so it might
1214 	 * as well be reflected to the server during the open.
1215 	 */
1216 	va.va_mask = AT_MODE;
1217 	error = VOP_GETATTR(vp, &va, 0, cr);
1218 	if (error) {
1219 		*cs->statusp = resp->status = puterrno4(error);
1220 		return;
1221 	}
1222 
1223 	resp->access = 0;
1224 	resp->supported = 0;
1225 
1226 	if (args->access & ACCESS4_READ) {
1227 		error = VOP_ACCESS(vp, VREAD, 0, cr);
1228 		if (!error && !MANDLOCK(vp, va.va_mode))
1229 			resp->access |= ACCESS4_READ;
1230 		resp->supported |= ACCESS4_READ;
1231 	}
1232 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1233 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1234 		if (!error)
1235 			resp->access |= ACCESS4_LOOKUP;
1236 		resp->supported |= ACCESS4_LOOKUP;
1237 	}
1238 	if (checkwriteperm &&
1239 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1240 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1241 		if (!error && !MANDLOCK(vp, va.va_mode))
1242 			resp->access |=
1243 			    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND));
1244 		resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND);
1245 	}
1246 
1247 	if (checkwriteperm &&
1248 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1249 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1250 		if (!error)
1251 			resp->access |= ACCESS4_DELETE;
1252 		resp->supported |= ACCESS4_DELETE;
1253 	}
1254 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1255 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1256 		if (!error && !MANDLOCK(vp, va.va_mode))
1257 			resp->access |= ACCESS4_EXECUTE;
1258 		resp->supported |= ACCESS4_EXECUTE;
1259 	}
1260 
1261 	*cs->statusp = resp->status = NFS4_OK;
1262 }
1263 
1264 /* ARGSUSED */
1265 static void
1266 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1267 	struct compound_state *cs)
1268 {
1269 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1270 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1271 	int error;
1272 	vnode_t *vp = cs->vp;
1273 	cred_t *cr = cs->cr;
1274 	vattr_t va;
1275 
1276 	if (vp == NULL) {
1277 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1278 		return;
1279 	}
1280 	if (cs->access == CS_ACCESS_DENIED) {
1281 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1282 		return;
1283 	}
1284 
1285 	if (args->offset + args->count < args->offset) {
1286 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1287 		return;
1288 	}
1289 
1290 	va.va_mask = AT_UID;
1291 	error = VOP_GETATTR(vp, &va, 0, cr);
1292 
1293 	/*
1294 	 * If we can't get the attributes, then we can't do the
1295 	 * right access checking.  So, we'll fail the request.
1296 	 */
1297 	if (error) {
1298 		*cs->statusp = resp->status = puterrno4(error);
1299 		return;
1300 	}
1301 	if (rdonly4(cs->exi, cs->vp, req)) {
1302 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1303 		return;
1304 	}
1305 
1306 	if (vp->v_type != VREG) {
1307 		if (vp->v_type == VDIR)
1308 			resp->status = NFS4ERR_ISDIR;
1309 		else
1310 			resp->status = NFS4ERR_INVAL;
1311 		*cs->statusp = resp->status;
1312 		return;
1313 	}
1314 
1315 	if (crgetuid(cr) != va.va_uid &&
1316 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr))) {
1317 		*cs->statusp = resp->status = puterrno4(error);
1318 		return;
1319 	}
1320 
1321 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
1322 	if (!error)
1323 		error = VOP_FSYNC(vp, FNODSYNC, cr);
1324 
1325 	if (error) {
1326 		*cs->statusp = resp->status = puterrno4(error);
1327 		return;
1328 	}
1329 
1330 	*cs->statusp = resp->status = NFS4_OK;
1331 	resp->writeverf = Write4verf;
1332 }
1333 
1334 /*
1335  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1336  * was completed. It does the nfsv4 create for special files.
1337  */
1338 /* ARGSUSED */
1339 static vnode_t *
1340 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1341 	struct compound_state *cs, vattr_t *vap, char *nm)
1342 {
1343 	int error;
1344 	cred_t *cr = cs->cr;
1345 	vnode_t *dvp = cs->vp;
1346 	vnode_t *vp = NULL;
1347 	int mode;
1348 	enum vcexcl excl;
1349 
1350 	switch (args->type) {
1351 	case NF4CHR:
1352 	case NF4BLK:
1353 		if (secpolicy_sys_devices(cr) != 0) {
1354 			*cs->statusp = resp->status = NFS4ERR_PERM;
1355 			return (NULL);
1356 		}
1357 		if (args->type == NF4CHR)
1358 			vap->va_type = VCHR;
1359 		else
1360 			vap->va_type = VBLK;
1361 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1362 					args->ftype4_u.devdata.specdata2);
1363 		vap->va_mask |= AT_RDEV;
1364 		break;
1365 	case NF4SOCK:
1366 		vap->va_type = VSOCK;
1367 		break;
1368 	case NF4FIFO:
1369 		vap->va_type = VFIFO;
1370 		break;
1371 	default:
1372 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1373 		return (NULL);
1374 	}
1375 
1376 	/*
1377 	 * Must specify the mode.
1378 	 */
1379 	if (!(vap->va_mask & AT_MODE)) {
1380 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1381 		return (NULL);
1382 	}
1383 
1384 	excl = EXCL;
1385 
1386 	mode = 0;
1387 
1388 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0);
1389 	if (error) {
1390 		*cs->statusp = resp->status = puterrno4(error);
1391 		return (NULL);
1392 	}
1393 	return (vp);
1394 }
1395 
1396 /*
1397  * nfsv4 create is used to create non-regular files. For regular files,
1398  * use nfsv4 open.
1399  */
1400 /* ARGSUSED */
1401 static void
1402 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1403 	struct compound_state *cs)
1404 {
1405 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1406 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1407 	int error;
1408 	struct vattr bva, iva, iva2, ava, *vap;
1409 	cred_t *cr = cs->cr;
1410 	vnode_t *dvp = cs->vp;
1411 	vnode_t *vp = NULL;
1412 	char *nm, *lnm;
1413 	uint_t len, llen;
1414 	int syncval = 0;
1415 	struct nfs4_svgetit_arg sarg;
1416 	struct nfs4_ntov_table ntov;
1417 	struct statvfs64 sb;
1418 	nfsstat4 status;
1419 
1420 	resp->attrset = 0;
1421 
1422 	if (dvp == NULL) {
1423 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1424 		return;
1425 	}
1426 
1427 	/*
1428 	 * If there is an unshared filesystem mounted on this vnode,
1429 	 * do not allow to create an object in this directory.
1430 	 */
1431 	if (vn_ismntpt(dvp)) {
1432 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1433 		return;
1434 	}
1435 
1436 	/* Verify that type is correct */
1437 	switch (args->type) {
1438 	case NF4LNK:
1439 	case NF4BLK:
1440 	case NF4CHR:
1441 	case NF4SOCK:
1442 	case NF4FIFO:
1443 	case NF4DIR:
1444 		break;
1445 	default:
1446 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1447 		return;
1448 	};
1449 
1450 	if (cs->access == CS_ACCESS_DENIED) {
1451 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1452 		return;
1453 	}
1454 	if (dvp->v_type != VDIR) {
1455 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1456 		return;
1457 	}
1458 	if (!utf8_dir_verify(&args->objname)) {
1459 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1460 		return;
1461 	}
1462 
1463 	if (rdonly4(cs->exi, cs->vp, req)) {
1464 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1465 		return;
1466 	}
1467 
1468 	/*
1469 	 * Name of newly created object
1470 	 */
1471 	nm = utf8_to_fn(&args->objname, &len, NULL);
1472 	if (nm == NULL) {
1473 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1474 		return;
1475 	}
1476 
1477 	if (len > MAXNAMELEN) {
1478 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1479 		kmem_free(nm, len);
1480 		return;
1481 	}
1482 
1483 	resp->attrset = 0;
1484 
1485 	sarg.sbp = &sb;
1486 	nfs4_ntov_table_init(&ntov);
1487 
1488 	status = do_rfs4_set_attrs(&resp->attrset,
1489 					&args->createattrs, cs, &sarg,
1490 					&ntov, NFS4ATTR_SETIT);
1491 
1492 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1493 		status = NFS4ERR_INVAL;
1494 
1495 	if (status != NFS4_OK) {
1496 		*cs->statusp = resp->status = status;
1497 		kmem_free(nm, len);
1498 		nfs4_ntov_table_free(&ntov, &sarg);
1499 		resp->attrset = 0;
1500 		return;
1501 	}
1502 
1503 	/* Get "before" change value */
1504 	bva.va_mask = AT_CTIME|AT_SEQ;
1505 	error = VOP_GETATTR(dvp, &bva, 0, cr);
1506 	if (error) {
1507 		*cs->statusp = resp->status = puterrno4(error);
1508 		kmem_free(nm, len);
1509 		nfs4_ntov_table_free(&ntov, &sarg);
1510 		resp->attrset = 0;
1511 		return;
1512 	}
1513 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1514 
1515 	vap = sarg.vap;
1516 
1517 	/*
1518 	 * Set default initial values for attributes when not specified
1519 	 * in createattrs.
1520 	 */
1521 	if ((vap->va_mask & AT_UID) == 0) {
1522 		vap->va_uid = crgetuid(cr);
1523 		vap->va_mask |= AT_UID;
1524 	}
1525 	if ((vap->va_mask & AT_GID) == 0) {
1526 		vap->va_gid = crgetgid(cr);
1527 		vap->va_mask |= AT_GID;
1528 	}
1529 
1530 	vap->va_mask |= AT_TYPE;
1531 	switch (args->type) {
1532 	case NF4DIR:
1533 		vap->va_type = VDIR;
1534 		if ((vap->va_mask & AT_MODE) == 0) {
1535 			vap->va_mode = 0700;	/* default: owner rwx only */
1536 			vap->va_mask |= AT_MODE;
1537 		}
1538 		error = VOP_MKDIR(dvp, nm, vap, &vp, cr);
1539 		if (error)
1540 			break;
1541 
1542 		/*
1543 		 * Get the initial "after" sequence number, if it fails,
1544 		 * set to zero
1545 		 */
1546 		iva.va_mask = AT_SEQ;
1547 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1548 			iva.va_seq = 0;
1549 		break;
1550 	case NF4LNK:
1551 		vap->va_type = VLNK;
1552 		if ((vap->va_mask & AT_MODE) == 0) {
1553 			vap->va_mode = 0700;	/* default: owner rwx only */
1554 			vap->va_mask |= AT_MODE;
1555 		}
1556 
1557 		/*
1558 		 * symlink names must be treated as data
1559 		 */
1560 		lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1561 
1562 		if (lnm == NULL) {
1563 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1564 			kmem_free(nm, len);
1565 			nfs4_ntov_table_free(&ntov, &sarg);
1566 			resp->attrset = 0;
1567 			return;
1568 		}
1569 
1570 		if (llen > MAXPATHLEN) {
1571 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1572 			kmem_free(nm, len);
1573 			kmem_free(lnm, llen);
1574 			nfs4_ntov_table_free(&ntov, &sarg);
1575 			resp->attrset = 0;
1576 			return;
1577 		}
1578 
1579 		error = VOP_SYMLINK(dvp, nm, vap, lnm, cr);
1580 		if (lnm != NULL)
1581 			kmem_free(lnm, llen);
1582 		if (error)
1583 			break;
1584 
1585 		/*
1586 		 * Get the initial "after" sequence number, if it fails,
1587 		 * set to zero
1588 		 */
1589 		iva.va_mask = AT_SEQ;
1590 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1591 			iva.va_seq = 0;
1592 
1593 		error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr);
1594 		if (error)
1595 			break;
1596 
1597 		VN_SETPATH(rootdir, dvp, vp, nm, strlen(nm));
1598 
1599 		/*
1600 		 * va_seq is not safe over VOP calls, check it again
1601 		 * if it has changed zero out iva to force atomic = FALSE.
1602 		 */
1603 		iva2.va_mask = AT_SEQ;
1604 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr) ||
1605 						iva2.va_seq != iva.va_seq)
1606 			iva.va_seq = 0;
1607 		break;
1608 	default:
1609 		/*
1610 		 * probably a special file.
1611 		 */
1612 		if ((vap->va_mask & AT_MODE) == 0) {
1613 			vap->va_mode = 0600;	/* default: owner rw only */
1614 			vap->va_mask |= AT_MODE;
1615 		}
1616 		syncval = FNODSYNC;
1617 		/*
1618 		 * We know this will only generate one VOP call
1619 		 */
1620 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
1621 
1622 		if (vp == NULL) {
1623 			kmem_free(nm, len);
1624 			nfs4_ntov_table_free(&ntov, &sarg);
1625 			resp->attrset = 0;
1626 			return;
1627 		}
1628 
1629 		/*
1630 		 * Get the initial "after" sequence number, if it fails,
1631 		 * set to zero
1632 		 */
1633 		iva.va_mask = AT_SEQ;
1634 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1635 			iva.va_seq = 0;
1636 
1637 		break;
1638 	}
1639 	kmem_free(nm, len);
1640 
1641 	if (error) {
1642 		*cs->statusp = resp->status = puterrno4(error);
1643 	}
1644 
1645 	/*
1646 	 * Force modified data and metadata out to stable storage.
1647 	 */
1648 	(void) VOP_FSYNC(dvp, 0, cr);
1649 
1650 	if (resp->status != NFS4_OK) {
1651 		if (vp != NULL)
1652 			VN_RELE(vp);
1653 		nfs4_ntov_table_free(&ntov, &sarg);
1654 		resp->attrset = 0;
1655 		return;
1656 	}
1657 
1658 	/*
1659 	 * Finish setup of cinfo response, "before" value already set.
1660 	 * Get "after" change value, if it fails, simply return the
1661 	 * before value.
1662 	 */
1663 	ava.va_mask = AT_CTIME|AT_SEQ;
1664 	if (VOP_GETATTR(dvp, &ava, 0, cr)) {
1665 		ava.va_ctime = bva.va_ctime;
1666 		ava.va_seq = 0;
1667 	}
1668 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1669 
1670 	/*
1671 	 * True verification that object was created with correct
1672 	 * attrs is impossible.  The attrs could have been changed
1673 	 * immediately after object creation.  If attributes did
1674 	 * not verify, the only recourse for the server is to
1675 	 * destroy the object.  Maybe if some attrs (like gid)
1676 	 * are set incorrectly, the object should be destroyed;
1677 	 * however, seems bad as a default policy.  Do we really
1678 	 * want to destroy an object over one of the times not
1679 	 * verifying correctly?  For these reasons, the server
1680 	 * currently sets bits in attrset for createattrs
1681 	 * that were set; however, no verification is done.
1682 	 *
1683 	 * vmask_to_nmask accounts for vattr bits set on create
1684 	 *	[do_rfs4_set_attrs() only sets resp bits for
1685 	 *	 non-vattr/vfs bits.]
1686 	 * Mask off any bits set by default so as not to return
1687 	 * more attrset bits than were requested in createattrs
1688 	 */
1689 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1690 	resp->attrset &= args->createattrs.attrmask;
1691 	nfs4_ntov_table_free(&ntov, &sarg);
1692 
1693 	error = makefh4(&cs->fh, vp, cs->exi);
1694 	if (error) {
1695 		*cs->statusp = resp->status = puterrno4(error);
1696 	}
1697 
1698 	/*
1699 	 * The cinfo.atomic = TRUE only if we got no errors, we have
1700 	 * non-zero va_seq's, and it has incremented by exactly one
1701 	 * during the creation and it didn't change during the VOP_LOOKUP
1702 	 * or VOP_FSYNC.
1703 	 */
1704 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1705 			iva.va_seq == (bva.va_seq + 1) &&
1706 			iva.va_seq == ava.va_seq)
1707 		resp->cinfo.atomic = TRUE;
1708 	else
1709 		resp->cinfo.atomic = FALSE;
1710 
1711 	(void) VOP_FSYNC(vp, syncval, cr);
1712 
1713 	if (resp->status != NFS4_OK) {
1714 		VN_RELE(vp);
1715 		return;
1716 	}
1717 	if (cs->vp)
1718 		VN_RELE(cs->vp);
1719 
1720 	cs->vp = vp;
1721 	*cs->statusp = resp->status = NFS4_OK;
1722 }
1723 
1724 
1725 /*ARGSUSED*/
1726 static void
1727 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1728 	struct compound_state *cs)
1729 {
1730 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1731 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1732 	rfs4_deleg_state_t *dsp;
1733 	nfsstat4 status;
1734 
1735 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1736 	resp->status = *cs->statusp = status;
1737 	if (status != NFS4_OK)
1738 		return;
1739 
1740 	/* Ensure specified filehandle matches */
1741 	if (cs->vp != dsp->finfo->vp) {
1742 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1743 	} else
1744 		rfs4_return_deleg(dsp, FALSE);
1745 
1746 	rfs4_update_lease(dsp->client);
1747 
1748 	rfs4_deleg_state_rele(dsp);
1749 }
1750 
1751 /*
1752  * Check to see if a given "flavor" is an explicitly shared flavor.
1753  * The assumption of this routine is the "flavor" is already a valid
1754  * flavor in the secinfo list of "exi".
1755  *
1756  *	e.g.
1757  *		# share -o sec=flavor1 /export
1758  *		# share -o sec=flavor2 /export/home
1759  *
1760  *		flavor2 is not an explicitly shared flavor for /export,
1761  *		however it is in the secinfo list for /export thru the
1762  *		server namespace setup.
1763  */
1764 int
1765 is_exported_sec(int flavor, struct exportinfo *exi)
1766 {
1767 	int	i;
1768 	struct secinfo *sp;
1769 
1770 	sp = exi->exi_export.ex_secinfo;
1771 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1772 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1773 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1774 			return (SEC_REF_EXPORTED(&sp[i]));
1775 		}
1776 	}
1777 
1778 	/* Should not reach this point based on the assumption */
1779 	return (0);
1780 }
1781 
1782 /*
1783  * Check if the security flavor used in the request matches what is
1784  * required at the export point or at the root pseudo node (exi_root).
1785  *
1786  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1787  *
1788  */
1789 static int
1790 secinfo_match_or_authnone(struct compound_state *cs)
1791 {
1792 	int	i;
1793 	struct secinfo *sp;
1794 
1795 	/*
1796 	 * Check cs->nfsflavor (from the request) against
1797 	 * the current export data in cs->exi.
1798 	 */
1799 	sp = cs->exi->exi_export.ex_secinfo;
1800 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1801 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1802 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1803 			return (1);
1804 	}
1805 
1806 	return (0);
1807 }
1808 
1809 /*
1810  * Check the access authority for the client and return the correct error.
1811  */
1812 nfsstat4
1813 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1814 {
1815 	int	authres;
1816 
1817 	/*
1818 	 * First, check if the security flavor used in the request
1819 	 * are among the flavors set in the server namespace.
1820 	 */
1821 	if (!secinfo_match_or_authnone(cs)) {
1822 		*cs->statusp = NFS4ERR_WRONGSEC;
1823 		return (*cs->statusp);
1824 	}
1825 
1826 	authres = checkauth4(cs, req);
1827 
1828 	if (authres > 0) {
1829 		*cs->statusp = NFS4_OK;
1830 		if (! (cs->access & CS_ACCESS_LIMITED))
1831 			cs->access = CS_ACCESS_OK;
1832 	} else if (authres == 0) {
1833 		*cs->statusp = NFS4ERR_ACCESS;
1834 	} else if (authres == -2) {
1835 		*cs->statusp = NFS4ERR_WRONGSEC;
1836 	} else {
1837 		*cs->statusp = NFS4ERR_DELAY;
1838 	}
1839 	return (*cs->statusp);
1840 }
1841 
1842 /*
1843  * bitmap4_to_attrmask is called by getattr and readdir.
1844  * It sets up the vattr mask and determines whether vfsstat call is needed
1845  * based on the input bitmap.
1846  * Returns nfsv4 status.
1847  */
1848 static nfsstat4
1849 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
1850 {
1851 	int i;
1852 	uint_t	va_mask;
1853 	struct statvfs64 *sbp = sargp->sbp;
1854 
1855 	sargp->sbp = NULL;
1856 	sargp->flag = 0;
1857 	sargp->rdattr_error = NFS4_OK;
1858 	sargp->mntdfid_set = FALSE;
1859 	if (sargp->cs->vp)
1860 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
1861 					    FH4_ATTRDIR | FH4_NAMEDATTR);
1862 	else
1863 		sargp->xattr = 0;
1864 
1865 	/*
1866 	 * Set rdattr_error_req to true if return error per
1867 	 * failed entry rather than fail the readdir.
1868 	 */
1869 	if (breq & FATTR4_RDATTR_ERROR_MASK)
1870 		sargp->rdattr_error_req = 1;
1871 	else
1872 		sargp->rdattr_error_req = 0;
1873 
1874 	/*
1875 	 * generate the va_mask
1876 	 * Handle the easy cases first
1877 	 */
1878 	switch (breq) {
1879 	case NFS4_NTOV_ATTR_MASK:
1880 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
1881 		return (NFS4_OK);
1882 
1883 	case NFS4_FS_ATTR_MASK:
1884 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
1885 		sargp->sbp = sbp;
1886 		return (NFS4_OK);
1887 
1888 	case NFS4_NTOV_ATTR_CACHE_MASK:
1889 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
1890 		return (NFS4_OK);
1891 
1892 	case FATTR4_LEASE_TIME_MASK:
1893 		sargp->vap->va_mask = 0;
1894 		return (NFS4_OK);
1895 
1896 	default:
1897 		va_mask = 0;
1898 		for (i = 0; i < nfs4_ntov_map_size; i++) {
1899 			if ((breq & nfs4_ntov_map[i].fbit) &&
1900 							nfs4_ntov_map[i].vbit)
1901 				va_mask |= nfs4_ntov_map[i].vbit;
1902 		}
1903 
1904 		/*
1905 		 * Check is vfsstat is needed
1906 		 */
1907 		if (breq & NFS4_FS_ATTR_MASK)
1908 			sargp->sbp = sbp;
1909 
1910 		sargp->vap->va_mask = va_mask;
1911 		return (NFS4_OK);
1912 	}
1913 	/* NOTREACHED */
1914 }
1915 
1916 /*
1917  * bitmap4_get_sysattrs is called by getattr and readdir.
1918  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
1919  * Returns nfsv4 status.
1920  */
1921 static nfsstat4
1922 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
1923 {
1924 	int error;
1925 	struct compound_state *cs = sargp->cs;
1926 	vnode_t *vp = cs->vp;
1927 
1928 	if (sargp->sbp != NULL) {
1929 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
1930 			sargp->sbp = NULL;	/* to identify error */
1931 			return (puterrno4(error));
1932 		}
1933 	}
1934 
1935 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
1936 }
1937 
1938 static void
1939 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
1940 {
1941 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
1942 			KM_SLEEP);
1943 	ntovp->attrcnt = 0;
1944 	ntovp->vfsstat = FALSE;
1945 }
1946 
1947 static void
1948 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
1949 	struct nfs4_svgetit_arg *sargp)
1950 {
1951 	int i;
1952 	union nfs4_attr_u *na;
1953 	uint8_t *amap;
1954 
1955 	/*
1956 	 * XXX Should do the same checks for whether the bit is set
1957 	 */
1958 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
1959 		i < ntovp->attrcnt; i++, na++, amap++) {
1960 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
1961 			NFS4ATTR_FREEIT, sargp, na);
1962 	}
1963 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
1964 		/*
1965 		 * xdr_free for getattr will be done later
1966 		 */
1967 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
1968 			i < ntovp->attrcnt; i++, na++, amap++) {
1969 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
1970 		}
1971 	}
1972 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
1973 }
1974 
1975 /*
1976  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
1977  */
1978 static nfsstat4
1979 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
1980 	struct nfs4_svgetit_arg *sargp)
1981 {
1982 	int error = 0;
1983 	int i, k;
1984 	struct nfs4_ntov_table ntov;
1985 	XDR xdr;
1986 	ulong_t xdr_size;
1987 	char *xdr_attrs;
1988 	nfsstat4 status = NFS4_OK;
1989 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
1990 	union nfs4_attr_u *na;
1991 	uint8_t *amap;
1992 
1993 	sargp->op = NFS4ATTR_GETIT;
1994 	sargp->flag = 0;
1995 
1996 	fattrp->attrmask = 0;
1997 	/* if no bits requested, then return empty fattr4 */
1998 	if (breq == 0) {
1999 		fattrp->attrlist4_len = 0;
2000 		fattrp->attrlist4 = NULL;
2001 		return (NFS4_OK);
2002 	}
2003 
2004 	/*
2005 	 * return NFS4ERR_INVAL when client requests write-only attrs
2006 	 */
2007 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2008 		return (NFS4ERR_INVAL);
2009 
2010 	nfs4_ntov_table_init(&ntov);
2011 	na = ntov.na;
2012 	amap = ntov.amap;
2013 
2014 	/*
2015 	 * Now loop to get or verify the attrs
2016 	 */
2017 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2018 		if (breq & nfs4_ntov_map[i].fbit) {
2019 			if ((*nfs4_ntov_map[i].sv_getit)(
2020 				    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2021 
2022 				error = (*nfs4_ntov_map[i].sv_getit)(
2023 						NFS4ATTR_GETIT, sargp, na);
2024 
2025 				/*
2026 				 * Possible error values:
2027 				 * >0 if sv_getit failed to
2028 				 * get the attr; 0 if succeeded;
2029 				 * <0 if rdattr_error and the
2030 				 * attribute cannot be returned.
2031 				 */
2032 				if (error && !(sargp->rdattr_error_req))
2033 					goto done;
2034 				/*
2035 				 * If error then just for entry
2036 				 */
2037 				if (error == 0) {
2038 					fattrp->attrmask |=
2039 						nfs4_ntov_map[i].fbit;
2040 					*amap++ =
2041 						(uint8_t)nfs4_ntov_map[i].nval;
2042 					na++;
2043 					(ntov.attrcnt)++;
2044 				} else if ((error > 0) &&
2045 					(sargp->rdattr_error == NFS4_OK)) {
2046 					sargp->rdattr_error = puterrno4(error);
2047 				}
2048 				error = 0;
2049 			}
2050 		}
2051 	}
2052 
2053 	/*
2054 	 * If rdattr_error was set after the return value for it was assigned,
2055 	 * update it.
2056 	 */
2057 	if (prev_rdattr_error != sargp->rdattr_error) {
2058 		na = ntov.na;
2059 		amap = ntov.amap;
2060 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2061 			k = *amap;
2062 			if (k < FATTR4_RDATTR_ERROR) {
2063 				continue;
2064 			}
2065 			if ((k == FATTR4_RDATTR_ERROR) &&
2066 			    ((*nfs4_ntov_map[k].sv_getit)(
2067 				NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2068 
2069 				(void) (*nfs4_ntov_map[k].sv_getit)(
2070 						NFS4ATTR_GETIT, sargp, na);
2071 			}
2072 			break;
2073 		}
2074 	}
2075 
2076 	xdr_size = 0;
2077 	na = ntov.na;
2078 	amap = ntov.amap;
2079 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2080 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2081 	}
2082 
2083 	fattrp->attrlist4_len = xdr_size;
2084 	if (xdr_size) {
2085 		/* freed by rfs4_op_getattr_free() */
2086 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2087 
2088 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2089 
2090 		na = ntov.na;
2091 		amap = ntov.amap;
2092 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2093 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2094 				cmn_err(CE_WARN, "do_rfs4_op_getattr: xdr "
2095 					"encode of attribute %d failed\n",
2096 					*amap);
2097 				status = NFS4ERR_SERVERFAULT;
2098 				break;
2099 			}
2100 		}
2101 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2102 	} else {
2103 		fattrp->attrlist4 = NULL;
2104 	}
2105 done:
2106 
2107 	nfs4_ntov_table_free(&ntov, sargp);
2108 
2109 	if (error != 0)
2110 		status = puterrno4(error);
2111 
2112 	return (status);
2113 }
2114 
2115 /* ARGSUSED */
2116 static void
2117 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2118 	struct compound_state *cs)
2119 {
2120 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2121 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2122 	struct nfs4_svgetit_arg sarg;
2123 	struct statvfs64 sb;
2124 	nfsstat4 status;
2125 
2126 	if (cs->vp == NULL) {
2127 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2128 		return;
2129 	}
2130 
2131 	if (cs->access == CS_ACCESS_DENIED) {
2132 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2133 		return;
2134 	}
2135 
2136 	sarg.sbp = &sb;
2137 	sarg.cs = cs;
2138 
2139 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2140 	if (status == NFS4_OK) {
2141 		status = bitmap4_get_sysattrs(&sarg);
2142 		if (status == NFS4_OK)
2143 			status = do_rfs4_op_getattr(args->attr_request,
2144 				&resp->obj_attributes, &sarg);
2145 	}
2146 	*cs->statusp = resp->status = status;
2147 }
2148 
2149 static void
2150 rfs4_op_getattr_free(nfs_resop4 *resop)
2151 {
2152 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2153 
2154 	nfs4_fattr4_free(&resp->obj_attributes);
2155 }
2156 
2157 /* ARGSUSED */
2158 static void
2159 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2160 	struct compound_state *cs)
2161 {
2162 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2163 
2164 	if (cs->vp == NULL) {
2165 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2166 		return;
2167 	}
2168 	if (cs->access == CS_ACCESS_DENIED) {
2169 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2170 		return;
2171 	}
2172 
2173 	resp->object.nfs_fh4_val =
2174 		kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2175 	nfs_fh4_copy(&cs->fh, &resp->object);
2176 	*cs->statusp = resp->status = NFS4_OK;
2177 }
2178 
2179 static void
2180 rfs4_op_getfh_free(nfs_resop4 *resop)
2181 {
2182 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2183 
2184 	if (resp->status == NFS4_OK &&
2185 	    resp->object.nfs_fh4_val != NULL) {
2186 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2187 		resp->object.nfs_fh4_val = NULL;
2188 		resp->object.nfs_fh4_len = 0;
2189 	}
2190 }
2191 
2192 /*
2193  * illegal: args: void
2194  *	    res : status (NFS4ERR_OP_ILLEGAL)
2195  */
2196 /* ARGSUSED */
2197 static void
2198 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2199 	struct svc_req *req, struct compound_state *cs)
2200 {
2201 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2202 
2203 	resop->resop = OP_ILLEGAL;
2204 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2205 }
2206 
2207 /*
2208  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2209  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2210  */
2211 /* ARGSUSED */
2212 static void
2213 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2214 	struct compound_state *cs)
2215 {
2216 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2217 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2218 	int error;
2219 	vnode_t *vp;
2220 	vnode_t *dvp;
2221 	struct vattr bdva, idva, adva;
2222 	char *nm;
2223 	uint_t  len;
2224 
2225 	/* SAVED_FH: source object */
2226 	vp = cs->saved_vp;
2227 	if (vp == NULL) {
2228 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2229 		return;
2230 	}
2231 
2232 	/* CURRENT_FH: target directory */
2233 	dvp = cs->vp;
2234 	if (dvp == NULL) {
2235 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2236 		return;
2237 	}
2238 
2239 	/*
2240 	 * If there is a non-shared filesystem mounted on this vnode,
2241 	 * do not allow to link any file in this directory.
2242 	 */
2243 	if (vn_ismntpt(dvp)) {
2244 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2245 		return;
2246 	}
2247 
2248 	if (cs->access == CS_ACCESS_DENIED) {
2249 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2250 		return;
2251 	}
2252 
2253 	/* Check source object's type validity */
2254 	if (vp->v_type == VDIR) {
2255 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2256 		return;
2257 	}
2258 
2259 	/* Check target directory's type */
2260 	if (dvp->v_type != VDIR) {
2261 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2262 		return;
2263 	}
2264 
2265 	if (cs->saved_exi != cs->exi) {
2266 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2267 		return;
2268 	}
2269 
2270 	if (!utf8_dir_verify(&args->newname)) {
2271 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2272 		return;
2273 	}
2274 
2275 	nm = utf8_to_fn(&args->newname, &len, NULL);
2276 	if (nm == NULL) {
2277 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2278 		return;
2279 	}
2280 
2281 	if (len > MAXNAMELEN) {
2282 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2283 		kmem_free(nm, len);
2284 		return;
2285 	}
2286 
2287 	if (rdonly4(cs->exi, cs->vp, req)) {
2288 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2289 		kmem_free(nm, len);
2290 		return;
2291 	}
2292 
2293 	/* Get "before" change value */
2294 	bdva.va_mask = AT_CTIME|AT_SEQ;
2295 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
2296 	if (error) {
2297 		*cs->statusp = resp->status = puterrno4(error);
2298 		kmem_free(nm, len);
2299 		return;
2300 	}
2301 
2302 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2303 
2304 	error = VOP_LINK(dvp, vp, nm, cs->cr);
2305 
2306 	kmem_free(nm, len);
2307 
2308 	/*
2309 	 * Get the initial "after" sequence number, if it fails, set to zero
2310 	 */
2311 	idva.va_mask = AT_SEQ;
2312 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
2313 		idva.va_seq = 0;
2314 
2315 	/*
2316 	 * Force modified data and metadata out to stable storage.
2317 	 */
2318 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
2319 	(void) VOP_FSYNC(dvp, 0, cs->cr);
2320 
2321 	if (error) {
2322 		*cs->statusp = resp->status = puterrno4(error);
2323 		return;
2324 	}
2325 
2326 	/*
2327 	 * Get "after" change value, if it fails, simply return the
2328 	 * before value.
2329 	 */
2330 	adva.va_mask = AT_CTIME|AT_SEQ;
2331 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
2332 		adva.va_ctime = bdva.va_ctime;
2333 		adva.va_seq = 0;
2334 	}
2335 
2336 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2337 
2338 	/*
2339 	 * The cinfo.atomic = TRUE only if we have
2340 	 * non-zero va_seq's, and it has incremented by exactly one
2341 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2342 	 */
2343 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2344 			idva.va_seq == (bdva.va_seq + 1) &&
2345 			idva.va_seq == adva.va_seq)
2346 		resp->cinfo.atomic = TRUE;
2347 	else
2348 		resp->cinfo.atomic = FALSE;
2349 
2350 	*cs->statusp = resp->status = NFS4_OK;
2351 }
2352 
2353 /*
2354  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2355  */
2356 
2357 /* ARGSUSED */
2358 static nfsstat4
2359 do_rfs4_op_lookup(char *nm, uint_t buflen, struct svc_req *req,
2360 	struct compound_state *cs)
2361 {
2362 	int error;
2363 	int different_export = 0;
2364 	vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2365 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2366 	nfsstat4 stat;
2367 	fid_t fid;
2368 	int attrdir, dotdot, walk;
2369 	bool_t is_newvp = FALSE;
2370 
2371 	if (cs->vp->v_flag & V_XATTRDIR) {
2372 		attrdir = 1;
2373 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2374 	} else {
2375 		attrdir = 0;
2376 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2377 	}
2378 
2379 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2380 
2381 	/*
2382 	 * If dotdotting, then need to check whether it's
2383 	 * above the root of a filesystem, or above an
2384 	 * export point.
2385 	 */
2386 	if (dotdot) {
2387 
2388 		/*
2389 		 * If dotdotting at the root of a filesystem, then
2390 		 * need to traverse back to the mounted-on filesystem
2391 		 * and do the dotdot lookup there.
2392 		 */
2393 		if (cs->vp->v_flag & VROOT) {
2394 
2395 			/*
2396 			 * If at the system root, then can
2397 			 * go up no further.
2398 			 */
2399 			if (VN_CMP(cs->vp, rootdir))
2400 				return (puterrno4(ENOENT));
2401 
2402 			/*
2403 			 * Traverse back to the mounted-on filesystem
2404 			 */
2405 			cs->vp = untraverse(cs->vp);
2406 
2407 			/*
2408 			 * Set the different_export flag so we remember
2409 			 * to pick up a new exportinfo entry for
2410 			 * this new filesystem.
2411 			 */
2412 			different_export = 1;
2413 		} else {
2414 
2415 			/*
2416 			 * If dotdotting above an export point then set
2417 			 * the different_export to get new export info.
2418 			 */
2419 			different_export = nfs_exported(cs->exi, cs->vp);
2420 		}
2421 	}
2422 
2423 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr);
2424 	if (error)
2425 		return (puterrno4(error));
2426 
2427 	VN_SETPATH(rootdir, cs->vp, vp, nm, strlen(nm));
2428 
2429 	/*
2430 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2431 	 *
2432 	 * XXX if the vnode is a symlink and it is not visible in
2433 	 * a pseudo filesystem, return ENOENT (not following symlink).
2434 	 * V4 client can not mount such symlink. This is a regression
2435 	 * from V2/V3.
2436 	 *
2437 	 * In the same exported filesystem, if the security flavor used
2438 	 * is not an explicitly shared flavor, limit the view to the visible
2439 	 * list entries only. This is not a WRONGSEC case because it's already
2440 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2441 	 */
2442 	if (!different_export &&
2443 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2444 	    cs->access & CS_ACCESS_LIMITED)) {
2445 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2446 			VN_RELE(vp);
2447 			return (puterrno4(ENOENT));
2448 		}
2449 	}
2450 
2451 	/*
2452 	 * If it's a mountpoint, then traverse it.
2453 	 */
2454 	if (vn_ismntpt(vp)) {
2455 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2456 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2457 
2458 		/*
2459 		 * hold pre_tvp to counteract rele by traverse.  We will
2460 		 * need pre_tvp below if checkexport4 fails
2461 		 */
2462 		VN_HOLD(pre_tvp);
2463 		tvp = vp;
2464 		if ((error = traverse(&tvp)) != 0) {
2465 			VN_RELE(vp);
2466 			VN_RELE(pre_tvp);
2467 			return (puterrno4(error));
2468 		}
2469 		vp = tvp;
2470 		different_export = 1;
2471 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2472 		/*
2473 		 * The vfsp comparison is to handle the case where
2474 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2475 		 * and NFS is unaware of local fs transistions because
2476 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2477 		 * the dir and the obj returned by lookup will have different
2478 		 * vfs ptrs.
2479 		 */
2480 		different_export = 1;
2481 	}
2482 
2483 	if (different_export) {
2484 
2485 		bzero(&fid, sizeof (fid));
2486 		fid.fid_len = MAXFIDSZ;
2487 		error = vop_fid_pseudo(vp, &fid);
2488 		if (error) {
2489 			VN_RELE(vp);
2490 			if (pre_tvp)
2491 				VN_RELE(pre_tvp);
2492 			return (puterrno4(error));
2493 		}
2494 
2495 		if (dotdot)
2496 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2497 		else
2498 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2499 
2500 		if (exi == NULL) {
2501 			if (pre_tvp) {
2502 				/*
2503 				 * If this vnode is a mounted-on vnode,
2504 				 * but the mounted-on file system is not
2505 				 * exported, send back the filehandle for
2506 				 * the mounted-on vnode, not the root of
2507 				 * the mounted-on file system.
2508 				 */
2509 				VN_RELE(vp);
2510 				vp = pre_tvp;
2511 				exi = pre_exi;
2512 			} else {
2513 				VN_RELE(vp);
2514 				return (puterrno4(EACCES));
2515 			}
2516 		} else if (pre_tvp) {
2517 			/* we're done with pre_tvp now. release extra hold */
2518 			VN_RELE(pre_tvp);
2519 		}
2520 
2521 		cs->exi = exi;
2522 
2523 		/*
2524 		 * Now we do a checkauth4. The reason is that
2525 		 * this client/user may not have access to the new
2526 		 * exported file system, and if he does,
2527 		 * the client/user may be mapped to a different uid.
2528 		 *
2529 		 * We start with a new cr, because the checkauth4 done
2530 		 * in the PUT*FH operation over wrote the cred's uid,
2531 		 * gid, etc, and we want the real thing before calling
2532 		 * checkauth4()
2533 		 */
2534 		crfree(cs->cr);
2535 		cs->cr = crdup(cs->basecr);
2536 
2537 		if (cs->vp)
2538 			oldvp = cs->vp;
2539 		cs->vp = vp;
2540 		is_newvp = TRUE;
2541 
2542 		stat = call_checkauth4(cs, req);
2543 		if (stat != NFS4_OK) {
2544 			VN_RELE(cs->vp);
2545 			cs->vp = oldvp;
2546 			return (stat);
2547 		}
2548 	}
2549 
2550 	error = makefh4(&cs->fh, vp, cs->exi);
2551 
2552 	if (error) {
2553 		if (is_newvp) {
2554 			VN_RELE(cs->vp);
2555 			cs->vp = oldvp;
2556 		} else
2557 			VN_RELE(vp);
2558 		return (puterrno4(error));
2559 	}
2560 
2561 	if (!is_newvp) {
2562 		if (cs->vp)
2563 			VN_RELE(cs->vp);
2564 		cs->vp = vp;
2565 	} else if (oldvp)
2566 		VN_RELE(oldvp);
2567 
2568 	/*
2569 	 * if did lookup on attrdir and didn't lookup .., set named
2570 	 * attr fh flag
2571 	 */
2572 	if (attrdir && ! dotdot)
2573 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2574 
2575 	/* Assume false for now, open proc will set this */
2576 	cs->mandlock = FALSE;
2577 
2578 	return (NFS4_OK);
2579 }
2580 
2581 /* ARGSUSED */
2582 static void
2583 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2584 	struct compound_state *cs)
2585 {
2586 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2587 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2588 	char *nm;
2589 	uint_t len;
2590 
2591 	if (cs->vp == NULL) {
2592 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2593 		return;
2594 	}
2595 
2596 	if (cs->vp->v_type == VLNK) {
2597 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
2598 		return;
2599 	}
2600 
2601 	if (cs->vp->v_type != VDIR) {
2602 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2603 		return;
2604 	}
2605 
2606 	if (!utf8_dir_verify(&args->objname)) {
2607 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2608 		return;
2609 	}
2610 
2611 	nm = utf8_to_str(&args->objname, &len, NULL);
2612 	if (nm == NULL) {
2613 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2614 		return;
2615 	}
2616 
2617 	if (len > MAXNAMELEN) {
2618 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2619 		kmem_free(nm, len);
2620 		return;
2621 	}
2622 
2623 	*cs->statusp = resp->status = do_rfs4_op_lookup(nm, len, req, cs);
2624 
2625 	kmem_free(nm, len);
2626 }
2627 
2628 /* ARGSUSED */
2629 static void
2630 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2631 	struct compound_state *cs)
2632 {
2633 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2634 
2635 	if (cs->vp == NULL) {
2636 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2637 		return;
2638 	}
2639 
2640 	if (cs->vp->v_type != VDIR) {
2641 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2642 		return;
2643 	}
2644 
2645 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", 3, req, cs);
2646 
2647 	/*
2648 	 * From NFSV4 Specification, LOOKUPP should not check for
2649 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2650 	 */
2651 	if (resp->status == NFS4ERR_WRONGSEC) {
2652 		*cs->statusp = resp->status = NFS4_OK;
2653 	}
2654 }
2655 
2656 
2657 /*ARGSUSED2*/
2658 static void
2659 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2660 	struct compound_state *cs)
2661 {
2662 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
2663 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
2664 	vnode_t		*avp = NULL;
2665 	int		lookup_flags = LOOKUP_XATTR, error;
2666 	int		exp_ro = 0;
2667 
2668 	if (cs->vp == NULL) {
2669 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2670 		return;
2671 	}
2672 
2673 	/*
2674 	 * Make a couple of checks made by copen()
2675 	 *
2676 	 * Check to make sure underlying fs supports xattrs.  This
2677 	 * is required because solaris filesystem implementations
2678 	 * (UFS/TMPFS) don't enforce the noxattr mount option
2679 	 * in VOP_LOOKUP(LOOKUP_XATTR).  If fs doesn't support this
2680 	 * pathconf cmd or if fs supports cmd but doesn't claim
2681 	 * support for xattr, return NOTSUPP.  It would be better
2682 	 * to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however,
2683 	 * that cmd is not available to VOP_PATHCONF interface
2684 	 * (it's only implemented inside pathconf syscall)...
2685 	 *
2686 	 * Verify permission to put attributes on files (access
2687 	 * checks from copen).
2688 	 */
2689 
2690 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) {
2691 		error = ENOTSUP;
2692 		goto error_out;
2693 	}
2694 
2695 	if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr) != 0) &&
2696 	    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr) != 0) &&
2697 	    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr) != 0)) {
2698 		error = EACCES;
2699 		goto error_out;
2700 	}
2701 
2702 	/*
2703 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
2704 	 * the file system is exported read-only -- regardless of
2705 	 * createdir flag.  Otherwise the attrdir would be created
2706 	 * (assuming server fs isn't mounted readonly locally).  If
2707 	 * VOP_LOOKUP returns ENOENT in this case, the error will
2708 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
2709 	 * because specfs has no VOP_LOOKUP op, so the macro would
2710 	 * return ENOSYS.  EINVAL is returned by all (current)
2711 	 * Solaris file system implementations when any of their
2712 	 * restrictions are violated (xattr(dir) can't have xattrdir).
2713 	 * Returning NOTSUPP is more appropriate in this case
2714 	 * because the object will never be able to have an attrdir.
2715 	 */
2716 	if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
2717 		lookup_flags |= CREATE_XATTR_DIR;
2718 
2719 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr);
2720 
2721 	if (error) {
2722 		if (error == ENOENT && args->createdir && exp_ro)
2723 			error = EROFS;
2724 		else if (error == EINVAL || error == ENOSYS)
2725 			error = ENOTSUP;
2726 		goto error_out;
2727 	}
2728 
2729 	ASSERT(avp->v_flag & V_XATTRDIR);
2730 
2731 	error = makefh4(&cs->fh, avp, cs->exi);
2732 
2733 	if (error) {
2734 		VN_RELE(avp);
2735 		goto error_out;
2736 	}
2737 
2738 	VN_RELE(cs->vp);
2739 	cs->vp = avp;
2740 
2741 	/*
2742 	 * There is no requirement for an attrdir fh flag
2743 	 * because the attrdir has a vnode flag to distinguish
2744 	 * it from regular (non-xattr) directories.  The
2745 	 * FH4_ATTRDIR flag is set for future sanity checks.
2746 	 */
2747 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
2748 	*cs->statusp = resp->status = NFS4_OK;
2749 	return;
2750 
2751 error_out:
2752 
2753 	*cs->statusp = resp->status = puterrno4(error);
2754 }
2755 
2756 static int
2757 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred)
2758 {
2759 	int error;
2760 	int i;
2761 	clock_t delaytime;
2762 	caller_context_t ct;
2763 
2764 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
2765 
2766 	/*
2767 	 * Don't block on mandatory locks. If this routine returns
2768 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
2769 	 */
2770 	uio->uio_fmode = FNONBLOCK;
2771 
2772 	ct.cc_sysid = 0;
2773 	ct.cc_pid = 0;
2774 	ct.cc_caller_id = nfs4_srv_caller_id;
2775 
2776 	for (i = 0; i < rfs4_maxlock_tries; i++) {
2777 
2778 
2779 		if (direction == FREAD) {
2780 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
2781 			error = VOP_READ(vp, uio, ioflag, cred, &ct);
2782 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
2783 		} else {
2784 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
2785 			error = VOP_WRITE(vp, uio, ioflag, cred, &ct);
2786 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
2787 		}
2788 
2789 		if (error != EAGAIN)
2790 			break;
2791 
2792 		if (i < rfs4_maxlock_tries - 1) {
2793 			delay(delaytime);
2794 			delaytime *= 2;
2795 		}
2796 	}
2797 
2798 	return (error);
2799 }
2800 
2801 /* ARGSUSED */
2802 static void
2803 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2804 	struct compound_state *cs)
2805 {
2806 	READ4args *args = &argop->nfs_argop4_u.opread;
2807 	READ4res *resp = &resop->nfs_resop4_u.opread;
2808 	int error;
2809 	int verror;
2810 	vnode_t *vp;
2811 	struct vattr va;
2812 	struct iovec iov;
2813 	struct uio uio;
2814 	u_offset_t offset;
2815 	bool_t *deleg = &cs->deleg;
2816 	nfsstat4 stat;
2817 	int in_crit = 0;
2818 	mblk_t *mp;
2819 	int alloc_err = 0;
2820 
2821 	vp = cs->vp;
2822 	if (vp == NULL) {
2823 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2824 		return;
2825 	}
2826 	if (cs->access == CS_ACCESS_DENIED) {
2827 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2828 		return;
2829 	}
2830 
2831 	/*
2832 	 * Enter the critical region before calling VOP_RWLOCK
2833 	 * to avoid a deadlock with write requests.
2834 	 */
2835 	if (nbl_need_check(vp)) {
2836 		nbl_start_crit(vp, RW_READER);
2837 		in_crit = 1;
2838 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
2839 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
2840 			goto out;
2841 		}
2842 	}
2843 
2844 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
2845 					deleg, TRUE)) != NFS4_OK) {
2846 		*cs->statusp = resp->status = stat;
2847 		goto out;
2848 	}
2849 
2850 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
2851 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2852 
2853 	/*
2854 	 * If we can't get the attributes, then we can't do the
2855 	 * right access checking.  So, we'll fail the request.
2856 	 */
2857 	if (verror) {
2858 		*cs->statusp = resp->status = puterrno4(verror);
2859 		goto out;
2860 	}
2861 
2862 	if (vp->v_type != VREG) {
2863 		*cs->statusp = resp->status =
2864 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
2865 		goto out;
2866 	}
2867 
2868 	if (crgetuid(cs->cr) != va.va_uid &&
2869 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr)) &&
2870 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr))) {
2871 		*cs->statusp = resp->status = puterrno4(error);
2872 		goto out;
2873 	}
2874 
2875 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
2876 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2877 		goto out;
2878 	}
2879 
2880 	offset = args->offset;
2881 	if (offset >= va.va_size) {
2882 		*cs->statusp = resp->status = NFS4_OK;
2883 		resp->eof = TRUE;
2884 		resp->data_len = 0;
2885 		resp->data_val = NULL;
2886 		resp->mblk = NULL;
2887 		*cs->statusp = resp->status = NFS4_OK;
2888 		goto out;
2889 	}
2890 
2891 	if (args->count == 0) {
2892 		*cs->statusp = resp->status = NFS4_OK;
2893 		resp->eof = FALSE;
2894 		resp->data_len = 0;
2895 		resp->data_val = NULL;
2896 		resp->mblk = NULL;
2897 		goto out;
2898 	}
2899 
2900 	/*
2901 	 * Do not allocate memory more than maximum allowed
2902 	 * transfer size
2903 	 */
2904 	if (args->count > rfs4_tsize(req))
2905 		args->count = rfs4_tsize(req);
2906 
2907 	/*
2908 	 * mp will contain the data to be sent out in the read reply.
2909 	 * It will be freed after the reply has been sent.
2910 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple,
2911 	 * so that the call to xdrmblk_putmblk() never fails.
2912 	 * If the first alloc of the requested size fails, then
2913 	 * decrease the size to something more reasonable and wait
2914 	 * for the allocation to occur.
2915 	 */
2916 	mp = allocb(RNDUP(args->count), BPRI_MED);
2917 	if (mp == NULL) {
2918 		if (args->count > MAXBSIZE)
2919 			args->count = MAXBSIZE;
2920 		mp = allocb_wait(RNDUP(args->count), BPRI_MED,
2921 				STR_NOSIG, &alloc_err);
2922 	}
2923 	ASSERT(mp != NULL);
2924 	ASSERT(alloc_err == 0);
2925 
2926 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
2927 	iov.iov_len = args->count;
2928 	uio.uio_iov = &iov;
2929 	uio.uio_iovcnt = 1;
2930 	uio.uio_segflg = UIO_SYSSPACE;
2931 	uio.uio_extflg = UIO_COPY_CACHED;
2932 	uio.uio_loffset = args->offset;
2933 	uio.uio_resid = args->count;
2934 
2935 	error = do_io(FREAD, vp, &uio, 0, cs->cr);
2936 
2937 	va.va_mask = AT_SIZE;
2938 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2939 
2940 	if (error) {
2941 		freeb(mp);
2942 		*cs->statusp = resp->status = puterrno4(error);
2943 		goto out;
2944 	}
2945 
2946 	*cs->statusp = resp->status = NFS4_OK;
2947 
2948 	ASSERT(uio.uio_resid >= 0);
2949 	resp->data_len = args->count - uio.uio_resid;
2950 	resp->data_val = (char *)mp->b_datap->db_base;
2951 	resp->mblk = mp;
2952 
2953 	if (!verror && offset + resp->data_len == va.va_size)
2954 		resp->eof = TRUE;
2955 	else
2956 		resp->eof = FALSE;
2957 
2958 out:
2959 	if (in_crit)
2960 		nbl_end_crit(vp);
2961 }
2962 
2963 static void
2964 rfs4_op_read_free(nfs_resop4 *resop)
2965 {
2966 	READ4res *resp = &resop->nfs_resop4_u.opread;
2967 
2968 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
2969 		freeb(resp->mblk);
2970 		resp->mblk = NULL;
2971 		resp->data_val = NULL;
2972 		resp->data_len = 0;
2973 	}
2974 }
2975 
2976 static void
2977 rfs4_op_readdir_free(nfs_resop4 *resop)
2978 {
2979 	READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
2980 
2981 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
2982 		freeb(resp->mblk);
2983 		resp->mblk = NULL;
2984 		resp->data_len = 0;
2985 	}
2986 }
2987 
2988 
2989 /* ARGSUSED */
2990 static void
2991 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2992 	struct compound_state *cs)
2993 {
2994 	PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
2995 	int error;
2996 	vnode_t *vp;
2997 	struct exportinfo *exi, *sav_exi;
2998 	nfs_fh4_fmt_t *fh_fmtp;
2999 
3000 	if (cs->vp) {
3001 		VN_RELE(cs->vp);
3002 		cs->vp = NULL;
3003 	}
3004 
3005 	if (cs->cr)
3006 		crfree(cs->cr);
3007 
3008 	cs->cr = crdup(cs->basecr);
3009 
3010 	vp = exi_public->exi_vp;
3011 	if (vp == NULL) {
3012 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3013 		return;
3014 	}
3015 
3016 	error = makefh4(&cs->fh, vp, exi_public);
3017 	if (error != 0) {
3018 		*cs->statusp = resp->status = puterrno4(error);
3019 		return;
3020 	}
3021 	sav_exi = cs->exi;
3022 	if (exi_public == exi_root) {
3023 		/*
3024 		 * No filesystem is actually shared public, so we default
3025 		 * to exi_root. In this case, we must check whether root
3026 		 * is exported.
3027 		 */
3028 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3029 
3030 		/*
3031 		 * if root filesystem is exported, the exportinfo struct that we
3032 		 * should use is what checkexport4 returns, because root_exi is
3033 		 * actually a mostly empty struct.
3034 		 */
3035 		exi = checkexport4(&fh_fmtp->fh4_fsid,
3036 			(fid_t *)&fh_fmtp->fh4_xlen, NULL);
3037 		cs->exi = ((exi != NULL) ? exi : exi_public);
3038 	} else {
3039 		/*
3040 		 * it's a properly shared filesystem
3041 		 */
3042 		cs->exi = exi_public;
3043 	}
3044 
3045 	VN_HOLD(vp);
3046 	cs->vp = vp;
3047 
3048 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3049 		VN_RELE(cs->vp);
3050 		cs->vp = NULL;
3051 		cs->exi = sav_exi;
3052 		return;
3053 	}
3054 
3055 	*cs->statusp = resp->status = NFS4_OK;
3056 }
3057 
3058 /*
3059  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3060  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3061  * or joe have restrictive search permissions, then we shouldn't let
3062  * the client get a file handle. This is easy to enforce. However, we
3063  * don't know what security flavor should be used until we resolve the
3064  * path name. Another complication is uid mapping. If root is
3065  * the user, then it will be mapped to the anonymous user by default,
3066  * but we won't know that till we've resolved the path name. And we won't
3067  * know what the anonymous user is.
3068  * Luckily, SECINFO is specified to take a full filename.
3069  * So what we will have to in rfs4_op_lookup is check that flavor of
3070  * the target object matches that of the request, and if root was the
3071  * caller, check for the root= and anon= options, and if necessary,
3072  * repeat the lookup using the right cred_t. But that's not done yet.
3073  */
3074 /* ARGSUSED */
3075 static void
3076 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3077 	struct compound_state *cs)
3078 {
3079 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3080 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3081 	nfs_fh4_fmt_t *fh_fmtp;
3082 
3083 	if (cs->vp) {
3084 		VN_RELE(cs->vp);
3085 		cs->vp = NULL;
3086 	}
3087 
3088 	if (cs->cr) {
3089 		crfree(cs->cr);
3090 		cs->cr = NULL;
3091 	}
3092 
3093 
3094 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3095 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3096 		return;
3097 	}
3098 
3099 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3100 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3101 				NULL);
3102 
3103 	if (cs->exi == NULL) {
3104 		*cs->statusp = resp->status = NFS4ERR_STALE;
3105 		return;
3106 	}
3107 
3108 	cs->cr = crdup(cs->basecr);
3109 
3110 	ASSERT(cs->cr != NULL);
3111 
3112 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3113 		*cs->statusp = resp->status;
3114 		return;
3115 	}
3116 
3117 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3118 		VN_RELE(cs->vp);
3119 		cs->vp = NULL;
3120 		return;
3121 	}
3122 
3123 	nfs_fh4_copy(&args->object, &cs->fh);
3124 	*cs->statusp = resp->status = NFS4_OK;
3125 	cs->deleg = FALSE;
3126 }
3127 
3128 /* ARGSUSED */
3129 static void
3130 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3131 	struct compound_state *cs)
3132 
3133 {
3134 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3135 	int error;
3136 	fid_t fid;
3137 	struct exportinfo *exi, *sav_exi;
3138 
3139 	if (cs->vp) {
3140 		VN_RELE(cs->vp);
3141 		cs->vp = NULL;
3142 	}
3143 
3144 	if (cs->cr)
3145 		crfree(cs->cr);
3146 
3147 	cs->cr = crdup(cs->basecr);
3148 
3149 	/*
3150 	 * Using rootdir, the system root vnode,
3151 	 * get its fid.
3152 	 */
3153 	bzero(&fid, sizeof (fid));
3154 	fid.fid_len = MAXFIDSZ;
3155 	error = vop_fid_pseudo(rootdir, &fid);
3156 	if (error != 0) {
3157 		*cs->statusp = resp->status = puterrno4(error);
3158 		return;
3159 	}
3160 
3161 	/*
3162 	 * Then use the root fsid & fid it to find out if it's exported
3163 	 *
3164 	 * If the server root isn't exported directly, then
3165 	 * it should at least be a pseudo export based on
3166 	 * one or more exports further down in the server's
3167 	 * file tree.
3168 	 */
3169 	exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3170 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3171 		NFS4_DEBUG(rfs4_debug,
3172 			(CE_WARN, "rfs4_op_putrootfh: export check failure"));
3173 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3174 		return;
3175 	}
3176 
3177 	/*
3178 	 * Now make a filehandle based on the root
3179 	 * export and root vnode.
3180 	 */
3181 	error = makefh4(&cs->fh, rootdir, exi);
3182 	if (error != 0) {
3183 		*cs->statusp = resp->status = puterrno4(error);
3184 		return;
3185 	}
3186 
3187 	sav_exi = cs->exi;
3188 	cs->exi = exi;
3189 
3190 	VN_HOLD(rootdir);
3191 	cs->vp = rootdir;
3192 
3193 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3194 		VN_RELE(rootdir);
3195 		cs->vp = NULL;
3196 		cs->exi = sav_exi;
3197 		return;
3198 	}
3199 
3200 	*cs->statusp = resp->status = NFS4_OK;
3201 	cs->deleg = FALSE;
3202 }
3203 
3204 /*
3205  * A directory entry is a valid nfsv4 entry if
3206  * - it has a non-zero ino
3207  * - it is not a dot or dotdot name
3208  * - it is visible in a pseudo export or in a real export that can
3209  *   only have a limited view.
3210  */
3211 static bool_t
3212 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3213 		int *expseudo, int check_visible)
3214 {
3215 	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3216 		*expseudo = 0;
3217 		return (FALSE);
3218 	}
3219 
3220 	if (! check_visible) {
3221 		*expseudo = 0;
3222 		return (TRUE);
3223 	}
3224 
3225 	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3226 }
3227 
3228 /*
3229  * set_rdattr_params sets up the variables used to manage what information
3230  * to get for each directory entry.
3231  */
3232 static nfsstat4
3233 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3234 		bitmap4 attrs, bool_t *need_to_lookup)
3235 {
3236 	uint_t	va_mask;
3237 	nfsstat4 status;
3238 	bitmap4 objbits;
3239 
3240 	status = bitmap4_to_attrmask(attrs, sargp);
3241 	if (status != NFS4_OK) {
3242 		/*
3243 		 * could not even figure attr mask
3244 		 */
3245 		return (status);
3246 	}
3247 	va_mask = sargp->vap->va_mask;
3248 
3249 	/*
3250 	 * dirent's d_ino is always correct value for mounted_on_fileid.
3251 	 * mntdfid_set is set once here, but mounted_on_fileid is
3252 	 * set in main dirent processing loop for each dirent.
3253 	 * The mntdfid_set is a simple optimization that lets the
3254 	 * server attr code avoid work when caller is readdir.
3255 	 */
3256 	sargp->mntdfid_set = TRUE;
3257 
3258 	/*
3259 	 * Lookup entry only if client asked for any of the following:
3260 	 * a) vattr attrs
3261 	 * b) vfs attrs
3262 	 * c) attrs w/per-object scope requested (change, filehandle, etc)
3263 	 *    other than mounted_on_fileid (which we can take from dirent)
3264 	 */
3265 	objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3266 
3267 	if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3268 		*need_to_lookup = TRUE;
3269 	else
3270 		*need_to_lookup = FALSE;
3271 
3272 	if (sargp->sbp == NULL)
3273 		return (NFS4_OK);
3274 
3275 	/*
3276 	 * If filesystem attrs are requested, get them now from the
3277 	 * directory vp, as most entries will have same filesystem. The only
3278 	 * exception are mounted over entries but we handle
3279 	 * those as we go (XXX mounted over detection not yet implemented).
3280 	 */
3281 	sargp->vap->va_mask = 0;	/* to avoid VOP_GETATTR */
3282 	status = bitmap4_get_sysattrs(sargp);
3283 	sargp->vap->va_mask = va_mask;
3284 
3285 	if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3286 		/*
3287 		 * Failed to get filesystem attributes.
3288 		 * Return a rdattr_error for each entry, but don't fail.
3289 		 * However, don't get any obj-dependent attrs.
3290 		 */
3291 		sargp->rdattr_error = status;	/* for rdattr_error */
3292 		*need_to_lookup = FALSE;
3293 		/*
3294 		 * At least get fileid for regular readdir output
3295 		 */
3296 		sargp->vap->va_mask &= AT_NODEID;
3297 		status = NFS4_OK;
3298 	}
3299 
3300 	return (status);
3301 }
3302 
3303 /*
3304  * readlink: args: CURRENT_FH.
3305  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3306  */
3307 
3308 /* ARGSUSED */
3309 static void
3310 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3311 	struct compound_state *cs)
3312 {
3313 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3314 	int error;
3315 	vnode_t *vp;
3316 	struct iovec iov;
3317 	struct vattr va;
3318 	struct uio uio;
3319 	char *data;
3320 
3321 	/* CURRENT_FH: directory */
3322 	vp = cs->vp;
3323 	if (vp == NULL) {
3324 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3325 		return;
3326 	}
3327 
3328 	if (cs->access == CS_ACCESS_DENIED) {
3329 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3330 		return;
3331 	}
3332 
3333 	if (vp->v_type == VDIR) {
3334 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
3335 		return;
3336 	}
3337 
3338 	if (vp->v_type != VLNK) {
3339 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3340 		return;
3341 	}
3342 
3343 	va.va_mask = AT_MODE;
3344 	error = VOP_GETATTR(vp, &va, 0, cs->cr);
3345 	if (error) {
3346 		*cs->statusp = resp->status = puterrno4(error);
3347 		return;
3348 	}
3349 
3350 	if (MANDLOCK(vp, va.va_mode)) {
3351 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3352 		return;
3353 	}
3354 
3355 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3356 
3357 	iov.iov_base = data;
3358 	iov.iov_len = MAXPATHLEN;
3359 	uio.uio_iov = &iov;
3360 	uio.uio_iovcnt = 1;
3361 	uio.uio_segflg = UIO_SYSSPACE;
3362 	uio.uio_extflg = UIO_COPY_CACHED;
3363 	uio.uio_loffset = 0;
3364 	uio.uio_resid = MAXPATHLEN;
3365 
3366 	error = VOP_READLINK(vp, &uio, cs->cr);
3367 
3368 	if (error) {
3369 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3370 		*cs->statusp = resp->status = puterrno4(error);
3371 		return;
3372 	}
3373 
3374 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
3375 
3376 	/*
3377 	 * treat link name as data
3378 	 */
3379 	(void) str_to_utf8(data, &resp->link);
3380 
3381 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3382 	*cs->statusp = resp->status = NFS4_OK;
3383 }
3384 
3385 static void
3386 rfs4_op_readlink_free(nfs_resop4 *resop)
3387 {
3388 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3389 	utf8string *symlink = &resp->link;
3390 
3391 	if (symlink->utf8string_val) {
3392 		UTF8STRING_FREE(*symlink)
3393 	}
3394 }
3395 
3396 /*
3397  * release_lockowner:
3398  *	Release any state associated with the supplied
3399  *	lockowner. Note if any lo_state is holding locks we will not
3400  *	rele that lo_state and thus the lockowner will not be destroyed.
3401  *	A client using lock after the lock owner stateid has been released
3402  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3403  *	to reissue the lock with new_lock_owner set to TRUE.
3404  *	args: lock_owner
3405  *	res:  status
3406  */
3407 /* ARGSUSED */
3408 static void
3409 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3410 	struct svc_req *req, struct compound_state *cs)
3411 {
3412 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3413 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3414 	rfs4_lockowner_t *lo;
3415 	rfs4_openowner_t *oop;
3416 	rfs4_state_t *sp;
3417 	rfs4_lo_state_t *lsp;
3418 	rfs4_client_t *cp;
3419 	bool_t create = FALSE;
3420 	locklist_t *llist;
3421 	sysid_t sysid;
3422 
3423 	/* Make sure there is a clientid around for this request */
3424 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3425 
3426 	if (cp == NULL) {
3427 		*cs->statusp = resp->status =
3428 			rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3429 		return;
3430 	}
3431 	rfs4_client_rele(cp);
3432 
3433 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
3434 	if (lo == NULL) {
3435 		*cs->statusp = resp->status = NFS4_OK;
3436 		return;
3437 	}
3438 	ASSERT(lo->client != NULL);
3439 
3440 	/*
3441 	 * Check for EXPIRED client. If so will reap state with in a lease
3442 	 * period or on next set_clientid_confirm step
3443 	 */
3444 	if (rfs4_lease_expired(lo->client)) {
3445 		rfs4_lockowner_rele(lo);
3446 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
3447 		return;
3448 	}
3449 
3450 	/*
3451 	 * If no sysid has been assigned, then no locks exist; just return.
3452 	 */
3453 	rfs4_dbe_lock(lo->client->dbe);
3454 	if (lo->client->sysidt == LM_NOSYSID) {
3455 		rfs4_lockowner_rele(lo);
3456 		rfs4_dbe_unlock(lo->client->dbe);
3457 		return;
3458 	}
3459 
3460 	sysid = lo->client->sysidt;
3461 	rfs4_dbe_unlock(lo->client->dbe);
3462 
3463 	/*
3464 	 * Mark the lockowner invalid.
3465 	 */
3466 	rfs4_dbe_hide(lo->dbe);
3467 
3468 	/*
3469 	 * sysid-pid pair should now not be used since the lockowner is
3470 	 * invalid. If the client were to instantiate the lockowner again
3471 	 * it would be assigned a new pid. Thus we can get the list of
3472 	 * current locks.
3473 	 */
3474 
3475 	llist = flk_get_active_locks(sysid, lo->pid);
3476 	/* If we are still holding locks fail */
3477 	if (llist != NULL) {
3478 
3479 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3480 
3481 		flk_free_locklist(llist);
3482 		/*
3483 		 * We need to unhide the lockowner so the client can
3484 		 * try it again. The bad thing here is if the client
3485 		 * has a logic error that took it here in the first place
3486 		 * he probably has lost accounting of the locks that it
3487 		 * is holding. So we may have dangling state until the
3488 		 * open owner state is reaped via close. One scenario
3489 		 * that could possibly occur is that the client has
3490 		 * sent the unlock request(s) in separate threads
3491 		 * and has not waited for the replies before sending the
3492 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
3493 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3494 		 * reissuing the request.
3495 		 */
3496 		rfs4_dbe_unhide(lo->dbe);
3497 		rfs4_lockowner_rele(lo);
3498 		return;
3499 	}
3500 
3501 	/*
3502 	 * For the corresponding client we need to check each open
3503 	 * owner for any opens that have lockowner state associated
3504 	 * with this lockowner.
3505 	 */
3506 
3507 	rfs4_dbe_lock(lo->client->dbe);
3508 	for (oop = lo->client->openownerlist.next->oop; oop != NULL;
3509 	    oop = oop->openownerlist.next->oop) {
3510 
3511 		rfs4_dbe_lock(oop->dbe);
3512 		for (sp = oop->ownerstateids.next->sp; sp != NULL;
3513 		    sp = sp->ownerstateids.next->sp) {
3514 
3515 			rfs4_dbe_lock(sp->dbe);
3516 			for (lsp = sp->lockownerlist.next->lsp;
3517 			    lsp != NULL; lsp = lsp->lockownerlist.next->lsp) {
3518 				if (lsp->locker == lo) {
3519 					rfs4_dbe_lock(lsp->dbe);
3520 					rfs4_dbe_invalidate(lsp->dbe);
3521 					rfs4_dbe_unlock(lsp->dbe);
3522 				}
3523 			}
3524 			rfs4_dbe_unlock(sp->dbe);
3525 		}
3526 		rfs4_dbe_unlock(oop->dbe);
3527 	}
3528 	rfs4_dbe_unlock(lo->client->dbe);
3529 
3530 	rfs4_lockowner_rele(lo);
3531 
3532 	*cs->statusp = resp->status = NFS4_OK;
3533 }
3534 
3535 /*
3536  * short utility function to lookup a file and recall the delegation
3537  */
3538 static rfs4_file_t *
3539 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3540 	int *lkup_error, cred_t *cr)
3541 {
3542 	vnode_t *vp;
3543 	rfs4_file_t *fp = NULL;
3544 	bool_t fcreate = FALSE;
3545 	int error;
3546 
3547 	if (vpp)
3548 		*vpp = NULL;
3549 
3550 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr)) == 0) {
3551 		VN_SETPATH(rootdir, dvp, vp, nm, strlen(nm));
3552 		if (vp->v_type == VREG)
3553 			fp = rfs4_findfile(vp, NULL, &fcreate);
3554 		if (vpp)
3555 			*vpp = vp;
3556 		else
3557 			VN_RELE(vp);
3558 	}
3559 
3560 	if (lkup_error)
3561 		*lkup_error = error;
3562 
3563 	return (fp);
3564 }
3565 
3566 /*
3567  * remove: args: CURRENT_FH: directory; name.
3568  *	res: status. If success - CURRENT_FH unchanged, return change_info
3569  *		for directory.
3570  */
3571 /* ARGSUSED */
3572 static void
3573 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3574 	struct compound_state *cs)
3575 {
3576 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3577 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3578 	int error;
3579 	vnode_t *dvp, *vp;
3580 	struct vattr bdva, idva, adva;
3581 	char *nm;
3582 	uint_t len;
3583 	rfs4_file_t *fp;
3584 	int in_crit = 0;
3585 
3586 	/* CURRENT_FH: directory */
3587 	dvp = cs->vp;
3588 	if (dvp == NULL) {
3589 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3590 		return;
3591 	}
3592 
3593 	if (cs->access == CS_ACCESS_DENIED) {
3594 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3595 		return;
3596 	}
3597 
3598 	/*
3599 	 * If there is an unshared filesystem mounted on this vnode,
3600 	 * Do not allow to remove anything in this directory.
3601 	 */
3602 	if (vn_ismntpt(dvp)) {
3603 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3604 		return;
3605 	}
3606 
3607 	if (dvp->v_type != VDIR) {
3608 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3609 		return;
3610 	}
3611 
3612 	if (!utf8_dir_verify(&args->target)) {
3613 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3614 		return;
3615 	}
3616 
3617 	/*
3618 	 * Lookup the file so that we can check if it's a directory
3619 	 */
3620 	nm = utf8_to_fn(&args->target, &len, NULL);
3621 	if (nm == NULL) {
3622 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3623 		return;
3624 	}
3625 
3626 	if (len > MAXNAMELEN) {
3627 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3628 		kmem_free(nm, len);
3629 		return;
3630 	}
3631 
3632 	if (rdonly4(cs->exi, cs->vp, req)) {
3633 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3634 		kmem_free(nm, len);
3635 		return;
3636 	}
3637 
3638 	/*
3639 	 * Lookup the file to determine type and while we are see if
3640 	 * there is a file struct around and check for delegation.
3641 	 * We don't need to acquire va_seq before this lookup, if
3642 	 * it causes an update, cinfo.before will not match, which will
3643 	 * trigger a cache flush even if atomic is TRUE.
3644 	 */
3645 	if (fp = rfs4_lookup_and_findfile(dvp, nm, &vp, &error, cs->cr)) {
3646 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3647 						NULL)) {
3648 			VN_RELE(vp);
3649 			rfs4_file_rele(fp);
3650 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3651 			kmem_free(nm, len);
3652 			return;
3653 		}
3654 	}
3655 
3656 	/* Didn't find anything to remove */
3657 	if (vp == NULL) {
3658 		*cs->statusp = resp->status = error;
3659 		kmem_free(nm, len);
3660 		return;
3661 	}
3662 
3663 	if (nbl_need_check(vp)) {
3664 		nbl_start_crit(vp, RW_READER);
3665 		in_crit = 1;
3666 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0)) {
3667 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3668 			kmem_free(nm, len);
3669 			nbl_end_crit(vp);
3670 			VN_RELE(vp);
3671 			if (fp) {
3672 				rfs4_clear_dont_grant(fp);
3673 				rfs4_file_rele(fp);
3674 			}
3675 			return;
3676 		}
3677 	}
3678 
3679 	/* Get dir "before" change value */
3680 	bdva.va_mask = AT_CTIME|AT_SEQ;
3681 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
3682 	if (error) {
3683 		*cs->statusp = resp->status = puterrno4(error);
3684 		kmem_free(nm, len);
3685 		return;
3686 	}
3687 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
3688 
3689 	/* Actually do the REMOVE operation */
3690 	if (vp->v_type == VDIR) {
3691 		/*
3692 		 * Can't remove a directory that has a mounted-on filesystem.
3693 		 */
3694 		if (vn_ismntpt(vp)) {
3695 			error = EACCES;
3696 		} else {
3697 			/*
3698 			 * System V defines rmdir to return EEXIST,
3699 			 * not * ENOTEMPTY, if the directory is not
3700 			 * empty.  A System V NFS server needs to map
3701 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
3702 			 * transmit over the wire.
3703 			 */
3704 			if ((error = VOP_RMDIR(dvp, nm, rootdir, cs->cr))
3705 				== EEXIST)
3706 				error = ENOTEMPTY;
3707 		}
3708 	} else {
3709 		if ((error = VOP_REMOVE(dvp, nm, cs->cr)) == 0 &&
3710 			fp != NULL) {
3711 			struct vattr va;
3712 
3713 			/*
3714 			 * This is va_seq safe because we are not
3715 			 * manipulating dvp.
3716 			 */
3717 			va.va_mask = AT_NLINK;
3718 			if (!VOP_GETATTR(fp->vp, &va, 0, cs->cr) &&
3719 				va.va_nlink == 0) {
3720 				/* The file is gone and so should the state */
3721 				if (in_crit) {
3722 					nbl_end_crit(vp);
3723 					in_crit = 0;
3724 				}
3725 				rfs4_close_all_state(fp);
3726 			}
3727 		}
3728 	}
3729 
3730 	if (in_crit)
3731 		nbl_end_crit(vp);
3732 	VN_RELE(vp);
3733 
3734 	if (fp) {
3735 		rfs4_clear_dont_grant(fp);
3736 		rfs4_file_rele(fp);
3737 	}
3738 	kmem_free(nm, len);
3739 
3740 	if (error) {
3741 		*cs->statusp = resp->status = puterrno4(error);
3742 		return;
3743 	}
3744 
3745 	/*
3746 	 * Get the initial "after" sequence number, if it fails, set to zero
3747 	 */
3748 	idva.va_mask = AT_SEQ;
3749 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
3750 		idva.va_seq = 0;
3751 
3752 	/*
3753 	 * Force modified data and metadata out to stable storage.
3754 	 */
3755 	(void) VOP_FSYNC(dvp, 0, cs->cr);
3756 
3757 	/*
3758 	 * Get "after" change value, if it fails, simply return the
3759 	 * before value.
3760 	 */
3761 	adva.va_mask = AT_CTIME|AT_SEQ;
3762 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
3763 		adva.va_ctime = bdva.va_ctime;
3764 		adva.va_seq = 0;
3765 	}
3766 
3767 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
3768 
3769 	/*
3770 	 * The cinfo.atomic = TRUE only if we have
3771 	 * non-zero va_seq's, and it has incremented by exactly one
3772 	 * during the VOP_REMOVE/RMDIR and it didn't change during
3773 	 * the VOP_FSYNC.
3774 	 */
3775 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
3776 			idva.va_seq == (bdva.va_seq + 1) &&
3777 			idva.va_seq == adva.va_seq)
3778 		resp->cinfo.atomic = TRUE;
3779 	else
3780 		resp->cinfo.atomic = FALSE;
3781 
3782 	*cs->statusp = resp->status = NFS4_OK;
3783 }
3784 
3785 /*
3786  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
3787  *		oldname and newname.
3788  *	res: status. If success - CURRENT_FH unchanged, return change_info
3789  *		for both from and target directories.
3790  */
3791 /* ARGSUSED */
3792 static void
3793 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3794 	struct compound_state *cs)
3795 {
3796 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
3797 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
3798 	int error;
3799 	vnode_t *odvp;
3800 	vnode_t *ndvp;
3801 	vnode_t *srcvp, *targvp;
3802 	struct vattr obdva, oidva, oadva;
3803 	struct vattr nbdva, nidva, nadva;
3804 	char *onm, *nnm;
3805 	uint_t olen, nlen;
3806 	rfs4_file_t *fp, *sfp;
3807 	int in_crit_src, in_crit_targ;
3808 	int fp_rele_grant_hold, sfp_rele_grant_hold;
3809 
3810 	fp = sfp = NULL;
3811 	srcvp = targvp = NULL;
3812 	in_crit_src = in_crit_targ = 0;
3813 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
3814 
3815 	/* CURRENT_FH: target directory */
3816 	ndvp = cs->vp;
3817 	if (ndvp == NULL) {
3818 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3819 		return;
3820 	}
3821 
3822 	/* SAVED_FH: from directory */
3823 	odvp = cs->saved_vp;
3824 	if (odvp == NULL) {
3825 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3826 		return;
3827 	}
3828 
3829 	if (cs->access == CS_ACCESS_DENIED) {
3830 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3831 		return;
3832 	}
3833 
3834 	/*
3835 	 * If there is an unshared filesystem mounted on this vnode,
3836 	 * do not allow to rename objects in this directory.
3837 	 */
3838 	if (vn_ismntpt(odvp)) {
3839 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3840 		return;
3841 	}
3842 
3843 	/*
3844 	 * If there is an unshared filesystem mounted on this vnode,
3845 	 * do not allow to rename to this directory.
3846 	 */
3847 	if (vn_ismntpt(ndvp)) {
3848 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3849 		return;
3850 	}
3851 
3852 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
3853 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3854 		return;
3855 	}
3856 
3857 	if (cs->saved_exi != cs->exi) {
3858 		*cs->statusp = resp->status = NFS4ERR_XDEV;
3859 		return;
3860 	}
3861 
3862 	if (!utf8_dir_verify(&args->oldname)) {
3863 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3864 		return;
3865 	}
3866 
3867 	if (!utf8_dir_verify(&args->newname)) {
3868 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3869 		return;
3870 	}
3871 
3872 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
3873 	if (onm == NULL) {
3874 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3875 		return;
3876 	}
3877 
3878 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
3879 	if (nnm == NULL) {
3880 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3881 		kmem_free(onm, olen);
3882 		return;
3883 	}
3884 
3885 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
3886 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3887 		kmem_free(onm, olen);
3888 		kmem_free(nnm, nlen);
3889 		return;
3890 	}
3891 
3892 
3893 	if (rdonly4(cs->exi, cs->vp, req)) {
3894 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3895 		kmem_free(onm, olen);
3896 		kmem_free(nnm, nlen);
3897 		return;
3898 	}
3899 
3900 	/*
3901 	 * Is the source a file and have a delegation?
3902 	 * We don't need to acquire va_seq before these lookups, if
3903 	 * it causes an update, cinfo.before will not match, which will
3904 	 * trigger a cache flush even if atomic is TRUE.
3905 	 */
3906 	if (sfp = rfs4_lookup_and_findfile(odvp, onm, &srcvp, &error, cs->cr)) {
3907 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
3908 						NULL)) {
3909 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3910 			goto err_out;
3911 		}
3912 	}
3913 
3914 	if (srcvp == NULL) {
3915 		*cs->statusp = resp->status = puterrno4(error);
3916 		kmem_free(onm, olen);
3917 		kmem_free(nnm, nlen);
3918 		return;
3919 	}
3920 
3921 	sfp_rele_grant_hold = 1;
3922 
3923 	/* Does the destination exist and a file and have a delegation? */
3924 	if (fp = rfs4_lookup_and_findfile(ndvp, nnm, &targvp, NULL, cs->cr)) {
3925 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3926 						NULL)) {
3927 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3928 			goto err_out;
3929 		}
3930 	}
3931 	fp_rele_grant_hold = 1;
3932 
3933 
3934 	/* Check for NBMAND lock on both source and target */
3935 	if (nbl_need_check(srcvp)) {
3936 		nbl_start_crit(srcvp, RW_READER);
3937 		in_crit_src = 1;
3938 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
3939 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3940 			goto err_out;
3941 		}
3942 	}
3943 
3944 	if (targvp && nbl_need_check(targvp)) {
3945 		nbl_start_crit(targvp, RW_READER);
3946 		in_crit_targ = 1;
3947 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
3948 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3949 			goto err_out;
3950 		}
3951 	}
3952 
3953 	/* Get source "before" change value */
3954 	obdva.va_mask = AT_CTIME|AT_SEQ;
3955 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr);
3956 	if (!error) {
3957 		nbdva.va_mask = AT_CTIME|AT_SEQ;
3958 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr);
3959 	}
3960 	if (error) {
3961 		*cs->statusp = resp->status = puterrno4(error);
3962 		goto err_out;
3963 	}
3964 
3965 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
3966 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
3967 
3968 	if ((error = VOP_RENAME(odvp, onm, ndvp, nnm, cs->cr)) == 0 &&
3969 		fp != NULL) {
3970 		struct vattr va;
3971 
3972 		va.va_mask = AT_NLINK;
3973 		if (!VOP_GETATTR(fp->vp, &va, 0, cs->cr) &&
3974 			va.va_nlink == 0) {
3975 			/* The file is gone and so should the state */
3976 			if (in_crit_targ) {
3977 				nbl_end_crit(targvp);
3978 				in_crit_targ = 0;
3979 			}
3980 			rfs4_close_all_state(fp);
3981 		}
3982 	}
3983 
3984 	if (in_crit_src)
3985 		nbl_end_crit(srcvp);
3986 	if (srcvp)
3987 		VN_RELE(srcvp);
3988 	if (in_crit_targ)
3989 		nbl_end_crit(targvp);
3990 	if (targvp)
3991 		VN_RELE(targvp);
3992 
3993 	if (sfp) {
3994 		rfs4_clear_dont_grant(sfp);
3995 		rfs4_file_rele(sfp);
3996 	}
3997 	if (fp) {
3998 		rfs4_clear_dont_grant(fp);
3999 		rfs4_file_rele(fp);
4000 	}
4001 
4002 	kmem_free(onm, olen);
4003 	kmem_free(nnm, nlen);
4004 
4005 	/*
4006 	 * Get the initial "after" sequence number, if it fails, set to zero
4007 	 */
4008 	oidva.va_mask = AT_SEQ;
4009 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr))
4010 		oidva.va_seq = 0;
4011 
4012 	nidva.va_mask = AT_SEQ;
4013 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr))
4014 		nidva.va_seq = 0;
4015 
4016 	/*
4017 	 * Force modified data and metadata out to stable storage.
4018 	 */
4019 	(void) VOP_FSYNC(odvp, 0, cs->cr);
4020 	(void) VOP_FSYNC(ndvp, 0, cs->cr);
4021 
4022 	if (error) {
4023 		*cs->statusp = resp->status = puterrno4(error);
4024 		return;
4025 	}
4026 
4027 	/*
4028 	 * Get "after" change values, if it fails, simply return the
4029 	 * before value.
4030 	 */
4031 	oadva.va_mask = AT_CTIME|AT_SEQ;
4032 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr)) {
4033 		oadva.va_ctime = obdva.va_ctime;
4034 		oadva.va_seq = 0;
4035 	}
4036 
4037 	nadva.va_mask = AT_CTIME|AT_SEQ;
4038 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr)) {
4039 		nadva.va_ctime = nbdva.va_ctime;
4040 		nadva.va_seq = 0;
4041 	}
4042 
4043 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4044 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4045 
4046 	/*
4047 	 * The cinfo.atomic = TRUE only if we have
4048 	 * non-zero va_seq's, and it has incremented by exactly one
4049 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4050 	 */
4051 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4052 			oidva.va_seq == (obdva.va_seq + 1) &&
4053 			oidva.va_seq == oadva.va_seq)
4054 		resp->source_cinfo.atomic = TRUE;
4055 	else
4056 		resp->source_cinfo.atomic = FALSE;
4057 
4058 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4059 			nidva.va_seq == (nbdva.va_seq + 1) &&
4060 			nidva.va_seq == nadva.va_seq)
4061 		resp->target_cinfo.atomic = TRUE;
4062 	else
4063 		resp->target_cinfo.atomic = FALSE;
4064 
4065 #ifdef	VOLATILE_FH_TEST
4066 	{
4067 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4068 
4069 	/*
4070 	 * Add the renamed file handle to the volatile rename list
4071 	 */
4072 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4073 		/* file handles may expire on rename */
4074 		vnode_t *vp;
4075 
4076 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4077 		/*
4078 		 * Already know that nnm will be a valid string
4079 		 */
4080 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr);
4081 		kmem_free(nnm, nlen);
4082 		if (!error) {
4083 			add_volrnm_fh(cs->exi, vp);
4084 			VN_RELE(vp);
4085 		}
4086 	}
4087 	}
4088 #endif	/* VOLATILE_FH_TEST */
4089 
4090 	*cs->statusp = resp->status = NFS4_OK;
4091 	return;
4092 
4093 err_out:
4094 	kmem_free(onm, olen);
4095 	kmem_free(nnm, nlen);
4096 
4097 	if (in_crit_src) nbl_end_crit(srcvp);
4098 	if (in_crit_targ) nbl_end_crit(targvp);
4099 	if (targvp) VN_RELE(targvp);
4100 	if (srcvp) VN_RELE(srcvp);
4101 	if (sfp) {
4102 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4103 		rfs4_file_rele(sfp);
4104 	}
4105 	if (fp) {
4106 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4107 		rfs4_file_rele(fp);
4108 	}
4109 }
4110 
4111 /* ARGSUSED */
4112 static void
4113 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4114 	struct compound_state *cs)
4115 {
4116 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4117 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4118 	rfs4_client_t *cp;
4119 
4120 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4121 		*cs->statusp = resp->status =
4122 			rfs4_check_clientid(&args->clientid, 0);
4123 		return;
4124 	}
4125 
4126 	if (rfs4_lease_expired(cp)) {
4127 		rfs4_client_rele(cp);
4128 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4129 		return;
4130 	}
4131 
4132 	rfs4_update_lease(cp);
4133 
4134 	mutex_enter(cp->cbinfo.cb_lock);
4135 	if (cp->cbinfo.cb_notified_of_cb_path_down == FALSE) {
4136 		cp->cbinfo.cb_notified_of_cb_path_down = TRUE;
4137 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4138 	} else {
4139 		*cs->statusp = resp->status = NFS4_OK;
4140 	}
4141 	mutex_exit(cp->cbinfo.cb_lock);
4142 
4143 	rfs4_client_rele(cp);
4144 
4145 }
4146 
4147 /* ARGSUSED */
4148 static void
4149 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4150 	struct compound_state *cs)
4151 {
4152 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4153 
4154 	/* No need to check cs->access - we are not accessing any object */
4155 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4156 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4157 		return;
4158 	}
4159 	if (cs->vp != NULL) {
4160 		VN_RELE(cs->vp);
4161 	}
4162 	cs->vp = cs->saved_vp;
4163 	cs->saved_vp = NULL;
4164 	cs->exi = cs->saved_exi;
4165 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4166 	*cs->statusp = resp->status = NFS4_OK;
4167 	cs->deleg = FALSE;
4168 }
4169 
4170 /* ARGSUSED */
4171 static void
4172 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4173 	struct compound_state *cs)
4174 {
4175 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4176 
4177 	/* No need to check cs->access - we are not accessing any object */
4178 	if (cs->vp == NULL) {
4179 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4180 		return;
4181 	}
4182 	if (cs->saved_vp != NULL) {
4183 		VN_RELE(cs->saved_vp);
4184 	}
4185 	cs->saved_vp = cs->vp;
4186 	VN_HOLD(cs->saved_vp);
4187 	cs->saved_exi = cs->exi;
4188 	/*
4189 	 * since SAVEFH is fairly rare, don't alloc space for its fh
4190 	 * unless necessary.
4191 	 */
4192 	if (cs->saved_fh.nfs_fh4_val == NULL) {
4193 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4194 	}
4195 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4196 	*cs->statusp = resp->status = NFS4_OK;
4197 }
4198 
4199 /*
4200  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4201  * return the bitmap of attrs that were set successfully. It is also
4202  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4203  * always be called only after rfs4_do_set_attrs().
4204  *
4205  * Verify that the attributes are same as the expected ones. sargp->vap
4206  * and sargp->sbp contain the input attributes as translated from fattr4.
4207  *
4208  * This function verifies only the attrs that correspond to a vattr or
4209  * vfsstat struct. That is because of the extra step needed to get the
4210  * corresponding system structs. Other attributes have already been set or
4211  * verified by do_rfs4_set_attrs.
4212  *
4213  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4214  */
4215 static int
4216 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4217 	bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4218 {
4219 	int error, ret_error = 0;
4220 	int i, k;
4221 	uint_t sva_mask = sargp->vap->va_mask;
4222 	uint_t vbit;
4223 	union nfs4_attr_u *na;
4224 	uint8_t *amap;
4225 	bool_t getsb = ntovp->vfsstat;
4226 
4227 	if (sva_mask != 0) {
4228 		/*
4229 		 * Okay to overwrite sargp->vap because we verify based
4230 		 * on the incoming values.
4231 		 */
4232 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4233 				sargp->cs->cr);
4234 		if (ret_error) {
4235 			if (resp == NULL)
4236 				return (ret_error);
4237 			/*
4238 			 * Must return bitmap of successful attrs
4239 			 */
4240 			sva_mask = 0;	/* to prevent checking vap later */
4241 		} else {
4242 			/*
4243 			 * Some file systems clobber va_mask. it is probably
4244 			 * wrong of them to do so, nonethless we practice
4245 			 * defensive coding.
4246 			 * See bug id 4276830.
4247 			 */
4248 			sargp->vap->va_mask = sva_mask;
4249 		}
4250 	}
4251 
4252 	if (getsb) {
4253 		/*
4254 		 * Now get the superblock and loop on the bitmap, as there is
4255 		 * no simple way of translating from superblock to bitmap4.
4256 		 */
4257 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4258 		if (ret_error) {
4259 			if (resp == NULL)
4260 				goto errout;
4261 			getsb = FALSE;
4262 		}
4263 	}
4264 
4265 	/*
4266 	 * Now loop and verify each attribute which getattr returned
4267 	 * whether it's the same as the input.
4268 	 */
4269 	if (resp == NULL && !getsb && (sva_mask == 0))
4270 		goto errout;
4271 
4272 	na = ntovp->na;
4273 	amap = ntovp->amap;
4274 	k = 0;
4275 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4276 		k = *amap;
4277 		ASSERT(nfs4_ntov_map[k].nval == k);
4278 		vbit = nfs4_ntov_map[k].vbit;
4279 
4280 		/*
4281 		 * If vattr attribute but VOP_GETATTR failed, or it's
4282 		 * superblock attribute but VFS_STATVFS failed, skip
4283 		 */
4284 		if (vbit) {
4285 			if ((vbit & sva_mask) == 0)
4286 				continue;
4287 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4288 			continue;
4289 		}
4290 		error = (*nfs4_ntov_map[k].sv_getit)(
4291 				NFS4ATTR_VERIT, sargp, na);
4292 		if (resp != NULL) {
4293 			if (error)
4294 				ret_error = -1;	/* not all match */
4295 			else	/* update response bitmap */
4296 				*resp |= nfs4_ntov_map[k].fbit;
4297 			continue;
4298 		}
4299 		if (error) {
4300 			ret_error = -1;	/* not all match */
4301 			break;
4302 		}
4303 	}
4304 errout:
4305 	return (ret_error);
4306 }
4307 
4308 /*
4309  * Decode the attribute to be set/verified. If the attr requires a sys op
4310  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4311  * call the sv_getit function for it, because the sys op hasn't yet been done.
4312  * Return 0 for success, error code if failed.
4313  *
4314  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4315  */
4316 static int
4317 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4318 	int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4319 {
4320 	int error = 0;
4321 	bool_t set_later;
4322 
4323 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4324 
4325 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4326 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4327 		/*
4328 		 * don't verify yet if a vattr or sb dependent attr,
4329 		 * because we don't have their sys values yet.
4330 		 * Will be done later.
4331 		 */
4332 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4333 			/*
4334 			 * ACLs are a special case, since setting the MODE
4335 			 * conflicts with setting the ACL.  We delay setting
4336 			 * the ACL until all other attributes have been set.
4337 			 * The ACL gets set in do_rfs4_op_setattr().
4338 			 */
4339 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4340 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4341 				    sargp, nap);
4342 				if (error) {
4343 					xdr_free(nfs4_ntov_map[k].xfunc,
4344 					    (caddr_t)nap);
4345 				}
4346 			}
4347 		}
4348 	} else {
4349 #ifdef  DEBUG
4350 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4351 			"decoding attribute %d\n", k);
4352 #endif
4353 		error = EINVAL;
4354 	}
4355 	if (!error && resp_bval && !set_later) {
4356 		*resp_bval |= nfs4_ntov_map[k].fbit;
4357 	}
4358 
4359 	return (error);
4360 }
4361 
4362 /*
4363  * Set vattr based on incoming fattr4 attrs - used by setattr.
4364  * Set response mask. Ignore any values that are not writable vattr attrs.
4365  */
4366 static nfsstat4
4367 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4368 		struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4369 		nfs4_attr_cmd_t cmd)
4370 {
4371 	int error = 0;
4372 	int i;
4373 	char *attrs = fattrp->attrlist4;
4374 	uint32_t attrslen = fattrp->attrlist4_len;
4375 	XDR xdr;
4376 	nfsstat4 status = NFS4_OK;
4377 	vnode_t *vp = cs->vp;
4378 	union nfs4_attr_u *na;
4379 	uint8_t *amap;
4380 
4381 #ifndef lint
4382 	/*
4383 	 * Make sure that maximum attribute number can be expressed as an
4384 	 * 8 bit quantity.
4385 	 */
4386 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4387 #endif
4388 
4389 	if (vp == NULL) {
4390 		if (resp)
4391 			*resp = 0;
4392 		return (NFS4ERR_NOFILEHANDLE);
4393 	}
4394 	if (cs->access == CS_ACCESS_DENIED) {
4395 		if (resp)
4396 			*resp = 0;
4397 		return (NFS4ERR_ACCESS);
4398 	}
4399 
4400 	sargp->op = cmd;
4401 	sargp->cs = cs;
4402 	sargp->flag = 0;	/* may be set later */
4403 	sargp->vap->va_mask = 0;
4404 	sargp->rdattr_error = NFS4_OK;
4405 	sargp->rdattr_error_req = FALSE;
4406 	/* sargp->sbp is set by the caller */
4407 
4408 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4409 
4410 	na = ntovp->na;
4411 	amap = ntovp->amap;
4412 
4413 	/*
4414 	 * The following loop iterates on the nfs4_ntov_map checking
4415 	 * if the fbit is set in the requested bitmap.
4416 	 * If set then we process the arguments using the
4417 	 * rfs4_fattr4 conversion functions to populate the setattr
4418 	 * vattr and va_mask. Any settable attrs that are not using vattr
4419 	 * will be set in this loop.
4420 	 */
4421 	for (i = 0; i < nfs4_ntov_map_size; i++) {
4422 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
4423 			continue;
4424 		}
4425 		/*
4426 		 * If setattr, must be a writable attr.
4427 		 * If verify/nverify, must be a readable attr.
4428 		 */
4429 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
4430 				    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
4431 			/*
4432 			 * Client tries to set/verify an
4433 			 * unsupported attribute, tries to set
4434 			 * a read only attr or verify a write
4435 			 * only one - error!
4436 			 */
4437 			break;
4438 		}
4439 		/*
4440 		 * Decode the attribute to set/verify
4441 		 */
4442 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
4443 					&xdr, resp ? resp : NULL, na);
4444 		if (error)
4445 			break;
4446 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
4447 		na++;
4448 		(ntovp->attrcnt)++;
4449 		if (nfs4_ntov_map[i].vfsstat)
4450 			ntovp->vfsstat = TRUE;
4451 	}
4452 
4453 	if (error != 0)
4454 		status = (error == ENOTSUP ?	NFS4ERR_ATTRNOTSUPP :
4455 						puterrno4(error));
4456 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
4457 	return (status);
4458 }
4459 
4460 static nfsstat4
4461 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4462 		stateid4 *stateid)
4463 {
4464 	int error = 0;
4465 	struct nfs4_svgetit_arg sarg;
4466 	bool_t trunc;
4467 
4468 	nfsstat4 status = NFS4_OK;
4469 	cred_t *cr = cs->cr;
4470 	vnode_t *vp = cs->vp;
4471 	struct nfs4_ntov_table ntov;
4472 	struct statvfs64 sb;
4473 	struct vattr bva;
4474 	struct flock64 bf;
4475 	int in_crit = 0;
4476 	uint_t saved_mask = 0;
4477 	caller_context_t ct;
4478 
4479 	*resp = 0;
4480 	sarg.sbp = &sb;
4481 	nfs4_ntov_table_init(&ntov);
4482 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
4483 			NFS4ATTR_SETIT);
4484 	if (status != NFS4_OK) {
4485 		/*
4486 		 * failed set attrs
4487 		 */
4488 		goto done;
4489 	}
4490 	if ((sarg.vap->va_mask == 0) &&
4491 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
4492 		/*
4493 		 * no further work to be done
4494 		 */
4495 		goto done;
4496 	}
4497 
4498 	/*
4499 	 * If we got a request to set the ACL and the MODE, only
4500 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
4501 	 * to change any other bits, along with setting an ACL,
4502 	 * gives NFS4ERR_INVAL.
4503 	 */
4504 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
4505 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
4506 		vattr_t va;
4507 
4508 		va.va_mask = AT_MODE;
4509 		error = VOP_GETATTR(vp, &va, 0, cs->cr);
4510 		if (error) {
4511 			status = puterrno4(error);
4512 			goto done;
4513 		}
4514 		if ((sarg.vap->va_mode ^ va.va_mode) &
4515 		    ~(VSUID | VSGID | VSVTX)) {
4516 			status = NFS4ERR_INVAL;
4517 			goto done;
4518 		}
4519 	}
4520 
4521 
4522 	/* Check stateid only if size has been set */
4523 	if (sarg.vap->va_mask & AT_SIZE) {
4524 		trunc = (sarg.vap->va_size == 0);
4525 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
4526 			trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE);
4527 		if (status != NFS4_OK)
4528 			goto done;
4529 	}
4530 
4531 	ct.cc_sysid = 0;
4532 	ct.cc_pid = 0;
4533 	ct.cc_caller_id = nfs4_srv_caller_id;
4534 
4535 	/* XXX start of possible race with delegations */
4536 
4537 	/*
4538 	 * We need to specially handle size changes because it is
4539 	 * possible for the client to create a file with read-only
4540 	 * modes, but with the file opened for writing. If the client
4541 	 * then tries to set the file size, e.g. ftruncate(3C),
4542 	 * fcntl(F_FREESP), the normal access checking done in
4543 	 * VOP_SETATTR would prevent the client from doing it even though
4544 	 * it should be allowed to do so.  To get around this, we do the
4545 	 * access checking for ourselves and use VOP_SPACE which doesn't
4546 	 * do the access checking.
4547 	 * Also the client should not be allowed to change the file
4548 	 * size if there is a conflicting non-blocking mandatory lock in
4549 	 * the region of the change.
4550 	 */
4551 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
4552 		u_offset_t offset;
4553 		ssize_t length;
4554 
4555 		/*
4556 		 * Check any possible conflict due to NBMAND locks.
4557 		 * Get into critical region before VOP_GETATTR, so the
4558 		 * size attribute is valid when checking conflicts.
4559 		 */
4560 		if (nbl_need_check(vp)) {
4561 			nbl_start_crit(vp, RW_READER);
4562 			in_crit = 1;
4563 		}
4564 
4565 		bva.va_mask = AT_UID|AT_SIZE;
4566 		if (error = VOP_GETATTR(vp, &bva, 0, cr)) {
4567 			status = puterrno4(error);
4568 			goto done;
4569 		}
4570 
4571 		if (in_crit) {
4572 			if (sarg.vap->va_size < bva.va_size) {
4573 				offset = sarg.vap->va_size;
4574 				length = bva.va_size - sarg.vap->va_size;
4575 			} else {
4576 				offset = bva.va_size;
4577 				length = sarg.vap->va_size - bva.va_size;
4578 			}
4579 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
4580 				status = NFS4ERR_LOCKED;
4581 				goto done;
4582 			}
4583 		}
4584 
4585 		if (crgetuid(cr) == bva.va_uid) {
4586 			saved_mask = sarg.vap->va_mask;
4587 			sarg.vap->va_mask &= ~AT_SIZE;
4588 			bf.l_type = F_WRLCK;
4589 			bf.l_whence = 0;
4590 			bf.l_start = (off64_t)sarg.vap->va_size;
4591 			bf.l_len = 0;
4592 			bf.l_sysid = 0;
4593 			bf.l_pid = 0;
4594 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
4595 					(offset_t)sarg.vap->va_size, cr, &ct);
4596 		}
4597 	}
4598 
4599 	if (!error && sarg.vap->va_mask != 0)
4600 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
4601 
4602 	/* restore AT_SIZE */
4603 	if (saved_mask & AT_SIZE)
4604 		sarg.vap->va_mask |= AT_SIZE;
4605 
4606 	/*
4607 	 * If an ACL was being set, it has been delayed until now,
4608 	 * in order to set the mode (via the VOP_SETATTR() above) first.
4609 	 */
4610 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
4611 		int i;
4612 
4613 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
4614 			if (ntov.amap[i] == FATTR4_ACL)
4615 				break;
4616 		if (i < NFS4_MAXNUM_ATTRS) {
4617 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
4618 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
4619 			if (error == 0) {
4620 				*resp |= FATTR4_ACL_MASK;
4621 			} else if (error == ENOTSUP) {
4622 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
4623 				status = NFS4ERR_ATTRNOTSUPP;
4624 				goto done;
4625 			}
4626 		} else {
4627 			NFS4_DEBUG(rfs4_debug,
4628 			    (CE_NOTE, "do_rfs4_op_setattr: "
4629 			    "unable to find ACL in fattr4"));
4630 			error = EINVAL;
4631 		}
4632 	}
4633 
4634 	if (error) {
4635 		status = puterrno4(error);
4636 
4637 		/*
4638 		 * Set the response bitmap when setattr failed.
4639 		 * If VOP_SETATTR partially succeeded, test by doing a
4640 		 * VOP_GETATTR on the object and comparing the data
4641 		 * to the setattr arguments.
4642 		 */
4643 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
4644 	} else {
4645 		/*
4646 		 * Force modified metadata out to stable storage.
4647 		 */
4648 		(void) VOP_FSYNC(vp, FNODSYNC, cr);
4649 		/*
4650 		 * Set response bitmap
4651 		 */
4652 		nfs4_vmask_to_nmask(sarg.vap->va_mask, resp);
4653 	}
4654 
4655 /* Return early and already have a NFSv4 error */
4656 done:
4657 	if (in_crit)
4658 		nbl_end_crit(vp);
4659 
4660 	nfs4_ntov_table_free(&ntov, &sarg);
4661 
4662 	return (status);
4663 }
4664 
4665 /* ARGSUSED */
4666 static void
4667 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4668 	struct compound_state *cs)
4669 {
4670 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
4671 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
4672 
4673 	if (cs->vp == NULL) {
4674 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4675 		return;
4676 	}
4677 
4678 	/*
4679 	 * If there is an unshared filesystem mounted on this vnode,
4680 	 * do not allow to setattr on this vnode.
4681 	 */
4682 	if (vn_ismntpt(cs->vp)) {
4683 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4684 		return;
4685 	}
4686 
4687 	resp->attrsset = 0;
4688 
4689 	if (rdonly4(cs->exi, cs->vp, req)) {
4690 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4691 		return;
4692 	}
4693 
4694 	*cs->statusp = resp->status =
4695 		do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
4696 			&args->stateid);
4697 }
4698 
4699 /* ARGSUSED */
4700 static void
4701 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4702 	struct compound_state *cs)
4703 {
4704 	/*
4705 	 * verify and nverify are exactly the same, except that nverify
4706 	 * succeeds when some argument changed, and verify succeeds when
4707 	 * when none changed.
4708 	 */
4709 
4710 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
4711 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
4712 
4713 	int error;
4714 	struct nfs4_svgetit_arg sarg;
4715 	struct statvfs64 sb;
4716 	struct nfs4_ntov_table ntov;
4717 
4718 	if (cs->vp == NULL) {
4719 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4720 		return;
4721 	}
4722 
4723 	sarg.sbp = &sb;
4724 	nfs4_ntov_table_init(&ntov);
4725 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4726 				&sarg, &ntov, NFS4ATTR_VERIT);
4727 	if (resp->status != NFS4_OK) {
4728 		/*
4729 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4730 		 * so could return -1 for "no match".
4731 		 */
4732 		if (resp->status == -1)
4733 			resp->status = NFS4ERR_NOT_SAME;
4734 		goto done;
4735 	}
4736 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4737 	switch (error) {
4738 	case 0:
4739 		resp->status = NFS4_OK;
4740 		break;
4741 	case -1:
4742 		resp->status = NFS4ERR_NOT_SAME;
4743 		break;
4744 	default:
4745 		resp->status = puterrno4(error);
4746 		break;
4747 	}
4748 done:
4749 	*cs->statusp = resp->status;
4750 	nfs4_ntov_table_free(&ntov, &sarg);
4751 }
4752 
4753 /* ARGSUSED */
4754 static void
4755 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4756 	struct compound_state *cs)
4757 {
4758 	/*
4759 	 * verify and nverify are exactly the same, except that nverify
4760 	 * succeeds when some argument changed, and verify succeeds when
4761 	 * when none changed.
4762 	 */
4763 
4764 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
4765 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
4766 
4767 	int error;
4768 	struct nfs4_svgetit_arg sarg;
4769 	struct statvfs64 sb;
4770 	struct nfs4_ntov_table ntov;
4771 
4772 	if (cs->vp == NULL) {
4773 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4774 		return;
4775 	}
4776 	sarg.sbp = &sb;
4777 	nfs4_ntov_table_init(&ntov);
4778 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4779 				&sarg, &ntov, NFS4ATTR_VERIT);
4780 	if (resp->status != NFS4_OK) {
4781 		/*
4782 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4783 		 * so could return -1 for "no match".
4784 		 */
4785 		if (resp->status == -1)
4786 			resp->status = NFS4_OK;
4787 		goto done;
4788 	}
4789 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4790 	switch (error) {
4791 	case 0:
4792 		resp->status = NFS4ERR_SAME;
4793 		break;
4794 	case -1:
4795 		resp->status = NFS4_OK;
4796 		break;
4797 	default:
4798 		resp->status = puterrno4(error);
4799 		break;
4800 	}
4801 done:
4802 	*cs->statusp = resp->status;
4803 	nfs4_ntov_table_free(&ntov, &sarg);
4804 }
4805 
4806 /*
4807  * XXX - This should live in an NFS header file.
4808  */
4809 #define	MAX_IOVECS	12
4810 
4811 /* ARGSUSED */
4812 static void
4813 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4814 	struct compound_state *cs)
4815 {
4816 	WRITE4args  *args = &argop->nfs_argop4_u.opwrite;
4817 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
4818 	int error;
4819 	vnode_t *vp;
4820 	struct vattr bva;
4821 	u_offset_t rlimit;
4822 	struct uio uio;
4823 	struct iovec iov[MAX_IOVECS];
4824 	struct iovec *iovp;
4825 	int iovcnt;
4826 	int ioflag;
4827 	cred_t *savecred, *cr;
4828 	bool_t *deleg = &cs->deleg;
4829 	nfsstat4 stat;
4830 	int in_crit = 0;
4831 
4832 	vp = cs->vp;
4833 	if (vp == NULL) {
4834 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4835 		return;
4836 	}
4837 	if (cs->access == CS_ACCESS_DENIED) {
4838 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4839 		return;
4840 	}
4841 
4842 	cr = cs->cr;
4843 
4844 	/*
4845 	 * We have to enter the critical region before calling VOP_RWLOCK
4846 	 * to avoid a deadlock with ufs.
4847 	 */
4848 	if (nbl_need_check(vp)) {
4849 		nbl_start_crit(vp, RW_READER);
4850 		in_crit = 1;
4851 		if (nbl_conflict(vp, NBL_WRITE,
4852 				args->offset, args->data_len, 0)) {
4853 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
4854 			goto out;
4855 		}
4856 	}
4857 
4858 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
4859 					deleg, TRUE)) != NFS4_OK) {
4860 		*cs->statusp = resp->status = stat;
4861 		goto out;
4862 	}
4863 
4864 	bva.va_mask = AT_MODE | AT_UID;
4865 	error = VOP_GETATTR(vp, &bva, 0, cr);
4866 
4867 	/*
4868 	 * If we can't get the attributes, then we can't do the
4869 	 * right access checking.  So, we'll fail the request.
4870 	 */
4871 	if (error) {
4872 		*cs->statusp = resp->status = puterrno4(error);
4873 		goto out;
4874 	}
4875 
4876 	if (rdonly4(cs->exi, cs->vp, req)) {
4877 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4878 		goto out;
4879 	}
4880 
4881 	if (vp->v_type != VREG) {
4882 		*cs->statusp = resp->status =
4883 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
4884 		goto out;
4885 	}
4886 
4887 	if (crgetuid(cr) != bva.va_uid &&
4888 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr))) {
4889 		*cs->statusp = resp->status = puterrno4(error);
4890 		goto out;
4891 	}
4892 
4893 	if (MANDLOCK(vp, bva.va_mode)) {
4894 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4895 		goto out;
4896 	}
4897 
4898 	if (args->data_len == 0) {
4899 		*cs->statusp = resp->status = NFS4_OK;
4900 		resp->count = 0;
4901 		resp->committed = args->stable;
4902 		resp->writeverf = Write4verf;
4903 		goto out;
4904 	}
4905 
4906 	if (args->mblk != NULL) {
4907 		mblk_t *m;
4908 		uint_t bytes, round_len;
4909 
4910 		iovcnt = 0;
4911 		bytes = 0;
4912 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
4913 		for (m = args->mblk;
4914 		    m != NULL && bytes < round_len;
4915 		    m = m->b_cont) {
4916 			iovcnt++;
4917 			bytes += MBLKL(m);
4918 		}
4919 #ifdef DEBUG
4920 		/* should have ended on an mblk boundary */
4921 		if (bytes != round_len) {
4922 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
4923 			    bytes, round_len, args->data_len);
4924 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
4925 			    (void *)args->mblk, (void *)m);
4926 			ASSERT(bytes == round_len);
4927 		}
4928 #endif
4929 		if (iovcnt <= MAX_IOVECS) {
4930 			iovp = iov;
4931 		} else {
4932 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
4933 		}
4934 		mblk_to_iov(args->mblk, iovcnt, iovp);
4935 	} else {
4936 		iovcnt = 1;
4937 		iovp = iov;
4938 		iovp->iov_base = args->data_val;
4939 		iovp->iov_len = args->data_len;
4940 	}
4941 
4942 	uio.uio_iov = iovp;
4943 	uio.uio_iovcnt = iovcnt;
4944 
4945 	uio.uio_segflg = UIO_SYSSPACE;
4946 	uio.uio_extflg = UIO_COPY_DEFAULT;
4947 	uio.uio_loffset = args->offset;
4948 	uio.uio_resid = args->data_len;
4949 	uio.uio_llimit = curproc->p_fsz_ctl;
4950 	rlimit = uio.uio_llimit - args->offset;
4951 	if (rlimit < (u_offset_t)uio.uio_resid)
4952 		uio.uio_resid = (int)rlimit;
4953 
4954 	if (args->stable == UNSTABLE4)
4955 		ioflag = 0;
4956 	else if (args->stable == FILE_SYNC4)
4957 		ioflag = FSYNC;
4958 	else if (args->stable == DATA_SYNC4)
4959 		ioflag = FDSYNC;
4960 	else {
4961 		if (iovp != iov)
4962 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
4963 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4964 		goto out;
4965 	}
4966 
4967 	/*
4968 	 * We're changing creds because VM may fault and we need
4969 	 * the cred of the current thread to be used if quota
4970 	 * checking is enabled.
4971 	 */
4972 	savecred = curthread->t_cred;
4973 	curthread->t_cred = cr;
4974 	error = do_io(FWRITE, vp, &uio, ioflag, cr);
4975 	curthread->t_cred = savecred;
4976 
4977 	if (iovp != iov)
4978 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
4979 
4980 	if (error) {
4981 		*cs->statusp = resp->status = puterrno4(error);
4982 		goto out;
4983 	}
4984 
4985 	*cs->statusp = resp->status = NFS4_OK;
4986 	resp->count = args->data_len - uio.uio_resid;
4987 
4988 	if (ioflag == 0)
4989 		resp->committed = UNSTABLE4;
4990 	else
4991 		resp->committed = FILE_SYNC4;
4992 
4993 	resp->writeverf = Write4verf;
4994 
4995 out:
4996 	if (in_crit)
4997 		nbl_end_crit(vp);
4998 }
4999 
5000 
5001 /* XXX put in a header file */
5002 extern int	sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5003 
5004 void
5005 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5006 	struct svc_req *req, cred_t *cr)
5007 {
5008 	uint_t i;
5009 	struct compound_state cs;
5010 
5011 	rfs4_init_compound_state(&cs);
5012 	/*
5013 	 * Form a reply tag by copying over the reqeuest tag.
5014 	 */
5015 	resp->tag.utf8string_val =
5016 				kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5017 	resp->tag.utf8string_len = args->tag.utf8string_len;
5018 	bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5019 					resp->tag.utf8string_len);
5020 
5021 	cs.statusp = &resp->status;
5022 
5023 	/*
5024 	 * XXX for now, minorversion should be zero
5025 	 */
5026 	if (args->minorversion != NFS4_MINORVERSION) {
5027 		resp->array_len = 0;
5028 		resp->array = NULL;
5029 		resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5030 		return;
5031 	}
5032 
5033 	resp->array_len = args->array_len;
5034 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5035 		KM_SLEEP);
5036 
5037 	ASSERT(exi == NULL);
5038 	ASSERT(cr == NULL);
5039 
5040 	cr = crget();
5041 	ASSERT(cr != NULL);
5042 
5043 	if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5044 		crfree(cr);
5045 		return;
5046 	}
5047 
5048 	cs.basecr = cr;
5049 
5050 	cs.req = req;
5051 
5052 	/*
5053 	 * For now, NFS4 compound processing must be protected by
5054 	 * exported_lock because it can access more than one exportinfo
5055 	 * per compound and share/unshare can now change multiple
5056 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5057 	 * per proc (excluding public exinfo), and exi_count design
5058 	 * is sufficient to protect concurrent execution of NFS2/3
5059 	 * ops along with unexport.  This lock will be removed as
5060 	 * part of the NFSv4 phase 2 namespace redesign work.
5061 	 */
5062 	rw_enter(&exported_lock, RW_READER);
5063 
5064 	/*
5065 	 * If this is the first compound we've seen, we need to start all
5066 	 * new instances' grace periods.
5067 	 */
5068 	if (rfs4_seen_first_compound == 0) {
5069 		rfs4_grace_start_new();
5070 		/*
5071 		 * This must be set after rfs4_grace_start_new(), otherwise
5072 		 * another thread could proceed past here before the former
5073 		 * is finished.
5074 		 */
5075 		rfs4_seen_first_compound = 1;
5076 	}
5077 
5078 	for (i = 0; i < args->array_len && cs.cont; i++) {
5079 		nfs_argop4 *argop;
5080 		nfs_resop4 *resop;
5081 		uint_t op;
5082 
5083 		argop = &args->array[i];
5084 		resop = &resp->array[i];
5085 		resop->resop = argop->argop;
5086 		op = (uint_t)resop->resop;
5087 
5088 		if (op < rfsv4disp_cnt) {
5089 			/*
5090 			 * Count the individual ops here; NULL and COMPOUND
5091 			 * are counted in common_dispatch()
5092 			 */
5093 			rfsproccnt_v4_ptr[op].value.ui64++;
5094 
5095 			NFS4_DEBUG(rfs4_debug > 1,
5096 				(CE_NOTE, "Executing %s", rfs4_op_string[op]));
5097 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5098 			NFS4_DEBUG(rfs4_debug > 1,
5099 				(CE_NOTE, "%s returned %d",
5100 				rfs4_op_string[op], *cs.statusp));
5101 			if (*cs.statusp != NFS4_OK)
5102 				cs.cont = FALSE;
5103 		} else {
5104 			/*
5105 			 * This is effectively dead code since XDR code
5106 			 * will have already returned BADXDR if op doesn't
5107 			 * decode to legal value.  This only done for a
5108 			 * day when XDR code doesn't verify v4 opcodes.
5109 			 */
5110 			op = OP_ILLEGAL;
5111 			rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5112 
5113 			rfs4_op_illegal(argop, resop, req, &cs);
5114 			cs.cont = FALSE;
5115 		}
5116 
5117 		/*
5118 		 * If not at last op, and if we are to stop, then
5119 		 * compact the results array.
5120 		 */
5121 		if ((i + 1) < args->array_len && !cs.cont) {
5122 			nfs_resop4 *new_res = kmem_alloc(
5123 				(i+1) * sizeof (nfs_resop4), KM_SLEEP);
5124 			bcopy(resp->array,
5125 				new_res, (i+1) * sizeof (nfs_resop4));
5126 			kmem_free(resp->array,
5127 				args->array_len * sizeof (nfs_resop4));
5128 
5129 			resp->array_len =  i + 1;
5130 			resp->array = new_res;
5131 		}
5132 	}
5133 
5134 	rw_exit(&exported_lock);
5135 
5136 	if (cs.vp)
5137 		VN_RELE(cs.vp);
5138 	if (cs.saved_vp)
5139 		VN_RELE(cs.saved_vp);
5140 	if (cs.saved_fh.nfs_fh4_val)
5141 		kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5142 
5143 	if (cs.basecr)
5144 		crfree(cs.basecr);
5145 	if (cs.cr)
5146 		crfree(cs.cr);
5147 }
5148 
5149 /*
5150  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5151  * XXX zero out the tag and array values. Need to investigate why the
5152  * XXX calls occur, but at least prevent the panic for now.
5153  */
5154 void
5155 rfs4_compound_free(COMPOUND4res *resp)
5156 {
5157 	uint_t i;
5158 
5159 	if (resp->tag.utf8string_val) {
5160 		UTF8STRING_FREE(resp->tag)
5161 	}
5162 
5163 	for (i = 0; i < resp->array_len; i++) {
5164 		nfs_resop4 *resop;
5165 		uint_t op;
5166 
5167 		resop = &resp->array[i];
5168 		op = (uint_t)resop->resop;
5169 		if (op < rfsv4disp_cnt) {
5170 			(*rfsv4disptab[op].dis_resfree)(resop);
5171 		}
5172 	}
5173 	if (resp->array != NULL) {
5174 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5175 	}
5176 }
5177 
5178 /*
5179  * Process the value of the compound request rpc flags, as a bit-AND
5180  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5181  */
5182 void
5183 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5184 {
5185 	int i;
5186 	int flag = RPC_ALL;
5187 
5188 	for (i = 0; flag && i < args->array_len; i++) {
5189 		uint_t op;
5190 
5191 		op = (uint_t)args->array[i].argop;
5192 
5193 		if (op < rfsv4disp_cnt)
5194 			flag &= rfsv4disptab[op].dis_flags;
5195 		else
5196 			flag = 0;
5197 	}
5198 	*flagp = flag;
5199 }
5200 
5201 nfsstat4
5202 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5203 {
5204 	nfsstat4 e;
5205 
5206 	rfs4_dbe_lock(cp->dbe);
5207 
5208 	if (cp->sysidt != LM_NOSYSID) {
5209 		*sp = cp->sysidt;
5210 		e = NFS4_OK;
5211 
5212 	} else if ((cp->sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5213 		*sp = cp->sysidt;
5214 		e = NFS4_OK;
5215 
5216 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5217 			"rfs4_client_sysid: allocated 0x%x\n", *sp));
5218 	} else
5219 		e = NFS4ERR_DELAY;
5220 
5221 	rfs4_dbe_unlock(cp->dbe);
5222 	return (e);
5223 }
5224 
5225 #if defined(DEBUG) && ! defined(lint)
5226 static void lock_print(char *str, int operation, struct flock64 *flk)
5227 {
5228 	char *op, *type;
5229 
5230 	switch (operation) {
5231 	case F_GETLK: op = "F_GETLK";
5232 		break;
5233 	case F_SETLK: op = "F_SETLK";
5234 		break;
5235 	default: op = "F_UNKNOWN";
5236 		break;
5237 	}
5238 	switch (flk->l_type) {
5239 	case F_UNLCK: type = "F_UNLCK";
5240 		break;
5241 	case F_RDLCK: type = "F_RDLCK";
5242 		break;
5243 	case F_WRLCK: type = "F_WRLCK";
5244 		break;
5245 	default: type = "F_UNKNOWN";
5246 		break;
5247 	}
5248 
5249 	ASSERT(flk->l_whence == 0);
5250 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
5251 		str, op, type,
5252 		(longlong_t)flk->l_start,
5253 		flk->l_len ? (longlong_t)flk->l_len : ~0LL,
5254 		flk->l_pid);
5255 }
5256 
5257 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5258 #else
5259 #define	LOCK_PRINT(d, s, t, f)
5260 #endif
5261 
5262 /*ARGSUSED*/
5263 static bool_t
5264 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5265 {
5266 	return (TRUE);
5267 }
5268 
5269 /*
5270  * Look up the pathname using the vp in cs as the directory vnode.
5271  * cs->vp will be the vnode for the file on success
5272  */
5273 
5274 static nfsstat4
5275 rfs4_lookup(component4 *component, struct svc_req *req,
5276 	    struct compound_state *cs)
5277 {
5278 	char *nm;
5279 	uint32_t len;
5280 	nfsstat4 status;
5281 
5282 	if (cs->vp == NULL) {
5283 		return (NFS4ERR_NOFILEHANDLE);
5284 	}
5285 	if (cs->vp->v_type != VDIR) {
5286 		return (NFS4ERR_NOTDIR);
5287 	}
5288 
5289 	if (!utf8_dir_verify(component))
5290 		return (NFS4ERR_INVAL);
5291 
5292 	nm = utf8_to_fn(component, &len, NULL);
5293 	if (nm == NULL) {
5294 		return (NFS4ERR_INVAL);
5295 	}
5296 
5297 	if (len > MAXNAMELEN) {
5298 		kmem_free(nm, len);
5299 		return (NFS4ERR_NAMETOOLONG);
5300 	}
5301 
5302 	status = do_rfs4_op_lookup(nm, len, req, cs);
5303 
5304 	kmem_free(nm, len);
5305 
5306 	return (status);
5307 }
5308 
5309 static nfsstat4
5310 rfs4_lookupfile(component4 *component, struct svc_req *req,
5311 		struct compound_state *cs, uint32_t access,
5312 		change_info4 *cinfo)
5313 {
5314 	nfsstat4 status;
5315 	vnode_t *dvp = cs->vp;
5316 	vattr_t bva, ava, fva;
5317 	int error;
5318 
5319 	/* Get "before" change value */
5320 	bva.va_mask = AT_CTIME|AT_SEQ;
5321 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5322 	if (error)
5323 		return (puterrno4(error));
5324 
5325 	/* rfs4_lookup may VN_RELE directory */
5326 	VN_HOLD(dvp);
5327 
5328 	status = rfs4_lookup(component, req, cs);
5329 	if (status != NFS4_OK) {
5330 		VN_RELE(dvp);
5331 		return (status);
5332 	}
5333 
5334 	/*
5335 	 * Get "after" change value, if it fails, simply return the
5336 	 * before value.
5337 	 */
5338 	ava.va_mask = AT_CTIME|AT_SEQ;
5339 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5340 		ava.va_ctime = bva.va_ctime;
5341 		ava.va_seq = 0;
5342 	}
5343 	VN_RELE(dvp);
5344 
5345 	/*
5346 	 * Validate the file is a file
5347 	 */
5348 	fva.va_mask = AT_TYPE|AT_MODE;
5349 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr);
5350 	if (error)
5351 		return (puterrno4(error));
5352 
5353 	if (fva.va_type != VREG) {
5354 		if (fva.va_type == VDIR)
5355 			return (NFS4ERR_ISDIR);
5356 		if (fva.va_type == VLNK)
5357 			return (NFS4ERR_SYMLINK);
5358 		return (NFS4ERR_INVAL);
5359 	}
5360 
5361 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
5362 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5363 
5364 	/*
5365 	 * It is undefined if VOP_LOOKUP will change va_seq, so
5366 	 * cinfo.atomic = TRUE only if we have
5367 	 * non-zero va_seq's, and they have not changed.
5368 	 */
5369 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
5370 		cinfo->atomic = TRUE;
5371 	else
5372 		cinfo->atomic = FALSE;
5373 
5374 	/* Check for mandatory locking */
5375 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
5376 	return (check_open_access(access, cs, req));
5377 }
5378 
5379 static nfsstat4
5380 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
5381 	    timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
5382 {
5383 	int error;
5384 	nfsstat4 status = NFS4_OK;
5385 	vattr_t va;
5386 
5387 tryagain:
5388 
5389 	/*
5390 	 * The file open mode used is VWRITE.  If the client needs
5391 	 * some other semantic, then it should do the access checking
5392 	 * itself.  It would have been nice to have the file open mode
5393 	 * passed as part of the arguments.
5394 	 */
5395 
5396 	*created = TRUE;
5397 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0);
5398 
5399 	if (error) {
5400 		*created = FALSE;
5401 
5402 		/*
5403 		 * If we got something other than file already exists
5404 		 * then just return this error.  Otherwise, we got
5405 		 * EEXIST.  If we were doing a GUARDED create, then
5406 		 * just return this error.  Otherwise, we need to
5407 		 * make sure that this wasn't a duplicate of an
5408 		 * exclusive create request.
5409 		 *
5410 		 * The assumption is made that a non-exclusive create
5411 		 * request will never return EEXIST.
5412 		 */
5413 
5414 		if (error != EEXIST || mode == GUARDED4) {
5415 			status = puterrno4(error);
5416 			return (status);
5417 		}
5418 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr);
5419 
5420 		if (error) {
5421 			/*
5422 			 * We couldn't find the file that we thought that
5423 			 * we just created.  So, we'll just try creating
5424 			 * it again.
5425 			 */
5426 			if (error == ENOENT)
5427 				goto tryagain;
5428 
5429 			status = puterrno4(error);
5430 			return (status);
5431 		}
5432 
5433 		VN_SETPATH(rootdir, dvp, *vpp, nm, strlen(nm));
5434 
5435 		if (mode == UNCHECKED4) {
5436 			/* existing object must be regular file */
5437 			if ((*vpp)->v_type != VREG) {
5438 				if ((*vpp)->v_type == VDIR)
5439 					status = NFS4ERR_ISDIR;
5440 				else if ((*vpp)->v_type == VLNK)
5441 					status = NFS4ERR_SYMLINK;
5442 				else
5443 					status = NFS4ERR_INVAL;
5444 				VN_RELE(*vpp);
5445 				return (status);
5446 			}
5447 
5448 			return (NFS4_OK);
5449 		}
5450 
5451 		/* Check for duplicate request */
5452 		ASSERT(mtime != 0);
5453 		va.va_mask = AT_MTIME;
5454 		error = VOP_GETATTR(*vpp, &va, 0, cr);
5455 		if (!error) {
5456 			/* We found the file */
5457 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
5458 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
5459 				/* but its not our creation */
5460 				VN_RELE(*vpp);
5461 				return (NFS4ERR_EXIST);
5462 			}
5463 			*created = TRUE; /* retrans of create == created */
5464 			return (NFS4_OK);
5465 		}
5466 		VN_RELE(*vpp);
5467 		return (NFS4ERR_EXIST);
5468 	}
5469 
5470 	return (NFS4_OK);
5471 }
5472 
5473 static nfsstat4
5474 check_open_access(uint32_t access,
5475 		struct compound_state *cs, struct svc_req *req)
5476 {
5477 	int error;
5478 	vnode_t *vp;
5479 	bool_t readonly;
5480 	cred_t *cr = cs->cr;
5481 
5482 	/* For now we don't allow mandatory locking as per V2/V3 */
5483 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
5484 		return (NFS4ERR_ACCESS);
5485 	}
5486 
5487 	vp = cs->vp;
5488 	ASSERT(cr != NULL && vp->v_type == VREG);
5489 
5490 	/*
5491 	 * If the file system is exported read only and we are trying
5492 	 * to open for write, then return NFS4ERR_ROFS
5493 	 */
5494 
5495 	readonly = rdonly4(cs->exi, cs->vp, req);
5496 
5497 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
5498 		return (NFS4ERR_ROFS);
5499 
5500 	if (access & OPEN4_SHARE_ACCESS_READ) {
5501 		if ((VOP_ACCESS(vp, VREAD, 0, cr) != 0) &&
5502 		    (VOP_ACCESS(vp, VEXEC, 0, cr) != 0)) {
5503 			return (NFS4ERR_ACCESS);
5504 		}
5505 	}
5506 
5507 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
5508 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
5509 		if (error)
5510 			return (NFS4ERR_ACCESS);
5511 	}
5512 
5513 	return (NFS4_OK);
5514 }
5515 
5516 static nfsstat4
5517 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
5518 		change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
5519 {
5520 	struct nfs4_svgetit_arg sarg;
5521 	struct nfs4_ntov_table ntov;
5522 
5523 	bool_t ntov_table_init = FALSE;
5524 	struct statvfs64 sb;
5525 	nfsstat4 status;
5526 	vnode_t *vp;
5527 	vattr_t bva, ava, iva, cva, *vap;
5528 	vnode_t *dvp;
5529 	timespec32_t *mtime;
5530 	char *nm = NULL;
5531 	uint_t buflen;
5532 	bool_t created;
5533 	bool_t setsize = FALSE;
5534 	len_t reqsize;
5535 	int error;
5536 	bool_t trunc;
5537 	caller_context_t ct;
5538 	component4 *component;
5539 
5540 	sarg.sbp = &sb;
5541 
5542 	dvp = cs->vp;
5543 
5544 	/* Check if the file system is read only */
5545 	if (rdonly4(cs->exi, dvp, req))
5546 		return (NFS4ERR_ROFS);
5547 
5548 	/*
5549 	 * Get the last component of path name in nm. cs will reference
5550 	 * the including directory on success.
5551 	 */
5552 	component = &args->open_claim4_u.file;
5553 	if (!utf8_dir_verify(component))
5554 		return (NFS4ERR_INVAL);
5555 
5556 	nm = utf8_to_fn(component, &buflen, NULL);
5557 
5558 	if (nm == NULL)
5559 		return (NFS4ERR_RESOURCE);
5560 
5561 	if (buflen > MAXNAMELEN) {
5562 		kmem_free(nm, buflen);
5563 		return (NFS4ERR_NAMETOOLONG);
5564 	}
5565 
5566 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
5567 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5568 	if (error) {
5569 		kmem_free(nm, buflen);
5570 		return (puterrno4(error));
5571 	}
5572 
5573 	if (bva.va_type != VDIR) {
5574 		kmem_free(nm, buflen);
5575 		return (NFS4ERR_NOTDIR);
5576 	}
5577 
5578 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
5579 
5580 	switch (args->mode) {
5581 	case GUARDED4:
5582 		/*FALLTHROUGH*/
5583 	case UNCHECKED4:
5584 		nfs4_ntov_table_init(&ntov);
5585 		ntov_table_init = TRUE;
5586 
5587 		*attrset = 0;
5588 		status = do_rfs4_set_attrs(attrset,
5589 					&args->createhow4_u.createattrs,
5590 					cs, &sarg, &ntov, NFS4ATTR_SETIT);
5591 
5592 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
5593 		    sarg.vap->va_type != VREG) {
5594 			if (sarg.vap->va_type == VDIR)
5595 				status = NFS4ERR_ISDIR;
5596 			else if (sarg.vap->va_type == VLNK)
5597 				status = NFS4ERR_SYMLINK;
5598 			else
5599 				status = NFS4ERR_INVAL;
5600 		}
5601 
5602 		if (status != NFS4_OK) {
5603 			kmem_free(nm, buflen);
5604 			nfs4_ntov_table_free(&ntov, &sarg);
5605 			*attrset = 0;
5606 			return (status);
5607 		}
5608 
5609 		vap = sarg.vap;
5610 		vap->va_type = VREG;
5611 		vap->va_mask |= AT_TYPE;
5612 
5613 		if ((vap->va_mask & AT_MODE) == 0) {
5614 			vap->va_mask |= AT_MODE;
5615 			vap->va_mode = (mode_t)0600;
5616 		}
5617 
5618 		if (vap->va_mask & AT_SIZE) {
5619 
5620 			/* Disallow create with a non-zero size */
5621 
5622 			if ((reqsize = sarg.vap->va_size) != 0) {
5623 				kmem_free(nm, buflen);
5624 				nfs4_ntov_table_free(&ntov, &sarg);
5625 				*attrset = 0;
5626 				return (NFS4ERR_INVAL);
5627 			}
5628 			setsize = TRUE;
5629 		}
5630 		break;
5631 
5632 	case EXCLUSIVE4:
5633 		/* prohibit EXCL create of named attributes */
5634 		if (dvp->v_flag & V_XATTRDIR) {
5635 			kmem_free(nm, buflen);
5636 			*attrset = 0;
5637 			return (NFS4ERR_INVAL);
5638 		}
5639 
5640 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
5641 		cva.va_type = VREG;
5642 		/*
5643 		 * Ensure no time overflows. Assumes underlying
5644 		 * filesystem supports at least 32 bits.
5645 		 * Truncate nsec to usec resolution to allow valid
5646 		 * compares even if the underlying filesystem truncates.
5647 		 */
5648 		mtime = (timespec32_t *)&args->createhow4_u.createverf;
5649 		cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
5650 		cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
5651 		cva.va_mode = (mode_t)0;
5652 		vap = &cva;
5653 		break;
5654 	}
5655 
5656 	status = create_vnode(dvp, nm, vap, args->mode, mtime,
5657 						cs->cr, &vp, &created);
5658 	kmem_free(nm, buflen);
5659 
5660 	if (status != NFS4_OK) {
5661 		if (ntov_table_init)
5662 			nfs4_ntov_table_free(&ntov, &sarg);
5663 		*attrset = 0;
5664 		return (status);
5665 	}
5666 
5667 	trunc = (setsize && !created);
5668 
5669 	if (args->mode != EXCLUSIVE4) {
5670 		bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
5671 
5672 		/*
5673 		 * True verification that object was created with correct
5674 		 * attrs is impossible.  The attrs could have been changed
5675 		 * immediately after object creation.  If attributes did
5676 		 * not verify, the only recourse for the server is to
5677 		 * destroy the object.  Maybe if some attrs (like gid)
5678 		 * are set incorrectly, the object should be destroyed;
5679 		 * however, seems bad as a default policy.  Do we really
5680 		 * want to destroy an object over one of the times not
5681 		 * verifying correctly?  For these reasons, the server
5682 		 * currently sets bits in attrset for createattrs
5683 		 * that were set; however, no verification is done.
5684 		 *
5685 		 * vmask_to_nmask accounts for vattr bits set on create
5686 		 *	[do_rfs4_set_attrs() only sets resp bits for
5687 		 *	 non-vattr/vfs bits.]
5688 		 * Mask off any bits we set by default so as not to return
5689 		 * more attrset bits than were requested in createattrs
5690 		 */
5691 		if (created) {
5692 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
5693 			*attrset &= createmask;
5694 		} else {
5695 			/*
5696 			 * We did not create the vnode (we tried but it
5697 			 * already existed).  In this case, the only createattr
5698 			 * that the spec allows the server to set is size,
5699 			 * and even then, it can only be set if it is 0.
5700 			 */
5701 			*attrset = 0;
5702 			if (trunc)
5703 				*attrset = FATTR4_SIZE_MASK;
5704 		}
5705 	}
5706 	if (ntov_table_init)
5707 		nfs4_ntov_table_free(&ntov, &sarg);
5708 
5709 	/*
5710 	 * Get the initial "after" sequence number, if it fails,
5711 	 * set to zero, time to before.
5712 	 */
5713 	iva.va_mask = AT_CTIME|AT_SEQ;
5714 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr)) {
5715 		iva.va_seq = 0;
5716 		iva.va_ctime = bva.va_ctime;
5717 	}
5718 
5719 	/*
5720 	 * create_vnode attempts to create the file exclusive,
5721 	 * if it already exists the VOP_CREATE will fail and
5722 	 * may not increase va_seq. It is atomic if
5723 	 * we haven't changed the directory, but if it has changed
5724 	 * we don't know what changed it.
5725 	 */
5726 	if (!created) {
5727 		if (bva.va_seq && iva.va_seq &&
5728 			bva.va_seq == iva.va_seq)
5729 			cinfo->atomic = TRUE;
5730 		else
5731 			cinfo->atomic = FALSE;
5732 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
5733 	} else {
5734 		/*
5735 		 * The entry was created, we need to sync the
5736 		 * directory metadata.
5737 		 */
5738 		(void) VOP_FSYNC(dvp, 0, cs->cr);
5739 
5740 		/*
5741 		 * Get "after" change value, if it fails, simply return the
5742 		 * before value.
5743 		 */
5744 		ava.va_mask = AT_CTIME|AT_SEQ;
5745 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5746 			ava.va_ctime = bva.va_ctime;
5747 			ava.va_seq = 0;
5748 		}
5749 
5750 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5751 
5752 		/*
5753 		 * The cinfo->atomic = TRUE only if we have
5754 		 * non-zero va_seq's, and it has incremented by exactly one
5755 		 * during the create_vnode and it didn't
5756 		 * change during the VOP_FSYNC.
5757 		 */
5758 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
5759 				iva.va_seq == (bva.va_seq + 1) &&
5760 				iva.va_seq == ava.va_seq)
5761 			cinfo->atomic = TRUE;
5762 		else
5763 			cinfo->atomic = FALSE;
5764 	}
5765 
5766 	/* Check for mandatory locking and that the size gets set. */
5767 	cva.va_mask = AT_MODE;
5768 	if (setsize)
5769 		cva.va_mask |= AT_SIZE;
5770 
5771 	/* Assume the worst */
5772 	cs->mandlock = TRUE;
5773 
5774 	if (VOP_GETATTR(vp, &cva, 0, cs->cr) == 0) {
5775 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
5776 
5777 		/*
5778 		 * Truncate the file if necessary; this would be
5779 		 * the case for create over an existing file.
5780 		 */
5781 
5782 		if (trunc) {
5783 			int in_crit = 0;
5784 			rfs4_file_t *fp;
5785 			bool_t create = FALSE;
5786 
5787 			/*
5788 			 * We are writing over an existing file.
5789 			 * Check to see if we need to recall a delegation.
5790 			 */
5791 			rfs4_hold_deleg_policy();
5792 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
5793 				if (rfs4_check_delegated_byfp(FWRITE, fp,
5794 					(reqsize == 0), FALSE, FALSE,
5795 							&clientid)) {
5796 
5797 					rfs4_file_rele(fp);
5798 					rfs4_rele_deleg_policy();
5799 					VN_RELE(vp);
5800 					*attrset = 0;
5801 					return (NFS4ERR_DELAY);
5802 				}
5803 				rfs4_file_rele(fp);
5804 			}
5805 			rfs4_rele_deleg_policy();
5806 
5807 			if (nbl_need_check(vp)) {
5808 				in_crit = 1;
5809 
5810 				ASSERT(reqsize == 0);
5811 
5812 				nbl_start_crit(vp, RW_READER);
5813 				if (nbl_conflict(vp, NBL_WRITE, 0,
5814 						cva.va_size, 0)) {
5815 					in_crit = 0;
5816 					nbl_end_crit(vp);
5817 					VN_RELE(vp);
5818 					*attrset = 0;
5819 					return (NFS4ERR_ACCESS);
5820 				}
5821 			}
5822 			ct.cc_sysid = 0;
5823 			ct.cc_pid = 0;
5824 			ct.cc_caller_id = nfs4_srv_caller_id;
5825 
5826 			cva.va_mask = AT_SIZE;
5827 			cva.va_size = reqsize;
5828 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
5829 			if (in_crit)
5830 				nbl_end_crit(vp);
5831 		}
5832 	}
5833 
5834 	error = makefh4(&cs->fh, vp, cs->exi);
5835 
5836 	/*
5837 	 * Force modified data and metadata out to stable storage.
5838 	 */
5839 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
5840 
5841 	if (error) {
5842 		VN_RELE(vp);
5843 		*attrset = 0;
5844 		return (puterrno4(error));
5845 	}
5846 
5847 	/* if parent dir is attrdir, set namedattr fh flag */
5848 	if (dvp->v_flag & V_XATTRDIR)
5849 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
5850 
5851 	if (cs->vp)
5852 		VN_RELE(cs->vp);
5853 
5854 	cs->vp = vp;
5855 
5856 	/*
5857 	 * if we did not create the file, we will need to check
5858 	 * the access bits on the file
5859 	 */
5860 
5861 	if (!created) {
5862 		if (setsize)
5863 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
5864 		status = check_open_access(args->share_access, cs, req);
5865 		if (status != NFS4_OK)
5866 			*attrset = 0;
5867 	}
5868 	return (status);
5869 }
5870 
5871 /*ARGSUSED*/
5872 static void
5873 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
5874 		rfs4_openowner_t *oo, delegreq_t deleg,
5875 		uint32_t access, uint32_t deny,
5876 		OPEN4res *resp)
5877 {
5878 	/* XXX Currently not using req  */
5879 	rfs4_state_t *state;
5880 	rfs4_file_t *file;
5881 	bool_t screate = TRUE;
5882 	bool_t fcreate = TRUE;
5883 	uint32_t amodes;
5884 	uint32_t dmodes;
5885 	rfs4_deleg_state_t *dsp;
5886 	struct shrlock shr;
5887 	struct shr_locowner shr_loco;
5888 	sysid_t sysid;
5889 	nfsstat4 status;
5890 	int fflags = 0;
5891 	int recall = 0;
5892 	int err;
5893 
5894 	/* get the file struct and hold a lock on it during initial open */
5895 	file = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
5896 	if (file == NULL) {
5897 		NFS4_DEBUG(rfs4_debug,
5898 			(CE_NOTE, "rfs4_do_open: can't find file"));
5899 		resp->status = NFS4ERR_SERVERFAULT;
5900 		return;
5901 	}
5902 
5903 	state = rfs4_findstate_by_owner_file(oo, file, &screate);
5904 	if (state == NULL) {
5905 		NFS4_DEBUG(rfs4_debug,
5906 			(CE_NOTE, "rfs4_do_open: can't find state"));
5907 		resp->status = NFS4ERR_RESOURCE;
5908 		/* No need to keep any reference */
5909 		rfs4_file_rele_withunlock(file);
5910 		return;
5911 	}
5912 
5913 	/*
5914 	 * Check for conflicts in deny and access before checking for
5915 	 * conflicts in delegation.  We don't want to recall a
5916 	 * delegation based on an open that will eventually fail based
5917 	 * on shares modes.
5918 	 */
5919 
5920 	shr.s_access = (short)access;
5921 	shr.s_deny = (short)deny;
5922 	shr.s_pid = rfs4_dbe_getid(oo->dbe);
5923 
5924 	if ((status = rfs4_client_sysid(oo->client, &sysid)) != NFS4_OK) {
5925 		resp->status = status;
5926 		rfs4_file_rele(file);
5927 		/* Not a fully formed open; "close" it */
5928 		if (screate == TRUE)
5929 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
5930 		rfs4_state_rele(state);
5931 		return;
5932 	}
5933 	shr.s_sysid = sysid;
5934 	shr_loco.sl_pid = shr.s_pid;
5935 	shr_loco.sl_id = shr.s_sysid;
5936 	shr.s_owner = (caddr_t)&shr_loco;
5937 	shr.s_own_len = sizeof (shr_loco);
5938 
5939 	fflags = 0;
5940 	if (access & OPEN4_SHARE_ACCESS_READ)
5941 		fflags |= FREAD;
5942 	if (access & OPEN4_SHARE_ACCESS_WRITE)
5943 		fflags |= FWRITE;
5944 
5945 	if ((err = vop_shrlock(cs->vp, F_SHARE, &shr, fflags)) != 0) {
5946 
5947 		resp->status = err == EAGAIN ?
5948 			NFS4ERR_SHARE_DENIED : puterrno4(err);
5949 
5950 		rfs4_file_rele(file);
5951 		/* Not a fully formed open; "close" it */
5952 		if (screate == TRUE)
5953 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
5954 		rfs4_state_rele(state);
5955 		return;
5956 	}
5957 
5958 	rfs4_dbe_lock(state->dbe);
5959 	rfs4_dbe_lock(file->dbe);
5960 
5961 	/*
5962 	 * Calculate the new deny and access mode that this open is adding to
5963 	 * the file for this open owner;
5964 	 */
5965 	dmodes = (deny & ~state->share_deny);
5966 	amodes = (access & ~state->share_access);
5967 
5968 	/*
5969 	 * Check to see if this file is delegated and if so, if a
5970 	 * recall needs to be done.
5971 	 */
5972 	if (rfs4_check_recall(state, access)) {
5973 		rfs4_dbe_unlock(file->dbe);
5974 		rfs4_dbe_unlock(state->dbe);
5975 		rfs4_recall_deleg(file, FALSE, state->owner->client);
5976 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
5977 		rfs4_dbe_lock(state->dbe);
5978 		rfs4_dbe_lock(file->dbe);
5979 		/* Let's see if the delegation was returned */
5980 		if (rfs4_check_recall(state, access)) {
5981 			rfs4_dbe_unlock(file->dbe);
5982 			rfs4_dbe_unlock(state->dbe);
5983 			rfs4_file_rele(file);
5984 			rfs4_update_lease(state->owner->client);
5985 			/* recalculate flags to match what was added */
5986 			fflags = 0;
5987 			if (amodes & OPEN4_SHARE_ACCESS_READ)
5988 				fflags |= FREAD;
5989 			if (amodes & OPEN4_SHARE_ACCESS_WRITE)
5990 				fflags |= FWRITE;
5991 			(void) vop_shrlock(cs->vp, F_UNSHARE, &shr, fflags);
5992 			/* Not a fully formed open; "close" it */
5993 			if (screate == TRUE)
5994 				rfs4_state_close(state, FALSE, FALSE, cs->cr);
5995 			rfs4_state_rele(state);
5996 			resp->status = NFS4ERR_DELAY;
5997 			return;
5998 		}
5999 	}
6000 
6001 	if (dmodes & OPEN4_SHARE_DENY_READ)
6002 		file->deny_read++;
6003 	if (dmodes & OPEN4_SHARE_DENY_WRITE)
6004 		file->deny_write++;
6005 	file->share_deny |= deny;
6006 	state->share_deny |= deny;
6007 
6008 	if (amodes & OPEN4_SHARE_ACCESS_READ)
6009 		file->access_read++;
6010 	if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6011 		file->access_write++;
6012 	file->share_access |= access;
6013 	state->share_access |= access;
6014 
6015 	/*
6016 	 * Check for delegation here. if the deleg argument is not
6017 	 * DELEG_ANY, then this is a reclaim from a client and
6018 	 * we must honor the delegation requested. If necessary we can
6019 	 * set the recall flag.
6020 	 */
6021 
6022 	dsp = rfs4_grant_delegation(deleg, state, &recall);
6023 
6024 	cs->deleg = (file->dinfo->dtype == OPEN_DELEGATE_WRITE);
6025 
6026 	next_stateid(&state->stateid);
6027 
6028 	resp->stateid = state->stateid.stateid;
6029 
6030 	rfs4_dbe_unlock(file->dbe);
6031 	rfs4_dbe_unlock(state->dbe);
6032 
6033 	if (dsp) {
6034 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6035 		rfs4_deleg_state_rele(dsp);
6036 	}
6037 
6038 	rfs4_file_rele(file);
6039 	rfs4_state_rele(state);
6040 
6041 	resp->status = NFS4_OK;
6042 }
6043 
6044 /*ARGSUSED*/
6045 static void
6046 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6047 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6048 {
6049 	change_info4 *cinfo = &resp->cinfo;
6050 	bitmap4 *attrset = &resp->attrset;
6051 
6052 	if (args->opentype == OPEN4_NOCREATE)
6053 		resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6054 					req, cs, args->share_access, cinfo);
6055 	else {
6056 		/* inhibit delegation grants during exclusive create */
6057 
6058 		if (args->mode == EXCLUSIVE4)
6059 			rfs4_disable_delegation();
6060 
6061 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6062 					oo->client->clientid);
6063 	}
6064 
6065 	if (resp->status == NFS4_OK) {
6066 
6067 		/* cs->vp cs->fh now reference the desired file */
6068 
6069 		rfs4_do_open(cs, req, oo, DELEG_ANY, args->share_access,
6070 						args->share_deny, resp);
6071 
6072 		/*
6073 		 * If rfs4_createfile set attrset, we must
6074 		 * clear this attrset before the response is copied.
6075 		 */
6076 		if (resp->status != NFS4_OK && resp->attrset) {
6077 			resp->attrset = 0;
6078 		}
6079 	}
6080 	else
6081 		*cs->statusp = resp->status;
6082 
6083 	if (args->mode == EXCLUSIVE4)
6084 		rfs4_enable_delegation();
6085 }
6086 
6087 /*ARGSUSED*/
6088 static void
6089 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6090 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6091 {
6092 	change_info4 *cinfo = &resp->cinfo;
6093 	vattr_t va;
6094 	vtype_t v_type = cs->vp->v_type;
6095 	int error = 0;
6096 
6097 	/* Verify that we have a regular file */
6098 	if (v_type != VREG) {
6099 		if (v_type == VDIR)
6100 			resp->status = NFS4ERR_ISDIR;
6101 		else if (v_type == VLNK)
6102 			resp->status = NFS4ERR_SYMLINK;
6103 		else
6104 			resp->status = NFS4ERR_INVAL;
6105 		return;
6106 	}
6107 
6108 	va.va_mask = AT_MODE|AT_UID;
6109 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr);
6110 	if (error) {
6111 		resp->status = puterrno4(error);
6112 		return;
6113 	}
6114 
6115 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6116 
6117 	/*
6118 	 * Check if we have access to the file, Note the the file
6119 	 * could have originally been open UNCHECKED or GUARDED
6120 	 * with mode bits that will now fail, but there is nothing
6121 	 * we can really do about that except in the case that the
6122 	 * owner of the file is the one requesting the open.
6123 	 */
6124 	if (crgetuid(cs->cr) != va.va_uid) {
6125 		resp->status = check_open_access(args->share_access, cs, req);
6126 		if (resp->status != NFS4_OK) {
6127 			return;
6128 		}
6129 	}
6130 
6131 	/*
6132 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6133 	 */
6134 	cinfo->before = 0;
6135 	cinfo->after = 0;
6136 	cinfo->atomic = FALSE;
6137 
6138 	rfs4_do_open(cs, req, oo,
6139 		NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6140 		args->share_access, args->share_deny, resp);
6141 }
6142 
6143 static void
6144 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6145 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6146 {
6147 	int error;
6148 	nfsstat4 status;
6149 	stateid4 stateid =
6150 			args->open_claim4_u.delegate_cur_info.delegate_stateid;
6151 	rfs4_deleg_state_t *dsp;
6152 
6153 	/*
6154 	 * Find the state info from the stateid and confirm that the
6155 	 * file is delegated.  If the state openowner is the same as
6156 	 * the supplied openowner we're done. If not, get the file
6157 	 * info from the found state info. Use that file info to
6158 	 * create the state for this lock owner. Note solaris doen't
6159 	 * really need the pathname to find the file. We may want to
6160 	 * lookup the pathname and make sure that the vp exist and
6161 	 * matches the vp in the file structure. However it is
6162 	 * possible that the pathname nolonger exists (local process
6163 	 * unlinks the file), so this may not be that useful.
6164 	 */
6165 
6166 	status = rfs4_get_deleg_state(&stateid, &dsp);
6167 	if (status != NFS4_OK) {
6168 		resp->status = status;
6169 		return;
6170 	}
6171 
6172 	ASSERT(dsp->finfo->dinfo->dtype != OPEN_DELEGATE_NONE);
6173 
6174 	/*
6175 	 * New lock owner, create state. Since this was probably called
6176 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
6177 	 */
6178 
6179 	ASSERT(cs->vp != NULL);
6180 	VN_RELE(cs->vp);
6181 	VN_HOLD(dsp->finfo->vp);
6182 	cs->vp = dsp->finfo->vp;
6183 
6184 	if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6185 		rfs4_deleg_state_rele(dsp);
6186 		*cs->statusp = resp->status = puterrno4(error);
6187 		return;
6188 	}
6189 
6190 	/* Mark progress for delegation returns */
6191 	dsp->finfo->dinfo->time_lastwrite = gethrestime_sec();
6192 	rfs4_deleg_state_rele(dsp);
6193 	rfs4_do_open(cs, req, oo, DELEG_NONE,
6194 				args->share_access, args->share_deny, resp);
6195 }
6196 
6197 /*ARGSUSED*/
6198 static void
6199 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
6200 			OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6201 {
6202 	/*
6203 	 * Lookup the pathname, it must already exist since this file
6204 	 * was delegated.
6205 	 *
6206 	 * Find the file and state info for this vp and open owner pair.
6207 	 *	check that they are in fact delegated.
6208 	 *	check that the state access and deny modes are the same.
6209 	 *
6210 	 * Return the delgation possibly seting the recall flag.
6211 	 */
6212 	rfs4_file_t *file;
6213 	rfs4_state_t *state;
6214 	bool_t create = FALSE;
6215 	bool_t dcreate = FALSE;
6216 	rfs4_deleg_state_t *dsp;
6217 	nfsace4 *ace;
6218 
6219 
6220 	/* Note we ignore oflags */
6221 	resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
6222 				req, cs, args->share_access, &resp->cinfo);
6223 
6224 	if (resp->status != NFS4_OK) {
6225 		return;
6226 	}
6227 
6228 	/* get the file struct and hold a lock on it during initial open */
6229 	file = rfs4_findfile_withlock(cs->vp, NULL, &create);
6230 	if (file == NULL) {
6231 		NFS4_DEBUG(rfs4_debug,
6232 			(CE_NOTE, "rfs4_do_opendelprev: can't find file"));
6233 		resp->status = NFS4ERR_SERVERFAULT;
6234 		return;
6235 	}
6236 
6237 	state = rfs4_findstate_by_owner_file(oo, file, &create);
6238 	if (state == NULL) {
6239 		NFS4_DEBUG(rfs4_debug,
6240 			(CE_NOTE, "rfs4_do_opendelprev: can't find state"));
6241 		resp->status = NFS4ERR_SERVERFAULT;
6242 		rfs4_file_rele_withunlock(file);
6243 		return;
6244 	}
6245 
6246 	rfs4_dbe_lock(state->dbe);
6247 	rfs4_dbe_lock(file->dbe);
6248 	if (args->share_access != state->share_access ||
6249 			args->share_deny != state->share_deny ||
6250 			state->finfo->dinfo->dtype == OPEN_DELEGATE_NONE) {
6251 		NFS4_DEBUG(rfs4_debug,
6252 			(CE_NOTE, "rfs4_do_opendelprev: state mixup"));
6253 		rfs4_dbe_unlock(file->dbe);
6254 		rfs4_dbe_unlock(state->dbe);
6255 		rfs4_file_rele(file);
6256 		rfs4_state_rele(state);
6257 		resp->status = NFS4ERR_SERVERFAULT;
6258 		return;
6259 	}
6260 	rfs4_dbe_unlock(file->dbe);
6261 	rfs4_dbe_unlock(state->dbe);
6262 
6263 	dsp = rfs4_finddeleg(state, &dcreate);
6264 	if (dsp == NULL) {
6265 		rfs4_state_rele(state);
6266 		rfs4_file_rele(file);
6267 		resp->status = NFS4ERR_SERVERFAULT;
6268 		return;
6269 	}
6270 
6271 	next_stateid(&state->stateid);
6272 
6273 	resp->stateid = state->stateid.stateid;
6274 
6275 	resp->delegation.delegation_type = dsp->dtype;
6276 
6277 	if (dsp->dtype == OPEN_DELEGATE_READ) {
6278 		open_read_delegation4 *rv =
6279 			&resp->delegation.open_delegation4_u.read;
6280 
6281 		rv->stateid = dsp->delegid.stateid;
6282 		rv->recall = FALSE; /* no policy in place to set to TRUE */
6283 		ace = &rv->permissions;
6284 	} else {
6285 		open_write_delegation4 *rv =
6286 			&resp->delegation.open_delegation4_u.write;
6287 
6288 		rv->stateid = dsp->delegid.stateid;
6289 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
6290 		ace = &rv->permissions;
6291 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
6292 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
6293 	}
6294 
6295 	/* XXX For now */
6296 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
6297 	ace->flag = 0;
6298 	ace->access_mask = 0;
6299 	ace->who.utf8string_len = 0;
6300 	ace->who.utf8string_val = 0;
6301 
6302 	rfs4_deleg_state_rele(dsp);
6303 	rfs4_state_rele(state);
6304 	rfs4_file_rele(file);
6305 }
6306 
6307 typedef enum {
6308 	NFS4_CHKSEQ_OKAY = 0,
6309 	NFS4_CHKSEQ_REPLAY = 1,
6310 	NFS4_CHKSEQ_BAD = 2
6311 } rfs4_chkseq_t;
6312 
6313 /*
6314  * Generic function for sequence number checks.
6315  */
6316 static rfs4_chkseq_t
6317 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
6318 		seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
6319 {
6320 	/* Same sequence ids and matching operations? */
6321 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
6322 		if (copyres == TRUE) {
6323 			rfs4_free_reply(resop);
6324 			rfs4_copy_reply(resop, lastop);
6325 		}
6326 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6327 			"Replayed SEQID %d\n", seqid));
6328 		return (NFS4_CHKSEQ_REPLAY);
6329 	}
6330 
6331 	/* If the incoming sequence is not the next expected then it is bad */
6332 	if (rqst_seq != seqid + 1) {
6333 		if (rqst_seq == seqid) {
6334 			NFS4_DEBUG(rfs4_debug,
6335 				(CE_NOTE, "BAD SEQID: Replayed sequence id "
6336 				"but last op was %d current op is %d\n",
6337 				lastop->resop, resop->resop));
6338 			return (NFS4_CHKSEQ_BAD);
6339 		}
6340 		NFS4_DEBUG(rfs4_debug,
6341 			(CE_NOTE, "BAD SEQID: got %u expecting %u\n",
6342 				rqst_seq, seqid));
6343 		return (NFS4_CHKSEQ_BAD);
6344 	}
6345 
6346 	/* Everything okay -- next expected */
6347 	return (NFS4_CHKSEQ_OKAY);
6348 }
6349 
6350 
6351 static rfs4_chkseq_t
6352 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
6353 {
6354 	rfs4_chkseq_t rc;
6355 
6356 	rfs4_dbe_lock(op->dbe);
6357 	rc = rfs4_check_seqid(op->open_seqid, op->reply, seqid, resop, TRUE);
6358 	rfs4_dbe_unlock(op->dbe);
6359 
6360 	if (rc == NFS4_CHKSEQ_OKAY)
6361 		rfs4_update_lease(op->client);
6362 
6363 	return (rc);
6364 }
6365 
6366 static rfs4_chkseq_t
6367 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op,
6368 	nfs_resop4 *resop)
6369 {
6370 	rfs4_chkseq_t rc;
6371 
6372 	rfs4_dbe_lock(op->dbe);
6373 	rc = rfs4_check_seqid(op->open_seqid, op->reply,
6374 		olo_seqid, resop, FALSE);
6375 	rfs4_dbe_unlock(op->dbe);
6376 
6377 	return (rc);
6378 }
6379 
6380 static rfs4_chkseq_t
6381 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lp, nfs_resop4 *resop)
6382 {
6383 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
6384 
6385 	rfs4_dbe_lock(lp->dbe);
6386 	if (!lp->skip_seqid_check)
6387 		rc = rfs4_check_seqid(lp->seqid, lp->reply,
6388 			seqid, resop, TRUE);
6389 	rfs4_dbe_unlock(lp->dbe);
6390 
6391 	return (rc);
6392 }
6393 
6394 static void
6395 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
6396 	    struct svc_req *req, struct compound_state *cs)
6397 {
6398 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
6399 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
6400 	open_owner4 *owner = &args->owner;
6401 	open_claim_type4 claim = args->claim;
6402 	rfs4_client_t *cp;
6403 	rfs4_openowner_t *oo;
6404 	bool_t create;
6405 	bool_t replay = FALSE;
6406 	int can_reclaim;
6407 
6408 
6409 	if (cs->vp == NULL) {
6410 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6411 		return;
6412 	}
6413 
6414 	/*
6415 	 * Need to check clientid and lease expiration first based on
6416 	 * error ordering and incrementing sequence id.
6417 	 */
6418 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
6419 	if (cp == NULL) {
6420 		*cs->statusp = resp->status =
6421 			rfs4_check_clientid(&owner->clientid, 0);
6422 		return;
6423 	}
6424 
6425 	if (rfs4_lease_expired(cp)) {
6426 		rfs4_client_close(cp);
6427 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6428 		return;
6429 	}
6430 	can_reclaim = cp->can_reclaim;
6431 
6432 	/*
6433 	 * Find the open_owner for use from this point forward.  Take
6434 	 * care in updating the sequence id based on the type of error
6435 	 * being returned.
6436 	 */
6437 retry:
6438 	create = TRUE;
6439 	oo = rfs4_findopenowner(owner, &create, args->seqid);
6440 	if (oo == NULL) {
6441 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
6442 		rfs4_client_rele(cp);
6443 		return;
6444 	}
6445 
6446 	/* Hold off access to the sequence space while the open is done */
6447 	rfs4_sw_enter(&oo->oo_sw);
6448 
6449 	/*
6450 	 * If the open_owner existed before at the server, then check
6451 	 * the sequence id.
6452 	 */
6453 	if (!create && !oo->postpone_confirm) {
6454 		switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
6455 		case NFS4_CHKSEQ_BAD:
6456 			if ((args->seqid > oo->open_seqid) &&
6457 				oo->need_confirm) {
6458 				rfs4_free_opens(oo, TRUE, FALSE);
6459 				rfs4_sw_exit(&oo->oo_sw);
6460 				rfs4_openowner_rele(oo);
6461 				goto retry;
6462 			}
6463 			resp->status = NFS4ERR_BAD_SEQID;
6464 			goto out;
6465 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
6466 			replay = TRUE;
6467 			goto out;
6468 		default:
6469 			break;
6470 		}
6471 
6472 		/*
6473 		 * Sequence was ok and open owner exists
6474 		 * check to see if we have yet to see an
6475 		 * open_confirm.
6476 		 */
6477 		if (oo->need_confirm) {
6478 			rfs4_free_opens(oo, TRUE, FALSE);
6479 			rfs4_sw_exit(&oo->oo_sw);
6480 			rfs4_openowner_rele(oo);
6481 			goto retry;
6482 		}
6483 	}
6484 	/* Grace only applies to regular-type OPENs */
6485 	if (rfs4_clnt_in_grace(cp) &&
6486 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
6487 		*cs->statusp = resp->status = NFS4ERR_GRACE;
6488 		goto out;
6489 	}
6490 
6491 	/*
6492 	 * If previous state at the server existed then can_reclaim
6493 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
6494 	 * client.
6495 	 */
6496 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
6497 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6498 		goto out;
6499 	}
6500 
6501 
6502 	/*
6503 	 * Reject the open if the client has missed the grace period
6504 	 */
6505 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
6506 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6507 		goto out;
6508 	}
6509 
6510 	/* Couple of up-front bookkeeping items */
6511 	if (oo->need_confirm) {
6512 		/*
6513 		 * If this is a reclaim OPEN then we should not ask
6514 		 * for a confirmation of the open_owner per the
6515 		 * protocol specification.
6516 		 */
6517 		if (claim == CLAIM_PREVIOUS)
6518 			oo->need_confirm = FALSE;
6519 		else
6520 			resp->rflags |= OPEN4_RESULT_CONFIRM;
6521 	}
6522 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
6523 
6524 	/*
6525 	 * If there is an unshared filesystem mounted on this vnode,
6526 	 * do not allow to open/create in this directory.
6527 	 */
6528 	if (vn_ismntpt(cs->vp)) {
6529 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
6530 		goto out;
6531 	}
6532 
6533 	/*
6534 	 * access must READ, WRITE, or BOTH.  No access is invalid.
6535 	 * deny can be READ, WRITE, BOTH, or NONE.
6536 	 * bits not defined for access/deny are invalid.
6537 	 */
6538 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
6539 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
6540 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
6541 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6542 		goto out;
6543 	}
6544 
6545 
6546 	/*
6547 	 * make sure attrset is zero before response is built.
6548 	 */
6549 	resp->attrset = 0;
6550 
6551 	switch (claim) {
6552 	case CLAIM_NULL:
6553 		rfs4_do_opennull(cs, req, args, oo, resp);
6554 	    break;
6555 	case CLAIM_PREVIOUS:
6556 		rfs4_do_openprev(cs, req, args, oo, resp);
6557 	    break;
6558 	case CLAIM_DELEGATE_CUR:
6559 		rfs4_do_opendelcur(cs, req, args, oo, resp);
6560 	    break;
6561 	case CLAIM_DELEGATE_PREV:
6562 		rfs4_do_opendelprev(cs, req, args, oo, resp);
6563 	    break;
6564 	default:
6565 		resp->status = NFS4ERR_INVAL;
6566 		break;
6567 	}
6568 
6569 out:
6570 	rfs4_client_rele(cp);
6571 
6572 	/* Catch sequence id handling here to make it a little easier */
6573 	switch (resp->status) {
6574 	case NFS4ERR_BADXDR:
6575 	case NFS4ERR_BAD_SEQID:
6576 	case NFS4ERR_BAD_STATEID:
6577 	case NFS4ERR_NOFILEHANDLE:
6578 	case NFS4ERR_RESOURCE:
6579 	case NFS4ERR_STALE_CLIENTID:
6580 	case NFS4ERR_STALE_STATEID:
6581 		/*
6582 		 * The protocol states that if any of these errors are
6583 		 * being returned, the sequence id should not be
6584 		 * incremented.  Any other return requires an
6585 		 * increment.
6586 		 */
6587 		break;
6588 	default:
6589 		/* Always update the lease in this case */
6590 		rfs4_update_lease(oo->client);
6591 
6592 		/* Regular response - copy the result */
6593 		if (!replay)
6594 			rfs4_update_open_resp(oo, resop, &cs->fh);
6595 
6596 		/*
6597 		 * REPLAY case: Only if the previous response was OK
6598 		 * do we copy the filehandle.  If not OK, no
6599 		 * filehandle to copy.
6600 		 */
6601 		if (replay == TRUE &&
6602 		    resp->status == NFS4_OK &&
6603 		    oo->reply_fh.nfs_fh4_val) {
6604 			/*
6605 			 * If this is a replay, we must restore the
6606 			 * current filehandle/vp to that of what was
6607 			 * returned originally.  Try our best to do
6608 			 * it.
6609 			 */
6610 			nfs_fh4_fmt_t *fh_fmtp =
6611 				(nfs_fh4_fmt_t *)oo->reply_fh.nfs_fh4_val;
6612 
6613 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
6614 				(fid_t *)&fh_fmtp->fh4_xlen, NULL);
6615 
6616 			if (cs->exi == NULL) {
6617 				resp->status = NFS4ERR_STALE;
6618 				goto finish;
6619 			}
6620 
6621 			VN_RELE(cs->vp);
6622 
6623 			cs->vp = nfs4_fhtovp(&oo->reply_fh, cs->exi,
6624 				&resp->status);
6625 
6626 			if (cs->vp == NULL)
6627 				goto finish;
6628 
6629 			nfs_fh4_copy(&oo->reply_fh, &cs->fh);
6630 		}
6631 
6632 		/*
6633 		 * If this was a replay, no need to update the
6634 		 * sequence id. If the open_owner was not created on
6635 		 * this pass, then update.  The first use of an
6636 		 * open_owner will not bump the sequence id.
6637 		 */
6638 		if (replay == FALSE && !create)
6639 			rfs4_update_open_sequence(oo);
6640 		/*
6641 		 * If the client is receiving an error and the
6642 		 * open_owner needs to be confirmed, there is no way
6643 		 * to notify the client of this fact ignoring the fact
6644 		 * that the server has no method of returning a
6645 		 * stateid to confirm.  Therefore, the server needs to
6646 		 * mark this open_owner in a way as to avoid the
6647 		 * sequence id checking the next time the client uses
6648 		 * this open_owner.
6649 		 */
6650 		if (resp->status != NFS4_OK && oo->need_confirm)
6651 			oo->postpone_confirm = TRUE;
6652 		/*
6653 		 * If OK response then clear the postpone flag and
6654 		 * reset the sequence id to keep in sync with the
6655 		 * client.
6656 		 */
6657 		if (resp->status == NFS4_OK && oo->postpone_confirm) {
6658 			oo->postpone_confirm = FALSE;
6659 			oo->open_seqid = args->seqid;
6660 		}
6661 		break;
6662 	}
6663 
6664 finish:
6665 	*cs->statusp = resp->status;
6666 
6667 	rfs4_sw_exit(&oo->oo_sw);
6668 	rfs4_openowner_rele(oo);
6669 }
6670 
6671 /*ARGSUSED*/
6672 void
6673 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
6674 		    struct svc_req *req, struct compound_state *cs)
6675 {
6676 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
6677 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
6678 	rfs4_state_t *sp;
6679 	nfsstat4 status;
6680 
6681 	if (cs->vp == NULL) {
6682 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6683 		return;
6684 	}
6685 
6686 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6687 	if (status != NFS4_OK) {
6688 		*cs->statusp = resp->status = status;
6689 		return;
6690 	}
6691 
6692 	/* Ensure specified filehandle matches */
6693 	if (cs->vp != sp->finfo->vp) {
6694 		rfs4_state_rele(sp);
6695 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6696 		return;
6697 	}
6698 
6699 	/* hold off other access to open_owner while we tinker */
6700 	rfs4_sw_enter(&sp->owner->oo_sw);
6701 
6702 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6703 	case NFS4_CHECK_STATEID_OKAY:
6704 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6705 			resop) != 0) {
6706 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6707 			break;
6708 		}
6709 		/*
6710 		 * If it is the appropriate stateid and determined to
6711 		 * be "OKAY" then this means that the stateid does not
6712 		 * need to be confirmed and the client is in error for
6713 		 * sending an OPEN_CONFIRM.
6714 		 */
6715 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6716 		break;
6717 	case NFS4_CHECK_STATEID_OLD:
6718 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6719 		break;
6720 	case NFS4_CHECK_STATEID_BAD:
6721 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6722 		break;
6723 	case NFS4_CHECK_STATEID_EXPIRED:
6724 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6725 		break;
6726 	case NFS4_CHECK_STATEID_CLOSED:
6727 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6728 		break;
6729 	case NFS4_CHECK_STATEID_REPLAY:
6730 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
6731 		case NFS4_CHKSEQ_OKAY:
6732 			/*
6733 			 * This is replayed stateid; if seqid matches
6734 			 * next expected, then client is using wrong seqid.
6735 			 */
6736 			/* fall through */
6737 		case NFS4_CHKSEQ_BAD:
6738 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6739 			break;
6740 		case NFS4_CHKSEQ_REPLAY:
6741 			/*
6742 			 * Note this case is the duplicate case so
6743 			 * resp->status is already set.
6744 			 */
6745 			*cs->statusp = resp->status;
6746 			rfs4_update_lease(sp->owner->client);
6747 			break;
6748 		}
6749 		break;
6750 	case NFS4_CHECK_STATEID_UNCONFIRMED:
6751 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6752 			resop) != NFS4_CHKSEQ_OKAY) {
6753 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6754 			break;
6755 		}
6756 		*cs->statusp = resp->status = NFS4_OK;
6757 
6758 		next_stateid(&sp->stateid);
6759 		resp->open_stateid = sp->stateid.stateid;
6760 		sp->owner->need_confirm = FALSE;
6761 		rfs4_update_lease(sp->owner->client);
6762 		rfs4_update_open_sequence(sp->owner);
6763 		rfs4_update_open_resp(sp->owner, resop, NULL);
6764 		break;
6765 	default:
6766 		ASSERT(FALSE);
6767 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
6768 		break;
6769 	}
6770 	rfs4_sw_exit(&sp->owner->oo_sw);
6771 	rfs4_state_rele(sp);
6772 }
6773 
6774 /*ARGSUSED*/
6775 void
6776 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
6777 		    struct svc_req *req, struct compound_state *cs)
6778 {
6779 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
6780 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
6781 	uint32_t access = args->share_access;
6782 	uint32_t deny = args->share_deny;
6783 	nfsstat4 status;
6784 	rfs4_state_t *sp;
6785 	rfs4_file_t *fp;
6786 
6787 	if (cs->vp == NULL) {
6788 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6789 		return;
6790 	}
6791 
6792 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6793 	if (status != NFS4_OK) {
6794 		*cs->statusp = resp->status = status;
6795 		return;
6796 	}
6797 
6798 	/* Ensure specified filehandle matches */
6799 	if (cs->vp != sp->finfo->vp) {
6800 		rfs4_state_rele(sp);
6801 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6802 		return;
6803 	}
6804 
6805 	/* hold off other access to open_owner while we tinker */
6806 	rfs4_sw_enter(&sp->owner->oo_sw);
6807 
6808 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6809 	case NFS4_CHECK_STATEID_OKAY:
6810 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6811 			resop) != NFS4_CHKSEQ_OKAY) {
6812 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6813 			goto end;
6814 		}
6815 		break;
6816 	case NFS4_CHECK_STATEID_OLD:
6817 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6818 		goto end;
6819 	case NFS4_CHECK_STATEID_BAD:
6820 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6821 		goto end;
6822 	case NFS4_CHECK_STATEID_EXPIRED:
6823 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6824 		goto end;
6825 	case NFS4_CHECK_STATEID_CLOSED:
6826 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6827 		goto end;
6828 	case NFS4_CHECK_STATEID_UNCONFIRMED:
6829 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6830 		goto end;
6831 	case NFS4_CHECK_STATEID_REPLAY:
6832 		/* Check the sequence id for the open owner */
6833 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
6834 		case NFS4_CHKSEQ_OKAY:
6835 			/*
6836 			 * This is replayed stateid; if seqid matches
6837 			 * next expected, then client is using wrong seqid.
6838 			 */
6839 			/* fall through */
6840 		case NFS4_CHKSEQ_BAD:
6841 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6842 			goto end;
6843 		case NFS4_CHKSEQ_REPLAY:
6844 			/*
6845 			 * Note this case is the duplicate case so
6846 			 * resp->status is already set.
6847 			 */
6848 			*cs->statusp = resp->status;
6849 			rfs4_update_lease(sp->owner->client);
6850 			goto end;
6851 		}
6852 		break;
6853 	default:
6854 		ASSERT(FALSE);
6855 		break;
6856 	}
6857 
6858 	rfs4_dbe_lock(sp->dbe);
6859 	/*
6860 	 * Check that the new access modes and deny modes are valid.
6861 	 * Check that no invalid bits are set.
6862 	 */
6863 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
6864 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_READ))) {
6865 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6866 		rfs4_update_open_sequence(sp->owner);
6867 		rfs4_dbe_unlock(sp->dbe);
6868 		goto end;
6869 	}
6870 
6871 	/*
6872 	 * The new modes must be a subset of the current modes and
6873 	 * the access must specify at least one mode. To test that
6874 	 * the new mode is a subset of the current modes we bitwise
6875 	 * AND them together and check that the result equals the new
6876 	 * mode. For example:
6877 	 * New mode, access == R and current mode, sp->share_access  == RW
6878 	 * access & sp->share_access == R == access, so the new access mode
6879 	 * is valid. Consider access == RW, sp->share_access = R
6880 	 * access & sp->share_access == R != access, so the new access mode
6881 	 * is invalid.
6882 	 */
6883 	if ((access & sp->share_access) != access ||
6884 	    (deny & sp->share_deny) != deny ||
6885 	    (access &
6886 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
6887 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6888 		rfs4_update_open_sequence(sp->owner);
6889 		rfs4_dbe_unlock(sp->dbe);
6890 		goto end;
6891 	}
6892 
6893 	/*
6894 	 * Release any share locks associated with this stateID.
6895 	 * Strictly speaking, this violates the spec because the
6896 	 * spec effectively requires that open downgrade be atomic.
6897 	 * At present, fs_shrlock does not have this capability.
6898 	 */
6899 	rfs4_dbe_unlock(sp->dbe);
6900 	rfs4_unshare(sp);
6901 	rfs4_dbe_lock(sp->dbe);
6902 
6903 	fp = sp->finfo;
6904 	rfs4_dbe_lock(fp->dbe);
6905 
6906 	/*
6907 	 * If the current mode has deny read and the new mode
6908 	 * does not, decrement the number of deny read mode bits
6909 	 * and if it goes to zero turn off the deny read bit
6910 	 * on the file.
6911 	 */
6912 	if ((sp->share_deny & OPEN4_SHARE_DENY_READ) &&
6913 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
6914 		fp->deny_read--;
6915 		if (fp->deny_read == 0)
6916 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
6917 	}
6918 
6919 	/*
6920 	 * If the current mode has deny write and the new mode
6921 	 * does not, decrement the number of deny write mode bits
6922 	 * and if it goes to zero turn off the deny write bit
6923 	 * on the file.
6924 	 */
6925 	if ((sp->share_deny & OPEN4_SHARE_DENY_WRITE) &&
6926 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
6927 		fp->deny_write--;
6928 		if (fp->deny_write == 0)
6929 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
6930 	}
6931 
6932 	/*
6933 	 * If the current mode has access read and the new mode
6934 	 * does not, decrement the number of access read mode bits
6935 	 * and if it goes to zero turn off the access read bit
6936 	 * on the file.
6937 	 */
6938 	if ((sp->share_access & OPEN4_SHARE_ACCESS_READ) &&
6939 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
6940 		fp->access_read--;
6941 		if (fp->access_read == 0)
6942 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
6943 	}
6944 
6945 	/*
6946 	 * If the current mode has access write and the new mode
6947 	 * does not, decrement the number of access write mode bits
6948 	 * and if it goes to zero turn off the access write bit
6949 	 * on the file.
6950 	 */
6951 	if ((sp->share_access & OPEN4_SHARE_ACCESS_WRITE) &&
6952 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
6953 		fp->access_write--;
6954 		if (fp->access_write == 0)
6955 			fp->share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
6956 	}
6957 
6958 	/* Set the new access and deny modes */
6959 	sp->share_access = access;
6960 	sp->share_deny = deny;
6961 	/* Check that the file is still accessible */
6962 	ASSERT(fp->share_access);
6963 
6964 	rfs4_dbe_unlock(fp->dbe);
6965 
6966 	rfs4_dbe_unlock(sp->dbe);
6967 	if ((status = rfs4_share(sp)) != NFS4_OK) {
6968 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
6969 		rfs4_update_open_sequence(sp->owner);
6970 		goto end;
6971 	}
6972 
6973 	rfs4_dbe_lock(sp->dbe);
6974 
6975 	/* Update the stateid */
6976 	next_stateid(&sp->stateid);
6977 	resp->open_stateid = sp->stateid.stateid;
6978 
6979 	rfs4_dbe_unlock(sp->dbe);
6980 
6981 	*cs->statusp = resp->status = NFS4_OK;
6982 	/* Update the lease */
6983 	rfs4_update_lease(sp->owner->client);
6984 	/* And the sequence */
6985 	rfs4_update_open_sequence(sp->owner);
6986 	rfs4_update_open_resp(sp->owner, resop, NULL);
6987 
6988 end:
6989 	rfs4_sw_exit(&sp->owner->oo_sw);
6990 	rfs4_state_rele(sp);
6991 }
6992 
6993 /*
6994  * The logic behind this function is detailed in the NFSv4 RFC in the
6995  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
6996  * that section for explicit guidance to server behavior for
6997  * SETCLIENTID.
6998  */
6999 void
7000 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7001 		    struct svc_req *req, struct compound_state *cs)
7002 {
7003 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7004 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7005 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7006 	bool_t create = TRUE;
7007 	char *addr, *netid;
7008 	int len;
7009 
7010 retry:
7011 	newcp = cp_confirmed = cp_unconfirmed = NULL;
7012 
7013 	/*
7014 	 * In search of an EXISTING client matching the incoming
7015 	 * request to establish a new client identifier at the server
7016 	 */
7017 	create = TRUE;
7018 	cp = rfs4_findclient(&args->client, &create, NULL);
7019 
7020 	/* Should never happen */
7021 	ASSERT(cp != NULL);
7022 
7023 	if (cp == NULL) {
7024 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7025 		return;
7026 	}
7027 
7028 	/*
7029 	 * Easiest case. Client identifier is newly created and is
7030 	 * unconfirmed.  Also note that for this case, no other
7031 	 * entries exist for the client identifier.  Nothing else to
7032 	 * check.  Just setup the response and respond.
7033 	 */
7034 	if (create) {
7035 		*cs->statusp = res->status = NFS4_OK;
7036 		res->SETCLIENTID4res_u.resok4.clientid = cp->clientid;
7037 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7038 							cp->confirm_verf;
7039 		/* Setup callback information; CB_NULL confirmation later */
7040 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7041 
7042 		rfs4_client_rele(cp);
7043 		return;
7044 	}
7045 
7046 	/*
7047 	 * An existing, confirmed client may exist but it may not have
7048 	 * been active for at least one lease period.  If so, then
7049 	 * "close" the client and create a new client identifier
7050 	 */
7051 	if (rfs4_lease_expired(cp)) {
7052 		rfs4_client_close(cp);
7053 		goto retry;
7054 	}
7055 
7056 	if (cp->need_confirm == TRUE)
7057 		cp_unconfirmed = cp;
7058 	else
7059 		cp_confirmed = cp;
7060 
7061 	cp = NULL;
7062 
7063 	/*
7064 	 * We have a confirmed client, now check for an
7065 	 * unconfimred entry
7066 	 */
7067 	if (cp_confirmed) {
7068 		/* If creds don't match then client identifier is inuse */
7069 		if (!creds_ok(cp_confirmed->cr_set, req, cs)) {
7070 			rfs4_cbinfo_t *cbp;
7071 			/*
7072 			 * Some one else has established this client
7073 			 * id. Try and say * who they are. We will use
7074 			 * the call back address supplied by * the
7075 			 * first client.
7076 			 */
7077 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7078 
7079 			addr = netid = NULL;
7080 
7081 			cbp = &cp_confirmed->cbinfo;
7082 			if (cbp->cb_callback.cb_location.r_addr &&
7083 			    cbp->cb_callback.cb_location.r_netid) {
7084 				cb_client4 *cbcp = &cbp->cb_callback;
7085 
7086 				len = strlen(cbcp->cb_location.r_addr)+1;
7087 				addr = kmem_alloc(len, KM_SLEEP);
7088 				bcopy(cbcp->cb_location.r_addr, addr, len);
7089 				len = strlen(cbcp->cb_location.r_netid)+1;
7090 				netid = kmem_alloc(len, KM_SLEEP);
7091 				bcopy(cbcp->cb_location.r_netid, netid, len);
7092 			}
7093 
7094 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
7095 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
7096 
7097 			rfs4_client_rele(cp_confirmed);
7098 		}
7099 
7100 		/*
7101 		 * Confirmed, creds match, and verifier matches; must
7102 		 * be an update of the callback info
7103 		 */
7104 		if (cp_confirmed->nfs_client.verifier ==
7105 						args->client.verifier) {
7106 			/* Setup callback information */
7107 			rfs4_client_setcb(cp_confirmed, &args->callback,
7108 						args->callback_ident);
7109 
7110 			/* everything okay -- move ahead */
7111 			*cs->statusp = res->status = NFS4_OK;
7112 			res->SETCLIENTID4res_u.resok4.clientid =
7113 				cp_confirmed->clientid;
7114 
7115 			/* update the confirm_verifier and return it */
7116 			rfs4_client_scv_next(cp_confirmed);
7117 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7118 						cp_confirmed->confirm_verf;
7119 
7120 			rfs4_client_rele(cp_confirmed);
7121 			return;
7122 		}
7123 
7124 		/*
7125 		 * Creds match but the verifier doesn't.  Must search
7126 		 * for an unconfirmed client that would be replaced by
7127 		 * this request.
7128 		 */
7129 		create = FALSE;
7130 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
7131 						cp_confirmed);
7132 	}
7133 
7134 	/*
7135 	 * At this point, we have taken care of the brand new client
7136 	 * struct, INUSE case, update of an existing, and confirmed
7137 	 * client struct.
7138 	 */
7139 
7140 	/*
7141 	 * check to see if things have changed while we originally
7142 	 * picked up the client struct.  If they have, then return and
7143 	 * retry the processing of this SETCLIENTID request.
7144 	 */
7145 	if (cp_unconfirmed) {
7146 		rfs4_dbe_lock(cp_unconfirmed->dbe);
7147 		if (!cp_unconfirmed->need_confirm) {
7148 			rfs4_dbe_unlock(cp_unconfirmed->dbe);
7149 			rfs4_client_rele(cp_unconfirmed);
7150 			if (cp_confirmed)
7151 				rfs4_client_rele(cp_confirmed);
7152 			goto retry;
7153 		}
7154 		/* do away with the old unconfirmed one */
7155 		rfs4_dbe_invalidate(cp_unconfirmed->dbe);
7156 		rfs4_dbe_unlock(cp_unconfirmed->dbe);
7157 		rfs4_client_rele(cp_unconfirmed);
7158 		cp_unconfirmed = NULL;
7159 	}
7160 
7161 	/*
7162 	 * This search will temporarily hide the confirmed client
7163 	 * struct while a new client struct is created as the
7164 	 * unconfirmed one.
7165 	 */
7166 	create = TRUE;
7167 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
7168 
7169 	ASSERT(newcp != NULL);
7170 
7171 	if (newcp == NULL) {
7172 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7173 		rfs4_client_rele(cp_confirmed);
7174 		return;
7175 	}
7176 
7177 	/*
7178 	 * If one was not created, then a similar request must be in
7179 	 * process so release and start over with this one
7180 	 */
7181 	if (create != TRUE) {
7182 		rfs4_client_rele(newcp);
7183 		if (cp_confirmed)
7184 			rfs4_client_rele(cp_confirmed);
7185 		goto retry;
7186 	}
7187 
7188 	*cs->statusp = res->status = NFS4_OK;
7189 	res->SETCLIENTID4res_u.resok4.clientid = newcp->clientid;
7190 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7191 							newcp->confirm_verf;
7192 	/* Setup callback information; CB_NULL confirmation later */
7193 	rfs4_client_setcb(newcp, &args->callback,
7194 				args->callback_ident);
7195 
7196 	newcp->cp_confirmed = cp_confirmed;
7197 
7198 	rfs4_client_rele(newcp);
7199 }
7200 
7201 /*ARGSUSED*/
7202 void
7203 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7204 			    struct svc_req *req, struct compound_state *cs)
7205 {
7206 	SETCLIENTID_CONFIRM4args *args =
7207 		&argop->nfs_argop4_u.opsetclientid_confirm;
7208 	SETCLIENTID_CONFIRM4res *res =
7209 		&resop->nfs_resop4_u.opsetclientid_confirm;
7210 	rfs4_client_t *cp, *cptoclose = NULL;
7211 
7212 	*cs->statusp = res->status = NFS4_OK;
7213 
7214 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
7215 
7216 	if (cp == NULL) {
7217 		*cs->statusp = res->status =
7218 			rfs4_check_clientid(&args->clientid, 1);
7219 		return;
7220 	}
7221 
7222 	if (!creds_ok(cp, req, cs)) {
7223 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7224 		rfs4_client_rele(cp);
7225 		return;
7226 	}
7227 
7228 	/* If the verifier doesn't match, the record doesn't match */
7229 	if (cp->confirm_verf != args->setclientid_confirm) {
7230 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
7231 		rfs4_client_rele(cp);
7232 		return;
7233 	}
7234 
7235 	rfs4_dbe_lock(cp->dbe);
7236 	cp->need_confirm = FALSE;
7237 	if (cp->cp_confirmed) {
7238 		cptoclose = cp->cp_confirmed;
7239 		cptoclose->ss_remove = 1;
7240 		cp->cp_confirmed = NULL;
7241 	}
7242 
7243 	/*
7244 	 * Record clientid in stable storage
7245 	 */
7246 	rfs4_ss_clid(cp, req);
7247 
7248 	rfs4_dbe_unlock(cp->dbe);
7249 
7250 	if (cptoclose)
7251 		/* don't need to rele, client_close does it */
7252 		rfs4_client_close(cptoclose);
7253 
7254 	/* If needed, initiate CB_NULL call for callback path */
7255 	rfs4_deleg_cb_check(cp);
7256 	rfs4_update_lease(cp);
7257 
7258 	/*
7259 	 * Update the client's associated server instance, if it's changed
7260 	 * since the client was created.
7261 	 */
7262 	if (rfs4_servinst(cp) != rfs4_cur_servinst)
7263 		rfs4_servinst_assign(cp, rfs4_cur_servinst);
7264 
7265 	/*
7266 	 * Check to see if client can perform reclaims
7267 	 */
7268 	rfs4_ss_chkclid(cp);
7269 
7270 	rfs4_client_rele(cp);
7271 }
7272 
7273 
7274 /*ARGSUSED*/
7275 void
7276 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
7277 	    struct svc_req *req, struct compound_state *cs)
7278 {
7279 	/* XXX Currently not using req arg */
7280 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
7281 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
7282 	rfs4_state_t *sp;
7283 	nfsstat4 status;
7284 
7285 	if (cs->vp == NULL) {
7286 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7287 		return;
7288 	}
7289 
7290 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
7291 	if (status != NFS4_OK) {
7292 		*cs->statusp = resp->status = status;
7293 		return;
7294 	}
7295 
7296 	/* Ensure specified filehandle matches */
7297 	if (cs->vp != sp->finfo->vp) {
7298 		rfs4_state_rele(sp);
7299 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7300 		return;
7301 	}
7302 
7303 	/* hold off other access to open_owner while we tinker */
7304 	rfs4_sw_enter(&sp->owner->oo_sw);
7305 
7306 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7307 	case NFS4_CHECK_STATEID_OKAY:
7308 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
7309 			resop) != NFS4_CHKSEQ_OKAY) {
7310 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7311 			goto end;
7312 		}
7313 		break;
7314 	case NFS4_CHECK_STATEID_OLD:
7315 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7316 		goto end;
7317 	case NFS4_CHECK_STATEID_BAD:
7318 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7319 		goto end;
7320 	case NFS4_CHECK_STATEID_EXPIRED:
7321 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7322 		goto end;
7323 	case NFS4_CHECK_STATEID_CLOSED:
7324 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7325 		goto end;
7326 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7327 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7328 		goto end;
7329 	case NFS4_CHECK_STATEID_REPLAY:
7330 		/* Check the sequence id for the open owner */
7331 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
7332 		case NFS4_CHKSEQ_OKAY:
7333 			/*
7334 			 * This is replayed stateid; if seqid matches
7335 			 * next expected, then client is using wrong seqid.
7336 			 */
7337 			/* FALL THROUGH */
7338 		case NFS4_CHKSEQ_BAD:
7339 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7340 			goto end;
7341 		case NFS4_CHKSEQ_REPLAY:
7342 			/*
7343 			 * Note this case is the duplicate case so
7344 			 * resp->status is already set.
7345 			 */
7346 			*cs->statusp = resp->status;
7347 			rfs4_update_lease(sp->owner->client);
7348 			goto end;
7349 		}
7350 		break;
7351 	default:
7352 		ASSERT(FALSE);
7353 		break;
7354 	}
7355 
7356 	rfs4_dbe_lock(sp->dbe);
7357 
7358 	/* Update the stateid. */
7359 	next_stateid(&sp->stateid);
7360 	resp->open_stateid = sp->stateid.stateid;
7361 
7362 	rfs4_dbe_unlock(sp->dbe);
7363 
7364 	rfs4_update_lease(sp->owner->client);
7365 	rfs4_update_open_sequence(sp->owner);
7366 	rfs4_update_open_resp(sp->owner, resop, NULL);
7367 
7368 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7369 
7370 	*cs->statusp = resp->status = status;
7371 
7372 end:
7373 	rfs4_sw_exit(&sp->owner->oo_sw);
7374 	rfs4_state_rele(sp);
7375 }
7376 
7377 /*
7378  * Manage the counts on the file struct and close all file locks
7379  */
7380 /*ARGSUSED*/
7381 void
7382 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
7383 	bool_t close_of_client)
7384 {
7385 	rfs4_file_t *fp = sp->finfo;
7386 	rfs4_lo_state_t *lsp;
7387 	struct shrlock shr;
7388 	struct shr_locowner shr_loco;
7389 	int fflags, s_access, s_deny;
7390 
7391 	fflags = s_access = s_deny = 0;
7392 	/*
7393 	 * Decrement the count for each access and deny bit that this
7394 	 * state has contributed to the file. If the file counts go to zero
7395 	 * clear the appropriate bit in the appropriate mask.
7396 	 */
7397 
7398 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
7399 		fp->access_read--;
7400 		fflags |= FREAD;
7401 		s_access |= F_RDACC;
7402 		if (fp->access_read == 0)
7403 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
7404 	}
7405 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
7406 		fp->access_write--;
7407 		fflags |= FWRITE;
7408 		s_access |= F_WRACC;
7409 		if (fp->access_write == 0)
7410 			fp->share_access &= ~OPEN4_SHARE_ACCESS_WRITE;
7411 	}
7412 	if (sp->share_deny & OPEN4_SHARE_DENY_READ) {
7413 		fp->deny_read--;
7414 		s_deny |= F_RDDNY;
7415 		if (fp->deny_read == 0)
7416 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
7417 	}
7418 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE) {
7419 		fp->deny_write--;
7420 		s_deny |= F_WRDNY;
7421 		if (fp->deny_write == 0)
7422 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7423 	}
7424 
7425 	/*
7426 	 * If this call is part of the larger closing down of client
7427 	 * state then it is just easier to release all locks
7428 	 * associated with this client instead of going through each
7429 	 * individual file and cleaning locks there.
7430 	 */
7431 	if (close_of_client) {
7432 		if (sp->owner->client->unlksys_completed == FALSE &&
7433 		    sp->lockownerlist.next->lsp != NULL &&
7434 			sp->owner->client->sysidt != LM_NOSYSID) {
7435 			/* Is the PxFS kernel module loaded? */
7436 			if (lm_remove_file_locks != NULL) {
7437 				int new_sysid;
7438 
7439 				/* Encode the cluster nodeid in new sysid */
7440 				new_sysid = sp->owner->client->sysidt;
7441 				lm_set_nlmid_flk(&new_sysid);
7442 
7443 				/*
7444 				 * This PxFS routine removes file locks for a
7445 				 * client over all nodes of a cluster.
7446 				 */
7447 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7448 				    "lm_remove_file_locks(sysid=0x%x)\n",
7449 				    new_sysid));
7450 				(*lm_remove_file_locks)(new_sysid);
7451 			} else {
7452 				struct flock64 flk;
7453 
7454 				/* Release all locks for this client */
7455 				flk.l_type = F_UNLKSYS;
7456 				flk.l_whence = 0;
7457 				flk.l_start = 0;
7458 				flk.l_len = 0;
7459 				flk.l_sysid = sp->owner->client->sysidt;
7460 				flk.l_pid = 0;
7461 				(void) VOP_FRLOCK(sp->finfo->vp, F_SETLK, &flk,
7462 				    F_REMOTELOCK | FREAD | FWRITE,
7463 				    (u_offset_t)0, NULL, CRED());
7464 			}
7465 
7466 			sp->owner->client->unlksys_completed = TRUE;
7467 		}
7468 	}
7469 
7470 	/*
7471 	 * Release all locks on this file by this lock owner or at
7472 	 * least mark the locks as having been released
7473 	 */
7474 	for (lsp = sp->lockownerlist.next->lsp; lsp != NULL;
7475 		lsp = lsp->lockownerlist.next->lsp) {
7476 
7477 		lsp->locks_cleaned = TRUE;
7478 
7479 		/* Was this already taken care of above? */
7480 		if (!close_of_client &&
7481 		    sp->owner->client->sysidt != LM_NOSYSID)
7482 			(void) cleanlocks(sp->finfo->vp, lsp->locker->pid,
7483 				lsp->locker->client->sysidt);
7484 	}
7485 
7486 	/*
7487 	 * Release any shrlocks associated with this open state ID.
7488 	 * This must be done before the rfs4_state gets marked closed.
7489 	 */
7490 	if (sp->owner->client->sysidt != LM_NOSYSID) {
7491 		shr.s_access = s_access;
7492 		shr.s_deny = s_deny;
7493 		shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
7494 		shr.s_sysid = sp->owner->client->sysidt;
7495 		shr_loco.sl_pid = shr.s_pid;
7496 		shr_loco.sl_id = shr.s_sysid;
7497 		shr.s_owner = (caddr_t)&shr_loco;
7498 		shr.s_own_len = sizeof (shr_loco);
7499 		(void) vop_shrlock(sp->finfo->vp, F_UNSHARE, &shr, fflags);
7500 	}
7501 }
7502 
7503 /*
7504  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
7505  */
7506 static nfsstat4
7507 lock_denied(LOCK4denied *dp, struct flock64 *flk)
7508 {
7509 	rfs4_lockowner_t *lo;
7510 	rfs4_client_t *cp;
7511 	uint32_t len;
7512 
7513 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
7514 	if (lo != NULL) {
7515 		cp = lo->client;
7516 		if (rfs4_lease_expired(cp)) {
7517 			rfs4_lockowner_rele(lo);
7518 			rfs4_dbe_hold(cp->dbe);
7519 			rfs4_client_close(cp);
7520 			return (NFS4ERR_EXPIRED);
7521 		}
7522 		dp->owner.clientid = lo->owner.clientid;
7523 		len = lo->owner.owner_len;
7524 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7525 		bcopy(lo->owner.owner_val, dp->owner.owner_val, len);
7526 		dp->owner.owner_len = len;
7527 		rfs4_lockowner_rele(lo);
7528 		goto finish;
7529 	}
7530 
7531 	/*
7532 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
7533 	 * of the client id contain the boot time for a NFS4 lock. So we
7534 	 * fabricate and identity by setting clientid to the sysid, and
7535 	 * the lock owner to the pid.
7536 	 */
7537 	dp->owner.clientid = flk->l_sysid;
7538 	len = sizeof (pid_t);
7539 	dp->owner.owner_len = len;
7540 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7541 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
7542 finish:
7543 	dp->offset = flk->l_start;
7544 	dp->length = flk->l_len;
7545 
7546 	if (flk->l_type == F_RDLCK)
7547 		dp->locktype = READ_LT;
7548 	else if (flk->l_type == F_WRLCK)
7549 		dp->locktype = WRITE_LT;
7550 	else
7551 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
7552 
7553 	return (NFS4_OK);
7554 }
7555 
7556 static int
7557 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
7558 {
7559 	int error;
7560 	struct flock64 flk;
7561 	int i;
7562 	clock_t delaytime;
7563 
7564 retry:
7565 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
7566 
7567 	for (i = 0; i < rfs4_maxlock_tries; i++) {
7568 		LOCK_PRINT(rfs4_debug, "setlock", F_SETLK, flock);
7569 		error = VOP_FRLOCK(vp, F_SETLK,
7570 				flock, flag, (u_offset_t)0, NULL, cred);
7571 
7572 		if (error != EAGAIN && error != EACCES)
7573 			break;
7574 
7575 		if (i < rfs4_maxlock_tries - 1) {
7576 			delay(delaytime);
7577 			delaytime *= 2;
7578 		}
7579 	}
7580 
7581 	if (error == EAGAIN || error == EACCES) {
7582 		/* Get the owner of the lock */
7583 		flk = *flock;
7584 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
7585 		if (VOP_FRLOCK(vp, F_GETLK,
7586 			    &flk,  flag, (u_offset_t)0, NULL, cred) == 0) {
7587 			if (flk.l_type == F_UNLCK) {
7588 				/* No longer locked, retry */
7589 				goto retry;
7590 			}
7591 			*flock = flk;
7592 			LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
7593 				F_GETLK, &flk);
7594 		}
7595 	}
7596 
7597 	return (error);
7598 }
7599 
7600 /*ARGSUSED*/
7601 static nfsstat4
7602 rfs4_do_lock(rfs4_lo_state_t *lp, nfs_lock_type4 locktype,
7603 	    seqid4 seqid, offset4 offset,
7604 	    length4 length, cred_t *cred, nfs_resop4 *resop)
7605 {
7606 	nfsstat4 status;
7607 	rfs4_lockowner_t *lo = lp->locker;
7608 	rfs4_state_t *sp = lp->state;
7609 	struct flock64 flock;
7610 	int16_t ltype;
7611 	int flag;
7612 	int error;
7613 	sysid_t sysid;
7614 	LOCK4res *lres;
7615 
7616 	if (rfs4_lease_expired(lo->client)) {
7617 		return (NFS4ERR_EXPIRED);
7618 	}
7619 
7620 	if ((status = rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
7621 		return (status);
7622 
7623 	/* Check for zero length. To lock to end of file use all ones for V4 */
7624 	if (length == 0)
7625 		return (NFS4ERR_INVAL);
7626 	else if (length == (length4)(~0))
7627 		length = 0;		/* Posix to end of file  */
7628 
7629 retry:
7630 	rfs4_dbe_lock(sp->dbe);
7631 
7632 
7633 	if (resop->resop != OP_LOCKU) {
7634 		switch (locktype) {
7635 		case READ_LT:
7636 		case READW_LT:
7637 			if ((sp->share_access
7638 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
7639 				rfs4_dbe_unlock(sp->dbe);
7640 
7641 				return (NFS4ERR_OPENMODE);
7642 			}
7643 			ltype = F_RDLCK;
7644 			break;
7645 		case WRITE_LT:
7646 		case WRITEW_LT:
7647 			if ((sp->share_access
7648 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7649 				rfs4_dbe_unlock(sp->dbe);
7650 
7651 				return (NFS4ERR_OPENMODE);
7652 			}
7653 			ltype = F_WRLCK;
7654 			break;
7655 		}
7656 	} else
7657 		ltype = F_UNLCK;
7658 
7659 	flock.l_type = ltype;
7660 	flock.l_whence = 0;		/* SEEK_SET */
7661 	flock.l_start = offset;
7662 	flock.l_len = length;
7663 	flock.l_sysid = sysid;
7664 	flock.l_pid = lp->locker->pid;
7665 
7666 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
7667 	if (flock.l_len < 0 || flock.l_start < 0) {
7668 		rfs4_dbe_unlock(sp->dbe);
7669 		return (NFS4ERR_INVAL);
7670 	}
7671 
7672 	/*
7673 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
7674 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
7675 	 */
7676 	flag = (int)sp->share_access | F_REMOTELOCK;
7677 
7678 	error = setlock(sp->finfo->vp, &flock, flag, cred);
7679 	if (error == 0) {
7680 		rfs4_dbe_lock(lp->dbe);
7681 		next_stateid(&lp->lockid);
7682 		rfs4_dbe_unlock(lp->dbe);
7683 	}
7684 
7685 	rfs4_dbe_unlock(sp->dbe);
7686 
7687 	/*
7688 	 * N.B. We map error values to nfsv4 errors. This is differrent
7689 	 * than puterrno4 routine.
7690 	 */
7691 	switch (error) {
7692 	case 0:
7693 		status = NFS4_OK;
7694 		break;
7695 	case EAGAIN:
7696 	case EACCES:		/* Old value */
7697 		/* Can only get here if op is OP_LOCK */
7698 		ASSERT(resop->resop == OP_LOCK);
7699 		lres = &resop->nfs_resop4_u.oplock;
7700 		status = NFS4ERR_DENIED;
7701 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
7702 			== NFS4ERR_EXPIRED)
7703 			goto retry;
7704 		break;
7705 	case ENOLCK:
7706 		status = NFS4ERR_DELAY;
7707 		break;
7708 	case EOVERFLOW:
7709 		status = NFS4ERR_INVAL;
7710 		break;
7711 	case EINVAL:
7712 		status = NFS4ERR_NOTSUPP;
7713 		break;
7714 	default:
7715 		cmn_err(CE_WARN, "rfs4_do_lock: unexpected errno (%d)",
7716 			error);
7717 		status = NFS4ERR_SERVERFAULT;
7718 		break;
7719 	}
7720 
7721 	return (status);
7722 }
7723 
7724 /*ARGSUSED*/
7725 void
7726 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
7727 	    struct svc_req *req, struct compound_state *cs)
7728 {
7729 	/* XXX Currently not using req arg */
7730 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
7731 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
7732 	nfsstat4 status;
7733 	stateid4 *stateid;
7734 	rfs4_lockowner_t *lo;
7735 	rfs4_client_t *cp;
7736 	rfs4_state_t *sp = NULL;
7737 	rfs4_lo_state_t *lsp = NULL;
7738 	bool_t ls_sw_held = FALSE;
7739 	bool_t create = TRUE;
7740 	bool_t lcreate = TRUE;
7741 	bool_t dup_lock = FALSE;
7742 	int rc;
7743 
7744 	if (cs->vp == NULL) {
7745 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7746 		return;
7747 	}
7748 
7749 	if (args->locker.new_lock_owner) {
7750 		/* Create a new lockowner for this instance */
7751 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
7752 
7753 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
7754 
7755 		stateid = &olo->open_stateid;
7756 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
7757 		if (status != NFS4_OK) {
7758 			NFS4_DEBUG(rfs4_debug,
7759 				(CE_NOTE, "Get state failed in lock %d",
7760 				status));
7761 			*cs->statusp = resp->status = status;
7762 			return;
7763 		}
7764 
7765 		/* Ensure specified filehandle matches */
7766 		if (cs->vp != sp->finfo->vp) {
7767 			rfs4_state_rele(sp);
7768 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7769 			return;
7770 		}
7771 
7772 		/* hold off other access to open_owner while we tinker */
7773 		rfs4_sw_enter(&sp->owner->oo_sw);
7774 
7775 		switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
7776 		case NFS4_CHECK_STATEID_OLD:
7777 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7778 			goto end;
7779 		case NFS4_CHECK_STATEID_BAD:
7780 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7781 			goto end;
7782 		case NFS4_CHECK_STATEID_EXPIRED:
7783 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7784 			goto end;
7785 		case NFS4_CHECK_STATEID_UNCONFIRMED:
7786 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7787 			goto end;
7788 		case NFS4_CHECK_STATEID_CLOSED:
7789 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7790 			goto end;
7791 		case NFS4_CHECK_STATEID_OKAY:
7792 		case NFS4_CHECK_STATEID_REPLAY:
7793 			switch (rfs4_check_olo_seqid(olo->open_seqid,
7794 				sp->owner, resop)) {
7795 			case NFS4_CHKSEQ_OKAY:
7796 				if (rc == NFS4_CHECK_STATEID_OKAY)
7797 					break;
7798 				/*
7799 				 * This is replayed stateid; if seqid
7800 				 * matches next expected, then client
7801 				 * is using wrong seqid.
7802 				 */
7803 				/* FALLTHROUGH */
7804 			case NFS4_CHKSEQ_BAD:
7805 				*cs->statusp = resp->status =
7806 					NFS4ERR_BAD_SEQID;
7807 				goto end;
7808 			case NFS4_CHKSEQ_REPLAY:
7809 				/* This is a duplicate LOCK request */
7810 				dup_lock = TRUE;
7811 
7812 				/*
7813 				 * For a duplicate we do not want to
7814 				 * create a new lockowner as it should
7815 				 * already exist.
7816 				 * Turn off the lockowner create flag.
7817 				 */
7818 				lcreate = FALSE;
7819 			}
7820 			break;
7821 		}
7822 
7823 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
7824 		if (lo == NULL) {
7825 			NFS4_DEBUG(rfs4_debug,
7826 				(CE_NOTE, "rfs4_op_lock: no lock owner"));
7827 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
7828 			goto end;
7829 		}
7830 
7831 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
7832 		if (lsp == NULL) {
7833 			rfs4_update_lease(sp->owner->client);
7834 			/*
7835 			 * Only update theh open_seqid if this is not
7836 			 * a duplicate request
7837 			 */
7838 			if (dup_lock == FALSE) {
7839 				rfs4_update_open_sequence(sp->owner);
7840 			}
7841 
7842 			NFS4_DEBUG(rfs4_debug,
7843 				(CE_NOTE, "rfs4_op_lock: no state"));
7844 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7845 			rfs4_update_open_resp(sp->owner, resop, NULL);
7846 			rfs4_lockowner_rele(lo);
7847 			goto end;
7848 		}
7849 
7850 		/*
7851 		 * This is the new_lock_owner branch and the client is
7852 		 * supposed to be associating a new lock_owner with
7853 		 * the open file at this point.  If we find that a
7854 		 * lock_owner/state association already exists and a
7855 		 * successful LOCK request was returned to the client,
7856 		 * an error is returned to the client since this is
7857 		 * not appropriate.  The client should be using the
7858 		 * existing lock_owner branch.
7859 		 */
7860 		if (dup_lock == FALSE && create == FALSE) {
7861 			if (lsp->lock_completed == TRUE) {
7862 				*cs->statusp =
7863 					resp->status = NFS4ERR_BAD_SEQID;
7864 				rfs4_lockowner_rele(lo);
7865 				goto end;
7866 			}
7867 		}
7868 
7869 		rfs4_update_lease(sp->owner->client);
7870 
7871 		/*
7872 		 * Only update theh open_seqid if this is not
7873 		 * a duplicate request
7874 		 */
7875 		if (dup_lock == FALSE) {
7876 			rfs4_update_open_sequence(sp->owner);
7877 		}
7878 
7879 		/*
7880 		 * If this is a duplicate lock request, just copy the
7881 		 * previously saved reply and return.
7882 		 */
7883 		if (dup_lock == TRUE) {
7884 			/* verify that lock_seqid's match */
7885 			if (lsp->seqid != olo->lock_seqid) {
7886 				NFS4_DEBUG(rfs4_debug,
7887 				(CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
7888 				"lsp->seqid=%d old->seqid=%d",
7889 				lsp->seqid, olo->lock_seqid));
7890 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7891 			} else {
7892 				rfs4_copy_reply(resop, lsp->reply);
7893 				/*
7894 				 * Make sure to copy the just
7895 				 * retrieved reply status into the
7896 				 * overall compound status
7897 				 */
7898 				*cs->statusp = resp->status;
7899 			}
7900 			rfs4_lockowner_rele(lo);
7901 			goto end;
7902 		}
7903 
7904 		rfs4_dbe_lock(lsp->dbe);
7905 
7906 		/* Make sure to update the lock sequence id */
7907 		lsp->seqid = olo->lock_seqid;
7908 
7909 		NFS4_DEBUG(rfs4_debug,
7910 			(CE_NOTE, "Lock seqid established as %d", lsp->seqid));
7911 
7912 		/*
7913 		 * This is used to signify the newly created lockowner
7914 		 * stateid and its sequence number.  The checks for
7915 		 * sequence number and increment don't occur on the
7916 		 * very first lock request for a lockowner.
7917 		 */
7918 		lsp->skip_seqid_check = TRUE;
7919 
7920 		/* hold off other access to lsp while we tinker */
7921 		rfs4_sw_enter(&lsp->ls_sw);
7922 		ls_sw_held = TRUE;
7923 
7924 		rfs4_dbe_unlock(lsp->dbe);
7925 
7926 		rfs4_lockowner_rele(lo);
7927 	} else {
7928 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
7929 		/* get lsp and hold the lock on the underlying file struct */
7930 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
7931 		    != NFS4_OK) {
7932 			*cs->statusp = resp->status = status;
7933 			return;
7934 		}
7935 		create = FALSE;	/* We didn't create lsp */
7936 
7937 		/* Ensure specified filehandle matches */
7938 		if (cs->vp != lsp->state->finfo->vp) {
7939 			rfs4_lo_state_rele(lsp, TRUE);
7940 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7941 			return;
7942 		}
7943 
7944 		/* hold off other access to lsp while we tinker */
7945 		rfs4_sw_enter(&lsp->ls_sw);
7946 		ls_sw_held = TRUE;
7947 
7948 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
7949 		/*
7950 		 * The stateid looks like it was okay (expected to be
7951 		 * the next one)
7952 		 */
7953 		case NFS4_CHECK_STATEID_OKAY:
7954 			/*
7955 			 * The sequence id is now checked.  Determine
7956 			 * if this is a replay or if it is in the
7957 			 * expected (next) sequence.  In the case of a
7958 			 * replay, there are two replay conditions
7959 			 * that may occur.  The first is the normal
7960 			 * condition where a LOCK is done with a
7961 			 * NFS4_OK response and the stateid is
7962 			 * updated.  That case is handled below when
7963 			 * the stateid is identified as a REPLAY.  The
7964 			 * second is the case where an error is
7965 			 * returned, like NFS4ERR_DENIED, and the
7966 			 * sequence number is updated but the stateid
7967 			 * is not updated.  This second case is dealt
7968 			 * with here.  So it may seem odd that the
7969 			 * stateid is okay but the sequence id is a
7970 			 * replay but it is okay.
7971 			 */
7972 			switch (rfs4_check_lock_seqid(
7973 				args->locker.locker4_u.lock_owner.lock_seqid,
7974 				lsp, resop)) {
7975 			case NFS4_CHKSEQ_REPLAY:
7976 				if (resp->status != NFS4_OK) {
7977 					/*
7978 					 * Here is our replay and need
7979 					 * to verify that the last
7980 					 * response was an error.
7981 					 */
7982 					*cs->statusp = resp->status;
7983 					goto end;
7984 				}
7985 				/*
7986 				 * This is done since the sequence id
7987 				 * looked like a replay but it didn't
7988 				 * pass our check so a BAD_SEQID is
7989 				 * returned as a result.
7990 				 */
7991 				/*FALLTHROUGH*/
7992 			case NFS4_CHKSEQ_BAD:
7993 				*cs->statusp = resp->status =
7994 					NFS4ERR_BAD_SEQID;
7995 				goto end;
7996 			case NFS4_CHKSEQ_OKAY:
7997 				/* Everything looks okay move ahead */
7998 				break;
7999 			}
8000 			break;
8001 		case NFS4_CHECK_STATEID_OLD:
8002 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8003 			goto end;
8004 		case NFS4_CHECK_STATEID_BAD:
8005 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8006 			goto end;
8007 		case NFS4_CHECK_STATEID_EXPIRED:
8008 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8009 			goto end;
8010 		case NFS4_CHECK_STATEID_CLOSED:
8011 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8012 			goto end;
8013 		case NFS4_CHECK_STATEID_REPLAY:
8014 			switch (rfs4_check_lock_seqid(
8015 				args->locker.locker4_u.lock_owner.lock_seqid,
8016 				lsp, resop)) {
8017 			case NFS4_CHKSEQ_OKAY:
8018 				/*
8019 				 * This is a replayed stateid; if
8020 				 * seqid matches the next expected,
8021 				 * then client is using wrong seqid.
8022 				 */
8023 			case NFS4_CHKSEQ_BAD:
8024 				*cs->statusp = resp->status =
8025 					NFS4ERR_BAD_SEQID;
8026 				goto end;
8027 			case NFS4_CHKSEQ_REPLAY:
8028 				rfs4_update_lease(lsp->locker->client);
8029 				*cs->statusp = status = resp->status;
8030 				goto end;
8031 			}
8032 			break;
8033 		default:
8034 			ASSERT(FALSE);
8035 			break;
8036 		}
8037 
8038 		rfs4_update_lock_sequence(lsp);
8039 		rfs4_update_lease(lsp->locker->client);
8040 	}
8041 
8042 	/*
8043 	 * NFS4 only allows locking on regular files, so
8044 	 * verify type of object.
8045 	 */
8046 	if (cs->vp->v_type != VREG) {
8047 		if (cs->vp->v_type == VDIR)
8048 			status = NFS4ERR_ISDIR;
8049 		else
8050 			status = NFS4ERR_INVAL;
8051 		goto out;
8052 	}
8053 
8054 	cp = lsp->state->owner->client;
8055 
8056 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
8057 		status = NFS4ERR_GRACE;
8058 		goto out;
8059 	}
8060 
8061 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->can_reclaim) {
8062 		status = NFS4ERR_NO_GRACE;
8063 		goto out;
8064 	}
8065 
8066 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
8067 		status = NFS4ERR_NO_GRACE;
8068 		goto out;
8069 	}
8070 
8071 	if (lsp->state->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE)
8072 		cs->deleg = TRUE;
8073 
8074 	status = rfs4_do_lock(lsp, args->locktype,
8075 				args->locker.locker4_u.lock_owner.lock_seqid,
8076 				args->offset,
8077 				args->length, cs->cr, resop);
8078 
8079 out:
8080 	lsp->skip_seqid_check = FALSE;
8081 
8082 	*cs->statusp = resp->status = status;
8083 
8084 	if (status == NFS4_OK) {
8085 		resp->LOCK4res_u.lock_stateid = lsp->lockid.stateid;
8086 		lsp->lock_completed = TRUE;
8087 	}
8088 	/*
8089 	 * Only update the "OPEN" response here if this was a new
8090 	 * lock_owner
8091 	 */
8092 	if (sp)
8093 		rfs4_update_open_resp(sp->owner, resop, NULL);
8094 
8095 	rfs4_update_lock_resp(lsp, resop);
8096 
8097 end:
8098 	if (lsp) {
8099 		if (ls_sw_held)
8100 			rfs4_sw_exit(&lsp->ls_sw);
8101 		/*
8102 		 * If an sp obtained, then the lsp does not represent
8103 		 * a lock on the file struct.
8104 		 */
8105 		if (sp != NULL)
8106 			rfs4_lo_state_rele(lsp, FALSE);
8107 		else
8108 			rfs4_lo_state_rele(lsp, TRUE);
8109 	}
8110 	if (sp) {
8111 		rfs4_sw_exit(&sp->owner->oo_sw);
8112 		rfs4_state_rele(sp);
8113 	}
8114 }
8115 
8116 /* free function for LOCK/LOCKT */
8117 static void
8118 lock_denied_free(nfs_resop4 *resop)
8119 {
8120 	LOCK4denied *dp = NULL;
8121 
8122 	switch (resop->resop) {
8123 	case OP_LOCK:
8124 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
8125 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
8126 		break;
8127 	case OP_LOCKT:
8128 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
8129 			dp = &resop->nfs_resop4_u.oplockt.denied;
8130 		break;
8131 	default:
8132 		break;
8133 	}
8134 
8135 	if (dp)
8136 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
8137 }
8138 
8139 /*ARGSUSED*/
8140 void
8141 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
8142 	    struct svc_req *req, struct compound_state *cs)
8143 {
8144 	/* XXX Currently not using req arg */
8145 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
8146 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
8147 	nfsstat4 status;
8148 	stateid4 *stateid = &args->lock_stateid;
8149 	rfs4_lo_state_t *lsp;
8150 
8151 	if (cs->vp == NULL) {
8152 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8153 		return;
8154 	}
8155 
8156 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
8157 		*cs->statusp = resp->status = status;
8158 		return;
8159 	}
8160 
8161 	/* Ensure specified filehandle matches */
8162 	if (cs->vp != lsp->state->finfo->vp) {
8163 		rfs4_lo_state_rele(lsp, TRUE);
8164 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8165 		return;
8166 	}
8167 
8168 	/* hold off other access to lsp while we tinker */
8169 	rfs4_sw_enter(&lsp->ls_sw);
8170 
8171 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8172 	case NFS4_CHECK_STATEID_OKAY:
8173 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
8174 		    != NFS4_CHKSEQ_OKAY) {
8175 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8176 			goto end;
8177 		}
8178 		break;
8179 	case NFS4_CHECK_STATEID_OLD:
8180 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8181 		goto end;
8182 	case NFS4_CHECK_STATEID_BAD:
8183 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8184 		goto end;
8185 	case NFS4_CHECK_STATEID_EXPIRED:
8186 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8187 		goto end;
8188 	case NFS4_CHECK_STATEID_CLOSED:
8189 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8190 		goto end;
8191 	case NFS4_CHECK_STATEID_REPLAY:
8192 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
8193 		case NFS4_CHKSEQ_OKAY:
8194 				/*
8195 				 * This is a replayed stateid; if
8196 				 * seqid matches the next expected,
8197 				 * then client is using wrong seqid.
8198 				 */
8199 		case NFS4_CHKSEQ_BAD:
8200 			*cs->statusp = resp->status =
8201 				NFS4ERR_BAD_SEQID;
8202 			goto end;
8203 		case NFS4_CHKSEQ_REPLAY:
8204 			rfs4_update_lease(lsp->locker->client);
8205 			*cs->statusp = status = resp->status;
8206 			goto end;
8207 		}
8208 		break;
8209 	default:
8210 		ASSERT(FALSE);
8211 		break;
8212 	}
8213 
8214 	rfs4_update_lock_sequence(lsp);
8215 	rfs4_update_lease(lsp->locker->client);
8216 
8217 	/*
8218 	 * NFS4 only allows locking on regular files, so
8219 	 * verify type of object.
8220 	 */
8221 	if (cs->vp->v_type != VREG) {
8222 		if (cs->vp->v_type == VDIR)
8223 			status = NFS4ERR_ISDIR;
8224 		else
8225 			status = NFS4ERR_INVAL;
8226 		goto out;
8227 	}
8228 
8229 	if (rfs4_clnt_in_grace(lsp->state->owner->client)) {
8230 		status = NFS4ERR_GRACE;
8231 		goto out;
8232 	}
8233 
8234 	status = rfs4_do_lock(lsp, args->locktype,
8235 			    args->seqid, args->offset,
8236 			    args->length, cs->cr, resop);
8237 
8238 out:
8239 	*cs->statusp = resp->status = status;
8240 
8241 	if (status == NFS4_OK)
8242 		resp->lock_stateid = lsp->lockid.stateid;
8243 
8244 	rfs4_update_lock_resp(lsp, resop);
8245 
8246 end:
8247 	rfs4_sw_exit(&lsp->ls_sw);
8248 	rfs4_lo_state_rele(lsp, TRUE);
8249 }
8250 
8251 /*
8252  * LOCKT is a best effort routine, the client can not be guaranteed that
8253  * the status return is still in effect by the time the reply is received.
8254  * They are numerous race conditions in this routine, but we are not required
8255  * and can not be accurate.
8256  */
8257 /*ARGSUSED*/
8258 void
8259 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
8260 	    struct svc_req *req, struct compound_state *cs)
8261 {
8262 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
8263 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
8264 	rfs4_lockowner_t *lo;
8265 	rfs4_client_t *cp;
8266 	bool_t create = FALSE;
8267 	struct flock64 flk;
8268 	int error;
8269 	int flag = FREAD | FWRITE;
8270 	int ltype;
8271 	length4 posix_length;
8272 	sysid_t sysid;
8273 	pid_t pid;
8274 
8275 	if (cs->vp == NULL) {
8276 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8277 		return;
8278 	}
8279 
8280 	/*
8281 	 * NFS4 only allows locking on regular files, so
8282 	 * verify type of object.
8283 	 */
8284 	if (cs->vp->v_type != VREG) {
8285 		if (cs->vp->v_type == VDIR)
8286 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
8287 		else
8288 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
8289 		return;
8290 	}
8291 
8292 	/*
8293 	 * Check out the clientid to ensure the server knows about it
8294 	 * so that we correctly inform the client of a server reboot.
8295 	 */
8296 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
8297 	    == NULL) {
8298 		*cs->statusp = resp->status =
8299 			rfs4_check_clientid(&args->owner.clientid, 0);
8300 		return;
8301 	}
8302 	if (rfs4_lease_expired(cp)) {
8303 		rfs4_client_close(cp);
8304 		/*
8305 		 * Protocol doesn't allow returning NFS4ERR_STALE as
8306 		 * other operations do on this check so STALE_CLIENTID
8307 		 * is returned instead
8308 		 */
8309 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
8310 		return;
8311 	}
8312 
8313 	if (rfs4_clnt_in_grace(cp)) {
8314 		*cs->statusp = resp->status = NFS4ERR_GRACE;
8315 		return;
8316 	}
8317 	rfs4_client_rele(cp);
8318 
8319 	resp->status = NFS4_OK;
8320 
8321 	switch (args->locktype) {
8322 	case READ_LT:
8323 	case READW_LT:
8324 		ltype = F_RDLCK;
8325 		break;
8326 	case WRITE_LT:
8327 	case WRITEW_LT:
8328 		ltype = F_WRLCK;
8329 		break;
8330 	}
8331 
8332 	posix_length = args->length;
8333 	/* Check for zero length. To lock to end of file use all ones for V4 */
8334 	if (posix_length == 0) {
8335 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8336 		return;
8337 	} else if (posix_length == (length4)(~0)) {
8338 		posix_length = 0;	/* Posix to end of file  */
8339 	}
8340 
8341 	/* Find or create a lockowner */
8342 	lo = rfs4_findlockowner(&args->owner, &create);
8343 
8344 	if (lo) {
8345 		pid = lo->pid;
8346 		if ((resp->status =
8347 			rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
8348 		goto out;
8349 	} else {
8350 		pid = 0;
8351 		sysid = lockt_sysid;
8352 	}
8353 retry:
8354 	flk.l_type = ltype;
8355 	flk.l_whence = 0;		/* SEEK_SET */
8356 	flk.l_start = args->offset;
8357 	flk.l_len = posix_length;
8358 	flk.l_sysid = sysid;
8359 	flk.l_pid = pid;
8360 	flag |= F_REMOTELOCK;
8361 
8362 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
8363 
8364 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
8365 	if (flk.l_len < 0 || flk.l_start < 0) {
8366 		resp->status = NFS4ERR_INVAL;
8367 		goto out;
8368 	}
8369 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
8370 	    NULL, cs->cr);
8371 
8372 	/*
8373 	 * N.B. We map error values to nfsv4 errors. This is differrent
8374 	 * than puterrno4 routine.
8375 	 */
8376 	switch (error) {
8377 	case 0:
8378 		if (flk.l_type == F_UNLCK)
8379 			resp->status = NFS4_OK;
8380 		else {
8381 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
8382 				goto retry;
8383 			resp->status = NFS4ERR_DENIED;
8384 		}
8385 		break;
8386 	case EOVERFLOW:
8387 		resp->status = NFS4ERR_INVAL;
8388 		break;
8389 	case EINVAL:
8390 		resp->status = NFS4ERR_NOTSUPP;
8391 		break;
8392 	default:
8393 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
8394 			error);
8395 		resp->status = NFS4ERR_SERVERFAULT;
8396 		break;
8397 	}
8398 
8399 out:
8400 	if (lo)
8401 		rfs4_lockowner_rele(lo);
8402 	*cs->statusp = resp->status;
8403 }
8404 
8405 static int
8406 vop_shrlock(vnode_t *vp, int cmd, struct shrlock *sp, int fflags)
8407 {
8408 	int err;
8409 
8410 	if (cmd == F_UNSHARE && sp->s_deny == 0 && sp->s_access == 0)
8411 		return (0);
8412 
8413 	err = VOP_SHRLOCK(vp, cmd, sp, fflags, CRED());
8414 
8415 	NFS4_DEBUG(rfs4_shrlock_debug,
8416 		(CE_NOTE, "rfs4_shrlock %s vp=%p acc=%d dny=%d sysid=%d "
8417 		"pid=%d err=%d\n", cmd == F_SHARE ? "SHARE" : "UNSHR",
8418 		(void *) vp, sp->s_access, sp->s_deny, sp->s_sysid, sp->s_pid,
8419 		err));
8420 
8421 	return (err);
8422 }
8423 
8424 static int
8425 rfs4_shrlock(rfs4_state_t *sp, int cmd)
8426 {
8427 	struct shrlock shr;
8428 	struct shr_locowner shr_loco;
8429 	int fflags;
8430 
8431 	fflags = shr.s_access = shr.s_deny = 0;
8432 
8433 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
8434 		fflags |= FREAD;
8435 		shr.s_access |= F_RDACC;
8436 	}
8437 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
8438 		fflags |= FWRITE;
8439 		shr.s_access |= F_WRACC;
8440 	}
8441 	if (sp->share_deny & OPEN4_SHARE_DENY_READ)
8442 		shr.s_deny |= F_RDDNY;
8443 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE)
8444 		shr.s_deny |= F_WRDNY;
8445 
8446 	shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
8447 	shr.s_sysid = sp->owner->client->sysidt;
8448 	shr_loco.sl_pid = shr.s_pid;
8449 	shr_loco.sl_id = shr.s_sysid;
8450 	shr.s_owner = (caddr_t)&shr_loco;
8451 	shr.s_own_len = sizeof (shr_loco);
8452 	return (vop_shrlock(sp->finfo->vp, cmd, &shr, fflags));
8453 }
8454 
8455 static int
8456 rfs4_share(rfs4_state_t *sp)
8457 {
8458 	return (rfs4_shrlock(sp, F_SHARE));
8459 }
8460 
8461 void
8462 rfs4_unshare(rfs4_state_t *sp)
8463 {
8464 	(void) rfs4_shrlock(sp, F_UNSHARE);
8465 }
8466