xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
28  *	All Rights Reserved
29  */
30 
31 /*
32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33  * Copyright 2019 Nexenta Systems, Inc.
34  * Copyright 2019 Nexenta by DDN, Inc.
35  * Copyright 2021 Racktop Systems, Inc.
36  */
37 
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vfs_opreg.h>
45 #include <sys/vnode.h>
46 #include <sys/uio.h>
47 #include <sys/errno.h>
48 #include <sys/sysmacros.h>
49 #include <sys/statvfs.h>
50 #include <sys/kmem.h>
51 #include <sys/dirent.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/systeminfo.h>
55 #include <sys/flock.h>
56 #include <sys/pathname.h>
57 #include <sys/nbmlock.h>
58 #include <sys/share.h>
59 #include <sys/atomic.h>
60 #include <sys/policy.h>
61 #include <sys/fem.h>
62 #include <sys/sdt.h>
63 #include <sys/ddi.h>
64 #include <sys/zone.h>
65 
66 #include <fs/fs_reparse.h>
67 
68 #include <rpc/types.h>
69 #include <rpc/auth.h>
70 #include <rpc/rpcsec_gss.h>
71 #include <rpc/svc.h>
72 
73 #include <nfs/nfs.h>
74 #include <nfs/nfssys.h>
75 #include <nfs/export.h>
76 #include <nfs/nfs_cmd.h>
77 #include <nfs/lm.h>
78 #include <nfs/nfs4.h>
79 #include <nfs/nfs4_drc.h>
80 
81 #include <sys/strsubr.h>
82 #include <sys/strsun.h>
83 
84 #include <inet/common.h>
85 #include <inet/ip.h>
86 #include <inet/ip6.h>
87 
88 #include <sys/tsol/label.h>
89 #include <sys/tsol/tndb.h>
90 
91 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
93 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
94 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
95 extern struct svc_ops rdma_svc_ops;
96 extern int nfs_loaned_buffers;
97 /* End of Tunables */
98 
99 static int rdma_setup_read_data4(READ4args *, READ4res *);
100 
101 /*
102  * Used to bump the stateid4.seqid value and show changes in the stateid
103  */
104 #define	next_stateid(sp) (++(sp)->bits.chgseq)
105 
106 /*
107  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
108  *	This is used to return NFS4ERR_TOOSMALL when clients specify
109  *	maxcount that isn't large enough to hold the smallest possible
110  *	XDR encoded dirent.
111  *
112  *	    sizeof cookie (8 bytes) +
113  *	    sizeof name_len (4 bytes) +
114  *	    sizeof smallest (padded) name (4 bytes) +
115  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
116  *	    sizeof attrlist4_len (4 bytes) +
117  *	    sizeof next boolean (4 bytes)
118  *
119  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
120  * the smallest possible entry4 (assumes no attrs requested).
121  *	sizeof nfsstat4 (4 bytes) +
122  *	sizeof verifier4 (8 bytes) +
123  *	sizeof entry4list bool (4 bytes) +
124  *	sizeof entry4 (36 bytes) +
125  *	sizeof eof bool (4 bytes)
126  *
127  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
128  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
129  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
130  *	required for a given name length.  MAXNAMELEN is the maximum
131  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
132  *	macros are to allow for . and .. entries -- just a minor tweak to try
133  *	and guarantee that buffer we give to VOP_READDIR will be large enough
134  *	to hold ., .., and the largest possible solaris dirent64.
135  */
136 #define	RFS4_MINLEN_ENTRY4 36
137 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
138 #define	RFS4_MINLEN_RDDIR_BUF \
139 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
140 
141 /*
142  * It would be better to pad to 4 bytes since that's what XDR would do,
143  * but the dirents UFS gives us are already padded to 8, so just take
144  * what we're given.  Dircount is only a hint anyway.  Currently the
145  * solaris kernel is ASCII only, so there's no point in calling the
146  * UTF8 functions.
147  *
148  * dirent64: named padded to provide 8 byte struct alignment
149  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
150  *
151  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
152  *
153  */
154 #define	DIRENT64_TO_DIRCOUNT(dp) \
155 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
156 
157 
158 static sysid_t		lockt_sysid;	/* dummy sysid for all LOCKT calls */
159 
160 u_longlong_t	nfs4_srv_caller_id;
161 uint_t		nfs4_srv_vkey = 0;
162 
163 void	rfs4_init_compound_state(struct compound_state *);
164 
165 static void	nullfree(caddr_t);
166 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 		    struct compound_state *);
168 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 		    struct compound_state *);
170 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 		    struct compound_state *);
172 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 		    struct compound_state *);
174 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 		    struct compound_state *);
176 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 		    struct svc_req *, struct compound_state *);
178 static void	rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 		    struct svc_req *, struct compound_state *);
180 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 		    struct compound_state *);
182 static void	rfs4_op_getattr_free(nfs_resop4 *);
183 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 		    struct compound_state *);
185 static void	rfs4_op_getfh_free(nfs_resop4 *);
186 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 		    struct compound_state *);
188 static void	rfs4_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 		    struct compound_state *);
190 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 		    struct compound_state *);
192 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 		    struct compound_state *);
194 static void	lock_denied_free(nfs_resop4 *);
195 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 		    struct compound_state *);
197 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 		    struct compound_state *);
199 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 		    struct compound_state *);
201 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
202 		    struct compound_state *);
203 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
204 		    struct svc_req *req, struct compound_state *cs);
205 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 		    struct compound_state *);
207 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
208 		    struct compound_state *);
209 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
210 		    struct svc_req *, struct compound_state *);
211 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
212 		    struct svc_req *, struct compound_state *);
213 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 		    struct compound_state *);
215 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 		    struct compound_state *);
217 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 		    struct compound_state *);
219 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 		    struct compound_state *);
221 static void	rfs4_op_read_free(nfs_resop4 *);
222 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
223 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 		    struct compound_state *);
225 static void	rfs4_op_readlink_free(nfs_resop4 *);
226 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
227 		    struct svc_req *, struct compound_state *);
228 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 		    struct compound_state *);
230 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 		    struct compound_state *);
232 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 		    struct compound_state *);
234 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 		    struct compound_state *);
236 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 		    struct compound_state *);
238 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 		    struct compound_state *);
240 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 		    struct compound_state *);
242 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
243 		    struct compound_state *);
244 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
245 		    struct svc_req *, struct compound_state *);
246 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
247 		    struct svc_req *req, struct compound_state *);
248 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
249 		    struct compound_state *);
250 static void	rfs4_op_secinfo_free(nfs_resop4 *);
251 
252 void rfs4x_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop,
253     struct svc_req *req, struct compound_state *cs);
254 void rfs4x_exchange_id_free(nfs_resop4 *);
255 
256 void rfs4x_op_create_session(nfs_argop4 *argop, nfs_resop4 *resop,
257     struct svc_req *req, struct compound_state *cs);
258 
259 void rfs4x_op_destroy_session(nfs_argop4 *argop, nfs_resop4 *resop,
260     struct svc_req *req, compound_state_t *cs);
261 
262 void rfs4x_op_sequence(nfs_argop4 *argop, nfs_resop4 *resop,
263     struct svc_req *req, struct compound_state *cs);
264 
265 void rfs4x_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop,
266     struct svc_req *req, compound_state_t *cs);
267 
268 void rfs4x_op_destroy_clientid(nfs_argop4 *argop, nfs_resop4 *resop,
269     struct svc_req *req, compound_state_t *cs);
270 
271 void rfs4x_op_bind_conn_to_session(nfs_argop4 *argop, nfs_resop4 *resop,
272     struct svc_req *req, compound_state_t *cs);
273 
274 void rfs4x_op_secinfo_noname(nfs_argop4 *argop, nfs_resop4 *resop,
275     struct svc_req *req, compound_state_t *cs);
276 
277 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
278 		    struct svc_req *);
279 nfsstat4	rfs4_client_sysid(rfs4_client_t *, sysid_t *);
280 void		rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
281 
282 /*
283  * translation table for attrs
284  */
285 struct nfs4_ntov_table {
286 	union nfs4_attr_u *na;
287 	uint8_t amap[NFS4_MAXNUM_ATTRS];
288 	int attrcnt;
289 	bool_t vfsstat;
290 };
291 
292 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
293 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
294 		    struct nfs4_svgetit_arg *sargp);
295 
296 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
297 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
298 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
299 
300 static void	hanfsv4_failover(nfs4_srv_t *);
301 
302 fem_t		*deleg_rdops;
303 fem_t		*deleg_wrops;
304 
305 /*
306  * NFS4 op dispatch table
307  */
308 
309 struct rfsv4disp {
310 	void	(*dis_proc)();		/* proc to call */
311 	void	(*dis_resfree)();	/* frees space allocated by proc */
312 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
313 };
314 
315 static struct rfsv4disp rfsv4disptab[] = {
316 	/*
317 	 * NFS VERSION 4
318 	 */
319 
320 	/* RFS_NULL = 0 */
321 	{rfs4_op_illegal, nullfree, 0},
322 
323 	/* UNUSED = 1 */
324 	{rfs4_op_illegal, nullfree, 0},
325 
326 	/* UNUSED = 2 */
327 	{rfs4_op_illegal, nullfree, 0},
328 
329 	/* OP_ACCESS = 3 */
330 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
331 
332 	/* OP_CLOSE = 4 */
333 	{rfs4_op_close, nullfree, 0},
334 
335 	/* OP_COMMIT = 5 */
336 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
337 
338 	/* OP_CREATE = 6 */
339 	{rfs4_op_create, nullfree, 0},
340 
341 	/* OP_DELEGPURGE = 7 */
342 	{rfs4_op_delegpurge, nullfree, 0},
343 
344 	/* OP_DELEGRETURN = 8 */
345 	{rfs4_op_delegreturn, nullfree, 0},
346 
347 	/* OP_GETATTR = 9 */
348 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
349 
350 	/* OP_GETFH = 10 */
351 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
352 
353 	/* OP_LINK = 11 */
354 	{rfs4_op_link, nullfree, 0},
355 
356 	/* OP_LOCK = 12 */
357 	{rfs4_op_lock, lock_denied_free, 0},
358 
359 	/* OP_LOCKT = 13 */
360 	{rfs4_op_lockt, lock_denied_free, 0},
361 
362 	/* OP_LOCKU = 14 */
363 	{rfs4_op_locku, nullfree, 0},
364 
365 	/* OP_LOOKUP = 15 */
366 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
367 
368 	/* OP_LOOKUPP = 16 */
369 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
370 
371 	/* OP_NVERIFY = 17 */
372 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
373 
374 	/* OP_OPEN = 18 */
375 	{rfs4_op_open, rfs4_free_reply, 0},
376 
377 	/* OP_OPENATTR = 19 */
378 	{rfs4_op_openattr, nullfree, 0},
379 
380 	/* OP_OPEN_CONFIRM = 20 */
381 	{rfs4_op_open_confirm, nullfree, 0},
382 
383 	/* OP_OPEN_DOWNGRADE = 21 */
384 	{rfs4_op_open_downgrade, nullfree, 0},
385 
386 	/* OP_OPEN_PUTFH = 22 */
387 	{rfs4_op_putfh, nullfree, RPC_ALL},
388 
389 	/* OP_PUTPUBFH = 23 */
390 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
391 
392 	/* OP_PUTROOTFH = 24 */
393 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
394 
395 	/* OP_READ = 25 */
396 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
397 
398 	/* OP_READDIR = 26 */
399 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
400 
401 	/* OP_READLINK = 27 */
402 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
403 
404 	/* OP_REMOVE = 28 */
405 	{rfs4_op_remove, nullfree, 0},
406 
407 	/* OP_RENAME = 29 */
408 	{rfs4_op_rename, nullfree, 0},
409 
410 	/* OP_RENEW = 30 */
411 	{rfs4_op_renew, nullfree, 0},
412 
413 	/* OP_RESTOREFH = 31 */
414 	{rfs4_op_restorefh, nullfree, RPC_ALL},
415 
416 	/* OP_SAVEFH = 32 */
417 	{rfs4_op_savefh, nullfree, RPC_ALL},
418 
419 	/* OP_SECINFO = 33 */
420 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
421 
422 	/* OP_SETATTR = 34 */
423 	{rfs4_op_setattr, nullfree, 0},
424 
425 	/* OP_SETCLIENTID = 35 */
426 	{rfs4_op_setclientid, nullfree, 0},
427 
428 	/* OP_SETCLIENTID_CONFIRM = 36 */
429 	{rfs4_op_setclientid_confirm, nullfree, 0},
430 
431 	/* OP_VERIFY = 37 */
432 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
433 
434 	/* OP_WRITE = 38 */
435 	{rfs4_op_write, nullfree, 0},
436 
437 	/* OP_RELEASE_LOCKOWNER = 39 */
438 	{rfs4_op_release_lockowner, nullfree, 0},
439 
440 	/*
441 	 * NFSv4.1 operations
442 	 */
443 
444 	/* OP_BACKCHANNEL_CTL = 40 */
445 	{rfs4_op_notsup,  nullfree,  0},
446 
447 	/*  OP_BIND_CONN_TO_SESSION = 41 */
448 	{rfs4x_op_bind_conn_to_session,  nullfree,  0},
449 
450 	/* OP_EXCHANGE_ID  = 42 */
451 	{rfs4x_op_exchange_id,  rfs4x_exchange_id_free,  0},
452 
453 	/* OP_CREATE_SESSION = 43 */
454 	{rfs4x_op_create_session,  nullfree,  0},
455 
456 	/* OP_DESTROY_SESSION = 44 */
457 	{rfs4x_op_destroy_session,  nullfree,  0},
458 
459 	/* OP_FREE_STATEID = 45 */
460 	{rfs4_op_notsup,  nullfree,  0},
461 
462 	/* OP_GET_DIR_DELEGATION = 46 */
463 	{rfs4_op_notsup,  nullfree,  0},
464 
465 	/* OP_GETDEVICEINFO = 47 */
466 	{rfs4_op_notsup,  nullfree,  0},
467 
468 	/* OP_GETDEVICELIST = 48 */
469 	{rfs4_op_notsup,  nullfree,  0},
470 
471 	/* OP_LAYOUTCOMMIT = 49 */
472 	{rfs4_op_notsup,  nullfree,  0},
473 
474 	/* OP_LAYOUTGET = 50 */
475 	{rfs4_op_notsup,  nullfree,  0},
476 
477 	/* OP_LAYOUTRETURN = 51 */
478 	{rfs4_op_notsup,  nullfree,  0},
479 
480 	/* OP_SECINFO_NO_NAME = 52 */
481 	{rfs4x_op_secinfo_noname, rfs4_op_secinfo_free, 0},
482 
483 	/* OP_SEQUENCE = 53 */
484 	{rfs4x_op_sequence,  nullfree,  0},
485 
486 	/* OP_SET_SSV = 54 */
487 	{rfs4_op_notsup,  nullfree,  0},
488 
489 	/* OP_TEST_STATEID = 55 */
490 	{rfs4_op_notsup,  nullfree,  0},
491 
492 	/* OP_WANT_DELEGATION = 56 */
493 	{rfs4_op_notsup,  nullfree,  0},
494 
495 	/* OP_DESTROY_CLIENTID = 57 */
496 	{rfs4x_op_destroy_clientid,  nullfree,  0},
497 
498 	/* OP_RECLAIM_COMPLETE = 58 */
499 	{rfs4x_op_reclaim_complete,  nullfree,  0},
500 };
501 
502 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
503 
504 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
505 
506 #ifdef DEBUG
507 
508 int		rfs4_fillone_debug = 0;
509 int		rfs4_no_stub_access = 1;
510 int		rfs4_rddir_debug = 0;
511 
512 static char    *rfs4_op_string[] = {
513 	"rfs4_op_null",
514 	"rfs4_op_1 unused",
515 	"rfs4_op_2 unused",
516 	"rfs4_op_access",
517 	"rfs4_op_close",
518 	"rfs4_op_commit",
519 	"rfs4_op_create",
520 	"rfs4_op_delegpurge",
521 	"rfs4_op_delegreturn",
522 	"rfs4_op_getattr",
523 	"rfs4_op_getfh",
524 	"rfs4_op_link",
525 	"rfs4_op_lock",
526 	"rfs4_op_lockt",
527 	"rfs4_op_locku",
528 	"rfs4_op_lookup",
529 	"rfs4_op_lookupp",
530 	"rfs4_op_nverify",
531 	"rfs4_op_open",
532 	"rfs4_op_openattr",
533 	"rfs4_op_open_confirm",
534 	"rfs4_op_open_downgrade",
535 	"rfs4_op_putfh",
536 	"rfs4_op_putpubfh",
537 	"rfs4_op_putrootfh",
538 	"rfs4_op_read",
539 	"rfs4_op_readdir",
540 	"rfs4_op_readlink",
541 	"rfs4_op_remove",
542 	"rfs4_op_rename",
543 	"rfs4_op_renew",
544 	"rfs4_op_restorefh",
545 	"rfs4_op_savefh",
546 	"rfs4_op_secinfo",
547 	"rfs4_op_setattr",
548 	"rfs4_op_setclientid",
549 	"rfs4_op_setclient_confirm",
550 	"rfs4_op_verify",
551 	"rfs4_op_write",
552 	"rfs4_op_release_lockowner",
553 	/* NFSv4.1 */
554 	"backchannel_ctl",
555 	"bind_conn_to_session",
556 	"exchange_id",
557 	"create_session",
558 	"destroy_session",
559 	"free_stateid",
560 	"get_dir_delegation",
561 	"getdeviceinfo",
562 	"getdevicelist",
563 	"layoutcommit",
564 	"layoutget",
565 	"layoutreturn",
566 	"secinfo_no_name",
567 	"sequence",
568 	"set_ssv",
569 	"test_stateid",
570 	"want_delegation",
571 	"destroy_clientid",
572 	"reclaim_complete",
573 	"rfs4_op_illegal"
574 };
575 
576 #endif
577 
578 void	rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
579 
580 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
581 
582 extern void	rfs4_free_fs_locations4(fs_locations4 *);
583 
584 #ifdef	nextdp
585 #undef nextdp
586 #endif
587 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
588 
589 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
590 	VOPNAME_OPEN,		{ .femop_open = deleg_rd_open },
591 	VOPNAME_WRITE,		{ .femop_write = deleg_rd_write },
592 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_rd_setattr },
593 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_rd_rwlock },
594 	VOPNAME_SPACE,		{ .femop_space = deleg_rd_space },
595 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_rd_setsecattr },
596 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_rd_vnevent },
597 	NULL,			NULL
598 };
599 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
600 	VOPNAME_OPEN,		{ .femop_open = deleg_wr_open },
601 	VOPNAME_READ,		{ .femop_read = deleg_wr_read },
602 	VOPNAME_WRITE,		{ .femop_write = deleg_wr_write },
603 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_wr_setattr },
604 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_wr_rwlock },
605 	VOPNAME_SPACE,		{ .femop_space = deleg_wr_space },
606 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_wr_setsecattr },
607 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_wr_vnevent },
608 	NULL,			NULL
609 };
610 
611 nfs4_srv_t *
612 nfs4_get_srv(void)
613 {
614 	nfs_globals_t *ng = nfs_srv_getzg();
615 	nfs4_srv_t *srv = ng->nfs4_srv;
616 	ASSERT(srv != NULL);
617 	return (srv);
618 }
619 
620 void
621 rfs4_srv_zone_init(nfs_globals_t *ng)
622 {
623 	nfs4_srv_t *nsrv4;
624 	timespec32_t verf;
625 
626 	nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
627 
628 	/*
629 	 * The following algorithm attempts to find a unique verifier
630 	 * to be used as the write verifier returned from the server
631 	 * to the client.  It is important that this verifier change
632 	 * whenever the server reboots.  Of secondary importance, it
633 	 * is important for the verifier to be unique between two
634 	 * different servers.
635 	 *
636 	 * Thus, an attempt is made to use the system hostid and the
637 	 * current time in seconds when the nfssrv kernel module is
638 	 * loaded.  It is assumed that an NFS server will not be able
639 	 * to boot and then to reboot in less than a second.  If the
640 	 * hostid has not been set, then the current high resolution
641 	 * time is used.  This will ensure different verifiers each
642 	 * time the server reboots and minimize the chances that two
643 	 * different servers will have the same verifier.
644 	 * XXX - this is broken on LP64 kernels.
645 	 */
646 	verf.tv_sec = (time_t)zone_get_hostid(NULL);
647 	if (verf.tv_sec != 0) {
648 		verf.tv_nsec = gethrestime_sec();
649 	} else {
650 		timespec_t tverf;
651 
652 		gethrestime(&tverf);
653 		verf.tv_sec = (time_t)tverf.tv_sec;
654 		verf.tv_nsec = tverf.tv_nsec;
655 	}
656 	nsrv4->write4verf = *(uint64_t *)&verf;
657 
658 	/* Used to manage create/destroy of server state */
659 	nsrv4->nfs4_server_state = NULL;
660 	nsrv4->nfs4_cur_servinst = NULL;
661 	nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
662 	mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
663 	mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
664 	mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
665 	rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
666 
667 	ng->nfs4_srv = nsrv4;
668 }
669 
670 void
671 rfs4_srv_zone_fini(nfs_globals_t *ng)
672 {
673 	nfs4_srv_t *nsrv4 = ng->nfs4_srv;
674 
675 	ng->nfs4_srv = NULL;
676 
677 	mutex_destroy(&nsrv4->deleg_lock);
678 	mutex_destroy(&nsrv4->state_lock);
679 	mutex_destroy(&nsrv4->servinst_lock);
680 	rw_destroy(&nsrv4->deleg_policy_lock);
681 
682 	kmem_free(nsrv4, sizeof (*nsrv4));
683 }
684 
685 void
686 rfs4_srvrinit(void)
687 {
688 	extern void rfs4_attr_init();
689 
690 	rfs4_attr_init();
691 
692 	if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
693 		rfs4_disable_delegation();
694 	} else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
695 	    &deleg_wrops) != 0) {
696 		rfs4_disable_delegation();
697 		fem_free(deleg_rdops);
698 	}
699 
700 	nfs4_srv_caller_id = fs_new_caller_id();
701 	lockt_sysid = lm_alloc_sysidt();
702 	vsd_create(&nfs4_srv_vkey, NULL);
703 	rfs4_state_g_init();
704 }
705 
706 void
707 rfs4_srvrfini(void)
708 {
709 	if (lockt_sysid != LM_NOSYSID) {
710 		lm_free_sysidt(lockt_sysid);
711 		lockt_sysid = LM_NOSYSID;
712 	}
713 
714 	rfs4_state_g_fini();
715 
716 	fem_free(deleg_rdops);
717 	fem_free(deleg_wrops);
718 }
719 
720 void
721 rfs4_do_server_start(int server_upordown,
722     int srv_delegation, int cluster_booted)
723 {
724 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
725 
726 	/* Is this a warm start? */
727 	if (server_upordown == NFS_SERVER_QUIESCED) {
728 		cmn_err(CE_NOTE, "nfs4_srv: "
729 		    "server was previously quiesced; "
730 		    "existing NFSv4 state will be re-used");
731 
732 		/*
733 		 * HA-NFSv4: this is also the signal
734 		 * that a Resource Group failover has
735 		 * occurred.
736 		 */
737 		if (cluster_booted)
738 			hanfsv4_failover(nsrv4);
739 	} else {
740 		/* Cold start */
741 		nsrv4->rfs4_start_time = 0;
742 		rfs4_state_zone_init(nsrv4);
743 		nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
744 		    nfs4_drc_hash);
745 
746 		/*
747 		 * The nfsd service was started with the -s option
748 		 * we need to pull in any state from the paths indicated.
749 		 */
750 		if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
751 			/* read in the stable storage state from these paths */
752 			rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
753 			    rfs4_dss_newpaths);
754 		}
755 	}
756 
757 	/* Check if delegation is to be enabled */
758 	if (srv_delegation != FALSE)
759 		rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
760 }
761 
762 void
763 rfs4_init_compound_state(struct compound_state *cs)
764 {
765 	bzero(cs, sizeof (*cs));
766 	cs->cont = TRUE;
767 	cs->access = CS_ACCESS_DENIED;
768 	cs->deleg = FALSE;
769 	cs->mandlock = FALSE;
770 	cs->fh.nfs_fh4_val = cs->fhbuf;
771 }
772 
773 /* Do cleanup of the compound_state */
774 void
775 rfs4_fini_compound_state(struct compound_state *cs)
776 {
777 	if (cs->vp) {
778 		VN_RELE(cs->vp);
779 	}
780 	if (cs->saved_vp) {
781 		VN_RELE(cs->saved_vp);
782 	}
783 	if (cs->cr) {
784 		crfree(cs->cr);
785 	}
786 	if (cs->saved_fh.nfs_fh4_val) {
787 		kmem_free(cs->saved_fh.nfs_fh4_val, NFS4_FHSIZE);
788 	}
789 	if (cs->sp) {
790 		rfs4x_session_rele(cs->sp);
791 	}
792 }
793 
794 void
795 rfs4_grace_start(rfs4_servinst_t *sip)
796 {
797 	rw_enter(&sip->rwlock, RW_WRITER);
798 	sip->start_time = nfs_sys_uptime();
799 	sip->grace_period = rfs4_grace_period;
800 	rw_exit(&sip->rwlock);
801 }
802 
803 /*
804  * returns true if the instance's grace period has never been started
805  */
806 int
807 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
808 {
809 	time_t start_time;
810 
811 	rw_enter(&sip->rwlock, RW_READER);
812 	start_time = sip->start_time;
813 	rw_exit(&sip->rwlock);
814 
815 	return (start_time == 0);
816 }
817 
818 /*
819  * Indicates if server instance is within the
820  * grace period.
821  */
822 int
823 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
824 {
825 	time_t grace_expiry;
826 
827 	/* All clients called reclaim-complete */
828 	if (sip->nreclaim == 0 || sip->grace_period == 0)
829 		return (0);
830 
831 	rw_enter(&sip->rwlock, RW_READER);
832 	grace_expiry = sip->start_time + sip->grace_period;
833 	rw_exit(&sip->rwlock);
834 
835 	if (nfs_sys_uptime() < grace_expiry)
836 		return (1);
837 
838 	/* Once grace period ends, optimize next calls */
839 	sip->grace_period = 0;
840 	return (0);
841 }
842 
843 int
844 rfs4_clnt_in_grace(rfs4_client_t *cp)
845 {
846 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
847 
848 	return (rfs4_servinst_in_grace(cp->rc_server_instance));
849 }
850 
851 /*
852  * reset all currently active grace periods
853  */
854 void
855 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
856 {
857 	rfs4_servinst_t *sip;
858 
859 	mutex_enter(&nsrv4->servinst_lock);
860 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
861 		if (rfs4_servinst_in_grace(sip))
862 			rfs4_grace_start(sip);
863 	mutex_exit(&nsrv4->servinst_lock);
864 }
865 
866 /*
867  * start any new instances' grace periods
868  */
869 void
870 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
871 {
872 	rfs4_servinst_t *sip;
873 
874 	mutex_enter(&nsrv4->servinst_lock);
875 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
876 		if (rfs4_servinst_grace_new(sip))
877 			rfs4_grace_start(sip);
878 	mutex_exit(&nsrv4->servinst_lock);
879 }
880 
881 static rfs4_dss_path_t *
882 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
883     char *path, unsigned index)
884 {
885 	size_t len;
886 	rfs4_dss_path_t *dss_path;
887 
888 	dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
889 
890 	/*
891 	 * Take a copy of the string, since the original may be overwritten.
892 	 * Sadly, no strdup() in the kernel.
893 	 */
894 	/* allow for NUL */
895 	len = strlen(path) + 1;
896 	dss_path->path = kmem_alloc(len, KM_SLEEP);
897 	(void) strlcpy(dss_path->path, path, len);
898 
899 	/* associate with servinst */
900 	dss_path->sip = sip;
901 	dss_path->index = index;
902 
903 	/*
904 	 * Add to list of served paths.
905 	 * No locking required, as we're only ever called at startup.
906 	 */
907 	if (nsrv4->dss_pathlist == NULL) {
908 		/* this is the first dss_path_t */
909 
910 		/* needed for insque/remque */
911 		dss_path->next = dss_path->prev = dss_path;
912 
913 		nsrv4->dss_pathlist = dss_path;
914 	} else {
915 		insque(dss_path, nsrv4->dss_pathlist);
916 	}
917 
918 	return (dss_path);
919 }
920 
921 /*
922  * Create a new server instance, and make it the currently active instance.
923  * Note that starting the grace period too early will reduce the clients'
924  * recovery window.
925  */
926 void
927 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
928     int dss_npaths, char **dss_paths)
929 {
930 	unsigned i;
931 	rfs4_servinst_t *sip;
932 	rfs4_oldstate_t *oldstate;
933 
934 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
935 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
936 
937 	sip->nreclaim = 0;
938 	sip->start_time = (time_t)0;
939 	sip->grace_period = (time_t)0;
940 	sip->next = NULL;
941 	sip->prev = NULL;
942 
943 	rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
944 	/*
945 	 * This initial dummy entry is required to setup for insque/remque.
946 	 * It must be skipped over whenever the list is traversed.
947 	 */
948 	oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
949 	/* insque/remque require initial list entry to be self-terminated */
950 	oldstate->next = oldstate;
951 	oldstate->prev = oldstate;
952 	sip->oldstate = oldstate;
953 
954 
955 	sip->dss_npaths = dss_npaths;
956 	sip->dss_paths = kmem_alloc(dss_npaths *
957 	    sizeof (rfs4_dss_path_t *), KM_SLEEP);
958 
959 	for (i = 0; i < dss_npaths; i++) {
960 		sip->dss_paths[i] =
961 		    rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
962 	}
963 
964 	mutex_enter(&nsrv4->servinst_lock);
965 	if (nsrv4->nfs4_cur_servinst != NULL) {
966 		/* add to linked list */
967 		sip->prev = nsrv4->nfs4_cur_servinst;
968 		nsrv4->nfs4_cur_servinst->next = sip;
969 	}
970 	if (start_grace)
971 		rfs4_grace_start(sip);
972 	/* make the new instance "current" */
973 	nsrv4->nfs4_cur_servinst = sip;
974 
975 	mutex_exit(&nsrv4->servinst_lock);
976 }
977 
978 /*
979  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
980  * all instances directly.
981  */
982 void
983 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
984 {
985 	rfs4_servinst_t *sip, *prev, *current;
986 #ifdef DEBUG
987 	int n = 0;
988 #endif
989 
990 	mutex_enter(&nsrv4->servinst_lock);
991 	ASSERT(nsrv4->nfs4_cur_servinst != NULL);
992 	current = nsrv4->nfs4_cur_servinst;
993 	nsrv4->nfs4_cur_servinst = NULL;
994 	for (sip = current; sip != NULL; sip = prev) {
995 		prev = sip->prev;
996 		rw_destroy(&sip->rwlock);
997 		if (sip->oldstate)
998 			kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
999 		if (sip->dss_paths) {
1000 			int i = sip->dss_npaths;
1001 
1002 			while (i > 0) {
1003 				i--;
1004 				if (sip->dss_paths[i] != NULL) {
1005 					char *path = sip->dss_paths[i]->path;
1006 
1007 					if (path != NULL) {
1008 						kmem_free(path,
1009 						    strlen(path) + 1);
1010 					}
1011 					kmem_free(sip->dss_paths[i],
1012 					    sizeof (rfs4_dss_path_t));
1013 				}
1014 			}
1015 			kmem_free(sip->dss_paths,
1016 			    sip->dss_npaths * sizeof (rfs4_dss_path_t *));
1017 		}
1018 		kmem_free(sip, sizeof (rfs4_servinst_t));
1019 #ifdef DEBUG
1020 		n++;
1021 #endif
1022 	}
1023 	mutex_exit(&nsrv4->servinst_lock);
1024 }
1025 
1026 /*
1027  * Assign the current server instance to a client_t.
1028  * Should be called with cp->rc_dbe held.
1029  */
1030 void
1031 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
1032     rfs4_servinst_t *sip)
1033 {
1034 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1035 
1036 	/*
1037 	 * The lock ensures that if the current instance is in the process
1038 	 * of changing, we will see the new one.
1039 	 */
1040 	mutex_enter(&nsrv4->servinst_lock);
1041 	cp->rc_server_instance = sip;
1042 	mutex_exit(&nsrv4->servinst_lock);
1043 }
1044 
1045 rfs4_servinst_t *
1046 rfs4_servinst(rfs4_client_t *cp)
1047 {
1048 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1049 
1050 	return (cp->rc_server_instance);
1051 }
1052 
1053 /* ARGSUSED */
1054 static void
1055 nullfree(caddr_t resop)
1056 {
1057 }
1058 
1059 /*
1060  * This is a fall-through for invalid or not implemented (yet) ops
1061  */
1062 /* ARGSUSED */
1063 static void
1064 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1065     struct compound_state *cs)
1066 {
1067 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
1068 }
1069 
1070 /*
1071  * Check if the security flavor, nfsnum, is in the flavor_list.
1072  */
1073 bool_t
1074 in_flavor_list(int nfsnum, int *flavor_list, int count)
1075 {
1076 	int i;
1077 
1078 	for (i = 0; i < count; i++) {
1079 		if (nfsnum == flavor_list[i])
1080 			return (TRUE);
1081 	}
1082 	return (FALSE);
1083 }
1084 
1085 /*
1086  * Used by rfs4_op_secinfo to get the security information from the
1087  * export structure associated with the component.
1088  */
1089 /* ARGSUSED */
1090 nfsstat4
1091 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
1092 {
1093 	int error, different_export = 0;
1094 	vnode_t *dvp, *vp;
1095 	struct exportinfo *exi;
1096 	fid_t fid;
1097 	uint_t count, i;
1098 	secinfo4 *resok_val;
1099 	struct secinfo *secp;
1100 	seconfig_t *si;
1101 	bool_t did_traverse = FALSE;
1102 	int dotdot, walk;
1103 	nfs_export_t *ne = nfs_get_export();
1104 
1105 	dvp = cs->vp;
1106 	exi = cs->exi;
1107 	ASSERT(exi != NULL);
1108 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
1109 
1110 	/*
1111 	 * If dotdotting, then need to check whether it's above the
1112 	 * root of a filesystem, or above an export point.
1113 	 */
1114 	if (dotdot) {
1115 		vnode_t *zone_rootvp = ne->exi_root->exi_vp;
1116 
1117 		ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
1118 		/*
1119 		 * If dotdotting at the root of a filesystem, then
1120 		 * need to traverse back to the mounted-on filesystem
1121 		 * and do the dotdot lookup there.
1122 		 */
1123 		if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
1124 
1125 			/*
1126 			 * If at the system root, then can
1127 			 * go up no further.
1128 			 */
1129 			if (VN_CMP(dvp, zone_rootvp))
1130 				return (puterrno4(ENOENT));
1131 
1132 			/*
1133 			 * Traverse back to the mounted-on filesystem
1134 			 */
1135 			dvp = untraverse(dvp, zone_rootvp);
1136 
1137 			/*
1138 			 * Set the different_export flag so we remember
1139 			 * to pick up a new exportinfo entry for
1140 			 * this new filesystem.
1141 			 */
1142 			different_export = 1;
1143 		} else {
1144 
1145 			/*
1146 			 * If dotdotting above an export point then set
1147 			 * the different_export to get new export info.
1148 			 */
1149 			different_export = nfs_exported(exi, dvp);
1150 		}
1151 	}
1152 
1153 	/*
1154 	 * Get the vnode for the component "nm".
1155 	 */
1156 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1157 	    NULL, NULL, NULL);
1158 	if (error)
1159 		return (puterrno4(error));
1160 
1161 	/*
1162 	 * If the vnode is in a pseudo filesystem, or if the security flavor
1163 	 * used in the request is valid but not an explicitly shared flavor,
1164 	 * or the access bit indicates that this is a limited access,
1165 	 * check whether this vnode is visible.
1166 	 */
1167 	if (!different_export &&
1168 	    (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1169 	    cs->access & CS_ACCESS_LIMITED)) {
1170 		if (! nfs_visible(exi, vp, &different_export)) {
1171 			VN_RELE(vp);
1172 			return (puterrno4(ENOENT));
1173 		}
1174 	}
1175 
1176 	/*
1177 	 * If it's a mountpoint, then traverse it.
1178 	 */
1179 	if (vn_ismntpt(vp)) {
1180 		if ((error = traverse(&vp)) != 0) {
1181 			VN_RELE(vp);
1182 			return (puterrno4(error));
1183 		}
1184 		/* remember that we had to traverse mountpoint */
1185 		did_traverse = TRUE;
1186 		different_export = 1;
1187 	} else if (vp->v_vfsp != dvp->v_vfsp) {
1188 		/*
1189 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1190 		 * then vp is probably an LOFS object.  We don't need the
1191 		 * realvp, we just need to know that we might have crossed
1192 		 * a server fs boundary and need to call checkexport4.
1193 		 * (LOFS lookup hides server fs mountpoints, and actually calls
1194 		 * traverse)
1195 		 */
1196 		different_export = 1;
1197 	}
1198 
1199 	/*
1200 	 * Get the export information for it.
1201 	 */
1202 	if (different_export) {
1203 
1204 		bzero(&fid, sizeof (fid));
1205 		fid.fid_len = MAXFIDSZ;
1206 		error = vop_fid_pseudo(vp, &fid);
1207 		if (error) {
1208 			VN_RELE(vp);
1209 			return (puterrno4(error));
1210 		}
1211 
1212 		/* We'll need to reassign "exi". */
1213 		if (dotdot)
1214 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1215 		else
1216 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1217 
1218 		if (exi == NULL) {
1219 			if (did_traverse == TRUE) {
1220 				/*
1221 				 * If this vnode is a mounted-on vnode,
1222 				 * but the mounted-on file system is not
1223 				 * exported, send back the secinfo for
1224 				 * the exported node that the mounted-on
1225 				 * vnode lives in.
1226 				 */
1227 				exi = cs->exi;
1228 			} else {
1229 				VN_RELE(vp);
1230 				return (puterrno4(EACCES));
1231 			}
1232 		}
1233 	}
1234 	ASSERT(exi != NULL);
1235 
1236 
1237 	/*
1238 	 * Create the secinfo result based on the security information
1239 	 * from the exportinfo structure (exi).
1240 	 *
1241 	 * Return all flavors for a pseudo node.
1242 	 * For a real export node, return the flavor that the client
1243 	 * has access with.
1244 	 */
1245 	ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1246 	if (PSEUDO(exi)) {
1247 		count = exi->exi_export.ex_seccnt; /* total sec count */
1248 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1249 		secp = exi->exi_export.ex_secinfo;
1250 
1251 		for (i = 0; i < count; i++) {
1252 			si = &secp[i].s_secinfo;
1253 			resok_val[i].flavor = si->sc_rpcnum;
1254 			if (resok_val[i].flavor == RPCSEC_GSS) {
1255 				rpcsec_gss_info *info;
1256 
1257 				info = &resok_val[i].flavor_info;
1258 				info->qop = si->sc_qop;
1259 				info->service = (rpc_gss_svc_t)si->sc_service;
1260 
1261 				/* get oid opaque data */
1262 				info->oid.sec_oid4_len =
1263 				    si->sc_gss_mech_type->length;
1264 				info->oid.sec_oid4_val = kmem_alloc(
1265 				    si->sc_gss_mech_type->length, KM_SLEEP);
1266 				bcopy(
1267 				    si->sc_gss_mech_type->elements,
1268 				    info->oid.sec_oid4_val,
1269 				    info->oid.sec_oid4_len);
1270 			}
1271 		}
1272 		resp->SECINFO4resok_len = count;
1273 		resp->SECINFO4resok_val = resok_val;
1274 	} else {
1275 		int ret_cnt = 0, k = 0;
1276 		int *flavor_list;
1277 
1278 		count = exi->exi_export.ex_seccnt; /* total sec count */
1279 		secp = exi->exi_export.ex_secinfo;
1280 
1281 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1282 		/* find out which flavors to return */
1283 		for (i = 0; i < count; i ++) {
1284 			int access, flavor, perm;
1285 
1286 			flavor = secp[i].s_secinfo.sc_nfsnum;
1287 			perm = secp[i].s_flags;
1288 
1289 			access = nfsauth4_secinfo_access(exi, cs->req,
1290 			    flavor, perm, cs->basecr);
1291 
1292 			if (! (access & NFSAUTH_DENIED) &&
1293 			    ! (access & NFSAUTH_WRONGSEC)) {
1294 				flavor_list[ret_cnt] = flavor;
1295 				ret_cnt++;
1296 			}
1297 		}
1298 
1299 		/* Create the returning SECINFO value */
1300 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1301 
1302 		for (i = 0; i < count; i++) {
1303 			/*
1304 			 * If the flavor is in the flavor list,
1305 			 * fill in resok_val.
1306 			 */
1307 			si = &secp[i].s_secinfo;
1308 			if (in_flavor_list(si->sc_nfsnum,
1309 			    flavor_list, ret_cnt)) {
1310 				resok_val[k].flavor = si->sc_rpcnum;
1311 				if (resok_val[k].flavor == RPCSEC_GSS) {
1312 					rpcsec_gss_info *info;
1313 
1314 					info = &resok_val[k].flavor_info;
1315 					info->qop = si->sc_qop;
1316 					info->service = (rpc_gss_svc_t)
1317 					    si->sc_service;
1318 
1319 					/* get oid opaque data */
1320 					info->oid.sec_oid4_len =
1321 					    si->sc_gss_mech_type->length;
1322 					info->oid.sec_oid4_val = kmem_alloc(
1323 					    si->sc_gss_mech_type->length,
1324 					    KM_SLEEP);
1325 					bcopy(si->sc_gss_mech_type->elements,
1326 					    info->oid.sec_oid4_val,
1327 					    info->oid.sec_oid4_len);
1328 				}
1329 				k++;
1330 			}
1331 			if (k >= ret_cnt)
1332 				break;
1333 		}
1334 		resp->SECINFO4resok_len = ret_cnt;
1335 		resp->SECINFO4resok_val = resok_val;
1336 		kmem_free(flavor_list, count * sizeof (int));
1337 	}
1338 
1339 	VN_RELE(vp);
1340 	return (NFS4_OK);
1341 }
1342 
1343 /*
1344  * SECINFO (Operation 33): Obtain required security information on
1345  * the component name in the format of (security-mechanism-oid, qop, service)
1346  * triplets.
1347  */
1348 /* ARGSUSED */
1349 static void
1350 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1351     struct compound_state *cs)
1352 {
1353 	SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1354 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1355 	utf8string *utfnm = &args->name;
1356 	uint_t len;
1357 	char *nm;
1358 	struct sockaddr *ca;
1359 	char *name = NULL;
1360 	nfsstat4 status = NFS4_OK;
1361 
1362 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1363 	    SECINFO4args *, args);
1364 
1365 	/*
1366 	 * Current file handle (cfh) should have been set before getting
1367 	 * into this function. If not, return error.
1368 	 */
1369 	if (cs->vp == NULL) {
1370 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1371 		goto out;
1372 	}
1373 
1374 	if (cs->vp->v_type != VDIR) {
1375 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1376 		goto out;
1377 	}
1378 
1379 	/*
1380 	 * Verify the component name. If failed, error out, but
1381 	 * do not error out if the component name is a "..".
1382 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1383 	 */
1384 	status = utf8_dir_verify(utfnm);
1385 	if (status != NFS4_OK) {
1386 		if (utfnm->utf8string_len != 2 ||
1387 		    utfnm->utf8string_val[0] != '.' ||
1388 		    utfnm->utf8string_val[1] != '.') {
1389 			*cs->statusp = resp->status = status;
1390 			goto out;
1391 		}
1392 	}
1393 
1394 	nm = utf8_to_str(utfnm, &len, NULL);
1395 	if (nm == NULL) {
1396 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1397 		goto out;
1398 	}
1399 
1400 	if (len > MAXNAMELEN) {
1401 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1402 		kmem_free(nm, len);
1403 		goto out;
1404 	}
1405 
1406 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1407 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1408 	    MAXPATHLEN  + 1);
1409 
1410 	if (name == NULL) {
1411 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1412 		kmem_free(nm, len);
1413 		goto out;
1414 	}
1415 
1416 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1417 
1418 	if (resp->status == NFS4_OK && rfs4_has_session(cs)) {
1419 		/*
1420 		 * See rfc 5661 section 2.6.3.1.1.8 and 18.29.3
1421 		 *
1422 		 * 2.6.3.1.1.8
1423 		 *	SECINFO and SECINFO_NO_NAME consume the current
1424 		 *	filehandle (note that this is a change from NFSv4.0).
1425 		 *
1426 		 * 18.29.3
1427 		 *	On success, the current filehandle is consumed (see
1428 		 *	Section 2.6.3.1.1.8), and if the next operation after
1429 		 *	SECINFO tries to use the current filehandle, that
1430 		 *	operation will fail with the status
1431 		 *	NFS4ERR_NOFILEHANDLE.
1432 		 */
1433 		VN_RELE(cs->vp);
1434 		cs->vp = NULL;
1435 	}
1436 
1437 	if (name != nm)
1438 		kmem_free(name, MAXPATHLEN + 1);
1439 	kmem_free(nm, len);
1440 
1441 out:
1442 	DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1443 	    SECINFO4res *, resp);
1444 }
1445 
1446 /*
1447  * Free SECINFO result.
1448  */
1449 /* ARGSUSED */
1450 static void
1451 rfs4_op_secinfo_free(nfs_resop4 *resop)
1452 {
1453 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1454 	int count, i;
1455 	secinfo4 *resok_val;
1456 
1457 	/* If this is not an Ok result, nothing to free. */
1458 	if (resp->status != NFS4_OK) {
1459 		return;
1460 	}
1461 
1462 	count = resp->SECINFO4resok_len;
1463 	resok_val = resp->SECINFO4resok_val;
1464 
1465 	for (i = 0; i < count; i++) {
1466 		if (resok_val[i].flavor == RPCSEC_GSS) {
1467 			rpcsec_gss_info *info;
1468 
1469 			info = &resok_val[i].flavor_info;
1470 			kmem_free(info->oid.sec_oid4_val,
1471 			    info->oid.sec_oid4_len);
1472 		}
1473 	}
1474 	kmem_free(resok_val, count * sizeof (secinfo4));
1475 	resp->SECINFO4resok_len = 0;
1476 	resp->SECINFO4resok_val = NULL;
1477 }
1478 
1479 /* ARGSUSED */
1480 static void
1481 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1482     struct compound_state *cs)
1483 {
1484 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1485 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1486 	int error;
1487 	vnode_t *vp;
1488 	struct vattr va;
1489 	int checkwriteperm;
1490 	cred_t *cr = cs->cr;
1491 	bslabel_t *clabel, *slabel;
1492 	ts_label_t *tslabel;
1493 	boolean_t admin_low_client;
1494 
1495 	DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1496 	    ACCESS4args *, args);
1497 
1498 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1499 	if (cs->access == CS_ACCESS_DENIED) {
1500 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1501 		goto out;
1502 	}
1503 #endif
1504 	if (cs->vp == NULL) {
1505 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1506 		goto out;
1507 	}
1508 
1509 	ASSERT(cr != NULL);
1510 
1511 	vp = cs->vp;
1512 
1513 	/*
1514 	 * If the file system is exported read only, it is not appropriate
1515 	 * to check write permissions for regular files and directories.
1516 	 * Special files are interpreted by the client, so the underlying
1517 	 * permissions are sent back to the client for interpretation.
1518 	 */
1519 	if (rdonly4(req, cs) &&
1520 	    (vp->v_type == VREG || vp->v_type == VDIR))
1521 		checkwriteperm = 0;
1522 	else
1523 		checkwriteperm = 1;
1524 
1525 	/*
1526 	 * XXX
1527 	 * We need the mode so that we can correctly determine access
1528 	 * permissions relative to a mandatory lock file.  Access to
1529 	 * mandatory lock files is denied on the server, so it might
1530 	 * as well be reflected to the server during the open.
1531 	 */
1532 	va.va_mask = AT_MODE;
1533 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1534 	if (error) {
1535 		*cs->statusp = resp->status = puterrno4(error);
1536 		goto out;
1537 	}
1538 	resp->access = 0;
1539 	resp->supported = 0;
1540 
1541 	if (is_system_labeled()) {
1542 		ASSERT(req->rq_label != NULL);
1543 		clabel = req->rq_label;
1544 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1545 		    "got client label from request(1)",
1546 		    struct svc_req *, req);
1547 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1548 			if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1549 				*cs->statusp = resp->status = puterrno4(EACCES);
1550 				goto out;
1551 			}
1552 			slabel = label2bslabel(tslabel);
1553 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1554 			    char *, "got server label(1) for vp(2)",
1555 			    bslabel_t *, slabel, vnode_t *, vp);
1556 
1557 			admin_low_client = B_FALSE;
1558 		} else
1559 			admin_low_client = B_TRUE;
1560 	}
1561 
1562 	if (args->access & ACCESS4_READ) {
1563 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1564 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1565 		    (!is_system_labeled() || admin_low_client ||
1566 		    bldominates(clabel, slabel)))
1567 			resp->access |= ACCESS4_READ;
1568 		resp->supported |= ACCESS4_READ;
1569 	}
1570 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1571 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1572 		if (!error && (!is_system_labeled() || admin_low_client ||
1573 		    bldominates(clabel, slabel)))
1574 			resp->access |= ACCESS4_LOOKUP;
1575 		resp->supported |= ACCESS4_LOOKUP;
1576 	}
1577 	if (checkwriteperm &&
1578 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1579 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1580 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1581 		    (!is_system_labeled() || admin_low_client ||
1582 		    blequal(clabel, slabel)))
1583 			resp->access |=
1584 			    (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1585 		resp->supported |=
1586 		    resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1587 	}
1588 
1589 	if (checkwriteperm &&
1590 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1591 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1592 		if (!error && (!is_system_labeled() || admin_low_client ||
1593 		    blequal(clabel, slabel)))
1594 			resp->access |= ACCESS4_DELETE;
1595 		resp->supported |= ACCESS4_DELETE;
1596 	}
1597 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1598 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1599 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1600 		    (!is_system_labeled() || admin_low_client ||
1601 		    bldominates(clabel, slabel)))
1602 			resp->access |= ACCESS4_EXECUTE;
1603 		resp->supported |= ACCESS4_EXECUTE;
1604 	}
1605 
1606 	if (is_system_labeled() && !admin_low_client)
1607 		label_rele(tslabel);
1608 
1609 	*cs->statusp = resp->status = NFS4_OK;
1610 out:
1611 	DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1612 	    ACCESS4res *, resp);
1613 }
1614 
1615 /* ARGSUSED */
1616 static void
1617 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1618     struct compound_state *cs)
1619 {
1620 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1621 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1622 	int error;
1623 	vnode_t *vp = cs->vp;
1624 	cred_t *cr = cs->cr;
1625 	vattr_t va;
1626 	nfs4_srv_t *nsrv4;
1627 
1628 	DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1629 	    COMMIT4args *, args);
1630 
1631 	if (vp == NULL) {
1632 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1633 		goto out;
1634 	}
1635 	if (cs->access == CS_ACCESS_DENIED) {
1636 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1637 		goto out;
1638 	}
1639 
1640 	if (args->offset + args->count < args->offset) {
1641 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1642 		goto out;
1643 	}
1644 
1645 	va.va_mask = AT_UID;
1646 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1647 
1648 	/*
1649 	 * If we can't get the attributes, then we can't do the
1650 	 * right access checking.  So, we'll fail the request.
1651 	 */
1652 	if (error) {
1653 		*cs->statusp = resp->status = puterrno4(error);
1654 		goto out;
1655 	}
1656 	if (rdonly4(req, cs)) {
1657 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1658 		goto out;
1659 	}
1660 
1661 	if (vp->v_type != VREG) {
1662 		if (vp->v_type == VDIR)
1663 			resp->status = NFS4ERR_ISDIR;
1664 		else
1665 			resp->status = NFS4ERR_INVAL;
1666 		*cs->statusp = resp->status;
1667 		goto out;
1668 	}
1669 
1670 	if (crgetuid(cr) != va.va_uid &&
1671 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1672 		*cs->statusp = resp->status = puterrno4(error);
1673 		goto out;
1674 	}
1675 
1676 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1677 
1678 	if (error) {
1679 		*cs->statusp = resp->status = puterrno4(error);
1680 		goto out;
1681 	}
1682 
1683 	nsrv4 = nfs4_get_srv();
1684 	*cs->statusp = resp->status = NFS4_OK;
1685 	resp->writeverf = nsrv4->write4verf;
1686 out:
1687 	DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1688 	    COMMIT4res *, resp);
1689 }
1690 
1691 /*
1692  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1693  * was completed. It does the nfsv4 create for special files.
1694  */
1695 /* ARGSUSED */
1696 static vnode_t *
1697 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1698     struct compound_state *cs, vattr_t *vap, char *nm)
1699 {
1700 	int error;
1701 	cred_t *cr = cs->cr;
1702 	vnode_t *dvp = cs->vp;
1703 	vnode_t *vp = NULL;
1704 	int mode;
1705 	enum vcexcl excl;
1706 
1707 	switch (args->type) {
1708 	case NF4CHR:
1709 	case NF4BLK:
1710 		if (secpolicy_sys_devices(cr) != 0) {
1711 			*cs->statusp = resp->status = NFS4ERR_PERM;
1712 			return (NULL);
1713 		}
1714 		if (args->type == NF4CHR)
1715 			vap->va_type = VCHR;
1716 		else
1717 			vap->va_type = VBLK;
1718 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1719 		    args->ftype4_u.devdata.specdata2);
1720 		vap->va_mask |= AT_RDEV;
1721 		break;
1722 	case NF4SOCK:
1723 		vap->va_type = VSOCK;
1724 		break;
1725 	case NF4FIFO:
1726 		vap->va_type = VFIFO;
1727 		break;
1728 	default:
1729 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1730 		return (NULL);
1731 	}
1732 
1733 	/*
1734 	 * Must specify the mode.
1735 	 */
1736 	if (!(vap->va_mask & AT_MODE)) {
1737 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1738 		return (NULL);
1739 	}
1740 
1741 	excl = EXCL;
1742 
1743 	mode = 0;
1744 
1745 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1746 	if (error) {
1747 		*cs->statusp = resp->status = puterrno4(error);
1748 		return (NULL);
1749 	}
1750 	return (vp);
1751 }
1752 
1753 /*
1754  * nfsv4 create is used to create non-regular files. For regular files,
1755  * use nfsv4 open.
1756  */
1757 /* ARGSUSED */
1758 static void
1759 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1760     struct compound_state *cs)
1761 {
1762 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1763 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1764 	int error;
1765 	struct vattr bva, iva, iva2, ava, *vap;
1766 	cred_t *cr = cs->cr;
1767 	vnode_t *dvp = cs->vp;
1768 	vnode_t *vp = NULL;
1769 	vnode_t *realvp;
1770 	char *nm, *lnm;
1771 	uint_t len, llen;
1772 	int syncval = 0;
1773 	struct nfs4_svgetit_arg sarg;
1774 	struct nfs4_ntov_table ntov;
1775 	struct statvfs64 sb;
1776 	nfsstat4 status;
1777 	struct sockaddr *ca;
1778 	char *name = NULL;
1779 	char *lname = NULL;
1780 
1781 	DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1782 	    CREATE4args *, args);
1783 
1784 	resp->attrset = 0;
1785 
1786 	if (dvp == NULL) {
1787 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1788 		goto out;
1789 	}
1790 
1791 	/*
1792 	 * If there is an unshared filesystem mounted on this vnode,
1793 	 * do not allow to create an object in this directory.
1794 	 */
1795 	if (vn_ismntpt(dvp)) {
1796 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1797 		goto out;
1798 	}
1799 
1800 	/* Verify that type is correct */
1801 	switch (args->type) {
1802 	case NF4LNK:
1803 	case NF4BLK:
1804 	case NF4CHR:
1805 	case NF4SOCK:
1806 	case NF4FIFO:
1807 	case NF4DIR:
1808 		break;
1809 	default:
1810 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1811 		goto out;
1812 	};
1813 
1814 	if (cs->access == CS_ACCESS_DENIED) {
1815 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1816 		goto out;
1817 	}
1818 	if (dvp->v_type != VDIR) {
1819 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1820 		goto out;
1821 	}
1822 	status = utf8_dir_verify(&args->objname);
1823 	if (status != NFS4_OK) {
1824 		*cs->statusp = resp->status = status;
1825 		goto out;
1826 	}
1827 
1828 	if (rdonly4(req, cs)) {
1829 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1830 		goto out;
1831 	}
1832 
1833 	/*
1834 	 * Name of newly created object
1835 	 */
1836 	nm = utf8_to_fn(&args->objname, &len, NULL);
1837 	if (nm == NULL) {
1838 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1839 		goto out;
1840 	}
1841 
1842 	if (len > MAXNAMELEN) {
1843 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1844 		kmem_free(nm, len);
1845 		goto out;
1846 	}
1847 
1848 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1849 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1850 	    MAXPATHLEN  + 1);
1851 
1852 	if (name == NULL) {
1853 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1854 		kmem_free(nm, len);
1855 		goto out;
1856 	}
1857 
1858 	resp->attrset = 0;
1859 
1860 	sarg.sbp = &sb;
1861 	sarg.is_referral = B_FALSE;
1862 	nfs4_ntov_table_init(&ntov);
1863 
1864 	status = do_rfs4_set_attrs(&resp->attrset,
1865 	    &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1866 
1867 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1868 		status = NFS4ERR_INVAL;
1869 
1870 	if (status != NFS4_OK) {
1871 		*cs->statusp = resp->status = status;
1872 		if (name != nm)
1873 			kmem_free(name, MAXPATHLEN + 1);
1874 		kmem_free(nm, len);
1875 		nfs4_ntov_table_free(&ntov, &sarg);
1876 		resp->attrset = 0;
1877 		goto out;
1878 	}
1879 
1880 	/* Get "before" change value */
1881 	bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1882 	error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1883 	if (error) {
1884 		*cs->statusp = resp->status = puterrno4(error);
1885 		if (name != nm)
1886 			kmem_free(name, MAXPATHLEN + 1);
1887 		kmem_free(nm, len);
1888 		nfs4_ntov_table_free(&ntov, &sarg);
1889 		resp->attrset = 0;
1890 		goto out;
1891 	}
1892 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1893 
1894 	vap = sarg.vap;
1895 
1896 	/*
1897 	 * Set the default initial values for attributes when the parent
1898 	 * directory does not have the VSUID/VSGID bit set and they have
1899 	 * not been specified in createattrs.
1900 	 */
1901 	if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1902 		vap->va_uid = crgetuid(cr);
1903 		vap->va_mask |= AT_UID;
1904 	}
1905 	if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1906 		vap->va_gid = crgetgid(cr);
1907 		vap->va_mask |= AT_GID;
1908 	}
1909 
1910 	vap->va_mask |= AT_TYPE;
1911 	switch (args->type) {
1912 	case NF4DIR:
1913 		vap->va_type = VDIR;
1914 		if ((vap->va_mask & AT_MODE) == 0) {
1915 			vap->va_mode = 0700;	/* default: owner rwx only */
1916 			vap->va_mask |= AT_MODE;
1917 		}
1918 		error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1919 		if (error)
1920 			break;
1921 
1922 		/*
1923 		 * Get the initial "after" sequence number, if it fails,
1924 		 * set to zero
1925 		 */
1926 		iva.va_mask = AT_SEQ;
1927 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1928 			iva.va_seq = 0;
1929 		break;
1930 	case NF4LNK:
1931 		vap->va_type = VLNK;
1932 		if ((vap->va_mask & AT_MODE) == 0) {
1933 			vap->va_mode = 0700;	/* default: owner rwx only */
1934 			vap->va_mask |= AT_MODE;
1935 		}
1936 
1937 		/*
1938 		 * symlink names must be treated as data
1939 		 */
1940 		lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1941 		    &llen, NULL);
1942 
1943 		if (lnm == NULL) {
1944 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1945 			if (name != nm)
1946 				kmem_free(name, MAXPATHLEN + 1);
1947 			kmem_free(nm, len);
1948 			nfs4_ntov_table_free(&ntov, &sarg);
1949 			resp->attrset = 0;
1950 			goto out;
1951 		}
1952 
1953 		if (llen > MAXPATHLEN) {
1954 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1955 			if (name != nm)
1956 				kmem_free(name, MAXPATHLEN + 1);
1957 			kmem_free(nm, len);
1958 			kmem_free(lnm, llen);
1959 			nfs4_ntov_table_free(&ntov, &sarg);
1960 			resp->attrset = 0;
1961 			goto out;
1962 		}
1963 
1964 		lname = nfscmd_convname(ca, cs->exi, lnm,
1965 		    NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1966 
1967 		if (lname == NULL) {
1968 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1969 			if (name != nm)
1970 				kmem_free(name, MAXPATHLEN + 1);
1971 			kmem_free(nm, len);
1972 			kmem_free(lnm, llen);
1973 			nfs4_ntov_table_free(&ntov, &sarg);
1974 			resp->attrset = 0;
1975 			goto out;
1976 		}
1977 
1978 		error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1979 		if (lname != lnm)
1980 			kmem_free(lname, MAXPATHLEN + 1);
1981 		kmem_free(lnm, llen);
1982 		if (error)
1983 			break;
1984 
1985 		/*
1986 		 * Get the initial "after" sequence number, if it fails,
1987 		 * set to zero
1988 		 */
1989 		iva.va_mask = AT_SEQ;
1990 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1991 			iva.va_seq = 0;
1992 
1993 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1994 		    NULL, NULL, NULL);
1995 		if (error)
1996 			break;
1997 
1998 		/*
1999 		 * va_seq is not safe over VOP calls, check it again
2000 		 * if it has changed zero out iva to force atomic = FALSE.
2001 		 */
2002 		iva2.va_mask = AT_SEQ;
2003 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
2004 		    iva2.va_seq != iva.va_seq)
2005 			iva.va_seq = 0;
2006 		break;
2007 	default:
2008 		/*
2009 		 * probably a special file.
2010 		 */
2011 		if ((vap->va_mask & AT_MODE) == 0) {
2012 			vap->va_mode = 0600;	/* default: owner rw only */
2013 			vap->va_mask |= AT_MODE;
2014 		}
2015 		syncval = FNODSYNC;
2016 		/*
2017 		 * We know this will only generate one VOP call
2018 		 */
2019 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
2020 
2021 		if (vp == NULL) {
2022 			if (name != nm)
2023 				kmem_free(name, MAXPATHLEN + 1);
2024 			kmem_free(nm, len);
2025 			nfs4_ntov_table_free(&ntov, &sarg);
2026 			resp->attrset = 0;
2027 			goto out;
2028 		}
2029 
2030 		/*
2031 		 * Get the initial "after" sequence number, if it fails,
2032 		 * set to zero
2033 		 */
2034 		iva.va_mask = AT_SEQ;
2035 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
2036 			iva.va_seq = 0;
2037 
2038 		break;
2039 	}
2040 	if (name != nm)
2041 		kmem_free(name, MAXPATHLEN + 1);
2042 	kmem_free(nm, len);
2043 
2044 	if (error) {
2045 		*cs->statusp = resp->status = puterrno4(error);
2046 	}
2047 
2048 	/*
2049 	 * Force modified data and metadata out to stable storage.
2050 	 */
2051 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2052 
2053 	if (resp->status != NFS4_OK) {
2054 		if (vp != NULL)
2055 			VN_RELE(vp);
2056 		nfs4_ntov_table_free(&ntov, &sarg);
2057 		resp->attrset = 0;
2058 		goto out;
2059 	}
2060 
2061 	/*
2062 	 * Finish setup of cinfo response, "before" value already set.
2063 	 * Get "after" change value, if it fails, simply return the
2064 	 * before value.
2065 	 */
2066 	ava.va_mask = AT_CTIME|AT_SEQ;
2067 	if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
2068 		ava.va_ctime = bva.va_ctime;
2069 		ava.va_seq = 0;
2070 	}
2071 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
2072 
2073 	/*
2074 	 * True verification that object was created with correct
2075 	 * attrs is impossible.  The attrs could have been changed
2076 	 * immediately after object creation.  If attributes did
2077 	 * not verify, the only recourse for the server is to
2078 	 * destroy the object.  Maybe if some attrs (like gid)
2079 	 * are set incorrectly, the object should be destroyed;
2080 	 * however, seems bad as a default policy.  Do we really
2081 	 * want to destroy an object over one of the times not
2082 	 * verifying correctly?  For these reasons, the server
2083 	 * currently sets bits in attrset for createattrs
2084 	 * that were set; however, no verification is done.
2085 	 *
2086 	 * vmask_to_nmask accounts for vattr bits set on create
2087 	 *	[do_rfs4_set_attrs() only sets resp bits for
2088 	 *	 non-vattr/vfs bits.]
2089 	 * Mask off any bits set by default so as not to return
2090 	 * more attrset bits than were requested in createattrs
2091 	 */
2092 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
2093 	resp->attrset &= args->createattrs.attrmask;
2094 	nfs4_ntov_table_free(&ntov, &sarg);
2095 
2096 	error = makefh4(&cs->fh, vp, cs->exi);
2097 	if (error) {
2098 		*cs->statusp = resp->status = puterrno4(error);
2099 	}
2100 
2101 	/*
2102 	 * The cinfo.atomic = TRUE only if we got no errors, we have
2103 	 * non-zero va_seq's, and it has incremented by exactly one
2104 	 * during the creation and it didn't change during the VOP_LOOKUP
2105 	 * or VOP_FSYNC.
2106 	 */
2107 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
2108 	    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
2109 		resp->cinfo.atomic = TRUE;
2110 	else
2111 		resp->cinfo.atomic = FALSE;
2112 
2113 	/*
2114 	 * Force modified metadata out to stable storage.
2115 	 *
2116 	 * if a underlying vp exists, pass it to VOP_FSYNC
2117 	 */
2118 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2119 		(void) VOP_FSYNC(realvp, syncval, cr, NULL);
2120 	else
2121 		(void) VOP_FSYNC(vp, syncval, cr, NULL);
2122 
2123 	if (resp->status != NFS4_OK) {
2124 		VN_RELE(vp);
2125 		goto out;
2126 	}
2127 	if (cs->vp)
2128 		VN_RELE(cs->vp);
2129 
2130 	cs->vp = vp;
2131 	*cs->statusp = resp->status = NFS4_OK;
2132 out:
2133 	DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
2134 	    CREATE4res *, resp);
2135 }
2136 
2137 /*ARGSUSED*/
2138 static void
2139 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2140     struct compound_state *cs)
2141 {
2142 	DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
2143 	    DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
2144 
2145 	rfs4_op_inval(argop, resop, req, cs);
2146 
2147 	DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
2148 	    DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
2149 }
2150 
2151 /*ARGSUSED*/
2152 static void
2153 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2154     struct compound_state *cs)
2155 {
2156 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2157 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2158 	rfs4_deleg_state_t *dsp;
2159 	nfsstat4 status;
2160 
2161 	DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2162 	    DELEGRETURN4args *, args);
2163 
2164 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2165 	resp->status = *cs->statusp = status;
2166 	if (status != NFS4_OK)
2167 		goto out;
2168 
2169 	/* Ensure specified filehandle matches */
2170 	if (cs->vp != dsp->rds_finfo->rf_vp) {
2171 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2172 	} else
2173 		rfs4_return_deleg(dsp, FALSE);
2174 
2175 	rfs4_update_lease(dsp->rds_client);
2176 
2177 	rfs4_deleg_state_rele(dsp);
2178 out:
2179 	DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2180 	    DELEGRETURN4res *, resp);
2181 }
2182 
2183 /*
2184  * Check to see if a given "flavor" is an explicitly shared flavor.
2185  * The assumption of this routine is the "flavor" is already a valid
2186  * flavor in the secinfo list of "exi".
2187  *
2188  *	e.g.
2189  *		# share -o sec=flavor1 /export
2190  *		# share -o sec=flavor2 /export/home
2191  *
2192  *		flavor2 is not an explicitly shared flavor for /export,
2193  *		however it is in the secinfo list for /export thru the
2194  *		server namespace setup.
2195  */
2196 int
2197 is_exported_sec(int flavor, struct exportinfo *exi)
2198 {
2199 	int	i;
2200 	struct secinfo *sp;
2201 
2202 	sp = exi->exi_export.ex_secinfo;
2203 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2204 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2205 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2206 			return (SEC_REF_EXPORTED(&sp[i]));
2207 		}
2208 	}
2209 
2210 	/* Should not reach this point based on the assumption */
2211 	return (0);
2212 }
2213 
2214 /*
2215  * Check if the security flavor used in the request matches what is
2216  * required at the export point or at the root pseudo node (exi_root).
2217  *
2218  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2219  *
2220  */
2221 static int
2222 secinfo_match_or_authnone(struct compound_state *cs)
2223 {
2224 	int	i;
2225 	struct secinfo *sp;
2226 
2227 	/*
2228 	 * Check cs->nfsflavor (from the request) against
2229 	 * the current export data in cs->exi.
2230 	 */
2231 	sp = cs->exi->exi_export.ex_secinfo;
2232 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2233 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2234 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2235 			return (1);
2236 	}
2237 
2238 	return (0);
2239 }
2240 
2241 /*
2242  * Check the access authority for the client and return the correct error.
2243  */
2244 nfsstat4
2245 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2246 {
2247 	int	authres;
2248 
2249 	/*
2250 	 * First, check if the security flavor used in the request
2251 	 * are among the flavors set in the server namespace.
2252 	 */
2253 	if (!secinfo_match_or_authnone(cs)) {
2254 		*cs->statusp = NFS4ERR_WRONGSEC;
2255 		return (*cs->statusp);
2256 	}
2257 
2258 	authres = checkauth4(cs, req);
2259 
2260 	if (authres > 0) {
2261 		*cs->statusp = NFS4_OK;
2262 		if (! (cs->access & CS_ACCESS_LIMITED))
2263 			cs->access = CS_ACCESS_OK;
2264 	} else if (authres == 0) {
2265 		*cs->statusp = NFS4ERR_ACCESS;
2266 	} else if (authres == -2) {
2267 		*cs->statusp = NFS4ERR_WRONGSEC;
2268 	} else {
2269 		*cs->statusp = NFS4ERR_DELAY;
2270 	}
2271 	return (*cs->statusp);
2272 }
2273 
2274 /*
2275  * bitmap4_to_attrmask is called by getattr and readdir.
2276  * It sets up the vattr mask and determines whether vfsstat call is needed
2277  * based on the input bitmap.
2278  * Returns nfsv4 status.
2279  */
2280 static nfsstat4
2281 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2282 {
2283 	int i;
2284 	uint_t	va_mask;
2285 	struct statvfs64 *sbp = sargp->sbp;
2286 
2287 	sargp->sbp = NULL;
2288 	sargp->flag = 0;
2289 	sargp->rdattr_error = NFS4_OK;
2290 	sargp->mntdfid_set = FALSE;
2291 	if (sargp->cs->vp)
2292 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2293 		    FH4_ATTRDIR | FH4_NAMEDATTR);
2294 	else
2295 		sargp->xattr = 0;
2296 
2297 	/*
2298 	 * Set rdattr_error_req to true if return error per
2299 	 * failed entry rather than fail the readdir.
2300 	 */
2301 	if (breq & FATTR4_RDATTR_ERROR_MASK)
2302 		sargp->rdattr_error_req = 1;
2303 	else
2304 		sargp->rdattr_error_req = 0;
2305 
2306 	/*
2307 	 * generate the va_mask
2308 	 * Handle the easy cases first
2309 	 */
2310 	switch (breq) {
2311 	case NFS4_NTOV_ATTR_MASK:
2312 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2313 		return (NFS4_OK);
2314 
2315 	case NFS4_FS_ATTR_MASK:
2316 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2317 		sargp->sbp = sbp;
2318 		return (NFS4_OK);
2319 
2320 	case NFS4_NTOV_ATTR_CACHE_MASK:
2321 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2322 		return (NFS4_OK);
2323 
2324 	case FATTR4_LEASE_TIME_MASK:
2325 		sargp->vap->va_mask = 0;
2326 		return (NFS4_OK);
2327 
2328 	default:
2329 		va_mask = 0;
2330 		for (i = 0; i < nfs4_ntov_map_size; i++) {
2331 			if ((breq & nfs4_ntov_map[i].fbit) &&
2332 			    nfs4_ntov_map[i].vbit)
2333 				va_mask |= nfs4_ntov_map[i].vbit;
2334 		}
2335 
2336 		/*
2337 		 * Check is vfsstat is needed
2338 		 */
2339 		if (breq & NFS4_FS_ATTR_MASK)
2340 			sargp->sbp = sbp;
2341 
2342 		sargp->vap->va_mask = va_mask;
2343 		return (NFS4_OK);
2344 	}
2345 	/* NOTREACHED */
2346 }
2347 
2348 /*
2349  * bitmap4_get_sysattrs is called by getattr and readdir.
2350  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2351  * Returns nfsv4 status.
2352  */
2353 static nfsstat4
2354 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2355 {
2356 	int error;
2357 	struct compound_state *cs = sargp->cs;
2358 	vnode_t *vp = cs->vp;
2359 
2360 	if (sargp->sbp != NULL) {
2361 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2362 			sargp->sbp = NULL;	/* to identify error */
2363 			return (puterrno4(error));
2364 		}
2365 	}
2366 
2367 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2368 }
2369 
2370 static void
2371 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2372 {
2373 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2374 	    KM_SLEEP);
2375 	ntovp->attrcnt = 0;
2376 	ntovp->vfsstat = FALSE;
2377 }
2378 
2379 static void
2380 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2381     struct nfs4_svgetit_arg *sargp)
2382 {
2383 	int i;
2384 	union nfs4_attr_u *na;
2385 	uint8_t *amap;
2386 
2387 	/*
2388 	 * XXX Should do the same checks for whether the bit is set
2389 	 */
2390 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
2391 	    i < ntovp->attrcnt; i++, na++, amap++) {
2392 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
2393 		    NFS4ATTR_FREEIT, sargp, na);
2394 	}
2395 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2396 		/*
2397 		 * xdr_free for getattr will be done later
2398 		 */
2399 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
2400 		    i < ntovp->attrcnt; i++, na++, amap++) {
2401 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2402 		}
2403 	}
2404 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2405 }
2406 
2407 /*
2408  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2409  */
2410 static nfsstat4
2411 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2412     struct nfs4_svgetit_arg *sargp)
2413 {
2414 	int error = 0;
2415 	int i, k;
2416 	struct nfs4_ntov_table ntov;
2417 	XDR xdr;
2418 	ulong_t xdr_size;
2419 	char *xdr_attrs;
2420 	nfsstat4 status = NFS4_OK;
2421 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2422 	union nfs4_attr_u *na;
2423 	uint8_t *amap;
2424 
2425 	sargp->op = NFS4ATTR_GETIT;
2426 	sargp->flag = 0;
2427 
2428 	fattrp->attrmask = 0;
2429 	/* if no bits requested, then return empty fattr4 */
2430 	if (breq == 0) {
2431 		fattrp->attrlist4_len = 0;
2432 		fattrp->attrlist4 = NULL;
2433 		return (NFS4_OK);
2434 	}
2435 
2436 	/*
2437 	 * return NFS4ERR_INVAL when client requests write-only attrs
2438 	 */
2439 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2440 		return (NFS4ERR_INVAL);
2441 
2442 	nfs4_ntov_table_init(&ntov);
2443 	na = ntov.na;
2444 	amap = ntov.amap;
2445 
2446 	/*
2447 	 * Now loop to get or verify the attrs
2448 	 */
2449 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2450 		if (breq & nfs4_ntov_map[i].fbit) {
2451 			if ((*nfs4_ntov_map[i].sv_getit)(
2452 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2453 
2454 				error = (*nfs4_ntov_map[i].sv_getit)(
2455 				    NFS4ATTR_GETIT, sargp, na);
2456 
2457 				/*
2458 				 * Possible error values:
2459 				 * >0 if sv_getit failed to
2460 				 * get the attr; 0 if succeeded;
2461 				 * <0 if rdattr_error and the
2462 				 * attribute cannot be returned.
2463 				 */
2464 				if (error && !(sargp->rdattr_error_req))
2465 					goto done;
2466 				/*
2467 				 * If error then just for entry
2468 				 */
2469 				if (error == 0) {
2470 					fattrp->attrmask |=
2471 					    nfs4_ntov_map[i].fbit;
2472 					*amap++ =
2473 					    (uint8_t)nfs4_ntov_map[i].nval;
2474 					na++;
2475 					(ntov.attrcnt)++;
2476 				} else if ((error > 0) &&
2477 				    (sargp->rdattr_error == NFS4_OK)) {
2478 					sargp->rdattr_error = puterrno4(error);
2479 				}
2480 				error = 0;
2481 			}
2482 		}
2483 	}
2484 
2485 	/*
2486 	 * If rdattr_error was set after the return value for it was assigned,
2487 	 * update it.
2488 	 */
2489 	if (prev_rdattr_error != sargp->rdattr_error) {
2490 		na = ntov.na;
2491 		amap = ntov.amap;
2492 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2493 			k = *amap;
2494 			if (k < FATTR4_RDATTR_ERROR) {
2495 				continue;
2496 			}
2497 			if ((k == FATTR4_RDATTR_ERROR) &&
2498 			    ((*nfs4_ntov_map[k].sv_getit)(
2499 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2500 
2501 				(void) (*nfs4_ntov_map[k].sv_getit)(
2502 				    NFS4ATTR_GETIT, sargp, na);
2503 			}
2504 			break;
2505 		}
2506 	}
2507 
2508 	xdr_size = 0;
2509 	na = ntov.na;
2510 	amap = ntov.amap;
2511 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2512 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2513 	}
2514 
2515 	fattrp->attrlist4_len = xdr_size;
2516 	if (xdr_size) {
2517 		/* freed by rfs4_op_getattr_free() */
2518 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2519 
2520 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2521 
2522 		na = ntov.na;
2523 		amap = ntov.amap;
2524 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2525 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2526 				DTRACE_PROBE1(nfss__e__getattr4_encfail,
2527 				    int, *amap);
2528 				status = NFS4ERR_SERVERFAULT;
2529 				break;
2530 			}
2531 		}
2532 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2533 	} else {
2534 		fattrp->attrlist4 = NULL;
2535 	}
2536 done:
2537 
2538 	nfs4_ntov_table_free(&ntov, sargp);
2539 
2540 	if (error != 0)
2541 		status = puterrno4(error);
2542 
2543 	return (status);
2544 }
2545 
2546 /* ARGSUSED */
2547 static void
2548 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2549     struct compound_state *cs)
2550 {
2551 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2552 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2553 	struct nfs4_svgetit_arg sarg;
2554 	struct statvfs64 sb;
2555 	nfsstat4 status;
2556 
2557 	DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2558 	    GETATTR4args *, args);
2559 
2560 	if (cs->vp == NULL) {
2561 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2562 		goto out;
2563 	}
2564 
2565 	if (cs->access == CS_ACCESS_DENIED) {
2566 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2567 		goto out;
2568 	}
2569 
2570 	sarg.sbp = &sb;
2571 	sarg.cs = cs;
2572 	sarg.is_referral = B_FALSE;
2573 
2574 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2575 	if (status == NFS4_OK) {
2576 
2577 		status = bitmap4_get_sysattrs(&sarg);
2578 		if (status == NFS4_OK) {
2579 
2580 			/* Is this a referral? */
2581 			if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2582 				/* Older V4 Solaris client sees a link */
2583 				if (client_is_downrev(req))
2584 					sarg.vap->va_type = VLNK;
2585 				else
2586 					sarg.is_referral = B_TRUE;
2587 			}
2588 
2589 			status = do_rfs4_op_getattr(args->attr_request,
2590 			    &resp->obj_attributes, &sarg);
2591 		}
2592 	}
2593 	*cs->statusp = resp->status = status;
2594 out:
2595 	DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2596 	    GETATTR4res *, resp);
2597 }
2598 
2599 static void
2600 rfs4_op_getattr_free(nfs_resop4 *resop)
2601 {
2602 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2603 
2604 	nfs4_fattr4_free(&resp->obj_attributes);
2605 }
2606 
2607 /* ARGSUSED */
2608 static void
2609 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2610     struct compound_state *cs)
2611 {
2612 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2613 
2614 	DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2615 
2616 	if (cs->vp == NULL) {
2617 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2618 		goto out;
2619 	}
2620 	if (cs->access == CS_ACCESS_DENIED) {
2621 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2622 		goto out;
2623 	}
2624 
2625 	/* check for reparse point at the share point */
2626 	if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2627 		/* it's all bad */
2628 		cs->exi->exi_moved = 1;
2629 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2630 		DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2631 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2632 		return;
2633 	}
2634 
2635 	/* check for reparse point at vp */
2636 	if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2637 		/* it's not all bad */
2638 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2639 		DTRACE_PROBE2(nfs4serv__func__referral__moved,
2640 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2641 		return;
2642 	}
2643 
2644 	resp->object.nfs_fh4_val =
2645 	    kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2646 	nfs_fh4_copy(&cs->fh, &resp->object);
2647 	*cs->statusp = resp->status = NFS4_OK;
2648 out:
2649 	DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2650 	    GETFH4res *, resp);
2651 }
2652 
2653 static void
2654 rfs4_op_getfh_free(nfs_resop4 *resop)
2655 {
2656 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2657 
2658 	if (resp->status == NFS4_OK &&
2659 	    resp->object.nfs_fh4_val != NULL) {
2660 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2661 		resp->object.nfs_fh4_val = NULL;
2662 		resp->object.nfs_fh4_len = 0;
2663 	}
2664 }
2665 
2666 /*
2667  * illegal: args: void
2668  *	    res : status (NFS4ERR_OP_ILLEGAL)
2669  */
2670 /* ARGSUSED */
2671 static void
2672 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2673     struct svc_req *req, struct compound_state *cs)
2674 {
2675 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2676 
2677 	resop->resop = OP_ILLEGAL;
2678 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2679 }
2680 
2681 /* ARGSUSED */
2682 static void
2683 rfs4_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2684     struct compound_state *cs)
2685 {
2686 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP;
2687 }
2688 
2689 /*
2690  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2691  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2692  */
2693 /* ARGSUSED */
2694 static void
2695 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2696     struct compound_state *cs)
2697 {
2698 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2699 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2700 	int error;
2701 	vnode_t *vp;
2702 	vnode_t *dvp;
2703 	struct vattr bdva, idva, adva;
2704 	char *nm;
2705 	uint_t  len;
2706 	struct sockaddr *ca;
2707 	char *name = NULL;
2708 	nfsstat4 status;
2709 
2710 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2711 	    LINK4args *, args);
2712 
2713 	/* SAVED_FH: source object */
2714 	vp = cs->saved_vp;
2715 	if (vp == NULL) {
2716 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2717 		goto out;
2718 	}
2719 
2720 	/* CURRENT_FH: target directory */
2721 	dvp = cs->vp;
2722 	if (dvp == NULL) {
2723 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2724 		goto out;
2725 	}
2726 
2727 	/*
2728 	 * If there is a non-shared filesystem mounted on this vnode,
2729 	 * do not allow to link any file in this directory.
2730 	 */
2731 	if (vn_ismntpt(dvp)) {
2732 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2733 		goto out;
2734 	}
2735 
2736 	if (cs->access == CS_ACCESS_DENIED) {
2737 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2738 		goto out;
2739 	}
2740 
2741 	/* Check source object's type validity */
2742 	if (vp->v_type == VDIR) {
2743 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2744 		goto out;
2745 	}
2746 
2747 	/* Check target directory's type */
2748 	if (dvp->v_type != VDIR) {
2749 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2750 		goto out;
2751 	}
2752 
2753 	if (cs->saved_exi != cs->exi) {
2754 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2755 		goto out;
2756 	}
2757 
2758 	status = utf8_dir_verify(&args->newname);
2759 	if (status != NFS4_OK) {
2760 		*cs->statusp = resp->status = status;
2761 		goto out;
2762 	}
2763 
2764 	nm = utf8_to_fn(&args->newname, &len, NULL);
2765 	if (nm == NULL) {
2766 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2767 		goto out;
2768 	}
2769 
2770 	if (len > MAXNAMELEN) {
2771 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2772 		kmem_free(nm, len);
2773 		goto out;
2774 	}
2775 
2776 	if (rdonly4(req, cs)) {
2777 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2778 		kmem_free(nm, len);
2779 		goto out;
2780 	}
2781 
2782 	/* Get "before" change value */
2783 	bdva.va_mask = AT_CTIME|AT_SEQ;
2784 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2785 	if (error) {
2786 		*cs->statusp = resp->status = puterrno4(error);
2787 		kmem_free(nm, len);
2788 		goto out;
2789 	}
2790 
2791 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2792 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2793 	    MAXPATHLEN  + 1);
2794 
2795 	if (name == NULL) {
2796 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2797 		kmem_free(nm, len);
2798 		goto out;
2799 	}
2800 
2801 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2802 
2803 	error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2804 
2805 	if (nm != name)
2806 		kmem_free(name, MAXPATHLEN + 1);
2807 	kmem_free(nm, len);
2808 
2809 	/*
2810 	 * Get the initial "after" sequence number, if it fails, set to zero
2811 	 */
2812 	idva.va_mask = AT_SEQ;
2813 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2814 		idva.va_seq = 0;
2815 
2816 	/*
2817 	 * Force modified data and metadata out to stable storage.
2818 	 */
2819 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2820 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2821 
2822 	if (error) {
2823 		*cs->statusp = resp->status = puterrno4(error);
2824 		goto out;
2825 	}
2826 
2827 	/*
2828 	 * Get "after" change value, if it fails, simply return the
2829 	 * before value.
2830 	 */
2831 	adva.va_mask = AT_CTIME|AT_SEQ;
2832 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2833 		adva.va_ctime = bdva.va_ctime;
2834 		adva.va_seq = 0;
2835 	}
2836 
2837 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2838 
2839 	/*
2840 	 * The cinfo.atomic = TRUE only if we have
2841 	 * non-zero va_seq's, and it has incremented by exactly one
2842 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2843 	 */
2844 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2845 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2846 		resp->cinfo.atomic = TRUE;
2847 	else
2848 		resp->cinfo.atomic = FALSE;
2849 
2850 	*cs->statusp = resp->status = NFS4_OK;
2851 out:
2852 	DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2853 	    LINK4res *, resp);
2854 }
2855 
2856 /*
2857  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2858  */
2859 
2860 /* ARGSUSED */
2861 static nfsstat4
2862 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2863 {
2864 	int error;
2865 	int different_export = 0;
2866 	vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2867 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2868 	nfsstat4 stat;
2869 	fid_t fid;
2870 	int attrdir, dotdot, walk;
2871 	bool_t is_newvp = FALSE;
2872 
2873 	if (cs->vp->v_flag & V_XATTRDIR) {
2874 		attrdir = 1;
2875 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2876 	} else {
2877 		attrdir = 0;
2878 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2879 	}
2880 
2881 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2882 
2883 	/*
2884 	 * If dotdotting, then need to check whether it's
2885 	 * above the root of a filesystem, or above an
2886 	 * export point.
2887 	 */
2888 	if (dotdot) {
2889 		vnode_t *zone_rootvp;
2890 
2891 		ASSERT(cs->exi != NULL);
2892 		zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2893 		/*
2894 		 * If dotdotting at the root of a filesystem, then
2895 		 * need to traverse back to the mounted-on filesystem
2896 		 * and do the dotdot lookup there.
2897 		 */
2898 		if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2899 
2900 			/*
2901 			 * If at the system root, then can
2902 			 * go up no further.
2903 			 */
2904 			if (VN_CMP(cs->vp, zone_rootvp))
2905 				return (puterrno4(ENOENT));
2906 
2907 			/*
2908 			 * Traverse back to the mounted-on filesystem
2909 			 */
2910 			cs->vp = untraverse(cs->vp, zone_rootvp);
2911 
2912 			/*
2913 			 * Set the different_export flag so we remember
2914 			 * to pick up a new exportinfo entry for
2915 			 * this new filesystem.
2916 			 */
2917 			different_export = 1;
2918 		} else {
2919 
2920 			/*
2921 			 * If dotdotting above an export point then set
2922 			 * the different_export to get new export info.
2923 			 */
2924 			different_export = nfs_exported(cs->exi, cs->vp);
2925 		}
2926 	}
2927 
2928 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2929 	    NULL, NULL, NULL);
2930 	if (error)
2931 		return (puterrno4(error));
2932 
2933 	/*
2934 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2935 	 *
2936 	 * XXX if the vnode is a symlink and it is not visible in
2937 	 * a pseudo filesystem, return ENOENT (not following symlink).
2938 	 * V4 client can not mount such symlink. This is a regression
2939 	 * from V2/V3.
2940 	 *
2941 	 * In the same exported filesystem, if the security flavor used
2942 	 * is not an explicitly shared flavor, limit the view to the visible
2943 	 * list entries only. This is not a WRONGSEC case because it's already
2944 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2945 	 */
2946 	if (!different_export &&
2947 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2948 	    cs->access & CS_ACCESS_LIMITED)) {
2949 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2950 			VN_RELE(vp);
2951 			return (puterrno4(ENOENT));
2952 		}
2953 	}
2954 
2955 	/*
2956 	 * If it's a mountpoint, then traverse it.
2957 	 */
2958 	if (vn_ismntpt(vp)) {
2959 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2960 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2961 
2962 		/*
2963 		 * hold pre_tvp to counteract rele by traverse.  We will
2964 		 * need pre_tvp below if checkexport4 fails
2965 		 */
2966 		VN_HOLD(pre_tvp);
2967 		if ((error = traverse(&vp)) != 0) {
2968 			VN_RELE(vp);
2969 			VN_RELE(pre_tvp);
2970 			return (puterrno4(error));
2971 		}
2972 		different_export = 1;
2973 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2974 		/*
2975 		 * The vfsp comparison is to handle the case where
2976 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2977 		 * and NFS is unaware of local fs transistions because
2978 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2979 		 * the dir and the obj returned by lookup will have different
2980 		 * vfs ptrs.
2981 		 */
2982 		different_export = 1;
2983 	}
2984 
2985 	if (different_export) {
2986 
2987 		bzero(&fid, sizeof (fid));
2988 		fid.fid_len = MAXFIDSZ;
2989 		error = vop_fid_pseudo(vp, &fid);
2990 		if (error) {
2991 			VN_RELE(vp);
2992 			if (pre_tvp)
2993 				VN_RELE(pre_tvp);
2994 			return (puterrno4(error));
2995 		}
2996 
2997 		if (dotdot)
2998 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2999 		else
3000 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
3001 
3002 		if (exi == NULL) {
3003 			if (pre_tvp) {
3004 				/*
3005 				 * If this vnode is a mounted-on vnode,
3006 				 * but the mounted-on file system is not
3007 				 * exported, send back the filehandle for
3008 				 * the mounted-on vnode, not the root of
3009 				 * the mounted-on file system.
3010 				 */
3011 				VN_RELE(vp);
3012 				vp = pre_tvp;
3013 				exi = pre_exi;
3014 			} else {
3015 				VN_RELE(vp);
3016 				return (puterrno4(EACCES));
3017 			}
3018 		} else if (pre_tvp) {
3019 			/* we're done with pre_tvp now. release extra hold */
3020 			VN_RELE(pre_tvp);
3021 		}
3022 
3023 		cs->exi = exi;
3024 
3025 		/*
3026 		 * Now we do a checkauth4. The reason is that
3027 		 * this client/user may not have access to the new
3028 		 * exported file system, and if they do,
3029 		 * the client/user may be mapped to a different uid.
3030 		 *
3031 		 * We start with a new cr, because the checkauth4 done
3032 		 * in the PUT*FH operation over wrote the cred's uid,
3033 		 * gid, etc, and we want the real thing before calling
3034 		 * checkauth4()
3035 		 */
3036 		crfree(cs->cr);
3037 		cs->cr = crdup(cs->basecr);
3038 
3039 		oldvp = cs->vp;
3040 		cs->vp = vp;
3041 		is_newvp = TRUE;
3042 
3043 		stat = call_checkauth4(cs, req);
3044 		if (stat != NFS4_OK) {
3045 			VN_RELE(cs->vp);
3046 			cs->vp = oldvp;
3047 			return (stat);
3048 		}
3049 	}
3050 
3051 	/*
3052 	 * After various NFS checks, do a label check on the path
3053 	 * component. The label on this path should either be the
3054 	 * global zone's label or a zone's label. We are only
3055 	 * interested in the zone's label because exported files
3056 	 * in global zone is accessible (though read-only) to
3057 	 * clients. The exportability/visibility check is already
3058 	 * done before reaching this code.
3059 	 */
3060 	if (is_system_labeled()) {
3061 		bslabel_t *clabel;
3062 
3063 		ASSERT(req->rq_label != NULL);
3064 		clabel = req->rq_label;
3065 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
3066 		    "got client label from request(1)", struct svc_req *, req);
3067 
3068 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3069 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3070 			    cs->exi)) {
3071 				error = EACCES;
3072 				goto err_out;
3073 			}
3074 		} else {
3075 			/*
3076 			 * We grant access to admin_low label clients
3077 			 * only if the client is trusted, i.e. also
3078 			 * running Solaris Trusted Extension.
3079 			 */
3080 			struct sockaddr	*ca;
3081 			int		addr_type;
3082 			void		*ipaddr;
3083 			tsol_tpc_t	*tp;
3084 
3085 			ca = (struct sockaddr *)svc_getrpccaller(
3086 			    req->rq_xprt)->buf;
3087 			if (ca->sa_family == AF_INET) {
3088 				addr_type = IPV4_VERSION;
3089 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
3090 			} else if (ca->sa_family == AF_INET6) {
3091 				addr_type = IPV6_VERSION;
3092 				ipaddr = &((struct sockaddr_in6 *)
3093 				    ca)->sin6_addr;
3094 			}
3095 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
3096 			if (tp == NULL || tp->tpc_tp.tp_doi !=
3097 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
3098 			    SUN_CIPSO) {
3099 				if (tp != NULL)
3100 					TPC_RELE(tp);
3101 				error = EACCES;
3102 				goto err_out;
3103 			}
3104 			TPC_RELE(tp);
3105 		}
3106 	}
3107 
3108 	error = makefh4(&cs->fh, vp, cs->exi);
3109 
3110 err_out:
3111 	if (error) {
3112 		if (is_newvp) {
3113 			VN_RELE(cs->vp);
3114 			cs->vp = oldvp;
3115 		} else
3116 			VN_RELE(vp);
3117 		return (puterrno4(error));
3118 	}
3119 
3120 	if (!is_newvp) {
3121 		if (cs->vp)
3122 			VN_RELE(cs->vp);
3123 		cs->vp = vp;
3124 	} else if (oldvp)
3125 		VN_RELE(oldvp);
3126 
3127 	/*
3128 	 * if did lookup on attrdir and didn't lookup .., set named
3129 	 * attr fh flag
3130 	 */
3131 	if (attrdir && ! dotdot)
3132 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
3133 
3134 	/* Assume false for now, open proc will set this */
3135 	cs->mandlock = FALSE;
3136 
3137 	return (NFS4_OK);
3138 }
3139 
3140 /* ARGSUSED */
3141 static void
3142 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3143     struct compound_state *cs)
3144 {
3145 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
3146 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
3147 	char *nm;
3148 	uint_t len;
3149 	struct sockaddr *ca;
3150 	char *name = NULL;
3151 	nfsstat4 status;
3152 
3153 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
3154 	    LOOKUP4args *, args);
3155 
3156 	if (cs->vp == NULL) {
3157 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3158 		goto out;
3159 	}
3160 
3161 	if (cs->vp->v_type == VLNK) {
3162 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
3163 		goto out;
3164 	}
3165 
3166 	if (cs->vp->v_type != VDIR) {
3167 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3168 		goto out;
3169 	}
3170 
3171 	status = utf8_dir_verify(&args->objname);
3172 	if (status != NFS4_OK) {
3173 		*cs->statusp = resp->status = status;
3174 		goto out;
3175 	}
3176 
3177 	nm = utf8_to_str(&args->objname, &len, NULL);
3178 	if (nm == NULL) {
3179 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3180 		goto out;
3181 	}
3182 
3183 	if (len > MAXNAMELEN) {
3184 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3185 		kmem_free(nm, len);
3186 		goto out;
3187 	}
3188 
3189 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3190 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3191 	    MAXPATHLEN  + 1);
3192 
3193 	if (name == NULL) {
3194 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3195 		kmem_free(nm, len);
3196 		goto out;
3197 	}
3198 
3199 	*cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3200 
3201 	if (name != nm)
3202 		kmem_free(name, MAXPATHLEN + 1);
3203 	kmem_free(nm, len);
3204 
3205 out:
3206 	DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3207 	    LOOKUP4res *, resp);
3208 }
3209 
3210 /* ARGSUSED */
3211 static void
3212 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3213     struct compound_state *cs)
3214 {
3215 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3216 
3217 	DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3218 
3219 	if (cs->vp == NULL) {
3220 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3221 		goto out;
3222 	}
3223 
3224 	if (cs->vp->v_type != VDIR) {
3225 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3226 		goto out;
3227 	}
3228 
3229 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3230 
3231 	/*
3232 	 * From NFSV4 Specification, LOOKUPP should not check for
3233 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3234 	 */
3235 	if (resp->status == NFS4ERR_WRONGSEC) {
3236 		*cs->statusp = resp->status = NFS4_OK;
3237 	}
3238 
3239 out:
3240 	DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3241 	    LOOKUPP4res *, resp);
3242 }
3243 
3244 
3245 /*ARGSUSED2*/
3246 static void
3247 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3248     struct compound_state *cs)
3249 {
3250 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
3251 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
3252 	vnode_t		*avp = NULL;
3253 	int		lookup_flags = LOOKUP_XATTR, error;
3254 	int		exp_ro = 0;
3255 
3256 	DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3257 	    OPENATTR4args *, args);
3258 
3259 	if (cs->vp == NULL) {
3260 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3261 		goto out;
3262 	}
3263 
3264 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3265 	    !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3266 		*cs->statusp = resp->status = puterrno4(ENOTSUP);
3267 		goto out;
3268 	}
3269 
3270 	/*
3271 	 * If file system supports passing ACE mask to VOP_ACCESS then
3272 	 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3273 	 */
3274 
3275 	if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3276 		error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3277 		    V_ACE_MASK, cs->cr, NULL);
3278 	else
3279 		error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3280 		    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3281 		    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3282 
3283 	if (error) {
3284 		*cs->statusp = resp->status = puterrno4(EACCES);
3285 		goto out;
3286 	}
3287 
3288 	/*
3289 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3290 	 * the file system is exported read-only -- regardless of
3291 	 * createdir flag.  Otherwise the attrdir would be created
3292 	 * (assuming server fs isn't mounted readonly locally).  If
3293 	 * VOP_LOOKUP returns ENOENT in this case, the error will
3294 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3295 	 * because specfs has no VOP_LOOKUP op, so the macro would
3296 	 * return ENOSYS.  EINVAL is returned by all (current)
3297 	 * Solaris file system implementations when any of their
3298 	 * restrictions are violated (xattr(dir) can't have xattrdir).
3299 	 * Returning NOTSUPP is more appropriate in this case
3300 	 * because the object will never be able to have an attrdir.
3301 	 */
3302 	if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3303 		lookup_flags |= CREATE_XATTR_DIR;
3304 
3305 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3306 	    NULL, NULL, NULL);
3307 
3308 	if (error) {
3309 		if (error == ENOENT && args->createdir && exp_ro)
3310 			*cs->statusp = resp->status = puterrno4(EROFS);
3311 		else if (error == EINVAL || error == ENOSYS)
3312 			*cs->statusp = resp->status = puterrno4(ENOTSUP);
3313 		else
3314 			*cs->statusp = resp->status = puterrno4(error);
3315 		goto out;
3316 	}
3317 
3318 	ASSERT(avp->v_flag & V_XATTRDIR);
3319 
3320 	error = makefh4(&cs->fh, avp, cs->exi);
3321 
3322 	if (error) {
3323 		VN_RELE(avp);
3324 		*cs->statusp = resp->status = puterrno4(error);
3325 		goto out;
3326 	}
3327 
3328 	VN_RELE(cs->vp);
3329 	cs->vp = avp;
3330 
3331 	/*
3332 	 * There is no requirement for an attrdir fh flag
3333 	 * because the attrdir has a vnode flag to distinguish
3334 	 * it from regular (non-xattr) directories.  The
3335 	 * FH4_ATTRDIR flag is set for future sanity checks.
3336 	 */
3337 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3338 	*cs->statusp = resp->status = NFS4_OK;
3339 
3340 out:
3341 	DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3342 	    OPENATTR4res *, resp);
3343 }
3344 
3345 static int
3346 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3347     caller_context_t *ct)
3348 {
3349 	int error;
3350 	int i;
3351 	clock_t delaytime;
3352 
3353 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3354 
3355 	/*
3356 	 * Don't block on mandatory locks. If this routine returns
3357 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3358 	 */
3359 	uio->uio_fmode = FNONBLOCK;
3360 
3361 	for (i = 0; i < rfs4_maxlock_tries; i++) {
3362 
3363 
3364 		if (direction == FREAD) {
3365 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3366 			error = VOP_READ(vp, uio, ioflag, cred, ct);
3367 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3368 		} else {
3369 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3370 			error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3371 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3372 		}
3373 
3374 		if (error != EAGAIN)
3375 			break;
3376 
3377 		if (i < rfs4_maxlock_tries - 1) {
3378 			delay(delaytime);
3379 			delaytime *= 2;
3380 		}
3381 	}
3382 
3383 	return (error);
3384 }
3385 
3386 /* ARGSUSED */
3387 static void
3388 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3389     struct compound_state *cs)
3390 {
3391 	READ4args *args = &argop->nfs_argop4_u.opread;
3392 	READ4res *resp = &resop->nfs_resop4_u.opread;
3393 	int error;
3394 	int verror;
3395 	vnode_t *vp;
3396 	struct vattr va;
3397 	struct iovec iov, *iovp = NULL;
3398 	int iovcnt;
3399 	struct uio uio;
3400 	u_offset_t offset;
3401 	bool_t *deleg = &cs->deleg;
3402 	nfsstat4 stat;
3403 	int in_crit = 0;
3404 	mblk_t *mp = NULL;
3405 	int alloc_err = 0;
3406 	int rdma_used = 0;
3407 	int loaned_buffers;
3408 	caller_context_t ct;
3409 	struct uio *uiop;
3410 
3411 	DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3412 	    READ4args, args);
3413 
3414 	vp = cs->vp;
3415 	if (vp == NULL) {
3416 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3417 		goto out;
3418 	}
3419 	if (cs->access == CS_ACCESS_DENIED) {
3420 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3421 		goto out;
3422 	}
3423 
3424 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3425 	    deleg, TRUE, &ct, cs)) != NFS4_OK) {
3426 		*cs->statusp = resp->status = stat;
3427 		goto out;
3428 	}
3429 
3430 	/*
3431 	 * Enter the critical region before calling VOP_RWLOCK
3432 	 * to avoid a deadlock with write requests.
3433 	 */
3434 	if (nbl_need_check(vp)) {
3435 		nbl_start_crit(vp, RW_READER);
3436 		in_crit = 1;
3437 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3438 		    &ct)) {
3439 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
3440 			goto out;
3441 		}
3442 	}
3443 
3444 	if (args->wlist) {
3445 		if (args->count > clist_len(args->wlist)) {
3446 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3447 			goto out;
3448 		}
3449 		rdma_used = 1;
3450 	}
3451 
3452 	/* use loaned buffers for TCP */
3453 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3454 
3455 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3456 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3457 
3458 	/*
3459 	 * If we can't get the attributes, then we can't do the
3460 	 * right access checking.  So, we'll fail the request.
3461 	 */
3462 	if (verror) {
3463 		*cs->statusp = resp->status = puterrno4(verror);
3464 		goto out;
3465 	}
3466 
3467 	if (vp->v_type != VREG) {
3468 		*cs->statusp = resp->status =
3469 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3470 		goto out;
3471 	}
3472 
3473 	if (crgetuid(cs->cr) != va.va_uid &&
3474 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3475 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3476 		*cs->statusp = resp->status = puterrno4(error);
3477 		goto out;
3478 	}
3479 
3480 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3481 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3482 		goto out;
3483 	}
3484 
3485 	offset = args->offset;
3486 	if (offset >= va.va_size) {
3487 		*cs->statusp = resp->status = NFS4_OK;
3488 		resp->eof = TRUE;
3489 		resp->data_len = 0;
3490 		resp->data_val = NULL;
3491 		resp->mblk = NULL;
3492 		/* RDMA */
3493 		resp->wlist = args->wlist;
3494 		resp->wlist_len = resp->data_len;
3495 		*cs->statusp = resp->status = NFS4_OK;
3496 		if (resp->wlist)
3497 			clist_zero_len(resp->wlist);
3498 		goto out;
3499 	}
3500 
3501 	if (args->count == 0) {
3502 		*cs->statusp = resp->status = NFS4_OK;
3503 		resp->eof = FALSE;
3504 		resp->data_len = 0;
3505 		resp->data_val = NULL;
3506 		resp->mblk = NULL;
3507 		/* RDMA */
3508 		resp->wlist = args->wlist;
3509 		resp->wlist_len = resp->data_len;
3510 		if (resp->wlist)
3511 			clist_zero_len(resp->wlist);
3512 		goto out;
3513 	}
3514 
3515 	/*
3516 	 * Do not allocate memory more than maximum allowed
3517 	 * transfer size
3518 	 */
3519 	if (args->count > rfs4_tsize(req))
3520 		args->count = rfs4_tsize(req);
3521 
3522 	if (loaned_buffers) {
3523 		uiop = (uio_t *)rfs_setup_xuio(vp);
3524 		ASSERT(uiop != NULL);
3525 		uiop->uio_segflg = UIO_SYSSPACE;
3526 		uiop->uio_loffset = args->offset;
3527 		uiop->uio_resid = args->count;
3528 
3529 		/* Jump to do the read if successful */
3530 		if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3531 			/*
3532 			 * Need to hold the vnode until after VOP_RETZCBUF()
3533 			 * is called.
3534 			 */
3535 			VN_HOLD(vp);
3536 			goto doio_read;
3537 		}
3538 
3539 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3540 		    uiop->uio_loffset, int, uiop->uio_resid);
3541 
3542 		uiop->uio_extflg = 0;
3543 
3544 		/* failure to setup for zero copy */
3545 		rfs_free_xuio((void *)uiop);
3546 		loaned_buffers = 0;
3547 	}
3548 
3549 	/*
3550 	 * If returning data via RDMA Write, then grab the chunk list. If we
3551 	 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3552 	 */
3553 	if (rdma_used) {
3554 		mp = NULL;
3555 		(void) rdma_get_wchunk(req, &iov, args->wlist);
3556 		uio.uio_iov = &iov;
3557 		uio.uio_iovcnt = 1;
3558 	} else {
3559 		/*
3560 		 * mp will contain the data to be sent out in the read reply.
3561 		 * It will be freed after the reply has been sent.
3562 		 */
3563 		mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3564 		ASSERT(mp != NULL);
3565 		ASSERT(alloc_err == 0);
3566 		uio.uio_iov = iovp;
3567 		uio.uio_iovcnt = iovcnt;
3568 	}
3569 
3570 	uio.uio_segflg = UIO_SYSSPACE;
3571 	uio.uio_extflg = UIO_COPY_CACHED;
3572 	uio.uio_loffset = args->offset;
3573 	uio.uio_resid = args->count;
3574 	uiop = &uio;
3575 
3576 doio_read:
3577 	error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3578 
3579 	va.va_mask = AT_SIZE;
3580 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3581 
3582 	if (error) {
3583 		if (mp)
3584 			freemsg(mp);
3585 		*cs->statusp = resp->status = puterrno4(error);
3586 		goto out;
3587 	}
3588 
3589 	/* make mblk using zc buffers */
3590 	if (loaned_buffers) {
3591 		mp = uio_to_mblk(uiop);
3592 		ASSERT(mp != NULL);
3593 	}
3594 
3595 	*cs->statusp = resp->status = NFS4_OK;
3596 
3597 	ASSERT(uiop->uio_resid >= 0);
3598 	resp->data_len = args->count - uiop->uio_resid;
3599 	if (mp) {
3600 		resp->data_val = (char *)mp->b_datap->db_base;
3601 		rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3602 	} else {
3603 		resp->data_val = (caddr_t)iov.iov_base;
3604 	}
3605 
3606 	resp->mblk = mp;
3607 
3608 	if (!verror && offset + resp->data_len == va.va_size)
3609 		resp->eof = TRUE;
3610 	else
3611 		resp->eof = FALSE;
3612 
3613 	if (rdma_used) {
3614 		if (!rdma_setup_read_data4(args, resp)) {
3615 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3616 		}
3617 	} else {
3618 		resp->wlist = NULL;
3619 	}
3620 
3621 out:
3622 	if (in_crit)
3623 		nbl_end_crit(vp);
3624 
3625 	if (iovp != NULL)
3626 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
3627 
3628 	DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3629 	    READ4res *, resp);
3630 }
3631 
3632 static void
3633 rfs4_op_read_free(nfs_resop4 *resop)
3634 {
3635 	READ4res	*resp = &resop->nfs_resop4_u.opread;
3636 
3637 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3638 		freemsg(resp->mblk);
3639 		resp->mblk = NULL;
3640 		resp->data_val = NULL;
3641 		resp->data_len = 0;
3642 	}
3643 }
3644 
3645 static void
3646 rfs4_op_readdir_free(nfs_resop4 * resop)
3647 {
3648 	READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3649 
3650 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3651 		freeb(resp->mblk);
3652 		resp->mblk = NULL;
3653 		resp->data_len = 0;
3654 	}
3655 }
3656 
3657 
3658 /* ARGSUSED */
3659 static void
3660 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3661     struct compound_state *cs)
3662 {
3663 	PUTPUBFH4res	*resp = &resop->nfs_resop4_u.opputpubfh;
3664 	int		error;
3665 	vnode_t		*vp;
3666 	struct exportinfo *exi, *sav_exi;
3667 	nfs_fh4_fmt_t	*fh_fmtp;
3668 	nfs_export_t *ne = nfs_get_export();
3669 
3670 	DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3671 
3672 	if (cs->vp) {
3673 		VN_RELE(cs->vp);
3674 		cs->vp = NULL;
3675 	}
3676 
3677 	if (cs->cr)
3678 		crfree(cs->cr);
3679 
3680 	cs->cr = crdup(cs->basecr);
3681 
3682 	vp = ne->exi_public->exi_vp;
3683 	if (vp == NULL) {
3684 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3685 		goto out;
3686 	}
3687 
3688 	error = makefh4(&cs->fh, vp, ne->exi_public);
3689 	if (error != 0) {
3690 		*cs->statusp = resp->status = puterrno4(error);
3691 		goto out;
3692 	}
3693 	sav_exi = cs->exi;
3694 	if (ne->exi_public == ne->exi_root) {
3695 		/*
3696 		 * No filesystem is actually shared public, so we default
3697 		 * to exi_root. In this case, we must check whether root
3698 		 * is exported.
3699 		 */
3700 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3701 
3702 		/*
3703 		 * if root filesystem is exported, the exportinfo struct that we
3704 		 * should use is what checkexport4 returns, because root_exi is
3705 		 * actually a mostly empty struct.
3706 		 */
3707 		exi = checkexport4(&fh_fmtp->fh4_fsid,
3708 		    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3709 		cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3710 	} else {
3711 		/*
3712 		 * it's a properly shared filesystem
3713 		 */
3714 		cs->exi = ne->exi_public;
3715 	}
3716 
3717 	if (is_system_labeled()) {
3718 		bslabel_t *clabel;
3719 
3720 		ASSERT(req->rq_label != NULL);
3721 		clabel = req->rq_label;
3722 		DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3723 		    "got client label from request(1)",
3724 		    struct svc_req *, req);
3725 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3726 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3727 			    cs->exi)) {
3728 				*cs->statusp = resp->status =
3729 				    NFS4ERR_SERVERFAULT;
3730 				goto out;
3731 			}
3732 		}
3733 	}
3734 
3735 	VN_HOLD(vp);
3736 	cs->vp = vp;
3737 
3738 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3739 		VN_RELE(cs->vp);
3740 		cs->vp = NULL;
3741 		cs->exi = sav_exi;
3742 		goto out;
3743 	}
3744 
3745 	*cs->statusp = resp->status = NFS4_OK;
3746 out:
3747 	DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3748 	    PUTPUBFH4res *, resp);
3749 }
3750 
3751 /*
3752  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3753  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3754  * or joe have restrictive search permissions, then we shouldn't let
3755  * the client get a file handle. This is easy to enforce. However, we
3756  * don't know what security flavor should be used until we resolve the
3757  * path name. Another complication is uid mapping. If root is
3758  * the user, then it will be mapped to the anonymous user by default,
3759  * but we won't know that till we've resolved the path name. And we won't
3760  * know what the anonymous user is.
3761  * Luckily, SECINFO is specified to take a full filename.
3762  * So what we will have to in rfs4_op_lookup is check that flavor of
3763  * the target object matches that of the request, and if root was the
3764  * caller, check for the root= and anon= options, and if necessary,
3765  * repeat the lookup using the right cred_t. But that's not done yet.
3766  */
3767 /* ARGSUSED */
3768 static void
3769 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3770     struct compound_state *cs)
3771 {
3772 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3773 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3774 	nfs_fh4_fmt_t *fh_fmtp;
3775 
3776 	DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3777 	    PUTFH4args *, args);
3778 
3779 	if (cs->vp) {
3780 		VN_RELE(cs->vp);
3781 		cs->vp = NULL;
3782 	}
3783 
3784 	if (cs->cr) {
3785 		crfree(cs->cr);
3786 		cs->cr = NULL;
3787 	}
3788 
3789 
3790 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3791 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3792 		goto out;
3793 	}
3794 
3795 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3796 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3797 	    NULL);
3798 
3799 	if (cs->exi == NULL) {
3800 		*cs->statusp = resp->status = NFS4ERR_STALE;
3801 		goto out;
3802 	}
3803 
3804 	cs->cr = crdup(cs->basecr);
3805 
3806 	ASSERT(cs->cr != NULL);
3807 
3808 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3809 		*cs->statusp = resp->status;
3810 		goto out;
3811 	}
3812 
3813 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3814 		VN_RELE(cs->vp);
3815 		cs->vp = NULL;
3816 		goto out;
3817 	}
3818 
3819 	nfs_fh4_copy(&args->object, &cs->fh);
3820 	*cs->statusp = resp->status = NFS4_OK;
3821 	cs->deleg = FALSE;
3822 
3823 out:
3824 	DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3825 	    PUTFH4res *, resp);
3826 }
3827 
3828 /* ARGSUSED */
3829 static void
3830 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3831     struct compound_state *cs)
3832 {
3833 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3834 	int error;
3835 	fid_t fid;
3836 	struct exportinfo *exi, *sav_exi;
3837 
3838 	DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3839 
3840 	if (cs->vp) {
3841 		VN_RELE(cs->vp);
3842 		cs->vp = NULL;
3843 	}
3844 
3845 	if (cs->cr)
3846 		crfree(cs->cr);
3847 
3848 	cs->cr = crdup(cs->basecr);
3849 
3850 	/*
3851 	 * Using rootdir, the system root vnode,
3852 	 * get its fid.
3853 	 */
3854 	bzero(&fid, sizeof (fid));
3855 	fid.fid_len = MAXFIDSZ;
3856 	error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3857 	if (error != 0) {
3858 		*cs->statusp = resp->status = puterrno4(error);
3859 		goto out;
3860 	}
3861 
3862 	/*
3863 	 * Then use the root fsid & fid it to find out if it's exported
3864 	 *
3865 	 * If the server root isn't exported directly, then
3866 	 * it should at least be a pseudo export based on
3867 	 * one or more exports further down in the server's
3868 	 * file tree.
3869 	 */
3870 	exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3871 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3872 		NFS4_DEBUG(rfs4_debug,
3873 		    (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3874 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3875 		goto out;
3876 	}
3877 
3878 	/*
3879 	 * Now make a filehandle based on the root
3880 	 * export and root vnode.
3881 	 */
3882 	error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3883 	if (error != 0) {
3884 		*cs->statusp = resp->status = puterrno4(error);
3885 		goto out;
3886 	}
3887 
3888 	sav_exi = cs->exi;
3889 	cs->exi = exi;
3890 
3891 	VN_HOLD(ZONE_ROOTVP());
3892 	cs->vp = ZONE_ROOTVP();
3893 
3894 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3895 		VN_RELE(cs->vp);
3896 		cs->vp = NULL;
3897 		cs->exi = sav_exi;
3898 		goto out;
3899 	}
3900 
3901 	*cs->statusp = resp->status = NFS4_OK;
3902 	cs->deleg = FALSE;
3903 out:
3904 	DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3905 	    PUTROOTFH4res *, resp);
3906 }
3907 
3908 /*
3909  * readlink: args: CURRENT_FH.
3910  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3911  */
3912 
3913 /* ARGSUSED */
3914 static void
3915 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3916     struct compound_state *cs)
3917 {
3918 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3919 	int error;
3920 	vnode_t *vp;
3921 	struct iovec iov;
3922 	struct vattr va;
3923 	struct uio uio;
3924 	char *data;
3925 	struct sockaddr *ca;
3926 	char *name = NULL;
3927 	int is_referral;
3928 
3929 	DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3930 
3931 	/* CURRENT_FH: directory */
3932 	vp = cs->vp;
3933 	if (vp == NULL) {
3934 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3935 		goto out;
3936 	}
3937 
3938 	if (cs->access == CS_ACCESS_DENIED) {
3939 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3940 		goto out;
3941 	}
3942 
3943 	/* Is it a referral? */
3944 	if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3945 
3946 		is_referral = 1;
3947 
3948 	} else {
3949 
3950 		is_referral = 0;
3951 
3952 		if (vp->v_type == VDIR) {
3953 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
3954 			goto out;
3955 		}
3956 
3957 		if (vp->v_type != VLNK) {
3958 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3959 			goto out;
3960 		}
3961 
3962 	}
3963 
3964 	va.va_mask = AT_MODE;
3965 	error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3966 	if (error) {
3967 		*cs->statusp = resp->status = puterrno4(error);
3968 		goto out;
3969 	}
3970 
3971 	if (MANDLOCK(vp, va.va_mode)) {
3972 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3973 		goto out;
3974 	}
3975 
3976 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3977 
3978 	if (is_referral) {
3979 		char *s;
3980 		size_t strsz;
3981 		kstat_named_t *stat =
3982 		    cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3983 
3984 		/* Get an artificial symlink based on a referral */
3985 		s = build_symlink(vp, cs->cr, &strsz);
3986 		stat[NFS_REFERLINKS].value.ui64++;
3987 		DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3988 		    vnode_t *, vp, char *, s);
3989 		if (s == NULL)
3990 			error = EINVAL;
3991 		else {
3992 			error = 0;
3993 			(void) strlcpy(data, s, MAXPATHLEN + 1);
3994 			kmem_free(s, strsz);
3995 		}
3996 
3997 	} else {
3998 
3999 		iov.iov_base = data;
4000 		iov.iov_len = MAXPATHLEN;
4001 		uio.uio_iov = &iov;
4002 		uio.uio_iovcnt = 1;
4003 		uio.uio_segflg = UIO_SYSSPACE;
4004 		uio.uio_extflg = UIO_COPY_CACHED;
4005 		uio.uio_loffset = 0;
4006 		uio.uio_resid = MAXPATHLEN;
4007 
4008 		error = VOP_READLINK(vp, &uio, cs->cr, NULL);
4009 
4010 		if (!error)
4011 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
4012 	}
4013 
4014 	if (error) {
4015 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4016 		*cs->statusp = resp->status = puterrno4(error);
4017 		goto out;
4018 	}
4019 
4020 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4021 	name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
4022 	    MAXPATHLEN  + 1);
4023 
4024 	if (name == NULL) {
4025 		/*
4026 		 * Even though the conversion failed, we return
4027 		 * something. We just don't translate it.
4028 		 */
4029 		name = data;
4030 	}
4031 
4032 	/*
4033 	 * treat link name as data
4034 	 */
4035 	(void) str_to_utf8(name, (utf8string *)&resp->link);
4036 
4037 	if (name != data)
4038 		kmem_free(name, MAXPATHLEN + 1);
4039 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4040 	*cs->statusp = resp->status = NFS4_OK;
4041 
4042 out:
4043 	DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
4044 	    READLINK4res *, resp);
4045 }
4046 
4047 static void
4048 rfs4_op_readlink_free(nfs_resop4 *resop)
4049 {
4050 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
4051 	utf8string *symlink = (utf8string *)&resp->link;
4052 
4053 	if (symlink->utf8string_val) {
4054 		UTF8STRING_FREE(*symlink)
4055 	}
4056 }
4057 
4058 /*
4059  * release_lockowner:
4060  *	Release any state associated with the supplied
4061  *	lockowner. Note if any lo_state is holding locks we will not
4062  *	rele that lo_state and thus the lockowner will not be destroyed.
4063  *	A client using lock after the lock owner stateid has been released
4064  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
4065  *	to reissue the lock with new_lock_owner set to TRUE.
4066  *	args: lock_owner
4067  *	res:  status
4068  */
4069 /* ARGSUSED */
4070 static void
4071 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
4072     struct svc_req *req, struct compound_state *cs)
4073 {
4074 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
4075 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
4076 	rfs4_lockowner_t *lo;
4077 	rfs4_openowner_t *oo;
4078 	rfs4_state_t *sp;
4079 	rfs4_lo_state_t *lsp;
4080 	rfs4_client_t *cp;
4081 	bool_t create = FALSE;
4082 	locklist_t *llist;
4083 	sysid_t sysid;
4084 
4085 	DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
4086 	    cs, RELEASE_LOCKOWNER4args *, ap);
4087 
4088 	/* Make sure there is a clientid around for this request */
4089 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
4090 
4091 	if (cp == NULL) {
4092 		*cs->statusp = resp->status =
4093 		    rfs4_check_clientid(&ap->lock_owner.clientid, 0);
4094 		goto out;
4095 	}
4096 	rfs4_client_rele(cp);
4097 
4098 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
4099 	if (lo == NULL) {
4100 		*cs->statusp = resp->status = NFS4_OK;
4101 		goto out;
4102 	}
4103 	ASSERT(lo->rl_client != NULL);
4104 
4105 	/*
4106 	 * Check for EXPIRED client. If so will reap state with in a lease
4107 	 * period or on next set_clientid_confirm step
4108 	 */
4109 	if (rfs4_lease_expired(lo->rl_client)) {
4110 		rfs4_lockowner_rele(lo);
4111 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4112 		goto out;
4113 	}
4114 
4115 	/*
4116 	 * If no sysid has been assigned, then no locks exist; just return.
4117 	 */
4118 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
4119 	if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
4120 		rfs4_lockowner_rele(lo);
4121 		rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4122 		goto out;
4123 	}
4124 
4125 	sysid = lo->rl_client->rc_sysidt;
4126 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4127 
4128 	/*
4129 	 * Mark the lockowner invalid.
4130 	 */
4131 	rfs4_dbe_hide(lo->rl_dbe);
4132 
4133 	/*
4134 	 * sysid-pid pair should now not be used since the lockowner is
4135 	 * invalid. If the client were to instantiate the lockowner again
4136 	 * it would be assigned a new pid. Thus we can get the list of
4137 	 * current locks.
4138 	 */
4139 
4140 	llist = flk_get_active_locks(sysid, lo->rl_pid);
4141 	/* If we are still holding locks fail */
4142 	if (llist != NULL) {
4143 
4144 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4145 
4146 		flk_free_locklist(llist);
4147 		/*
4148 		 * We need to unhide the lockowner so the client can
4149 		 * try it again. The bad thing here is if the client
4150 		 * has a logic error that took it here in the first place
4151 		 * they probably have lost accounting of the locks that it
4152 		 * is holding. So we may have dangling state until the
4153 		 * open owner state is reaped via close. One scenario
4154 		 * that could possibly occur is that the client has
4155 		 * sent the unlock request(s) in separate threads
4156 		 * and has not waited for the replies before sending the
4157 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
4158 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4159 		 * reissuing the request.
4160 		 */
4161 		rfs4_dbe_unhide(lo->rl_dbe);
4162 		rfs4_lockowner_rele(lo);
4163 		goto out;
4164 	}
4165 
4166 	/*
4167 	 * For the corresponding client we need to check each open
4168 	 * owner for any opens that have lockowner state associated
4169 	 * with this lockowner.
4170 	 */
4171 
4172 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
4173 	for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4174 	    oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4175 
4176 		rfs4_dbe_lock(oo->ro_dbe);
4177 		for (sp = list_head(&oo->ro_statelist); sp != NULL;
4178 		    sp = list_next(&oo->ro_statelist, sp)) {
4179 
4180 			rfs4_dbe_lock(sp->rs_dbe);
4181 			for (lsp = list_head(&sp->rs_lostatelist);
4182 			    lsp != NULL;
4183 			    lsp = list_next(&sp->rs_lostatelist, lsp)) {
4184 				if (lsp->rls_locker == lo) {
4185 					rfs4_dbe_lock(lsp->rls_dbe);
4186 					rfs4_dbe_invalidate(lsp->rls_dbe);
4187 					rfs4_dbe_unlock(lsp->rls_dbe);
4188 				}
4189 			}
4190 			rfs4_dbe_unlock(sp->rs_dbe);
4191 		}
4192 		rfs4_dbe_unlock(oo->ro_dbe);
4193 	}
4194 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4195 
4196 	rfs4_lockowner_rele(lo);
4197 
4198 	*cs->statusp = resp->status = NFS4_OK;
4199 
4200 out:
4201 	DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4202 	    cs, RELEASE_LOCKOWNER4res *, resp);
4203 }
4204 
4205 /*
4206  * short utility function to lookup a file and recall the delegation
4207  */
4208 static rfs4_file_t *
4209 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4210     int *lkup_error, cred_t *cr)
4211 {
4212 	vnode_t *vp;
4213 	rfs4_file_t *fp = NULL;
4214 	bool_t fcreate = FALSE;
4215 	int error;
4216 
4217 	if (vpp)
4218 		*vpp = NULL;
4219 
4220 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4221 	    NULL)) == 0) {
4222 		if (vp->v_type == VREG)
4223 			fp = rfs4_findfile(vp, NULL, &fcreate);
4224 		if (vpp)
4225 			*vpp = vp;
4226 		else
4227 			VN_RELE(vp);
4228 	}
4229 
4230 	if (lkup_error)
4231 		*lkup_error = error;
4232 
4233 	return (fp);
4234 }
4235 
4236 /*
4237  * remove: args: CURRENT_FH: directory; name.
4238  *	res: status. If success - CURRENT_FH unchanged, return change_info
4239  *		for directory.
4240  */
4241 /* ARGSUSED */
4242 static void
4243 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4244     struct compound_state *cs)
4245 {
4246 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4247 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4248 	int error;
4249 	vnode_t *dvp, *vp;
4250 	struct vattr bdva, idva, adva;
4251 	char *nm;
4252 	uint_t len;
4253 	rfs4_file_t *fp;
4254 	int in_crit = 0;
4255 	bslabel_t *clabel;
4256 	struct sockaddr *ca;
4257 	char *name = NULL;
4258 	nfsstat4 status;
4259 
4260 	DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4261 	    REMOVE4args *, args);
4262 
4263 	/* CURRENT_FH: directory */
4264 	dvp = cs->vp;
4265 	if (dvp == NULL) {
4266 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4267 		goto out;
4268 	}
4269 
4270 	if (cs->access == CS_ACCESS_DENIED) {
4271 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4272 		goto out;
4273 	}
4274 
4275 	/*
4276 	 * If there is an unshared filesystem mounted on this vnode,
4277 	 * Do not allow to remove anything in this directory.
4278 	 */
4279 	if (vn_ismntpt(dvp)) {
4280 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4281 		goto out;
4282 	}
4283 
4284 	if (dvp->v_type != VDIR) {
4285 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4286 		goto out;
4287 	}
4288 
4289 	status = utf8_dir_verify(&args->target);
4290 	if (status != NFS4_OK) {
4291 		*cs->statusp = resp->status = status;
4292 		goto out;
4293 	}
4294 
4295 	/*
4296 	 * Lookup the file so that we can check if it's a directory
4297 	 */
4298 	nm = utf8_to_fn(&args->target, &len, NULL);
4299 	if (nm == NULL) {
4300 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4301 		goto out;
4302 	}
4303 
4304 	if (len > MAXNAMELEN) {
4305 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4306 		kmem_free(nm, len);
4307 		goto out;
4308 	}
4309 
4310 	if (rdonly4(req, cs)) {
4311 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4312 		kmem_free(nm, len);
4313 		goto out;
4314 	}
4315 
4316 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4317 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4318 	    MAXPATHLEN  + 1);
4319 
4320 	if (name == NULL) {
4321 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4322 		kmem_free(nm, len);
4323 		goto out;
4324 	}
4325 
4326 	/*
4327 	 * Lookup the file to determine type and while we are see if
4328 	 * there is a file struct around and check for delegation.
4329 	 * We don't need to acquire va_seq before this lookup, if
4330 	 * it causes an update, cinfo.before will not match, which will
4331 	 * trigger a cache flush even if atomic is TRUE.
4332 	 */
4333 	if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4334 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4335 		    NULL)) {
4336 			VN_RELE(vp);
4337 			rfs4_file_rele(fp);
4338 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4339 			if (nm != name)
4340 				kmem_free(name, MAXPATHLEN + 1);
4341 			kmem_free(nm, len);
4342 			goto out;
4343 		}
4344 	}
4345 
4346 	/* Didn't find anything to remove */
4347 	if (vp == NULL) {
4348 		*cs->statusp = resp->status = error;
4349 		if (nm != name)
4350 			kmem_free(name, MAXPATHLEN + 1);
4351 		kmem_free(nm, len);
4352 		goto out;
4353 	}
4354 
4355 	if (nbl_need_check(vp)) {
4356 		nbl_start_crit(vp, RW_READER);
4357 		in_crit = 1;
4358 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4359 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4360 			if (nm != name)
4361 				kmem_free(name, MAXPATHLEN + 1);
4362 			kmem_free(nm, len);
4363 			nbl_end_crit(vp);
4364 			VN_RELE(vp);
4365 			if (fp) {
4366 				rfs4_clear_dont_grant(fp);
4367 				rfs4_file_rele(fp);
4368 			}
4369 			goto out;
4370 		}
4371 	}
4372 
4373 	/* check label before allowing removal */
4374 	if (is_system_labeled()) {
4375 		ASSERT(req->rq_label != NULL);
4376 		clabel = req->rq_label;
4377 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4378 		    "got client label from request(1)",
4379 		    struct svc_req *, req);
4380 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4381 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4382 			    cs->exi)) {
4383 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4384 				if (name != nm)
4385 					kmem_free(name, MAXPATHLEN + 1);
4386 				kmem_free(nm, len);
4387 				if (in_crit)
4388 					nbl_end_crit(vp);
4389 				VN_RELE(vp);
4390 				if (fp) {
4391 					rfs4_clear_dont_grant(fp);
4392 					rfs4_file_rele(fp);
4393 				}
4394 				goto out;
4395 			}
4396 		}
4397 	}
4398 
4399 	/* Get dir "before" change value */
4400 	bdva.va_mask = AT_CTIME|AT_SEQ;
4401 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4402 	if (error) {
4403 		*cs->statusp = resp->status = puterrno4(error);
4404 		if (nm != name)
4405 			kmem_free(name, MAXPATHLEN + 1);
4406 		kmem_free(nm, len);
4407 		if (in_crit)
4408 			nbl_end_crit(vp);
4409 		VN_RELE(vp);
4410 		if (fp) {
4411 			rfs4_clear_dont_grant(fp);
4412 			rfs4_file_rele(fp);
4413 		}
4414 		goto out;
4415 	}
4416 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4417 
4418 	/* Actually do the REMOVE operation */
4419 	if (vp->v_type == VDIR) {
4420 		/*
4421 		 * Can't remove a directory that has a mounted-on filesystem.
4422 		 */
4423 		if (vn_ismntpt(vp)) {
4424 			error = EACCES;
4425 		} else {
4426 			/*
4427 			 * System V defines rmdir to return EEXIST,
4428 			 * not ENOTEMPTY, if the directory is not
4429 			 * empty.  A System V NFS server needs to map
4430 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4431 			 * transmit over the wire.
4432 			 */
4433 			if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4434 			    NULL, 0)) == EEXIST)
4435 				error = ENOTEMPTY;
4436 		}
4437 	} else {
4438 		if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4439 		    fp != NULL) {
4440 			struct vattr va;
4441 			vnode_t *tvp;
4442 
4443 			rfs4_dbe_lock(fp->rf_dbe);
4444 			tvp = fp->rf_vp;
4445 			if (tvp)
4446 				VN_HOLD(tvp);
4447 			rfs4_dbe_unlock(fp->rf_dbe);
4448 
4449 			if (tvp) {
4450 				/*
4451 				 * This is va_seq safe because we are not
4452 				 * manipulating dvp.
4453 				 */
4454 				va.va_mask = AT_NLINK;
4455 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4456 				    va.va_nlink == 0) {
4457 					/* Remove state on file remove */
4458 					if (in_crit) {
4459 						nbl_end_crit(vp);
4460 						in_crit = 0;
4461 					}
4462 					rfs4_close_all_state(fp);
4463 				}
4464 				VN_RELE(tvp);
4465 			}
4466 		}
4467 	}
4468 
4469 	if (in_crit)
4470 		nbl_end_crit(vp);
4471 	VN_RELE(vp);
4472 
4473 	if (fp) {
4474 		rfs4_clear_dont_grant(fp);
4475 		rfs4_file_rele(fp);
4476 	}
4477 	if (nm != name)
4478 		kmem_free(name, MAXPATHLEN + 1);
4479 	kmem_free(nm, len);
4480 
4481 	if (error) {
4482 		*cs->statusp = resp->status = puterrno4(error);
4483 		goto out;
4484 	}
4485 
4486 	/*
4487 	 * Get the initial "after" sequence number, if it fails, set to zero
4488 	 */
4489 	idva.va_mask = AT_SEQ;
4490 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4491 		idva.va_seq = 0;
4492 
4493 	/*
4494 	 * Force modified data and metadata out to stable storage.
4495 	 */
4496 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4497 
4498 	/*
4499 	 * Get "after" change value, if it fails, simply return the
4500 	 * before value.
4501 	 */
4502 	adva.va_mask = AT_CTIME|AT_SEQ;
4503 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4504 		adva.va_ctime = bdva.va_ctime;
4505 		adva.va_seq = 0;
4506 	}
4507 
4508 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4509 
4510 	/*
4511 	 * The cinfo.atomic = TRUE only if we have
4512 	 * non-zero va_seq's, and it has incremented by exactly one
4513 	 * during the VOP_REMOVE/RMDIR and it didn't change during
4514 	 * the VOP_FSYNC.
4515 	 */
4516 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4517 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4518 		resp->cinfo.atomic = TRUE;
4519 	else
4520 		resp->cinfo.atomic = FALSE;
4521 
4522 	*cs->statusp = resp->status = NFS4_OK;
4523 
4524 out:
4525 	DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4526 	    REMOVE4res *, resp);
4527 }
4528 
4529 /*
4530  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4531  *		oldname and newname.
4532  *	res: status. If success - CURRENT_FH unchanged, return change_info
4533  *		for both from and target directories.
4534  */
4535 /* ARGSUSED */
4536 static void
4537 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4538     struct compound_state *cs)
4539 {
4540 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
4541 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4542 	int error;
4543 	vnode_t *odvp;
4544 	vnode_t *ndvp;
4545 	vnode_t *srcvp, *targvp, *tvp;
4546 	struct vattr obdva, oidva, oadva;
4547 	struct vattr nbdva, nidva, nadva;
4548 	char *onm, *nnm;
4549 	uint_t olen, nlen;
4550 	rfs4_file_t *fp, *sfp;
4551 	int in_crit_src, in_crit_targ;
4552 	int fp_rele_grant_hold, sfp_rele_grant_hold;
4553 	int unlinked;
4554 	bslabel_t *clabel;
4555 	struct sockaddr *ca;
4556 	char *converted_onm = NULL;
4557 	char *converted_nnm = NULL;
4558 	nfsstat4 status;
4559 
4560 	DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4561 	    RENAME4args *, args);
4562 
4563 	fp = sfp = NULL;
4564 	srcvp = targvp = tvp = NULL;
4565 	in_crit_src = in_crit_targ = 0;
4566 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4567 	unlinked = 0;
4568 
4569 	/* CURRENT_FH: target directory */
4570 	ndvp = cs->vp;
4571 	if (ndvp == NULL) {
4572 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4573 		goto out;
4574 	}
4575 
4576 	/* SAVED_FH: from directory */
4577 	odvp = cs->saved_vp;
4578 	if (odvp == NULL) {
4579 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4580 		goto out;
4581 	}
4582 
4583 	if (cs->access == CS_ACCESS_DENIED) {
4584 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4585 		goto out;
4586 	}
4587 
4588 	/*
4589 	 * If there is an unshared filesystem mounted on this vnode,
4590 	 * do not allow to rename objects in this directory.
4591 	 */
4592 	if (vn_ismntpt(odvp)) {
4593 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4594 		goto out;
4595 	}
4596 
4597 	/*
4598 	 * If there is an unshared filesystem mounted on this vnode,
4599 	 * do not allow to rename to this directory.
4600 	 */
4601 	if (vn_ismntpt(ndvp)) {
4602 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4603 		goto out;
4604 	}
4605 
4606 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4607 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4608 		goto out;
4609 	}
4610 
4611 	if (cs->saved_exi != cs->exi) {
4612 		*cs->statusp = resp->status = NFS4ERR_XDEV;
4613 		goto out;
4614 	}
4615 
4616 	status = utf8_dir_verify(&args->oldname);
4617 	if (status != NFS4_OK) {
4618 		*cs->statusp = resp->status = status;
4619 		goto out;
4620 	}
4621 
4622 	status = utf8_dir_verify(&args->newname);
4623 	if (status != NFS4_OK) {
4624 		*cs->statusp = resp->status = status;
4625 		goto out;
4626 	}
4627 
4628 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
4629 	if (onm == NULL) {
4630 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4631 		goto out;
4632 	}
4633 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4634 	nlen = MAXPATHLEN + 1;
4635 	converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4636 	    nlen);
4637 
4638 	if (converted_onm == NULL) {
4639 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4640 		kmem_free(onm, olen);
4641 		goto out;
4642 	}
4643 
4644 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4645 	if (nnm == NULL) {
4646 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4647 		if (onm != converted_onm)
4648 			kmem_free(converted_onm, MAXPATHLEN + 1);
4649 		kmem_free(onm, olen);
4650 		goto out;
4651 	}
4652 	converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4653 	    MAXPATHLEN  + 1);
4654 
4655 	if (converted_nnm == NULL) {
4656 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4657 		kmem_free(nnm, nlen);
4658 		nnm = NULL;
4659 		if (onm != converted_onm)
4660 			kmem_free(converted_onm, MAXPATHLEN + 1);
4661 		kmem_free(onm, olen);
4662 		goto out;
4663 	}
4664 
4665 
4666 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4667 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4668 		kmem_free(onm, olen);
4669 		kmem_free(nnm, nlen);
4670 		goto out;
4671 	}
4672 
4673 
4674 	if (rdonly4(req, cs)) {
4675 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4676 		if (onm != converted_onm)
4677 			kmem_free(converted_onm, MAXPATHLEN + 1);
4678 		kmem_free(onm, olen);
4679 		if (nnm != converted_nnm)
4680 			kmem_free(converted_nnm, MAXPATHLEN + 1);
4681 		kmem_free(nnm, nlen);
4682 		goto out;
4683 	}
4684 
4685 	/* check label of the target dir */
4686 	if (is_system_labeled()) {
4687 		ASSERT(req->rq_label != NULL);
4688 		clabel = req->rq_label;
4689 		DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4690 		    "got client label from request(1)",
4691 		    struct svc_req *, req);
4692 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4693 			if (!do_rfs_label_check(clabel, ndvp,
4694 			    EQUALITY_CHECK, cs->exi)) {
4695 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4696 				goto err_out;
4697 			}
4698 		}
4699 	}
4700 
4701 	/*
4702 	 * Is the source a file and have a delegation?
4703 	 * We don't need to acquire va_seq before these lookups, if
4704 	 * it causes an update, cinfo.before will not match, which will
4705 	 * trigger a cache flush even if atomic is TRUE.
4706 	 */
4707 	if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4708 	    &error, cs->cr)) {
4709 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4710 		    NULL)) {
4711 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4712 			goto err_out;
4713 		}
4714 	}
4715 
4716 	if (srcvp == NULL) {
4717 		*cs->statusp = resp->status = puterrno4(error);
4718 		if (onm != converted_onm)
4719 			kmem_free(converted_onm, MAXPATHLEN + 1);
4720 		kmem_free(onm, olen);
4721 		if (nnm != converted_nnm)
4722 			kmem_free(converted_nnm, MAXPATHLEN + 1);
4723 		kmem_free(nnm, nlen);
4724 		goto out;
4725 	}
4726 
4727 	sfp_rele_grant_hold = 1;
4728 
4729 	/* Does the destination exist and a file and have a delegation? */
4730 	if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4731 	    NULL, cs->cr)) {
4732 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4733 		    NULL)) {
4734 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4735 			goto err_out;
4736 		}
4737 	}
4738 	fp_rele_grant_hold = 1;
4739 
4740 	/* Check for NBMAND lock on both source and target */
4741 	if (nbl_need_check(srcvp)) {
4742 		nbl_start_crit(srcvp, RW_READER);
4743 		in_crit_src = 1;
4744 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4745 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4746 			goto err_out;
4747 		}
4748 	}
4749 
4750 	if (targvp && nbl_need_check(targvp)) {
4751 		nbl_start_crit(targvp, RW_READER);
4752 		in_crit_targ = 1;
4753 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4754 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4755 			goto err_out;
4756 		}
4757 	}
4758 
4759 	/* Get source "before" change value */
4760 	obdva.va_mask = AT_CTIME|AT_SEQ;
4761 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4762 	if (!error) {
4763 		nbdva.va_mask = AT_CTIME|AT_SEQ;
4764 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4765 	}
4766 	if (error) {
4767 		*cs->statusp = resp->status = puterrno4(error);
4768 		goto err_out;
4769 	}
4770 
4771 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4772 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4773 
4774 	error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4775 	    NULL, 0);
4776 
4777 	/*
4778 	 * If target existed and was unlinked by VOP_RENAME, state will need
4779 	 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4780 	 * any necessary nbl_end_crit on srcvp and tgtvp.
4781 	 */
4782 	if (error == 0 && fp != NULL) {
4783 		rfs4_dbe_lock(fp->rf_dbe);
4784 		tvp = fp->rf_vp;
4785 		if (tvp)
4786 			VN_HOLD(tvp);
4787 		rfs4_dbe_unlock(fp->rf_dbe);
4788 
4789 		if (tvp) {
4790 			struct vattr va;
4791 			va.va_mask = AT_NLINK;
4792 
4793 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4794 			    va.va_nlink == 0) {
4795 				unlinked = 1;
4796 
4797 				/* DEBUG data */
4798 				if ((srcvp == targvp) || (tvp != targvp)) {
4799 					cmn_err(CE_WARN, "rfs4_op_rename: "
4800 					    "srcvp %p, targvp: %p, tvp: %p",
4801 					    (void *)srcvp, (void *)targvp,
4802 					    (void *)tvp);
4803 				}
4804 			} else {
4805 				VN_RELE(tvp);
4806 			}
4807 		}
4808 	}
4809 	if (error == 0)
4810 		vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4811 
4812 	if (in_crit_src)
4813 		nbl_end_crit(srcvp);
4814 	if (srcvp)
4815 		VN_RELE(srcvp);
4816 	if (in_crit_targ)
4817 		nbl_end_crit(targvp);
4818 	if (targvp)
4819 		VN_RELE(targvp);
4820 
4821 	if (unlinked) {
4822 		ASSERT(fp != NULL);
4823 		ASSERT(tvp != NULL);
4824 
4825 		/* DEBUG data */
4826 		if (RW_READ_HELD(&tvp->v_nbllock)) {
4827 			cmn_err(CE_WARN, "rfs4_op_rename: "
4828 			    "RW_READ_HELD(%p)", (void *)tvp);
4829 		}
4830 
4831 		/* The file is gone and so should the state */
4832 		rfs4_close_all_state(fp);
4833 		VN_RELE(tvp);
4834 	}
4835 
4836 	if (sfp) {
4837 		rfs4_clear_dont_grant(sfp);
4838 		rfs4_file_rele(sfp);
4839 	}
4840 	if (fp) {
4841 		rfs4_clear_dont_grant(fp);
4842 		rfs4_file_rele(fp);
4843 	}
4844 
4845 	if (converted_onm != onm)
4846 		kmem_free(converted_onm, MAXPATHLEN + 1);
4847 	kmem_free(onm, olen);
4848 	if (converted_nnm != nnm)
4849 		kmem_free(converted_nnm, MAXPATHLEN + 1);
4850 	kmem_free(nnm, nlen);
4851 
4852 	/*
4853 	 * Get the initial "after" sequence number, if it fails, set to zero
4854 	 */
4855 	oidva.va_mask = AT_SEQ;
4856 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4857 		oidva.va_seq = 0;
4858 
4859 	nidva.va_mask = AT_SEQ;
4860 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4861 		nidva.va_seq = 0;
4862 
4863 	/*
4864 	 * Force modified data and metadata out to stable storage.
4865 	 */
4866 	(void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4867 	(void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4868 
4869 	if (error) {
4870 		*cs->statusp = resp->status = puterrno4(error);
4871 		goto out;
4872 	}
4873 
4874 	/*
4875 	 * Get "after" change values, if it fails, simply return the
4876 	 * before value.
4877 	 */
4878 	oadva.va_mask = AT_CTIME|AT_SEQ;
4879 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4880 		oadva.va_ctime = obdva.va_ctime;
4881 		oadva.va_seq = 0;
4882 	}
4883 
4884 	nadva.va_mask = AT_CTIME|AT_SEQ;
4885 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4886 		nadva.va_ctime = nbdva.va_ctime;
4887 		nadva.va_seq = 0;
4888 	}
4889 
4890 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4891 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4892 
4893 	/*
4894 	 * The cinfo.atomic = TRUE only if we have
4895 	 * non-zero va_seq's, and it has incremented by exactly one
4896 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4897 	 */
4898 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4899 	    oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4900 		resp->source_cinfo.atomic = TRUE;
4901 	else
4902 		resp->source_cinfo.atomic = FALSE;
4903 
4904 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4905 	    nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4906 		resp->target_cinfo.atomic = TRUE;
4907 	else
4908 		resp->target_cinfo.atomic = FALSE;
4909 
4910 #ifdef	VOLATILE_FH_TEST
4911 	{
4912 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4913 
4914 	/*
4915 	 * Add the renamed file handle to the volatile rename list
4916 	 */
4917 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4918 		/* file handles may expire on rename */
4919 		vnode_t *vp;
4920 
4921 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4922 		/*
4923 		 * Already know that nnm will be a valid string
4924 		 */
4925 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4926 		    NULL, NULL, NULL);
4927 		kmem_free(nnm, nlen);
4928 		if (!error) {
4929 			add_volrnm_fh(cs->exi, vp);
4930 			VN_RELE(vp);
4931 		}
4932 	}
4933 	}
4934 #endif	/* VOLATILE_FH_TEST */
4935 
4936 	*cs->statusp = resp->status = NFS4_OK;
4937 out:
4938 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4939 	    RENAME4res *, resp);
4940 	return;
4941 
4942 err_out:
4943 	if (onm != converted_onm)
4944 		kmem_free(converted_onm, MAXPATHLEN + 1);
4945 	if (onm != NULL)
4946 		kmem_free(onm, olen);
4947 	if (nnm != converted_nnm)
4948 		kmem_free(converted_nnm, MAXPATHLEN + 1);
4949 	if (nnm != NULL)
4950 		kmem_free(nnm, nlen);
4951 
4952 	if (in_crit_src) nbl_end_crit(srcvp);
4953 	if (in_crit_targ) nbl_end_crit(targvp);
4954 	if (targvp) VN_RELE(targvp);
4955 	if (srcvp) VN_RELE(srcvp);
4956 	if (sfp) {
4957 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4958 		rfs4_file_rele(sfp);
4959 	}
4960 	if (fp) {
4961 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4962 		rfs4_file_rele(fp);
4963 	}
4964 
4965 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4966 	    RENAME4res *, resp);
4967 }
4968 
4969 /* ARGSUSED */
4970 static void
4971 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4972     struct compound_state *cs)
4973 {
4974 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4975 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4976 	rfs4_client_t *cp;
4977 
4978 	DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4979 	    RENEW4args *, args);
4980 
4981 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4982 		*cs->statusp = resp->status =
4983 		    rfs4_check_clientid(&args->clientid, 0);
4984 		goto out;
4985 	}
4986 
4987 	if (rfs4_lease_expired(cp)) {
4988 		rfs4_client_rele(cp);
4989 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4990 		goto out;
4991 	}
4992 
4993 	rfs4_update_lease(cp);
4994 
4995 	mutex_enter(cp->rc_cbinfo.cb_lock);
4996 	if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4997 		cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4998 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4999 	} else {
5000 		*cs->statusp = resp->status = NFS4_OK;
5001 	}
5002 	mutex_exit(cp->rc_cbinfo.cb_lock);
5003 
5004 	rfs4_client_rele(cp);
5005 
5006 out:
5007 	DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
5008 	    RENEW4res *, resp);
5009 }
5010 
5011 /* ARGSUSED */
5012 static void
5013 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
5014     struct compound_state *cs)
5015 {
5016 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
5017 
5018 	DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
5019 
5020 	/* No need to check cs->access - we are not accessing any object */
5021 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
5022 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
5023 		goto out;
5024 	}
5025 	if (cs->vp != NULL) {
5026 		VN_RELE(cs->vp);
5027 	}
5028 	cs->vp = cs->saved_vp;
5029 	cs->saved_vp = NULL;
5030 	cs->exi = cs->saved_exi;
5031 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
5032 	*cs->statusp = resp->status = NFS4_OK;
5033 	cs->deleg = FALSE;
5034 
5035 out:
5036 	DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
5037 	    RESTOREFH4res *, resp);
5038 }
5039 
5040 /* ARGSUSED */
5041 static void
5042 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5043     struct compound_state *cs)
5044 {
5045 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
5046 
5047 	DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
5048 
5049 	/* No need to check cs->access - we are not accessing any object */
5050 	if (cs->vp == NULL) {
5051 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5052 		goto out;
5053 	}
5054 	if (cs->saved_vp != NULL) {
5055 		VN_RELE(cs->saved_vp);
5056 	}
5057 	cs->saved_vp = cs->vp;
5058 	VN_HOLD(cs->saved_vp);
5059 	cs->saved_exi = cs->exi;
5060 	/*
5061 	 * since SAVEFH is fairly rare, don't alloc space for its fh
5062 	 * unless necessary.
5063 	 */
5064 	if (cs->saved_fh.nfs_fh4_val == NULL) {
5065 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
5066 	}
5067 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
5068 	*cs->statusp = resp->status = NFS4_OK;
5069 
5070 out:
5071 	DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
5072 	    SAVEFH4res *, resp);
5073 }
5074 
5075 /*
5076  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
5077  * return the bitmap of attrs that were set successfully. It is also
5078  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
5079  * always be called only after rfs4_do_set_attrs().
5080  *
5081  * Verify that the attributes are same as the expected ones. sargp->vap
5082  * and sargp->sbp contain the input attributes as translated from fattr4.
5083  *
5084  * This function verifies only the attrs that correspond to a vattr or
5085  * vfsstat struct. That is because of the extra step needed to get the
5086  * corresponding system structs. Other attributes have already been set or
5087  * verified by do_rfs4_set_attrs.
5088  *
5089  * Return 0 if all attrs match, -1 if some don't, error if error processing.
5090  */
5091 static int
5092 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
5093     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
5094 {
5095 	int error, ret_error = 0;
5096 	int i, k;
5097 	uint_t sva_mask = sargp->vap->va_mask;
5098 	uint_t vbit;
5099 	union nfs4_attr_u *na;
5100 	uint8_t *amap;
5101 	bool_t getsb = ntovp->vfsstat;
5102 
5103 	if (sva_mask != 0) {
5104 		/*
5105 		 * Okay to overwrite sargp->vap because we verify based
5106 		 * on the incoming values.
5107 		 */
5108 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
5109 		    sargp->cs->cr, NULL);
5110 		if (ret_error) {
5111 			if (resp == NULL)
5112 				return (ret_error);
5113 			/*
5114 			 * Must return bitmap of successful attrs
5115 			 */
5116 			sva_mask = 0;	/* to prevent checking vap later */
5117 		} else {
5118 			/*
5119 			 * Some file systems clobber va_mask. it is probably
5120 			 * wrong of them to do so, nonethless we practice
5121 			 * defensive coding.
5122 			 * See bug id 4276830.
5123 			 */
5124 			sargp->vap->va_mask = sva_mask;
5125 		}
5126 	}
5127 
5128 	if (getsb) {
5129 		/*
5130 		 * Now get the superblock and loop on the bitmap, as there is
5131 		 * no simple way of translating from superblock to bitmap4.
5132 		 */
5133 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
5134 		if (ret_error) {
5135 			if (resp == NULL)
5136 				goto errout;
5137 			getsb = FALSE;
5138 		}
5139 	}
5140 
5141 	/*
5142 	 * Now loop and verify each attribute which getattr returned
5143 	 * whether it's the same as the input.
5144 	 */
5145 	if (resp == NULL && !getsb && (sva_mask == 0))
5146 		goto errout;
5147 
5148 	na = ntovp->na;
5149 	amap = ntovp->amap;
5150 	k = 0;
5151 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
5152 		k = *amap;
5153 		ASSERT(nfs4_ntov_map[k].nval == k);
5154 		vbit = nfs4_ntov_map[k].vbit;
5155 
5156 		/*
5157 		 * If vattr attribute but VOP_GETATTR failed, or it's
5158 		 * superblock attribute but VFS_STATVFS failed, skip
5159 		 */
5160 		if (vbit) {
5161 			if ((vbit & sva_mask) == 0)
5162 				continue;
5163 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
5164 			continue;
5165 		}
5166 		error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5167 		if (resp != NULL) {
5168 			if (error)
5169 				ret_error = -1;	/* not all match */
5170 			else	/* update response bitmap */
5171 				*resp |= nfs4_ntov_map[k].fbit;
5172 			continue;
5173 		}
5174 		if (error) {
5175 			ret_error = -1;	/* not all match */
5176 			break;
5177 		}
5178 	}
5179 errout:
5180 	return (ret_error);
5181 }
5182 
5183 /*
5184  * Decode the attribute to be set/verified. If the attr requires a sys op
5185  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5186  * call the sv_getit function for it, because the sys op hasn't yet been done.
5187  * Return 0 for success, error code if failed.
5188  *
5189  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5190  */
5191 static int
5192 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5193     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5194 {
5195 	int error = 0;
5196 	bool_t set_later;
5197 
5198 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5199 
5200 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5201 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5202 		/*
5203 		 * don't verify yet if a vattr or sb dependent attr,
5204 		 * because we don't have their sys values yet.
5205 		 * Will be done later.
5206 		 */
5207 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5208 			/*
5209 			 * ACLs are a special case, since setting the MODE
5210 			 * conflicts with setting the ACL.  We delay setting
5211 			 * the ACL until all other attributes have been set.
5212 			 * The ACL gets set in do_rfs4_op_setattr().
5213 			 */
5214 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5215 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5216 				    sargp, nap);
5217 				if (error) {
5218 					xdr_free(nfs4_ntov_map[k].xfunc,
5219 					    (caddr_t)nap);
5220 				}
5221 			}
5222 		}
5223 	} else {
5224 #ifdef  DEBUG
5225 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5226 		    "decoding attribute %d\n", k);
5227 #endif
5228 		error = EINVAL;
5229 	}
5230 	if (!error && resp_bval && !set_later) {
5231 		*resp_bval |= nfs4_ntov_map[k].fbit;
5232 	}
5233 
5234 	return (error);
5235 }
5236 
5237 /*
5238  * Set vattr based on incoming fattr4 attrs - used by setattr.
5239  * Set response mask. Ignore any values that are not writable vattr attrs.
5240  */
5241 static nfsstat4
5242 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5243     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5244     nfs4_attr_cmd_t cmd)
5245 {
5246 	int error = 0;
5247 	int i;
5248 	char *attrs = fattrp->attrlist4;
5249 	uint32_t attrslen = fattrp->attrlist4_len;
5250 	XDR xdr;
5251 	nfsstat4 status = NFS4_OK;
5252 	vnode_t *vp = cs->vp;
5253 	union nfs4_attr_u *na;
5254 	uint8_t *amap;
5255 
5256 #ifndef lint
5257 	/*
5258 	 * Make sure that maximum attribute number can be expressed as an
5259 	 * 8 bit quantity.
5260 	 */
5261 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5262 #endif
5263 
5264 	if (vp == NULL) {
5265 		if (resp)
5266 			*resp = 0;
5267 		return (NFS4ERR_NOFILEHANDLE);
5268 	}
5269 	if (cs->access == CS_ACCESS_DENIED) {
5270 		if (resp)
5271 			*resp = 0;
5272 		return (NFS4ERR_ACCESS);
5273 	}
5274 
5275 	sargp->op = cmd;
5276 	sargp->cs = cs;
5277 	sargp->flag = 0;	/* may be set later */
5278 	sargp->vap->va_mask = 0;
5279 	sargp->rdattr_error = NFS4_OK;
5280 	sargp->rdattr_error_req = FALSE;
5281 	/* sargp->sbp is set by the caller */
5282 
5283 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5284 
5285 	na = ntovp->na;
5286 	amap = ntovp->amap;
5287 
5288 	/*
5289 	 * The following loop iterates on the nfs4_ntov_map checking
5290 	 * if the fbit is set in the requested bitmap.
5291 	 * If set then we process the arguments using the
5292 	 * rfs4_fattr4 conversion functions to populate the setattr
5293 	 * vattr and va_mask. Any settable attrs that are not using vattr
5294 	 * will be set in this loop.
5295 	 */
5296 	for (i = 0; i < nfs4_ntov_map_size; i++) {
5297 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5298 			continue;
5299 		}
5300 		/*
5301 		 * If setattr, must be a writable attr.
5302 		 * If verify/nverify, must be a readable attr.
5303 		 */
5304 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
5305 		    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5306 			/*
5307 			 * Client tries to set/verify an
5308 			 * unsupported attribute, tries to set
5309 			 * a read only attr or verify a write
5310 			 * only one - error!
5311 			 */
5312 			break;
5313 		}
5314 		/*
5315 		 * Decode the attribute to set/verify
5316 		 */
5317 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5318 		    &xdr, resp ? resp : NULL, na);
5319 		if (error)
5320 			break;
5321 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5322 		na++;
5323 		(ntovp->attrcnt)++;
5324 		if (nfs4_ntov_map[i].vfsstat)
5325 			ntovp->vfsstat = TRUE;
5326 	}
5327 
5328 	if (error != 0)
5329 		status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5330 		    puterrno4(error));
5331 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
5332 	return (status);
5333 }
5334 
5335 static nfsstat4
5336 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5337     stateid4 *stateid)
5338 {
5339 	int error = 0;
5340 	struct nfs4_svgetit_arg sarg;
5341 	bool_t trunc;
5342 
5343 	nfsstat4 status = NFS4_OK;
5344 	cred_t *cr = cs->cr;
5345 	vnode_t *vp = cs->vp;
5346 	struct nfs4_ntov_table ntov;
5347 	struct statvfs64 sb;
5348 	struct vattr bva;
5349 	struct flock64 bf;
5350 	int in_crit = 0;
5351 	uint_t saved_mask = 0;
5352 	caller_context_t ct;
5353 
5354 	*resp = 0;
5355 	sarg.sbp = &sb;
5356 	sarg.is_referral = B_FALSE;
5357 	nfs4_ntov_table_init(&ntov);
5358 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5359 	    NFS4ATTR_SETIT);
5360 	if (status != NFS4_OK) {
5361 		/*
5362 		 * failed set attrs
5363 		 */
5364 		goto done;
5365 	}
5366 
5367 	if ((sarg.vap->va_mask == 0) &&
5368 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5369 		/*
5370 		 * no further work to be done
5371 		 */
5372 		goto done;
5373 	}
5374 
5375 	/*
5376 	 * If we got a request to set the ACL and the MODE, only
5377 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
5378 	 * to change any other bits, along with setting an ACL,
5379 	 * gives NFS4ERR_INVAL.
5380 	 */
5381 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5382 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
5383 		vattr_t va;
5384 
5385 		va.va_mask = AT_MODE;
5386 		error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5387 		if (error) {
5388 			status = puterrno4(error);
5389 			goto done;
5390 		}
5391 		if ((sarg.vap->va_mode ^ va.va_mode) &
5392 		    ~(VSUID | VSGID | VSVTX)) {
5393 			status = NFS4ERR_INVAL;
5394 			goto done;
5395 		}
5396 	}
5397 
5398 	/* Check stateid only if size has been set */
5399 	if (sarg.vap->va_mask & AT_SIZE) {
5400 		trunc = (sarg.vap->va_size == 0);
5401 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5402 		    trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct, cs);
5403 		if (status != NFS4_OK)
5404 			goto done;
5405 	} else {
5406 		ct.cc_sysid = 0;
5407 		ct.cc_pid = 0;
5408 		ct.cc_caller_id = nfs4_srv_caller_id;
5409 		ct.cc_flags = CC_DONTBLOCK;
5410 	}
5411 
5412 	/* XXX start of possible race with delegations */
5413 
5414 	/*
5415 	 * We need to specially handle size changes because it is
5416 	 * possible for the client to create a file with read-only
5417 	 * modes, but with the file opened for writing. If the client
5418 	 * then tries to set the file size, e.g. ftruncate(3C),
5419 	 * fcntl(F_FREESP), the normal access checking done in
5420 	 * VOP_SETATTR would prevent the client from doing it even though
5421 	 * it should be allowed to do so.  To get around this, we do the
5422 	 * access checking for ourselves and use VOP_SPACE which doesn't
5423 	 * do the access checking.
5424 	 * Also the client should not be allowed to change the file
5425 	 * size if there is a conflicting non-blocking mandatory lock in
5426 	 * the region of the change.
5427 	 */
5428 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5429 		u_offset_t offset;
5430 		ssize_t length;
5431 
5432 		/*
5433 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5434 		 * before returning, sarg.vap->va_mask is used to
5435 		 * generate the setattr reply bitmap.  We also clear
5436 		 * AT_SIZE below before calling VOP_SPACE.  For both
5437 		 * of these cases, the va_mask needs to be saved here
5438 		 * and restored after calling VOP_SETATTR.
5439 		 */
5440 		saved_mask = sarg.vap->va_mask;
5441 
5442 		/*
5443 		 * Check any possible conflict due to NBMAND locks.
5444 		 * Get into critical region before VOP_GETATTR, so the
5445 		 * size attribute is valid when checking conflicts.
5446 		 */
5447 		if (nbl_need_check(vp)) {
5448 			nbl_start_crit(vp, RW_READER);
5449 			in_crit = 1;
5450 		}
5451 
5452 		bva.va_mask = AT_UID|AT_SIZE;
5453 		if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5454 			status = puterrno4(error);
5455 			goto done;
5456 		}
5457 
5458 		if (in_crit) {
5459 			if (sarg.vap->va_size < bva.va_size) {
5460 				offset = sarg.vap->va_size;
5461 				length = bva.va_size - sarg.vap->va_size;
5462 			} else {
5463 				offset = bva.va_size;
5464 				length = sarg.vap->va_size - bva.va_size;
5465 			}
5466 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5467 			    &ct)) {
5468 				status = NFS4ERR_LOCKED;
5469 				goto done;
5470 			}
5471 		}
5472 
5473 		if (crgetuid(cr) == bva.va_uid) {
5474 			sarg.vap->va_mask &= ~AT_SIZE;
5475 			bf.l_type = F_WRLCK;
5476 			bf.l_whence = 0;
5477 			bf.l_start = (off64_t)sarg.vap->va_size;
5478 			bf.l_len = 0;
5479 			bf.l_sysid = 0;
5480 			bf.l_pid = 0;
5481 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5482 			    (offset_t)sarg.vap->va_size, cr, &ct);
5483 		}
5484 	}
5485 
5486 	if (!error && sarg.vap->va_mask != 0)
5487 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5488 
5489 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
5490 	if (saved_mask & AT_SIZE)
5491 		sarg.vap->va_mask |= AT_SIZE;
5492 
5493 	/*
5494 	 * If an ACL was being set, it has been delayed until now,
5495 	 * in order to set the mode (via the VOP_SETATTR() above) first.
5496 	 */
5497 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5498 		int i;
5499 
5500 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5501 			if (ntov.amap[i] == FATTR4_ACL)
5502 				break;
5503 		if (i < NFS4_MAXNUM_ATTRS) {
5504 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5505 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5506 			if (error == 0) {
5507 				*resp |= FATTR4_ACL_MASK;
5508 			} else if (error == ENOTSUP) {
5509 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
5510 				status = NFS4ERR_ATTRNOTSUPP;
5511 				goto done;
5512 			}
5513 		} else {
5514 			NFS4_DEBUG(rfs4_debug,
5515 			    (CE_NOTE, "do_rfs4_op_setattr: "
5516 			    "unable to find ACL in fattr4"));
5517 			error = EINVAL;
5518 		}
5519 	}
5520 
5521 	if (error) {
5522 		/* check if a monitor detected a delegation conflict */
5523 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5524 			status = NFS4ERR_DELAY;
5525 		else
5526 			status = puterrno4(error);
5527 
5528 		/*
5529 		 * Set the response bitmap when setattr failed.
5530 		 * If VOP_SETATTR partially succeeded, test by doing a
5531 		 * VOP_GETATTR on the object and comparing the data
5532 		 * to the setattr arguments.
5533 		 */
5534 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
5535 	} else {
5536 		/*
5537 		 * Force modified metadata out to stable storage.
5538 		 */
5539 		(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5540 		/*
5541 		 * Set response bitmap
5542 		 */
5543 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5544 	}
5545 
5546 /* Return early and already have a NFSv4 error */
5547 done:
5548 	/*
5549 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5550 	 * conversion sets both readable and writeable NFS4 attrs
5551 	 * for AT_MTIME and AT_ATIME.  The line below masks out
5552 	 * unrequested attrs from the setattr result bitmap.  This
5553 	 * is placed after the done: label to catch the ATTRNOTSUP
5554 	 * case.
5555 	 */
5556 	*resp &= fattrp->attrmask;
5557 
5558 	if (in_crit)
5559 		nbl_end_crit(vp);
5560 
5561 	nfs4_ntov_table_free(&ntov, &sarg);
5562 
5563 	return (status);
5564 }
5565 
5566 /* ARGSUSED */
5567 static void
5568 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5569     struct compound_state *cs)
5570 {
5571 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5572 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5573 	bslabel_t *clabel;
5574 
5575 	DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5576 	    SETATTR4args *, args);
5577 
5578 	if (cs->vp == NULL) {
5579 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5580 		goto out;
5581 	}
5582 
5583 	/*
5584 	 * If there is an unshared filesystem mounted on this vnode,
5585 	 * do not allow to setattr on this vnode.
5586 	 */
5587 	if (vn_ismntpt(cs->vp)) {
5588 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5589 		goto out;
5590 	}
5591 
5592 	resp->attrsset = 0;
5593 
5594 	if (rdonly4(req, cs)) {
5595 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5596 		goto out;
5597 	}
5598 
5599 	/* check label before setting attributes */
5600 	if (is_system_labeled()) {
5601 		ASSERT(req->rq_label != NULL);
5602 		clabel = req->rq_label;
5603 		DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5604 		    "got client label from request(1)",
5605 		    struct svc_req *, req);
5606 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
5607 			if (!do_rfs_label_check(clabel, cs->vp,
5608 			    EQUALITY_CHECK, cs->exi)) {
5609 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
5610 				goto out;
5611 			}
5612 		}
5613 	}
5614 
5615 	*cs->statusp = resp->status =
5616 	    do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5617 	    &args->stateid);
5618 
5619 out:
5620 	DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5621 	    SETATTR4res *, resp);
5622 }
5623 
5624 /* ARGSUSED */
5625 static void
5626 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5627     struct compound_state *cs)
5628 {
5629 	/*
5630 	 * verify and nverify are exactly the same, except that nverify
5631 	 * succeeds when some argument changed, and verify succeeds when
5632 	 * when none changed.
5633 	 */
5634 
5635 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5636 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5637 
5638 	int error;
5639 	struct nfs4_svgetit_arg sarg;
5640 	struct statvfs64 sb;
5641 	struct nfs4_ntov_table ntov;
5642 
5643 	DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5644 	    VERIFY4args *, args);
5645 
5646 	if (cs->vp == NULL) {
5647 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5648 		goto out;
5649 	}
5650 
5651 	sarg.sbp = &sb;
5652 	sarg.is_referral = B_FALSE;
5653 	nfs4_ntov_table_init(&ntov);
5654 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5655 	    &sarg, &ntov, NFS4ATTR_VERIT);
5656 	if (resp->status != NFS4_OK) {
5657 		/*
5658 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5659 		 * so could return -1 for "no match".
5660 		 */
5661 		if (resp->status == -1)
5662 			resp->status = NFS4ERR_NOT_SAME;
5663 		goto done;
5664 	}
5665 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5666 	switch (error) {
5667 	case 0:
5668 		resp->status = NFS4_OK;
5669 		break;
5670 	case -1:
5671 		resp->status = NFS4ERR_NOT_SAME;
5672 		break;
5673 	default:
5674 		resp->status = puterrno4(error);
5675 		break;
5676 	}
5677 done:
5678 	*cs->statusp = resp->status;
5679 	nfs4_ntov_table_free(&ntov, &sarg);
5680 out:
5681 	DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5682 	    VERIFY4res *, resp);
5683 }
5684 
5685 /* ARGSUSED */
5686 static void
5687 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5688     struct compound_state *cs)
5689 {
5690 	/*
5691 	 * verify and nverify are exactly the same, except that nverify
5692 	 * succeeds when some argument changed, and verify succeeds when
5693 	 * when none changed.
5694 	 */
5695 
5696 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5697 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5698 
5699 	int error;
5700 	struct nfs4_svgetit_arg sarg;
5701 	struct statvfs64 sb;
5702 	struct nfs4_ntov_table ntov;
5703 
5704 	DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5705 	    NVERIFY4args *, args);
5706 
5707 	if (cs->vp == NULL) {
5708 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5709 		DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5710 		    NVERIFY4res *, resp);
5711 		return;
5712 	}
5713 	sarg.sbp = &sb;
5714 	sarg.is_referral = B_FALSE;
5715 	nfs4_ntov_table_init(&ntov);
5716 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5717 	    &sarg, &ntov, NFS4ATTR_VERIT);
5718 	if (resp->status != NFS4_OK) {
5719 		/*
5720 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5721 		 * so could return -1 for "no match".
5722 		 */
5723 		if (resp->status == -1)
5724 			resp->status = NFS4_OK;
5725 		goto done;
5726 	}
5727 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5728 	switch (error) {
5729 	case 0:
5730 		resp->status = NFS4ERR_SAME;
5731 		break;
5732 	case -1:
5733 		resp->status = NFS4_OK;
5734 		break;
5735 	default:
5736 		resp->status = puterrno4(error);
5737 		break;
5738 	}
5739 done:
5740 	*cs->statusp = resp->status;
5741 	nfs4_ntov_table_free(&ntov, &sarg);
5742 
5743 	DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5744 	    NVERIFY4res *, resp);
5745 }
5746 
5747 /*
5748  * XXX - This should live in an NFS header file.
5749  */
5750 #define	MAX_IOVECS	12
5751 
5752 /* ARGSUSED */
5753 static void
5754 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5755     struct compound_state *cs)
5756 {
5757 	WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5758 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5759 	int error;
5760 	vnode_t *vp;
5761 	struct vattr bva;
5762 	u_offset_t rlimit;
5763 	struct uio uio;
5764 	struct iovec iov[MAX_IOVECS];
5765 	struct iovec *iovp;
5766 	int iovcnt;
5767 	int ioflag;
5768 	cred_t *savecred, *cr;
5769 	bool_t *deleg = &cs->deleg;
5770 	nfsstat4 stat;
5771 	int in_crit = 0;
5772 	caller_context_t ct;
5773 	nfs4_srv_t *nsrv4;
5774 
5775 	DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5776 	    WRITE4args *, args);
5777 
5778 	vp = cs->vp;
5779 	if (vp == NULL) {
5780 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5781 		goto out;
5782 	}
5783 	if (cs->access == CS_ACCESS_DENIED) {
5784 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5785 		goto out;
5786 	}
5787 
5788 	cr = cs->cr;
5789 
5790 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5791 	    deleg, TRUE, &ct, cs)) != NFS4_OK) {
5792 		*cs->statusp = resp->status = stat;
5793 		goto out;
5794 	}
5795 
5796 	/*
5797 	 * We have to enter the critical region before calling VOP_RWLOCK
5798 	 * to avoid a deadlock with ufs.
5799 	 */
5800 	if (nbl_need_check(vp)) {
5801 		nbl_start_crit(vp, RW_READER);
5802 		in_crit = 1;
5803 		if (nbl_conflict(vp, NBL_WRITE,
5804 		    args->offset, args->data_len, 0, &ct)) {
5805 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
5806 			goto out;
5807 		}
5808 	}
5809 
5810 	bva.va_mask = AT_MODE | AT_UID;
5811 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5812 
5813 	/*
5814 	 * If we can't get the attributes, then we can't do the
5815 	 * right access checking.  So, we'll fail the request.
5816 	 */
5817 	if (error) {
5818 		*cs->statusp = resp->status = puterrno4(error);
5819 		goto out;
5820 	}
5821 
5822 	if (rdonly4(req, cs)) {
5823 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5824 		goto out;
5825 	}
5826 
5827 	if (vp->v_type != VREG) {
5828 		*cs->statusp = resp->status =
5829 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5830 		goto out;
5831 	}
5832 
5833 	if (crgetuid(cr) != bva.va_uid &&
5834 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5835 		*cs->statusp = resp->status = puterrno4(error);
5836 		goto out;
5837 	}
5838 
5839 	if (MANDLOCK(vp, bva.va_mode)) {
5840 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5841 		goto out;
5842 	}
5843 
5844 	nsrv4 = nfs4_get_srv();
5845 	if (args->data_len == 0) {
5846 		*cs->statusp = resp->status = NFS4_OK;
5847 		resp->count = 0;
5848 		resp->committed = args->stable;
5849 		resp->writeverf = nsrv4->write4verf;
5850 		goto out;
5851 	}
5852 
5853 	if (args->mblk != NULL) {
5854 		mblk_t *m;
5855 		uint_t bytes, round_len;
5856 
5857 		iovcnt = 0;
5858 		bytes = 0;
5859 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5860 		for (m = args->mblk;
5861 		    m != NULL && bytes < round_len;
5862 		    m = m->b_cont) {
5863 			iovcnt++;
5864 			bytes += MBLKL(m);
5865 		}
5866 #ifdef DEBUG
5867 		/* should have ended on an mblk boundary */
5868 		if (bytes != round_len) {
5869 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5870 			    bytes, round_len, args->data_len);
5871 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5872 			    (void *)args->mblk, (void *)m);
5873 			ASSERT(bytes == round_len);
5874 		}
5875 #endif
5876 		if (iovcnt <= MAX_IOVECS) {
5877 			iovp = iov;
5878 		} else {
5879 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5880 		}
5881 		mblk_to_iov(args->mblk, iovcnt, iovp);
5882 	} else if (args->rlist != NULL) {
5883 		iovcnt = 1;
5884 		iovp = iov;
5885 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5886 		iovp->iov_len = args->data_len;
5887 	} else {
5888 		iovcnt = 1;
5889 		iovp = iov;
5890 		iovp->iov_base = args->data_val;
5891 		iovp->iov_len = args->data_len;
5892 	}
5893 
5894 	uio.uio_iov = iovp;
5895 	uio.uio_iovcnt = iovcnt;
5896 
5897 	uio.uio_segflg = UIO_SYSSPACE;
5898 	uio.uio_extflg = UIO_COPY_DEFAULT;
5899 	uio.uio_loffset = args->offset;
5900 	uio.uio_resid = args->data_len;
5901 	uio.uio_llimit = curproc->p_fsz_ctl;
5902 	rlimit = uio.uio_llimit - args->offset;
5903 	if (rlimit < (u_offset_t)uio.uio_resid)
5904 		uio.uio_resid = (int)rlimit;
5905 
5906 	if (args->stable == UNSTABLE4)
5907 		ioflag = 0;
5908 	else if (args->stable == FILE_SYNC4)
5909 		ioflag = FSYNC;
5910 	else if (args->stable == DATA_SYNC4)
5911 		ioflag = FDSYNC;
5912 	else {
5913 		if (iovp != iov)
5914 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
5915 		*cs->statusp = resp->status = NFS4ERR_INVAL;
5916 		goto out;
5917 	}
5918 
5919 	/*
5920 	 * We're changing creds because VM may fault and we need
5921 	 * the cred of the current thread to be used if quota
5922 	 * checking is enabled.
5923 	 */
5924 	savecred = curthread->t_cred;
5925 	curthread->t_cred = cr;
5926 	error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5927 	curthread->t_cred = savecred;
5928 
5929 	if (iovp != iov)
5930 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
5931 
5932 	if (error) {
5933 		*cs->statusp = resp->status = puterrno4(error);
5934 		goto out;
5935 	}
5936 
5937 	*cs->statusp = resp->status = NFS4_OK;
5938 	resp->count = args->data_len - uio.uio_resid;
5939 
5940 	if (ioflag == 0)
5941 		resp->committed = UNSTABLE4;
5942 	else
5943 		resp->committed = FILE_SYNC4;
5944 
5945 	resp->writeverf = nsrv4->write4verf;
5946 
5947 out:
5948 	if (in_crit)
5949 		nbl_end_crit(vp);
5950 
5951 	DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5952 	    WRITE4res *, resp);
5953 }
5954 
5955 static inline int
5956 rfs4_opnum_in_range(const compound_state_t *cs, int opnum)
5957 {
5958 	if (opnum < FIRST_NFS4_OP || opnum > LAST_NFS4_OP)
5959 		return (0);
5960 	else if (cs->minorversion == 0 && opnum > LAST_NFS40_OP)
5961 		return (0);
5962 	else if (cs->minorversion == 1 && opnum > LAST_NFS41_OP)
5963 		return (0);
5964 	else if (cs->minorversion == 2 && opnum > LAST_NFS42_OP)
5965 		return (0);
5966 	return (1);
5967 }
5968 
5969 void
5970 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, compound_state_t *cs,
5971     struct svc_req *req, int *rv)
5972 {
5973 	uint_t i;
5974 	cred_t *cr;
5975 	nfs4_srv_t *nsrv4;
5976 	nfs_export_t *ne = nfs_get_export();
5977 
5978 	if (rv != NULL)
5979 		*rv = 0;
5980 	/*
5981 	 * Form a reply tag by copying over the request tag.
5982 	 */
5983 	resp->tag.utf8string_len = args->tag.utf8string_len;
5984 	if (args->tag.utf8string_len != 0) {
5985 		resp->tag.utf8string_val =
5986 		    kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5987 		bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5988 		    resp->tag.utf8string_len);
5989 	} else {
5990 		resp->tag.utf8string_val = NULL;
5991 	}
5992 
5993 	cs->statusp = &resp->status;
5994 	cs->req = req;
5995 	cs->minorversion = args->minorversion;
5996 	resp->array = NULL;
5997 	resp->array_len = 0;
5998 
5999 	if (args->array_len == 0) {
6000 		resp->status = NFS4_OK;
6001 		return;
6002 	}
6003 
6004 	cr = svc_xprt_cred(req->rq_xprt);
6005 	ASSERT(cr != NULL);
6006 
6007 	if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) {
6008 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
6009 		    cs, COMPOUND4args *, args);
6010 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
6011 		    cs, COMPOUND4res *, resp);
6012 		svcerr_badcred(req->rq_xprt);
6013 		if (rv != NULL)
6014 			*rv = 1;
6015 		return;
6016 	}
6017 
6018 	resp->array_len = args->array_len;
6019 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
6020 	    KM_SLEEP);
6021 
6022 	cs->op_len = args->array_len;
6023 	cs->basecr = cr;
6024 	nsrv4 = nfs4_get_srv();
6025 
6026 	DTRACE_NFSV4_2(compound__start, struct compound_state *, cs,
6027 	    COMPOUND4args *, args);
6028 
6029 	/*
6030 	 * For now, NFS4 compound processing must be protected by
6031 	 * exported_lock because it can access more than one exportinfo
6032 	 * per compound and share/unshare can now change multiple
6033 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
6034 	 * per proc (excluding public exinfo), and exi_count design
6035 	 * is sufficient to protect concurrent execution of NFS2/3
6036 	 * ops along with unexport.  This lock will be removed as
6037 	 * part of the NFSv4 phase 2 namespace redesign work.
6038 	 */
6039 	rw_enter(&ne->exported_lock, RW_READER);
6040 
6041 	/*
6042 	 * If this is the first compound we've seen, we need to start all
6043 	 * new instances' grace periods.
6044 	 */
6045 	if (nsrv4->seen_first_compound == 0) {
6046 		rfs4_grace_start_new(nsrv4);
6047 		/*
6048 		 * This must be set after rfs4_grace_start_new(), otherwise
6049 		 * another thread could proceed past here before the former
6050 		 * is finished.
6051 		 */
6052 		nsrv4->seen_first_compound = 1;
6053 	}
6054 
6055 	for (i = 0; i < args->array_len && cs->cont; i++) {
6056 		nfs_argop4 *argop;
6057 		nfs_resop4 *resop;
6058 		uint_t op;
6059 		kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
6060 
6061 		argop = &args->array[i];
6062 		resop = &resp->array[i];
6063 		resop->resop = argop->argop;
6064 		op = (uint_t)resop->resop;
6065 
6066 		cs->op_pos = i;
6067 		if (op < rfsv4disp_cnt && rfs4_opnum_in_range(cs, op)) {
6068 			/*
6069 			 * Count the individual ops here; NULL and COMPOUND
6070 			 * are counted in common_dispatch()
6071 			 */
6072 			stat[op].value.ui64++;
6073 
6074 			NFS4_DEBUG(rfs4_debug > 1,
6075 			    (CE_NOTE, "Executing %s", rfs4_op_string[op]));
6076 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, cs);
6077 			NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
6078 			    rfs4_op_string[op], *cs->statusp));
6079 			if (*cs->statusp != NFS4_OK)
6080 				cs->cont = FALSE;
6081 		} else {
6082 			/*
6083 			 * This is effectively dead code since XDR code
6084 			 * will have already returned BADXDR if op doesn't
6085 			 * decode to legal value.  This only done for a
6086 			 * day when XDR code doesn't verify v4 opcodes.
6087 			 */
6088 			op = OP_ILLEGAL;
6089 			stat[OP_ILLEGAL_IDX].value.ui64++;
6090 
6091 			rfs4_op_illegal(argop, resop, req, cs);
6092 			cs->cont = FALSE;
6093 		}
6094 
6095 		/*
6096 		 * If not at last op, and if we are to stop, then
6097 		 * compact the results array.
6098 		 */
6099 		if ((i + 1) < args->array_len && !cs->cont) {
6100 			nfs_resop4 *new_res = kmem_alloc(
6101 			    (i+1) * sizeof (nfs_resop4), KM_SLEEP);
6102 			bcopy(resp->array,
6103 			    new_res, (i+1) * sizeof (nfs_resop4));
6104 			kmem_free(resp->array,
6105 			    args->array_len * sizeof (nfs_resop4));
6106 
6107 			resp->array_len =  i + 1;
6108 			resp->array = new_res;
6109 		}
6110 	}
6111 
6112 	rw_exit(&ne->exported_lock);
6113 
6114 	DTRACE_NFSV4_2(compound__done, struct compound_state *, cs,
6115 	    COMPOUND4res *, resp);
6116 
6117 	/*
6118 	 * done with this compound request, free the label
6119 	 */
6120 
6121 	if (req->rq_label != NULL) {
6122 		kmem_free(req->rq_label, sizeof (bslabel_t));
6123 		req->rq_label = NULL;
6124 	}
6125 }
6126 
6127 /*
6128  * XXX because of what appears to be duplicate calls to rfs4_compound_free
6129  * XXX zero out the tag and array values. Need to investigate why the
6130  * XXX calls occur, but at least prevent the panic for now.
6131  */
6132 void
6133 rfs4_compound_free(COMPOUND4res *resp)
6134 {
6135 	uint_t i;
6136 
6137 	if (resp->tag.utf8string_val) {
6138 		UTF8STRING_FREE(resp->tag)
6139 	}
6140 
6141 	for (i = 0; i < resp->array_len; i++) {
6142 		nfs_resop4 *resop;
6143 		uint_t op;
6144 
6145 		resop = &resp->array[i];
6146 		op = (uint_t)resop->resop;
6147 		if (op < rfsv4disp_cnt) {
6148 			(*rfsv4disptab[op].dis_resfree)(resop);
6149 		}
6150 	}
6151 	if (resp->array != NULL) {
6152 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6153 	}
6154 }
6155 
6156 /*
6157  * Process the value of the compound request rpc flags, as a bit-AND
6158  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6159  */
6160 void
6161 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6162 {
6163 	int i;
6164 	int flag = RPC_ALL;
6165 
6166 	for (i = 0; flag && i < args->array_len; i++) {
6167 		uint_t op;
6168 
6169 		op = (uint_t)args->array[i].argop;
6170 
6171 		if (op < rfsv4disp_cnt)
6172 			flag &= rfsv4disptab[op].dis_flags;
6173 		else
6174 			flag = 0;
6175 	}
6176 	*flagp = flag;
6177 }
6178 
6179 nfsstat4
6180 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6181 {
6182 	nfsstat4 e;
6183 
6184 	rfs4_dbe_lock(cp->rc_dbe);
6185 
6186 	if (cp->rc_sysidt != LM_NOSYSID) {
6187 		*sp = cp->rc_sysidt;
6188 		e = NFS4_OK;
6189 
6190 	} else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6191 		*sp = cp->rc_sysidt;
6192 		e = NFS4_OK;
6193 
6194 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6195 		    "rfs4_client_sysid: allocated 0x%x\n", *sp));
6196 	} else
6197 		e = NFS4ERR_DELAY;
6198 
6199 	rfs4_dbe_unlock(cp->rc_dbe);
6200 	return (e);
6201 }
6202 
6203 #if defined(DEBUG) && ! defined(lint)
6204 static void lock_print(char *str, int operation, struct flock64 *flk)
6205 {
6206 	char *op, *type;
6207 
6208 	switch (operation) {
6209 	case F_GETLK: op = "F_GETLK";
6210 		break;
6211 	case F_SETLK: op = "F_SETLK";
6212 		break;
6213 	case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6214 		break;
6215 	default: op = "F_UNKNOWN";
6216 		break;
6217 	}
6218 	switch (flk->l_type) {
6219 	case F_UNLCK: type = "F_UNLCK";
6220 		break;
6221 	case F_RDLCK: type = "F_RDLCK";
6222 		break;
6223 	case F_WRLCK: type = "F_WRLCK";
6224 		break;
6225 	default: type = "F_UNKNOWN";
6226 		break;
6227 	}
6228 
6229 	ASSERT(flk->l_whence == 0);
6230 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6231 	    str, op, type, (longlong_t)flk->l_start,
6232 	    flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6233 }
6234 
6235 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6236 #else
6237 #define	LOCK_PRINT(d, s, t, f)
6238 #endif
6239 
6240 /*ARGSUSED*/
6241 static bool_t
6242 creds_ok(cred_set_t *cr_set, struct svc_req *req, struct compound_state *cs)
6243 {
6244 	return (TRUE);
6245 }
6246 
6247 /*
6248  * Look up the pathname using the vp in cs as the directory vnode.
6249  * cs->vp will be the vnode for the file on success
6250  */
6251 
6252 static nfsstat4
6253 rfs4_lookup(component4 *component, struct svc_req *req,
6254     struct compound_state *cs)
6255 {
6256 	char *nm;
6257 	uint32_t len;
6258 	nfsstat4 status;
6259 	struct sockaddr *ca;
6260 	char *name;
6261 
6262 	if (cs->vp == NULL) {
6263 		return (NFS4ERR_NOFILEHANDLE);
6264 	}
6265 	if (cs->vp->v_type != VDIR) {
6266 		return (NFS4ERR_NOTDIR);
6267 	}
6268 
6269 	status = utf8_dir_verify(component);
6270 	if (status != NFS4_OK)
6271 		return (status);
6272 
6273 	nm = utf8_to_fn(component, &len, NULL);
6274 	if (nm == NULL) {
6275 		return (NFS4ERR_INVAL);
6276 	}
6277 
6278 	if (len > MAXNAMELEN) {
6279 		kmem_free(nm, len);
6280 		return (NFS4ERR_NAMETOOLONG);
6281 	}
6282 
6283 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6284 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6285 	    MAXPATHLEN + 1);
6286 
6287 	if (name == NULL) {
6288 		kmem_free(nm, len);
6289 		return (NFS4ERR_INVAL);
6290 	}
6291 
6292 	status = do_rfs4_op_lookup(name, req, cs);
6293 
6294 	if (name != nm)
6295 		kmem_free(name, MAXPATHLEN + 1);
6296 
6297 	kmem_free(nm, len);
6298 
6299 	return (status);
6300 }
6301 
6302 static nfsstat4
6303 rfs4_lookupfile(component4 *component, struct svc_req *req,
6304     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6305 {
6306 	nfsstat4 status;
6307 	vnode_t *dvp = cs->vp;
6308 	vattr_t bva, ava, fva;
6309 	int error;
6310 
6311 	/* Get "before" change value */
6312 	bva.va_mask = AT_CTIME|AT_SEQ;
6313 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6314 	if (error)
6315 		return (puterrno4(error));
6316 
6317 	/* rfs4_lookup may VN_RELE directory */
6318 	VN_HOLD(dvp);
6319 
6320 	status = rfs4_lookup(component, req, cs);
6321 	if (status != NFS4_OK) {
6322 		VN_RELE(dvp);
6323 		return (status);
6324 	}
6325 
6326 	/*
6327 	 * Get "after" change value, if it fails, simply return the
6328 	 * before value.
6329 	 */
6330 	ava.va_mask = AT_CTIME|AT_SEQ;
6331 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6332 		ava.va_ctime = bva.va_ctime;
6333 		ava.va_seq = 0;
6334 	}
6335 	VN_RELE(dvp);
6336 
6337 	/*
6338 	 * Validate the file is a file
6339 	 */
6340 	fva.va_mask = AT_TYPE|AT_MODE;
6341 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6342 	if (error)
6343 		return (puterrno4(error));
6344 
6345 	if (fva.va_type != VREG) {
6346 		if (fva.va_type == VDIR)
6347 			return (NFS4ERR_ISDIR);
6348 		if (fva.va_type == VLNK)
6349 			return (NFS4ERR_SYMLINK);
6350 		return (NFS4ERR_INVAL);
6351 	}
6352 
6353 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6354 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6355 
6356 	/*
6357 	 * It is undefined if VOP_LOOKUP will change va_seq, so
6358 	 * cinfo.atomic = TRUE only if we have
6359 	 * non-zero va_seq's, and they have not changed.
6360 	 */
6361 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6362 		cinfo->atomic = TRUE;
6363 	else
6364 		cinfo->atomic = FALSE;
6365 
6366 	/* Check for mandatory locking */
6367 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6368 	return (check_open_access(access, cs, req));
6369 }
6370 
6371 static nfsstat4
6372 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6373     cred_t *cr, vnode_t **vpp, bool_t *created)
6374 {
6375 	int error;
6376 	nfsstat4 status = NFS4_OK;
6377 	vattr_t va;
6378 
6379 tryagain:
6380 
6381 	/*
6382 	 * The file open mode used is VWRITE.  If the client needs
6383 	 * some other semantic, then it should do the access checking
6384 	 * itself.  It would have been nice to have the file open mode
6385 	 * passed as part of the arguments.
6386 	 */
6387 
6388 	*created = TRUE;
6389 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6390 
6391 	if (error) {
6392 		*created = FALSE;
6393 
6394 		/*
6395 		 * If we got something other than file already exists
6396 		 * then just return this error.  Otherwise, we got
6397 		 * EEXIST.  If we were doing a GUARDED create, then
6398 		 * just return this error.  Otherwise, we need to
6399 		 * make sure that this wasn't a duplicate of an
6400 		 * exclusive create request.
6401 		 *
6402 		 * The assumption is made that a non-exclusive create
6403 		 * request will never return EEXIST.
6404 		 */
6405 
6406 		if (error != EEXIST || mode == GUARDED4) {
6407 			status = puterrno4(error);
6408 			return (status);
6409 		}
6410 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6411 		    NULL, NULL, NULL);
6412 
6413 		if (error) {
6414 			/*
6415 			 * We couldn't find the file that we thought that
6416 			 * we just created.  So, we'll just try creating
6417 			 * it again.
6418 			 */
6419 			if (error == ENOENT)
6420 				goto tryagain;
6421 
6422 			status = puterrno4(error);
6423 			return (status);
6424 		}
6425 
6426 		if (mode == UNCHECKED4) {
6427 			/* existing object must be regular file */
6428 			if ((*vpp)->v_type != VREG) {
6429 				if ((*vpp)->v_type == VDIR)
6430 					status = NFS4ERR_ISDIR;
6431 				else if ((*vpp)->v_type == VLNK)
6432 					status = NFS4ERR_SYMLINK;
6433 				else
6434 					status = NFS4ERR_INVAL;
6435 				VN_RELE(*vpp);
6436 				return (status);
6437 			}
6438 
6439 			return (NFS4_OK);
6440 		}
6441 
6442 		/* Check for duplicate request */
6443 		va.va_mask = AT_MTIME;
6444 		error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6445 		if (!error) {
6446 			/* We found the file */
6447 			const timestruc_t *mtime = &vap->va_mtime;
6448 
6449 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
6450 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
6451 				/* but its not our creation */
6452 				VN_RELE(*vpp);
6453 				return (NFS4ERR_EXIST);
6454 			}
6455 			*created = TRUE; /* retrans of create == created */
6456 			return (NFS4_OK);
6457 		}
6458 		VN_RELE(*vpp);
6459 		return (NFS4ERR_EXIST);
6460 	}
6461 
6462 	return (NFS4_OK);
6463 }
6464 
6465 static nfsstat4
6466 check_open_access(uint32_t access, struct compound_state *cs,
6467     struct svc_req *req)
6468 {
6469 	int error;
6470 	vnode_t *vp;
6471 	bool_t readonly;
6472 	cred_t *cr = cs->cr;
6473 
6474 	/* For now we don't allow mandatory locking as per V2/V3 */
6475 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6476 		return (NFS4ERR_ACCESS);
6477 	}
6478 
6479 	vp = cs->vp;
6480 	ASSERT(cr != NULL && vp->v_type == VREG);
6481 
6482 	/*
6483 	 * If the file system is exported read only and we are trying
6484 	 * to open for write, then return NFS4ERR_ROFS
6485 	 */
6486 
6487 	readonly = rdonly4(req, cs);
6488 
6489 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6490 		return (NFS4ERR_ROFS);
6491 
6492 	if (access & OPEN4_SHARE_ACCESS_READ) {
6493 		if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6494 		    (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6495 			return (NFS4ERR_ACCESS);
6496 		}
6497 	}
6498 
6499 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
6500 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6501 		if (error)
6502 			return (NFS4ERR_ACCESS);
6503 	}
6504 
6505 	return (NFS4_OK);
6506 }
6507 
6508 static void
6509 rfs4_verifier_to_mtime(verifier4 v, timestruc_t *mtime)
6510 {
6511 	timespec32_t *time = (timespec32_t *)&v;
6512 
6513 	/*
6514 	 * Ensure no time overflows. Assumes underlying
6515 	 * filesystem supports at least 32 bits.
6516 	 * Truncate nsec to usec resolution to allow valid
6517 	 * compares even if the underlying filesystem truncates.
6518 	 */
6519 	mtime->tv_sec = time->tv_sec % TIME32_MAX;
6520 	mtime->tv_nsec = (time->tv_nsec / 1000) * 1000;
6521 }
6522 
6523 static nfsstat4
6524 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6525     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6526 {
6527 	struct nfs4_svgetit_arg sarg;
6528 	struct nfs4_ntov_table ntov;
6529 
6530 	bool_t ntov_table_init = FALSE;
6531 	struct statvfs64 sb;
6532 	nfsstat4 status;
6533 	vnode_t *vp;
6534 	vattr_t bva, ava, iva, cva, *vap;
6535 	vnode_t *dvp;
6536 	char *nm = NULL;
6537 	uint_t buflen;
6538 	bool_t created;
6539 	bool_t setsize = FALSE;
6540 	len_t reqsize;
6541 	int error;
6542 	bool_t trunc;
6543 	caller_context_t ct;
6544 	component4 *component;
6545 	bslabel_t *clabel;
6546 	struct sockaddr *ca;
6547 	char *name = NULL;
6548 	fattr4 *fattr = NULL;
6549 
6550 	ASSERT(*attrset == 0);
6551 
6552 	sarg.sbp = &sb;
6553 	sarg.is_referral = B_FALSE;
6554 
6555 	dvp = cs->vp;
6556 
6557 	/* Check if the file system is read only */
6558 	if (rdonly4(req, cs))
6559 		return (NFS4ERR_ROFS);
6560 
6561 	/* check the label of including directory */
6562 	if (is_system_labeled()) {
6563 		ASSERT(req->rq_label != NULL);
6564 		clabel = req->rq_label;
6565 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6566 		    "got client label from request(1)",
6567 		    struct svc_req *, req);
6568 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
6569 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6570 			    cs->exi)) {
6571 				return (NFS4ERR_ACCESS);
6572 			}
6573 		}
6574 	}
6575 
6576 	if ((args->mode == EXCLUSIVE4 || args->mode == EXCLUSIVE4_1) &&
6577 	    dvp->v_flag & V_XATTRDIR) {
6578 		/* prohibit EXCL create of named attributes */
6579 		return (NFS4ERR_INVAL);
6580 	}
6581 
6582 	/*
6583 	 * Get the last component of path name in nm. cs will reference
6584 	 * the including directory on success.
6585 	 */
6586 	component = &args->claim.open_claim4_u.file;
6587 	status = utf8_dir_verify(component);
6588 	if (status != NFS4_OK)
6589 		return (status);
6590 
6591 	nm = utf8_to_fn(component, &buflen, NULL);
6592 
6593 	if (nm == NULL)
6594 		return (NFS4ERR_RESOURCE);
6595 
6596 	if (buflen > MAXNAMELEN) {
6597 		kmem_free(nm, buflen);
6598 		return (NFS4ERR_NAMETOOLONG);
6599 	}
6600 
6601 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6602 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6603 	if (error) {
6604 		kmem_free(nm, buflen);
6605 		return (puterrno4(error));
6606 	}
6607 
6608 	if (bva.va_type != VDIR) {
6609 		kmem_free(nm, buflen);
6610 		return (NFS4ERR_NOTDIR);
6611 	}
6612 
6613 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6614 
6615 	switch (args->mode) {
6616 	case GUARDED4:
6617 		/*FALLTHROUGH*/
6618 	case UNCHECKED4:
6619 	case EXCLUSIVE4_1:
6620 		nfs4_ntov_table_init(&ntov);
6621 		ntov_table_init = TRUE;
6622 
6623 		if (args->mode == EXCLUSIVE4_1)
6624 			fattr = &args->createhow4_u.ch_createboth.cva_attrs;
6625 		else
6626 			fattr = &args->createhow4_u.createattrs;
6627 
6628 		status = do_rfs4_set_attrs(attrset,
6629 		    fattr,
6630 		    cs, &sarg, &ntov, NFS4ATTR_SETIT);
6631 
6632 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6633 		    sarg.vap->va_type != VREG) {
6634 			if (sarg.vap->va_type == VDIR)
6635 				status = NFS4ERR_ISDIR;
6636 			else if (sarg.vap->va_type == VLNK)
6637 				status = NFS4ERR_SYMLINK;
6638 			else
6639 				status = NFS4ERR_INVAL;
6640 		}
6641 
6642 		if (status != NFS4_OK) {
6643 			kmem_free(nm, buflen);
6644 			nfs4_ntov_table_free(&ntov, &sarg);
6645 			*attrset = 0;
6646 			return (status);
6647 		}
6648 
6649 		vap = sarg.vap;
6650 		vap->va_type = VREG;
6651 		vap->va_mask |= AT_TYPE;
6652 
6653 		if ((vap->va_mask & AT_MODE) == 0) {
6654 			vap->va_mask |= AT_MODE;
6655 			vap->va_mode = (mode_t)0600;
6656 		}
6657 
6658 		if (vap->va_mask & AT_SIZE) {
6659 
6660 			/* Disallow create with a non-zero size */
6661 
6662 			if ((reqsize = sarg.vap->va_size) != 0) {
6663 				kmem_free(nm, buflen);
6664 				nfs4_ntov_table_free(&ntov, &sarg);
6665 				*attrset = 0;
6666 				return (NFS4ERR_INVAL);
6667 			}
6668 			setsize = TRUE;
6669 		}
6670 		if (args->mode == EXCLUSIVE4_1) {
6671 			rfs4_verifier_to_mtime(
6672 			    args->createhow4_u.ch_createboth.cva_verf,
6673 			    &vap->va_mtime);
6674 			/* attrset will be set later */
6675 			fattr->attrmask |= FATTR4_TIME_MODIFY_MASK;
6676 			vap->va_mask |= AT_MTIME;
6677 		}
6678 		break;
6679 
6680 	case EXCLUSIVE4:
6681 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6682 		cva.va_type = VREG;
6683 		cva.va_mode = (mode_t)0;
6684 
6685 		rfs4_verifier_to_mtime(args->createhow4_u.createverf,
6686 		    &cva.va_mtime);
6687 
6688 		vap = &cva;
6689 
6690 		/*
6691 		 * For EXCL create, attrset is set to the server attr
6692 		 * used to cache the client's verifier.
6693 		 */
6694 		*attrset = FATTR4_TIME_MODIFY_MASK;
6695 		break;
6696 	}
6697 
6698 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6699 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6700 	    MAXPATHLEN  + 1);
6701 
6702 	if (name == NULL) {
6703 		kmem_free(nm, buflen);
6704 		return (NFS4ERR_SERVERFAULT);
6705 	}
6706 
6707 	status = create_vnode(dvp, name, vap, args->mode,
6708 	    cs->cr, &vp, &created);
6709 	if (nm != name)
6710 		kmem_free(name, MAXPATHLEN + 1);
6711 	kmem_free(nm, buflen);
6712 
6713 	if (status != NFS4_OK) {
6714 		if (ntov_table_init)
6715 			nfs4_ntov_table_free(&ntov, &sarg);
6716 		*attrset = 0;
6717 		return (status);
6718 	}
6719 
6720 	trunc = (setsize && !created);
6721 
6722 	if (args->mode != EXCLUSIVE4) {
6723 		bitmap4 createmask = fattr->attrmask;
6724 
6725 		/*
6726 		 * True verification that object was created with correct
6727 		 * attrs is impossible.  The attrs could have been changed
6728 		 * immediately after object creation.  If attributes did
6729 		 * not verify, the only recourse for the server is to
6730 		 * destroy the object.  Maybe if some attrs (like gid)
6731 		 * are set incorrectly, the object should be destroyed;
6732 		 * however, seems bad as a default policy.  Do we really
6733 		 * want to destroy an object over one of the times not
6734 		 * verifying correctly?  For these reasons, the server
6735 		 * currently sets bits in attrset for createattrs
6736 		 * that were set; however, no verification is done.
6737 		 *
6738 		 * vmask_to_nmask accounts for vattr bits set on create
6739 		 *	[do_rfs4_set_attrs() only sets resp bits for
6740 		 *	 non-vattr/vfs bits.]
6741 		 * Mask off any bits we set by default so as not to return
6742 		 * more attrset bits than were requested in createattrs
6743 		 */
6744 		if (created) {
6745 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6746 			*attrset &= createmask;
6747 		} else {
6748 			/*
6749 			 * We did not create the vnode (we tried but it
6750 			 * already existed).  In this case, the only createattr
6751 			 * that the spec allows the server to set is size,
6752 			 * and even then, it can only be set if it is 0.
6753 			 */
6754 			*attrset = 0;
6755 			if (trunc)
6756 				*attrset = FATTR4_SIZE_MASK;
6757 		}
6758 	}
6759 	if (ntov_table_init)
6760 		nfs4_ntov_table_free(&ntov, &sarg);
6761 
6762 	/*
6763 	 * Get the initial "after" sequence number, if it fails,
6764 	 * set to zero, time to before.
6765 	 */
6766 	iva.va_mask = AT_CTIME|AT_SEQ;
6767 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6768 		iva.va_seq = 0;
6769 		iva.va_ctime = bva.va_ctime;
6770 	}
6771 
6772 	/*
6773 	 * create_vnode attempts to create the file exclusive,
6774 	 * if it already exists the VOP_CREATE will fail and
6775 	 * may not increase va_seq. It is atomic if
6776 	 * we haven't changed the directory, but if it has changed
6777 	 * we don't know what changed it.
6778 	 */
6779 	if (!created) {
6780 		if (bva.va_seq && iva.va_seq &&
6781 		    bva.va_seq == iva.va_seq)
6782 			cinfo->atomic = TRUE;
6783 		else
6784 			cinfo->atomic = FALSE;
6785 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6786 	} else {
6787 		/*
6788 		 * The entry was created, we need to sync the
6789 		 * directory metadata.
6790 		 */
6791 		(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6792 
6793 		/*
6794 		 * Get "after" change value, if it fails, simply return the
6795 		 * before value.
6796 		 */
6797 		ava.va_mask = AT_CTIME|AT_SEQ;
6798 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6799 			ava.va_ctime = bva.va_ctime;
6800 			ava.va_seq = 0;
6801 		}
6802 
6803 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6804 
6805 		/*
6806 		 * The cinfo->atomic = TRUE only if we have
6807 		 * non-zero va_seq's, and it has incremented by exactly one
6808 		 * during the create_vnode and it didn't
6809 		 * change during the VOP_FSYNC.
6810 		 */
6811 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
6812 		    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6813 			cinfo->atomic = TRUE;
6814 		else
6815 			cinfo->atomic = FALSE;
6816 	}
6817 
6818 	/* Check for mandatory locking and that the size gets set. */
6819 	cva.va_mask = AT_MODE;
6820 	if (setsize)
6821 		cva.va_mask |= AT_SIZE;
6822 
6823 	/* Assume the worst */
6824 	cs->mandlock = TRUE;
6825 
6826 	if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6827 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6828 
6829 		/*
6830 		 * Truncate the file if necessary; this would be
6831 		 * the case for create over an existing file.
6832 		 */
6833 
6834 		if (trunc) {
6835 			int in_crit = 0;
6836 			rfs4_file_t *fp;
6837 			nfs4_srv_t *nsrv4;
6838 			bool_t create = FALSE;
6839 
6840 			/*
6841 			 * We are writing over an existing file.
6842 			 * Check to see if we need to recall a delegation.
6843 			 */
6844 			nsrv4 = nfs4_get_srv();
6845 			rfs4_hold_deleg_policy(nsrv4);
6846 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6847 				if (rfs4_check_delegated_byfp(FWRITE, fp,
6848 				    (reqsize == 0), FALSE, FALSE, &clientid)) {
6849 					rfs4_file_rele(fp);
6850 					rfs4_rele_deleg_policy(nsrv4);
6851 					VN_RELE(vp);
6852 					*attrset = 0;
6853 					return (NFS4ERR_DELAY);
6854 				}
6855 				rfs4_file_rele(fp);
6856 			}
6857 			rfs4_rele_deleg_policy(nsrv4);
6858 
6859 			if (nbl_need_check(vp)) {
6860 				in_crit = 1;
6861 
6862 				ASSERT(reqsize == 0);
6863 
6864 				nbl_start_crit(vp, RW_READER);
6865 				if (nbl_conflict(vp, NBL_WRITE, 0,
6866 				    cva.va_size, 0, NULL)) {
6867 					in_crit = 0;
6868 					nbl_end_crit(vp);
6869 					VN_RELE(vp);
6870 					*attrset = 0;
6871 					return (NFS4ERR_ACCESS);
6872 				}
6873 			}
6874 			ct.cc_sysid = 0;
6875 			ct.cc_pid = 0;
6876 			ct.cc_caller_id = nfs4_srv_caller_id;
6877 			ct.cc_flags = CC_DONTBLOCK;
6878 
6879 			cva.va_mask = AT_SIZE;
6880 			cva.va_size = reqsize;
6881 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6882 			if (in_crit)
6883 				nbl_end_crit(vp);
6884 		}
6885 	}
6886 
6887 	error = makefh4(&cs->fh, vp, cs->exi);
6888 
6889 	/*
6890 	 * Force modified data and metadata out to stable storage.
6891 	 */
6892 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6893 
6894 	if (error) {
6895 		VN_RELE(vp);
6896 		*attrset = 0;
6897 		return (puterrno4(error));
6898 	}
6899 
6900 	/* if parent dir is attrdir, set namedattr fh flag */
6901 	if (dvp->v_flag & V_XATTRDIR)
6902 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6903 
6904 	if (cs->vp)
6905 		VN_RELE(cs->vp);
6906 
6907 	cs->vp = vp;
6908 
6909 	/*
6910 	 * if we did not create the file, we will need to check
6911 	 * the access bits on the file
6912 	 */
6913 
6914 	if (!created) {
6915 		if (setsize)
6916 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6917 		status = check_open_access(args->share_access, cs, req);
6918 		if (status != NFS4_OK)
6919 			*attrset = 0;
6920 	}
6921 	return (status);
6922 }
6923 
6924 /*ARGSUSED*/
6925 static void
6926 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6927     rfs4_openowner_t *oo, delegreq_t deleg,
6928     uint32_t access, uint32_t deny,
6929     OPEN4res *resp, int deleg_cur)
6930 {
6931 	/* XXX Currently not using req  */
6932 	rfs4_state_t *sp;
6933 	rfs4_file_t *fp;
6934 	bool_t screate = TRUE;
6935 	bool_t fcreate = TRUE;
6936 	uint32_t open_a, share_a;
6937 	uint32_t open_d, share_d;
6938 	rfs4_deleg_state_t *dsp;
6939 	sysid_t sysid;
6940 	nfsstat4 status;
6941 	caller_context_t ct;
6942 	int fflags = 0;
6943 	int recall = 0;
6944 	int err;
6945 	int first_open;
6946 
6947 	/* get the file struct and hold a lock on it during initial open */
6948 	fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6949 	if (fp == NULL) {
6950 		resp->status = NFS4ERR_RESOURCE;
6951 		DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6952 		return;
6953 	}
6954 
6955 	sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6956 	if (sp == NULL) {
6957 		resp->status = NFS4ERR_RESOURCE;
6958 		DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6959 		/* No need to keep any reference */
6960 		rw_exit(&fp->rf_file_rwlock);
6961 		rfs4_file_rele(fp);
6962 		return;
6963 	}
6964 
6965 	/* try to get the sysid before continuing */
6966 	if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6967 		resp->status = status;
6968 		rfs4_file_rele(fp);
6969 		/* Not a fully formed open; "close" it */
6970 		if (screate == TRUE)
6971 			rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6972 		rfs4_state_rele(sp);
6973 		return;
6974 	}
6975 
6976 	/* Calculate the fflags for this OPEN. */
6977 	if (access & OPEN4_SHARE_ACCESS_READ)
6978 		fflags |= FREAD;
6979 	if (access & OPEN4_SHARE_ACCESS_WRITE)
6980 		fflags |= FWRITE;
6981 
6982 	rfs4_dbe_lock(sp->rs_dbe);
6983 
6984 	/*
6985 	 * Calculate the new deny and access mode that this open is adding to
6986 	 * the file for this open owner;
6987 	 */
6988 	open_d = (deny & ~sp->rs_open_deny);
6989 	open_a = (access & ~sp->rs_open_access);
6990 
6991 	/*
6992 	 * Calculate the new share access and share deny modes that this open
6993 	 * is adding to the file for this open owner;
6994 	 */
6995 	share_a = (access & ~sp->rs_share_access);
6996 	share_d = (deny & ~sp->rs_share_deny);
6997 
6998 	first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6999 
7000 	/*
7001 	 * Check to see the client has already sent an open for this
7002 	 * open owner on this file with the same share/deny modes.
7003 	 * If so, we don't need to check for a conflict and we don't
7004 	 * need to add another shrlock.  If not, then we need to
7005 	 * check for conflicts in deny and access before checking for
7006 	 * conflicts in delegation.  We don't want to recall a
7007 	 * delegation based on an open that will eventually fail based
7008 	 * on shares modes.
7009 	 */
7010 
7011 	if (share_a || share_d) {
7012 		if ((err = rfs4_share(sp, access, deny)) != 0) {
7013 			rfs4_dbe_unlock(sp->rs_dbe);
7014 			resp->status = err;
7015 
7016 			rfs4_file_rele(fp);
7017 			/* Not a fully formed open; "close" it */
7018 			if (screate == TRUE)
7019 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7020 			rfs4_state_rele(sp);
7021 			return;
7022 		}
7023 	}
7024 
7025 	rfs4_dbe_lock(fp->rf_dbe);
7026 
7027 	/*
7028 	 * Check to see if this file is delegated and if so, if a
7029 	 * recall needs to be done.
7030 	 */
7031 	if (rfs4_check_recall(sp, access)) {
7032 		rfs4_dbe_unlock(fp->rf_dbe);
7033 		rfs4_dbe_unlock(sp->rs_dbe);
7034 		rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
7035 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
7036 		rfs4_dbe_lock(sp->rs_dbe);
7037 
7038 		/* if state closed while lock was dropped */
7039 		if (sp->rs_closed) {
7040 			if (share_a || share_d)
7041 				(void) rfs4_unshare(sp);
7042 			rfs4_dbe_unlock(sp->rs_dbe);
7043 			rfs4_file_rele(fp);
7044 			/* Not a fully formed open; "close" it */
7045 			if (screate == TRUE)
7046 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7047 			rfs4_state_rele(sp);
7048 			resp->status = NFS4ERR_OLD_STATEID;
7049 			return;
7050 		}
7051 
7052 		rfs4_dbe_lock(fp->rf_dbe);
7053 		/* Let's see if the delegation was returned */
7054 		if (rfs4_check_recall(sp, access)) {
7055 			rfs4_dbe_unlock(fp->rf_dbe);
7056 			if (share_a || share_d)
7057 				(void) rfs4_unshare(sp);
7058 			rfs4_dbe_unlock(sp->rs_dbe);
7059 			rfs4_file_rele(fp);
7060 			rfs4_update_lease(sp->rs_owner->ro_client);
7061 
7062 			/* Not a fully formed open; "close" it */
7063 			if (screate == TRUE)
7064 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7065 			rfs4_state_rele(sp);
7066 			resp->status = NFS4ERR_DELAY;
7067 			return;
7068 		}
7069 	}
7070 	/*
7071 	 * the share check passed and any delegation conflict has been
7072 	 * taken care of, now call vop_open.
7073 	 * if this is the first open then call vop_open with fflags.
7074 	 * if not, call vn_open_upgrade with just the upgrade flags.
7075 	 *
7076 	 * if the file has been opened already, it will have the current
7077 	 * access mode in the state struct.  if it has no share access, then
7078 	 * this is a new open.
7079 	 *
7080 	 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7081 	 * call VOP_OPEN(), just do the open upgrade.
7082 	 */
7083 	if (first_open && !deleg_cur) {
7084 		ct.cc_sysid = sysid;
7085 		ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7086 		ct.cc_caller_id = nfs4_srv_caller_id;
7087 		ct.cc_flags = CC_DONTBLOCK;
7088 		err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7089 		if (err) {
7090 			rfs4_dbe_unlock(fp->rf_dbe);
7091 			if (share_a || share_d)
7092 				(void) rfs4_unshare(sp);
7093 			rfs4_dbe_unlock(sp->rs_dbe);
7094 			rfs4_file_rele(fp);
7095 
7096 			/* Not a fully formed open; "close" it */
7097 			if (screate == TRUE)
7098 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7099 			rfs4_state_rele(sp);
7100 			/* check if a monitor detected a delegation conflict */
7101 			if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7102 				resp->status = NFS4ERR_DELAY;
7103 			else
7104 				resp->status = NFS4ERR_SERVERFAULT;
7105 			return;
7106 		}
7107 	} else { /* open upgrade */
7108 		/*
7109 		 * calculate the fflags for the new mode that is being added
7110 		 * by this upgrade.
7111 		 */
7112 		fflags = 0;
7113 		if (open_a & OPEN4_SHARE_ACCESS_READ)
7114 			fflags |= FREAD;
7115 		if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7116 			fflags |= FWRITE;
7117 		vn_open_upgrade(cs->vp, fflags);
7118 	}
7119 	sp->rs_open_access |= access;
7120 	sp->rs_open_deny |= deny;
7121 
7122 	if (open_d & OPEN4_SHARE_DENY_READ)
7123 		fp->rf_deny_read++;
7124 	if (open_d & OPEN4_SHARE_DENY_WRITE)
7125 		fp->rf_deny_write++;
7126 	fp->rf_share_deny |= deny;
7127 
7128 	if (open_a & OPEN4_SHARE_ACCESS_READ)
7129 		fp->rf_access_read++;
7130 	if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7131 		fp->rf_access_write++;
7132 	fp->rf_share_access |= access;
7133 
7134 	/*
7135 	 * Check for delegation here. if the deleg argument is not
7136 	 * DELEG_ANY, then this is a reclaim from a client and
7137 	 * we must honor the delegation requested. If necessary we can
7138 	 * set the recall flag.
7139 	 */
7140 
7141 	dsp = rfs4_grant_delegation(deleg, sp, &recall);
7142 
7143 	cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7144 
7145 	next_stateid(&sp->rs_stateid);
7146 
7147 	resp->stateid = sp->rs_stateid.stateid;
7148 
7149 	rfs4_dbe_unlock(fp->rf_dbe);
7150 	rfs4_dbe_unlock(sp->rs_dbe);
7151 
7152 	if (dsp) {
7153 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7154 		rfs4_deleg_state_rele(dsp);
7155 	}
7156 
7157 	rfs4_file_rele(fp);
7158 	rfs4_state_rele(sp);
7159 
7160 	resp->status = NFS4_OK;
7161 }
7162 
7163 /*ARGSUSED*/
7164 static void
7165 rfs4_do_openfh(struct compound_state *cs, struct svc_req *req, OPEN4args *args,
7166     rfs4_openowner_t *oo, OPEN4res *resp)
7167 {
7168 	/* cs->vp and cs->fh have been updated by putfh. */
7169 	rfs4_do_open(cs, req, oo, DELEG_ANY,
7170 	    (args->share_access & 0xff), args->share_deny, resp, 0);
7171 }
7172 
7173 /*ARGSUSED*/
7174 static void
7175 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7176     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7177 {
7178 	change_info4 *cinfo = &resp->cinfo;
7179 	bitmap4 *attrset = &resp->attrset;
7180 
7181 	if (args->opentype == OPEN4_NOCREATE)
7182 		resp->status = rfs4_lookupfile(&args->claim.open_claim4_u.file,
7183 		    req, cs, args->share_access, cinfo);
7184 	else {
7185 		/* inhibit delegation grants during exclusive create */
7186 
7187 		if (args->mode == EXCLUSIVE4)
7188 			rfs4_disable_delegation();
7189 
7190 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7191 		    oo->ro_client->rc_clientid);
7192 	}
7193 
7194 	if (resp->status == NFS4_OK) {
7195 
7196 		/* cs->vp cs->fh now reference the desired file */
7197 
7198 		rfs4_do_open(cs, req, oo,
7199 		    oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7200 		    args->share_access, args->share_deny, resp, 0);
7201 
7202 		/*
7203 		 * If rfs4_createfile set attrset, we must
7204 		 * clear this attrset before the response is copied.
7205 		 */
7206 		if (resp->status != NFS4_OK && resp->attrset) {
7207 			resp->attrset = 0;
7208 		}
7209 	}
7210 	else
7211 		*cs->statusp = resp->status;
7212 
7213 	if (args->mode == EXCLUSIVE4)
7214 		rfs4_enable_delegation();
7215 }
7216 
7217 /*ARGSUSED*/
7218 static void
7219 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7220     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7221 {
7222 	change_info4 *cinfo = &resp->cinfo;
7223 	vattr_t va;
7224 	vtype_t v_type = cs->vp->v_type;
7225 	int error = 0;
7226 
7227 	/* Verify that we have a regular file */
7228 	if (v_type != VREG) {
7229 		if (v_type == VDIR)
7230 			resp->status = NFS4ERR_ISDIR;
7231 		else if (v_type == VLNK)
7232 			resp->status = NFS4ERR_SYMLINK;
7233 		else
7234 			resp->status = NFS4ERR_INVAL;
7235 		return;
7236 	}
7237 
7238 	va.va_mask = AT_MODE|AT_UID;
7239 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7240 	if (error) {
7241 		resp->status = puterrno4(error);
7242 		return;
7243 	}
7244 
7245 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7246 
7247 	/*
7248 	 * Check if we have access to the file, Note the the file
7249 	 * could have originally been open UNCHECKED or GUARDED
7250 	 * with mode bits that will now fail, but there is nothing
7251 	 * we can really do about that except in the case that the
7252 	 * owner of the file is the one requesting the open.
7253 	 */
7254 	if (crgetuid(cs->cr) != va.va_uid) {
7255 		resp->status = check_open_access(args->share_access, cs, req);
7256 		if (resp->status != NFS4_OK) {
7257 			return;
7258 		}
7259 	}
7260 
7261 	/*
7262 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7263 	 */
7264 	cinfo->before = 0;
7265 	cinfo->after = 0;
7266 	cinfo->atomic = FALSE;
7267 
7268 	rfs4_do_open(cs, req, oo,
7269 	    NFS4_DELEG4TYPE2REQTYPE(args->claim.open_claim4_u.delegate_type),
7270 	    args->share_access, args->share_deny, resp, 0);
7271 }
7272 
7273 static void
7274 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7275     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7276 {
7277 	int error;
7278 	nfsstat4 status;
7279 	stateid4 stateid =
7280 	    args->claim.open_claim4_u.delegate_cur_info.delegate_stateid;
7281 	rfs4_deleg_state_t *dsp;
7282 
7283 	/*
7284 	 * Find the state info from the stateid and confirm that the
7285 	 * file is delegated.  If the state openowner is the same as
7286 	 * the supplied openowner we're done. If not, get the file
7287 	 * info from the found state info. Use that file info to
7288 	 * create the state for this lock owner. Note solaris doen't
7289 	 * really need the pathname to find the file. We may want to
7290 	 * lookup the pathname and make sure that the vp exist and
7291 	 * matches the vp in the file structure. However it is
7292 	 * possible that the pathname nolonger exists (local process
7293 	 * unlinks the file), so this may not be that useful.
7294 	 */
7295 
7296 	status = rfs4_get_deleg_state(&stateid, &dsp);
7297 	if (status != NFS4_OK) {
7298 		resp->status = status;
7299 		return;
7300 	}
7301 
7302 	ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7303 
7304 	/*
7305 	 * New lock owner, create state. Since this was probably called
7306 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
7307 	 */
7308 
7309 	ASSERT(cs->vp != NULL);
7310 	VN_RELE(cs->vp);
7311 	VN_HOLD(dsp->rds_finfo->rf_vp);
7312 	cs->vp = dsp->rds_finfo->rf_vp;
7313 
7314 	if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7315 		rfs4_deleg_state_rele(dsp);
7316 		*cs->statusp = resp->status = puterrno4(error);
7317 		return;
7318 	}
7319 
7320 	/* Mark progress for delegation returns */
7321 	dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7322 	rfs4_deleg_state_rele(dsp);
7323 	rfs4_do_open(cs, req, oo, DELEG_NONE,
7324 	    args->share_access, args->share_deny, resp, 1);
7325 }
7326 
7327 /*ARGSUSED*/
7328 static void
7329 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7330     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7331 {
7332 	/*
7333 	 * Lookup the pathname, it must already exist since this file
7334 	 * was delegated.
7335 	 *
7336 	 * Find the file and state info for this vp and open owner pair.
7337 	 *	check that they are in fact delegated.
7338 	 *	check that the state access and deny modes are the same.
7339 	 *
7340 	 * Return the delgation possibly seting the recall flag.
7341 	 */
7342 	rfs4_file_t *fp;
7343 	rfs4_state_t *sp;
7344 	bool_t create = FALSE;
7345 	bool_t dcreate = FALSE;
7346 	rfs4_deleg_state_t *dsp;
7347 	nfsace4 *ace;
7348 
7349 	/* Note we ignore oflags */
7350 	resp->status = rfs4_lookupfile(
7351 	    &args->claim.open_claim4_u.file_delegate_prev,
7352 	    req, cs, args->share_access, &resp->cinfo);
7353 
7354 	if (resp->status != NFS4_OK) {
7355 		return;
7356 	}
7357 
7358 	/* get the file struct and hold a lock on it during initial open */
7359 	fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7360 	if (fp == NULL) {
7361 		resp->status = NFS4ERR_RESOURCE;
7362 		DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7363 		return;
7364 	}
7365 
7366 	sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7367 	if (sp == NULL) {
7368 		resp->status = NFS4ERR_SERVERFAULT;
7369 		DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7370 		rw_exit(&fp->rf_file_rwlock);
7371 		rfs4_file_rele(fp);
7372 		return;
7373 	}
7374 
7375 	rfs4_dbe_lock(sp->rs_dbe);
7376 	rfs4_dbe_lock(fp->rf_dbe);
7377 	if (args->share_access != sp->rs_share_access ||
7378 	    args->share_deny != sp->rs_share_deny ||
7379 	    sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7380 		NFS4_DEBUG(rfs4_debug,
7381 		    (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7382 		rfs4_dbe_unlock(fp->rf_dbe);
7383 		rfs4_dbe_unlock(sp->rs_dbe);
7384 		rfs4_file_rele(fp);
7385 		rfs4_state_rele(sp);
7386 		resp->status = NFS4ERR_SERVERFAULT;
7387 		return;
7388 	}
7389 	rfs4_dbe_unlock(fp->rf_dbe);
7390 	rfs4_dbe_unlock(sp->rs_dbe);
7391 
7392 	dsp = rfs4_finddeleg(sp, &dcreate);
7393 	if (dsp == NULL) {
7394 		rfs4_state_rele(sp);
7395 		rfs4_file_rele(fp);
7396 		resp->status = NFS4ERR_SERVERFAULT;
7397 		return;
7398 	}
7399 
7400 	next_stateid(&sp->rs_stateid);
7401 
7402 	resp->stateid = sp->rs_stateid.stateid;
7403 
7404 	resp->delegation.delegation_type = dsp->rds_dtype;
7405 
7406 	if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7407 		open_read_delegation4 *rv =
7408 		    &resp->delegation.open_delegation4_u.read;
7409 
7410 		rv->stateid = dsp->rds_delegid.stateid;
7411 		rv->recall = FALSE; /* no policy in place to set to TRUE */
7412 		ace = &rv->permissions;
7413 	} else {
7414 		open_write_delegation4 *rv =
7415 		    &resp->delegation.open_delegation4_u.write;
7416 
7417 		rv->stateid = dsp->rds_delegid.stateid;
7418 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
7419 		ace = &rv->permissions;
7420 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
7421 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7422 	}
7423 
7424 	/* XXX For now */
7425 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7426 	ace->flag = 0;
7427 	ace->access_mask = 0;
7428 	ace->who.utf8string_len = 0;
7429 	ace->who.utf8string_val = 0;
7430 
7431 	rfs4_deleg_state_rele(dsp);
7432 	rfs4_state_rele(sp);
7433 	rfs4_file_rele(fp);
7434 }
7435 
7436 typedef enum {
7437 	NFS4_CHKSEQ_OKAY = 0,
7438 	NFS4_CHKSEQ_REPLAY = 1,
7439 	NFS4_CHKSEQ_BAD = 2
7440 } rfs4_chkseq_t;
7441 
7442 /*
7443  * Generic function for sequence number checks.
7444  */
7445 static rfs4_chkseq_t
7446 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7447     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7448 {
7449 	/* Same sequence ids and matching operations? */
7450 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
7451 		if (copyres == TRUE) {
7452 			rfs4_free_reply(resop);
7453 			rfs4_copy_reply(resop, lastop);
7454 		}
7455 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7456 		    "Replayed SEQID %d\n", seqid));
7457 		return (NFS4_CHKSEQ_REPLAY);
7458 	}
7459 
7460 	/* If the incoming sequence is not the next expected then it is bad */
7461 	if (rqst_seq != seqid + 1) {
7462 		if (rqst_seq == seqid) {
7463 			NFS4_DEBUG(rfs4_debug,
7464 			    (CE_NOTE, "BAD SEQID: Replayed sequence id "
7465 			    "but last op was %d current op is %d\n",
7466 			    lastop->resop, resop->resop));
7467 			return (NFS4_CHKSEQ_BAD);
7468 		}
7469 		NFS4_DEBUG(rfs4_debug,
7470 		    (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7471 		    rqst_seq, seqid));
7472 		return (NFS4_CHKSEQ_BAD);
7473 	}
7474 
7475 	/* Everything okay -- next expected */
7476 	return (NFS4_CHKSEQ_OKAY);
7477 }
7478 
7479 
7480 static rfs4_chkseq_t
7481 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop,
7482     const compound_state_t *cs)
7483 {
7484 	rfs4_chkseq_t rc;
7485 
7486 	if (rfs4_has_session(cs))
7487 		return (NFS4_CHKSEQ_OKAY);
7488 
7489 	rfs4_dbe_lock(op->ro_dbe);
7490 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7491 	    TRUE);
7492 	rfs4_dbe_unlock(op->ro_dbe);
7493 
7494 	if (rc == NFS4_CHKSEQ_OKAY)
7495 		rfs4_update_lease(op->ro_client);
7496 
7497 	return (rc);
7498 }
7499 
7500 static rfs4_chkseq_t
7501 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7502 {
7503 	rfs4_chkseq_t rc;
7504 
7505 	rfs4_dbe_lock(op->ro_dbe);
7506 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7507 	    olo_seqid, resop, FALSE);
7508 	rfs4_dbe_unlock(op->ro_dbe);
7509 
7510 	return (rc);
7511 }
7512 
7513 static rfs4_chkseq_t
7514 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7515 {
7516 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7517 
7518 	rfs4_dbe_lock(lsp->rls_dbe);
7519 	if (!lsp->rls_skip_seqid_check)
7520 		rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7521 		    resop, TRUE);
7522 	rfs4_dbe_unlock(lsp->rls_dbe);
7523 
7524 	return (rc);
7525 }
7526 
7527 static void
7528 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7529     struct svc_req *req, struct compound_state *cs)
7530 {
7531 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
7532 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7533 	open_owner4 *owner = &args->owner;
7534 	open_claim_type4 claim = args->claim.claim;
7535 	rfs4_client_t *cp;
7536 	rfs4_openowner_t *oo;
7537 	bool_t create;
7538 	bool_t replay = FALSE;
7539 	int can_reclaim;
7540 
7541 	DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7542 	    OPEN4args *, args);
7543 
7544 	if (cs->vp == NULL) {
7545 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7546 		goto end;
7547 	}
7548 
7549 	/* rfc5661 section 18.16.3 */
7550 	if (rfs4_has_session(cs))
7551 		owner->clientid = cs->client->rc_clientid;
7552 
7553 	/*
7554 	 * Need to check clientid and lease expiration first based on
7555 	 * error ordering and incrementing sequence id.
7556 	 */
7557 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7558 	if (cp == NULL) {
7559 		*cs->statusp = resp->status =
7560 		    rfs4_check_clientid(&owner->clientid, 0);
7561 		goto end;
7562 	}
7563 
7564 	if (rfs4_lease_expired(cp)) {
7565 		rfs4_client_close(cp);
7566 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7567 		goto end;
7568 	}
7569 	can_reclaim = cp->rc_can_reclaim;
7570 
7571 	/*
7572 	 * Find the open_owner for use from this point forward.  Take
7573 	 * care in updating the sequence id based on the type of error
7574 	 * being returned.
7575 	 */
7576 retry:
7577 	create = TRUE;
7578 	oo = rfs4_findopenowner(owner, &create, args->seqid);
7579 	if (oo == NULL) {
7580 		*cs->statusp = resp->status = NFS4ERR_RESOURCE;
7581 		rfs4_client_rele(cp);
7582 		goto end;
7583 	}
7584 
7585 	/*
7586 	 * OPEN_CONFIRM must not be implemented in v4.1
7587 	 */
7588 	if (rfs4_has_session(cs)) {
7589 		oo->ro_need_confirm = FALSE;
7590 	}
7591 
7592 	/* Hold off access to the sequence space while the open is done */
7593 	/* Workaround to avoid deadlock */
7594 	if (!rfs4_has_session(cs))
7595 		rfs4_sw_enter(&oo->ro_sw);
7596 
7597 	/*
7598 	 * If the open_owner existed before at the server, then check
7599 	 * the sequence id.
7600 	 */
7601 	if (!create && !oo->ro_postpone_confirm) {
7602 		switch (rfs4_check_open_seqid(args->seqid, oo, resop, cs)) {
7603 		case NFS4_CHKSEQ_BAD:
7604 			ASSERT(!rfs4_has_session(cs));
7605 			if ((args->seqid > oo->ro_open_seqid) &&
7606 			    oo->ro_need_confirm) {
7607 				rfs4_free_opens(oo, TRUE, FALSE);
7608 				rfs4_sw_exit(&oo->ro_sw);
7609 				rfs4_openowner_rele(oo);
7610 				goto retry;
7611 			}
7612 			resp->status = NFS4ERR_BAD_SEQID;
7613 			goto out;
7614 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7615 			replay = TRUE;
7616 			goto out;
7617 		default:
7618 			break;
7619 		}
7620 
7621 		/*
7622 		 * Sequence was ok and open owner exists
7623 		 * check to see if we have yet to see an
7624 		 * open_confirm.
7625 		 */
7626 		if (oo->ro_need_confirm) {
7627 			rfs4_free_opens(oo, TRUE, FALSE);
7628 			ASSERT(!rfs4_has_session(cs));
7629 			rfs4_sw_exit(&oo->ro_sw);
7630 			rfs4_openowner_rele(oo);
7631 			goto retry;
7632 		}
7633 	}
7634 	/* Grace only applies to regular-type OPENs */
7635 	if (rfs4_clnt_in_grace(cp) &&
7636 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR ||
7637 	    claim == CLAIM_FH)) {
7638 		*cs->statusp = resp->status = NFS4ERR_GRACE;
7639 		goto out;
7640 	}
7641 
7642 	/*
7643 	 * If previous state at the server existed then can_reclaim
7644 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
7645 	 * client.
7646 	 */
7647 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7648 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7649 		goto out;
7650 	}
7651 
7652 
7653 	/*
7654 	 * Reject the open if the client has missed the grace period
7655 	 */
7656 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7657 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7658 		goto out;
7659 	}
7660 
7661 	/* Couple of up-front bookkeeping items */
7662 	if (oo->ro_need_confirm) {
7663 		/*
7664 		 * If this is a reclaim OPEN then we should not ask
7665 		 * for a confirmation of the open_owner per the
7666 		 * protocol specification.
7667 		 */
7668 		if (claim == CLAIM_PREVIOUS)
7669 			oo->ro_need_confirm = FALSE;
7670 		else
7671 			resp->rflags |= OPEN4_RESULT_CONFIRM;
7672 	}
7673 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7674 
7675 	/*
7676 	 * If there is an unshared filesystem mounted on this vnode,
7677 	 * do not allow to open/create in this directory.
7678 	 */
7679 	if (vn_ismntpt(cs->vp)) {
7680 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
7681 		goto out;
7682 	}
7683 
7684 	/*
7685 	 * access must READ, WRITE, or BOTH.  No access is invalid.
7686 	 * deny can be READ, WRITE, BOTH, or NONE.
7687 	 * bits not defined for access/deny are invalid.
7688 	 */
7689 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7690 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7691 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7692 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7693 		goto out;
7694 	}
7695 
7696 
7697 	/*
7698 	 * make sure attrset is zero before response is built.
7699 	 */
7700 	resp->attrset = 0;
7701 
7702 	switch (claim) {
7703 	case CLAIM_NULL:
7704 		rfs4_do_opennull(cs, req, args, oo, resp);
7705 		break;
7706 	case CLAIM_PREVIOUS:
7707 		rfs4_do_openprev(cs, req, args, oo, resp);
7708 		break;
7709 	case CLAIM_DELEGATE_CUR:
7710 		rfs4_do_opendelcur(cs, req, args, oo, resp);
7711 		break;
7712 	case CLAIM_DELEGATE_PREV:
7713 		rfs4_do_opendelprev(cs, req, args, oo, resp);
7714 		break;
7715 	case CLAIM_FH:
7716 		rfs4_do_openfh(cs, req, args, oo, resp);
7717 		break;
7718 	default:
7719 		resp->status = NFS4ERR_INVAL;
7720 		break;
7721 	}
7722 
7723 out:
7724 	rfs4_client_rele(cp);
7725 
7726 	/* Catch sequence id handling here to make it a little easier */
7727 	switch (resp->status) {
7728 	case NFS4ERR_BADXDR:
7729 	case NFS4ERR_BAD_SEQID:
7730 	case NFS4ERR_BAD_STATEID:
7731 	case NFS4ERR_NOFILEHANDLE:
7732 	case NFS4ERR_RESOURCE:
7733 	case NFS4ERR_STALE_CLIENTID:
7734 	case NFS4ERR_STALE_STATEID:
7735 		/*
7736 		 * The protocol states that if any of these errors are
7737 		 * being returned, the sequence id should not be
7738 		 * incremented.  Any other return requires an
7739 		 * increment.
7740 		 */
7741 		break;
7742 	default:
7743 		/* Always update the lease in this case */
7744 		rfs4_update_lease(oo->ro_client);
7745 
7746 		/* Regular response - copy the result */
7747 		if (!replay)
7748 			rfs4_update_open_resp(oo, resop, &cs->fh);
7749 
7750 		/*
7751 		 * REPLAY case: Only if the previous response was OK
7752 		 * do we copy the filehandle.  If not OK, no
7753 		 * filehandle to copy.
7754 		 */
7755 		if (replay == TRUE &&
7756 		    resp->status == NFS4_OK &&
7757 		    oo->ro_reply_fh.nfs_fh4_val) {
7758 			/*
7759 			 * If this is a replay, we must restore the
7760 			 * current filehandle/vp to that of what was
7761 			 * returned originally.  Try our best to do
7762 			 * it.
7763 			 */
7764 			nfs_fh4_fmt_t *fh_fmtp =
7765 			    (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7766 
7767 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7768 			    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7769 
7770 			if (cs->exi == NULL) {
7771 				resp->status = NFS4ERR_STALE;
7772 				goto finish;
7773 			}
7774 
7775 			VN_RELE(cs->vp);
7776 
7777 			cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7778 			    &resp->status);
7779 
7780 			if (cs->vp == NULL)
7781 				goto finish;
7782 
7783 			nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7784 		}
7785 
7786 		/*
7787 		 * If this was a replay, no need to update the
7788 		 * sequence id. If the open_owner was not created on
7789 		 * this pass, then update.  The first use of an
7790 		 * open_owner will not bump the sequence id.
7791 		 */
7792 		if (replay == FALSE && !create)
7793 			rfs4_update_open_sequence(oo);
7794 		/*
7795 		 * If the client is receiving an error and the
7796 		 * open_owner needs to be confirmed, there is no way
7797 		 * to notify the client of this fact ignoring the fact
7798 		 * that the server has no method of returning a
7799 		 * stateid to confirm.  Therefore, the server needs to
7800 		 * mark this open_owner in a way as to avoid the
7801 		 * sequence id checking the next time the client uses
7802 		 * this open_owner.
7803 		 */
7804 		if (resp->status != NFS4_OK && oo->ro_need_confirm)
7805 			oo->ro_postpone_confirm = TRUE;
7806 		/*
7807 		 * If OK response then clear the postpone flag and
7808 		 * reset the sequence id to keep in sync with the
7809 		 * client.
7810 		 */
7811 		if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7812 			oo->ro_postpone_confirm = FALSE;
7813 			oo->ro_open_seqid = args->seqid;
7814 		}
7815 		break;
7816 	}
7817 
7818 finish:
7819 	*cs->statusp = resp->status;
7820 
7821 	if (!rfs4_has_session(cs))
7822 		rfs4_sw_exit(&oo->ro_sw);
7823 	rfs4_openowner_rele(oo);
7824 
7825 end:
7826 	DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7827 	    OPEN4res *, resp);
7828 }
7829 
7830 /*ARGSUSED*/
7831 void
7832 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7833     struct svc_req *req, struct compound_state *cs)
7834 {
7835 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7836 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7837 	rfs4_state_t *sp;
7838 	nfsstat4 status;
7839 
7840 	DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7841 	    OPEN_CONFIRM4args *, args);
7842 
7843 	ASSERT(!rfs4_has_session(cs));
7844 
7845 	if (cs->vp == NULL) {
7846 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7847 		goto out;
7848 	}
7849 
7850 	if (cs->vp->v_type != VREG) {
7851 		*cs->statusp = resp->status =
7852 		    cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7853 		return;
7854 	}
7855 
7856 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7857 	if (status != NFS4_OK) {
7858 		*cs->statusp = resp->status = status;
7859 		goto out;
7860 	}
7861 
7862 	/* Ensure specified filehandle matches */
7863 	if (cs->vp != sp->rs_finfo->rf_vp) {
7864 		rfs4_state_rele(sp);
7865 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7866 		goto out;
7867 	}
7868 
7869 	/* hold off other access to open_owner while we tinker */
7870 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
7871 
7872 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
7873 	case NFS4_CHECK_STATEID_OKAY:
7874 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7875 		    resop, cs) != 0) {
7876 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7877 			break;
7878 		}
7879 		/*
7880 		 * If it is the appropriate stateid and determined to
7881 		 * be "OKAY" then this means that the stateid does not
7882 		 * need to be confirmed and the client is in error for
7883 		 * sending an OPEN_CONFIRM.
7884 		 */
7885 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7886 		break;
7887 	case NFS4_CHECK_STATEID_OLD:
7888 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7889 		break;
7890 	case NFS4_CHECK_STATEID_BAD:
7891 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7892 		break;
7893 	case NFS4_CHECK_STATEID_EXPIRED:
7894 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7895 		break;
7896 	case NFS4_CHECK_STATEID_CLOSED:
7897 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7898 		break;
7899 	case NFS4_CHECK_STATEID_REPLAY:
7900 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7901 		    resop, cs)) {
7902 		case NFS4_CHKSEQ_OKAY:
7903 			/*
7904 			 * This is replayed stateid; if seqid matches
7905 			 * next expected, then client is using wrong seqid.
7906 			 */
7907 			/* fall through */
7908 		case NFS4_CHKSEQ_BAD:
7909 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7910 			break;
7911 		case NFS4_CHKSEQ_REPLAY:
7912 			/*
7913 			 * Note this case is the duplicate case so
7914 			 * resp->status is already set.
7915 			 */
7916 			*cs->statusp = resp->status;
7917 			rfs4_update_lease(sp->rs_owner->ro_client);
7918 			break;
7919 		}
7920 		break;
7921 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7922 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7923 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
7924 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7925 			break;
7926 		}
7927 		*cs->statusp = resp->status = NFS4_OK;
7928 
7929 		next_stateid(&sp->rs_stateid);
7930 		resp->open_stateid = sp->rs_stateid.stateid;
7931 		sp->rs_owner->ro_need_confirm = FALSE;
7932 		rfs4_update_lease(sp->rs_owner->ro_client);
7933 		rfs4_update_open_sequence(sp->rs_owner);
7934 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7935 		break;
7936 	default:
7937 		ASSERT(FALSE);
7938 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7939 		break;
7940 	}
7941 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
7942 	rfs4_state_rele(sp);
7943 
7944 out:
7945 	DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7946 	    OPEN_CONFIRM4res *, resp);
7947 }
7948 
7949 /*ARGSUSED*/
7950 void
7951 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7952     struct svc_req *req, struct compound_state *cs)
7953 {
7954 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7955 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7956 	uint32_t access = args->share_access;
7957 	uint32_t deny = args->share_deny;
7958 	nfsstat4 status;
7959 	rfs4_state_t *sp;
7960 	rfs4_file_t *fp;
7961 	int fflags = 0;
7962 
7963 	DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7964 	    OPEN_DOWNGRADE4args *, args);
7965 
7966 	if (cs->vp == NULL) {
7967 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7968 		goto out;
7969 	}
7970 
7971 	if (cs->vp->v_type != VREG) {
7972 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7973 		return;
7974 	}
7975 
7976 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7977 	if (status != NFS4_OK) {
7978 		*cs->statusp = resp->status = status;
7979 		goto out;
7980 	}
7981 
7982 	/* Ensure specified filehandle matches */
7983 	if (cs->vp != sp->rs_finfo->rf_vp) {
7984 		rfs4_state_rele(sp);
7985 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7986 		goto out;
7987 	}
7988 
7989 	/* hold off other access to open_owner while we tinker */
7990 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
7991 
7992 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
7993 	case NFS4_CHECK_STATEID_OKAY:
7994 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7995 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
7996 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7997 			goto end;
7998 		}
7999 		break;
8000 	case NFS4_CHECK_STATEID_OLD:
8001 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8002 		goto end;
8003 	case NFS4_CHECK_STATEID_BAD:
8004 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8005 		goto end;
8006 	case NFS4_CHECK_STATEID_EXPIRED:
8007 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8008 		goto end;
8009 	case NFS4_CHECK_STATEID_CLOSED:
8010 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8011 		goto end;
8012 	case NFS4_CHECK_STATEID_UNCONFIRMED:
8013 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8014 		goto end;
8015 	case NFS4_CHECK_STATEID_REPLAY:
8016 		ASSERT(!rfs4_has_session(cs));
8017 
8018 		/* Check the sequence id for the open owner */
8019 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8020 		    resop, cs)) {
8021 		case NFS4_CHKSEQ_OKAY:
8022 			/*
8023 			 * This is replayed stateid; if seqid matches
8024 			 * next expected, then client is using wrong seqid.
8025 			 */
8026 			/* fall through */
8027 		case NFS4_CHKSEQ_BAD:
8028 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8029 			goto end;
8030 		case NFS4_CHKSEQ_REPLAY:
8031 			/*
8032 			 * Note this case is the duplicate case so
8033 			 * resp->status is already set.
8034 			 */
8035 			*cs->statusp = resp->status;
8036 			rfs4_update_lease(sp->rs_owner->ro_client);
8037 			goto end;
8038 		}
8039 		break;
8040 	default:
8041 		ASSERT(FALSE);
8042 		break;
8043 	}
8044 
8045 	rfs4_dbe_lock(sp->rs_dbe);
8046 	/*
8047 	 * Check that the new access modes and deny modes are valid.
8048 	 * Check that no invalid bits are set.
8049 	 */
8050 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
8051 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
8052 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8053 		rfs4_update_open_sequence(sp->rs_owner);
8054 		rfs4_dbe_unlock(sp->rs_dbe);
8055 		goto end;
8056 	}
8057 
8058 	/*
8059 	 * The new modes must be a subset of the current modes and
8060 	 * the access must specify at least one mode. To test that
8061 	 * the new mode is a subset of the current modes we bitwise
8062 	 * AND them together and check that the result equals the new
8063 	 * mode. For example:
8064 	 * New mode, access == R and current mode, sp->rs_open_access  == RW
8065 	 * access & sp->rs_open_access == R == access, so the new access mode
8066 	 * is valid. Consider access == RW, sp->rs_open_access = R
8067 	 * access & sp->rs_open_access == R != access, so the new access mode
8068 	 * is invalid.
8069 	 */
8070 	if ((access & sp->rs_open_access) != access ||
8071 	    (deny & sp->rs_open_deny) != deny ||
8072 	    (access &
8073 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
8074 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8075 		rfs4_update_open_sequence(sp->rs_owner);
8076 		rfs4_dbe_unlock(sp->rs_dbe);
8077 		goto end;
8078 	}
8079 
8080 	/*
8081 	 * Release any share locks associated with this stateID.
8082 	 * Strictly speaking, this violates the spec because the
8083 	 * spec effectively requires that open downgrade be atomic.
8084 	 * At present, fs_shrlock does not have this capability.
8085 	 */
8086 	(void) rfs4_unshare(sp);
8087 
8088 	status = rfs4_share(sp, access, deny);
8089 	if (status != NFS4_OK) {
8090 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8091 		rfs4_update_open_sequence(sp->rs_owner);
8092 		rfs4_dbe_unlock(sp->rs_dbe);
8093 		goto end;
8094 	}
8095 
8096 	fp = sp->rs_finfo;
8097 	rfs4_dbe_lock(fp->rf_dbe);
8098 
8099 	/*
8100 	 * If the current mode has deny read and the new mode
8101 	 * does not, decrement the number of deny read mode bits
8102 	 * and if it goes to zero turn off the deny read bit
8103 	 * on the file.
8104 	 */
8105 	if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
8106 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
8107 		fp->rf_deny_read--;
8108 		if (fp->rf_deny_read == 0)
8109 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8110 	}
8111 
8112 	/*
8113 	 * If the current mode has deny write and the new mode
8114 	 * does not, decrement the number of deny write mode bits
8115 	 * and if it goes to zero turn off the deny write bit
8116 	 * on the file.
8117 	 */
8118 	if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8119 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8120 		fp->rf_deny_write--;
8121 		if (fp->rf_deny_write == 0)
8122 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8123 	}
8124 
8125 	/*
8126 	 * If the current mode has access read and the new mode
8127 	 * does not, decrement the number of access read mode bits
8128 	 * and if it goes to zero turn off the access read bit
8129 	 * on the file.  set fflags to FREAD for the call to
8130 	 * vn_open_downgrade().
8131 	 */
8132 	if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8133 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8134 		fp->rf_access_read--;
8135 		if (fp->rf_access_read == 0)
8136 			fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8137 		fflags |= FREAD;
8138 	}
8139 
8140 	/*
8141 	 * If the current mode has access write and the new mode
8142 	 * does not, decrement the number of access write mode bits
8143 	 * and if it goes to zero turn off the access write bit
8144 	 * on the file.  set fflags to FWRITE for the call to
8145 	 * vn_open_downgrade().
8146 	 */
8147 	if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8148 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8149 		fp->rf_access_write--;
8150 		if (fp->rf_access_write == 0)
8151 			fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8152 		fflags |= FWRITE;
8153 	}
8154 
8155 	/* Check that the file is still accessible */
8156 	ASSERT(fp->rf_share_access);
8157 
8158 	rfs4_dbe_unlock(fp->rf_dbe);
8159 
8160 	/* now set the new open access and deny modes */
8161 	sp->rs_open_access = access;
8162 	sp->rs_open_deny = deny;
8163 
8164 	/*
8165 	 * we successfully downgraded the share lock, now we need to downgrade
8166 	 * the open. it is possible that the downgrade was only for a deny
8167 	 * mode and we have nothing else to do.
8168 	 */
8169 	if ((fflags & (FREAD|FWRITE)) != 0)
8170 		vn_open_downgrade(cs->vp, fflags);
8171 
8172 	/* Update the stateid */
8173 	next_stateid(&sp->rs_stateid);
8174 	resp->open_stateid = sp->rs_stateid.stateid;
8175 
8176 	rfs4_dbe_unlock(sp->rs_dbe);
8177 
8178 	*cs->statusp = resp->status = NFS4_OK;
8179 	/* Update the lease */
8180 	rfs4_update_lease(sp->rs_owner->ro_client);
8181 	/* And the sequence */
8182 	rfs4_update_open_sequence(sp->rs_owner);
8183 	rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8184 
8185 end:
8186 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8187 	rfs4_state_rele(sp);
8188 out:
8189 	DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8190 	    OPEN_DOWNGRADE4res *, resp);
8191 }
8192 
8193 static void *
8194 memstr(const void *s1, const char *s2, size_t n)
8195 {
8196 	size_t l = strlen(s2);
8197 	char *p = (char *)s1;
8198 
8199 	while (n >= l) {
8200 		if (bcmp(p, s2, l) == 0)
8201 			return (p);
8202 		p++;
8203 		n--;
8204 	}
8205 
8206 	return (NULL);
8207 }
8208 
8209 /*
8210  * The logic behind this function is detailed in the NFSv4 RFC in the
8211  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
8212  * that section for explicit guidance to server behavior for
8213  * SETCLIENTID.
8214  */
8215 void
8216 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8217     struct svc_req *req, struct compound_state *cs)
8218 {
8219 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8220 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8221 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8222 	rfs4_clntip_t *ci;
8223 	bool_t create;
8224 	char *addr, *netid;
8225 	int len;
8226 
8227 	DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8228 	    SETCLIENTID4args *, args);
8229 retry:
8230 	newcp = cp_confirmed = cp_unconfirmed = NULL;
8231 
8232 	/*
8233 	 * Save the caller's IP address
8234 	 */
8235 	args->client.cl_addr =
8236 	    (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8237 
8238 	/*
8239 	 * Record if it is a Solaris client that cannot handle referrals.
8240 	 */
8241 	if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8242 	    !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8243 		/* Add a "yes, it's downrev" record */
8244 		create = TRUE;
8245 		ci = rfs4_find_clntip(args->client.cl_addr, &create);
8246 		ASSERT(ci != NULL);
8247 		rfs4_dbe_rele(ci->ri_dbe);
8248 	} else {
8249 		/* Remove any previous record */
8250 		rfs4_invalidate_clntip(args->client.cl_addr);
8251 	}
8252 
8253 	/*
8254 	 * In search of an EXISTING client matching the incoming
8255 	 * request to establish a new client identifier at the server
8256 	 */
8257 	create = TRUE;
8258 	cp = rfs4_findclient(&args->client, &create, NULL);
8259 
8260 	/* Should never happen */
8261 	ASSERT(cp != NULL);
8262 
8263 	if (cp == NULL) {
8264 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8265 		goto out;
8266 	}
8267 
8268 	/*
8269 	 * Easiest case. Client identifier is newly created and is
8270 	 * unconfirmed.  Also note that for this case, no other
8271 	 * entries exist for the client identifier.  Nothing else to
8272 	 * check.  Just setup the response and respond.
8273 	 */
8274 	if (create) {
8275 		*cs->statusp = res->status = NFS4_OK;
8276 		res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8277 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8278 		    cp->rc_confirm_verf;
8279 		/* Setup callback information; CB_NULL confirmation later */
8280 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8281 
8282 		rfs4_client_rele(cp);
8283 		goto out;
8284 	}
8285 
8286 	/*
8287 	 * An existing, confirmed client may exist but it may not have
8288 	 * been active for at least one lease period.  If so, then
8289 	 * "close" the client and create a new client identifier
8290 	 */
8291 	if (rfs4_lease_expired(cp)) {
8292 		rfs4_client_close(cp);
8293 		goto retry;
8294 	}
8295 
8296 	if (cp->rc_need_confirm == TRUE)
8297 		cp_unconfirmed = cp;
8298 	else
8299 		cp_confirmed = cp;
8300 
8301 	cp = NULL;
8302 
8303 	/*
8304 	 * We have a confirmed client, now check for an
8305 	 * unconfimred entry
8306 	 */
8307 	if (cp_confirmed) {
8308 		/* If creds don't match then client identifier is inuse */
8309 		if (!creds_ok(&cp_confirmed->rc_cr_set, req, cs)) {
8310 			rfs4_cbinfo_t *cbp;
8311 			/*
8312 			 * Some one else has established this client
8313 			 * id. Try and say * who they are. We will use
8314 			 * the call back address supplied by * the
8315 			 * first client.
8316 			 */
8317 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8318 
8319 			addr = netid = NULL;
8320 
8321 			cbp = &cp_confirmed->rc_cbinfo;
8322 			if (cbp->cb_callback.cb_location.r_addr &&
8323 			    cbp->cb_callback.cb_location.r_netid) {
8324 				cb_client4 *cbcp = &cbp->cb_callback;
8325 
8326 				len = strlen(cbcp->cb_location.r_addr)+1;
8327 				addr = kmem_alloc(len, KM_SLEEP);
8328 				bcopy(cbcp->cb_location.r_addr, addr, len);
8329 				len = strlen(cbcp->cb_location.r_netid)+1;
8330 				netid = kmem_alloc(len, KM_SLEEP);
8331 				bcopy(cbcp->cb_location.r_netid, netid, len);
8332 			}
8333 
8334 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
8335 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
8336 
8337 			rfs4_client_rele(cp_confirmed);
8338 		}
8339 
8340 		/*
8341 		 * Confirmed, creds match, and verifier matches; must
8342 		 * be an update of the callback info
8343 		 */
8344 		if (cp_confirmed->rc_nfs_client.verifier ==
8345 		    args->client.verifier) {
8346 			/* Setup callback information */
8347 			rfs4_client_setcb(cp_confirmed, &args->callback,
8348 			    args->callback_ident);
8349 
8350 			/* everything okay -- move ahead */
8351 			*cs->statusp = res->status = NFS4_OK;
8352 			res->SETCLIENTID4res_u.resok4.clientid =
8353 			    cp_confirmed->rc_clientid;
8354 
8355 			/* update the confirm_verifier and return it */
8356 			rfs4_client_scv_next(cp_confirmed);
8357 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8358 			    cp_confirmed->rc_confirm_verf;
8359 
8360 			rfs4_client_rele(cp_confirmed);
8361 			goto out;
8362 		}
8363 
8364 		/*
8365 		 * Creds match but the verifier doesn't.  Must search
8366 		 * for an unconfirmed client that would be replaced by
8367 		 * this request.
8368 		 */
8369 		create = FALSE;
8370 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
8371 		    cp_confirmed);
8372 	}
8373 
8374 	/*
8375 	 * At this point, we have taken care of the brand new client
8376 	 * struct, INUSE case, update of an existing, and confirmed
8377 	 * client struct.
8378 	 */
8379 
8380 	/*
8381 	 * check to see if things have changed while we originally
8382 	 * picked up the client struct.  If they have, then return and
8383 	 * retry the processing of this SETCLIENTID request.
8384 	 */
8385 	if (cp_unconfirmed) {
8386 		rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8387 		if (!cp_unconfirmed->rc_need_confirm) {
8388 			rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8389 			rfs4_client_rele(cp_unconfirmed);
8390 			if (cp_confirmed)
8391 				rfs4_client_rele(cp_confirmed);
8392 			goto retry;
8393 		}
8394 		/* do away with the old unconfirmed one */
8395 		rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8396 		rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8397 		rfs4_client_rele(cp_unconfirmed);
8398 		cp_unconfirmed = NULL;
8399 	}
8400 
8401 	/*
8402 	 * This search will temporarily hide the confirmed client
8403 	 * struct while a new client struct is created as the
8404 	 * unconfirmed one.
8405 	 */
8406 	create = TRUE;
8407 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8408 
8409 	ASSERT(newcp != NULL);
8410 
8411 	if (newcp == NULL) {
8412 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8413 		rfs4_client_rele(cp_confirmed);
8414 		goto out;
8415 	}
8416 
8417 	/*
8418 	 * If one was not created, then a similar request must be in
8419 	 * process so release and start over with this one
8420 	 */
8421 	if (create != TRUE) {
8422 		rfs4_client_rele(newcp);
8423 		if (cp_confirmed)
8424 			rfs4_client_rele(cp_confirmed);
8425 		goto retry;
8426 	}
8427 
8428 	*cs->statusp = res->status = NFS4_OK;
8429 	res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8430 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8431 	    newcp->rc_confirm_verf;
8432 	/* Setup callback information; CB_NULL confirmation later */
8433 	rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8434 
8435 	newcp->rc_cp_confirmed = cp_confirmed;
8436 
8437 	rfs4_client_rele(newcp);
8438 
8439 out:
8440 	DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8441 	    SETCLIENTID4res *, res);
8442 }
8443 
8444 /*ARGSUSED*/
8445 void
8446 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8447     struct svc_req *req, struct compound_state *cs)
8448 {
8449 	SETCLIENTID_CONFIRM4args *args =
8450 	    &argop->nfs_argop4_u.opsetclientid_confirm;
8451 	SETCLIENTID_CONFIRM4res *res =
8452 	    &resop->nfs_resop4_u.opsetclientid_confirm;
8453 	rfs4_client_t *cp, *cptoclose = NULL;
8454 	nfs4_srv_t *nsrv4;
8455 
8456 	DTRACE_NFSV4_2(op__setclientid__confirm__start,
8457 	    struct compound_state *, cs,
8458 	    SETCLIENTID_CONFIRM4args *, args);
8459 
8460 	nsrv4 = nfs4_get_srv();
8461 	*cs->statusp = res->status = NFS4_OK;
8462 
8463 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
8464 
8465 	if (cp == NULL) {
8466 		*cs->statusp = res->status =
8467 		    rfs4_check_clientid(&args->clientid, 1);
8468 		goto out;
8469 	}
8470 
8471 	if (!creds_ok(&cp->rc_cr_set, req, cs)) {
8472 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8473 		rfs4_client_rele(cp);
8474 		goto out;
8475 	}
8476 
8477 	/* If the verifier doesn't match, the record doesn't match */
8478 	if (cp->rc_confirm_verf != args->setclientid_confirm) {
8479 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8480 		rfs4_client_rele(cp);
8481 		goto out;
8482 	}
8483 
8484 	rfs4_dbe_lock(cp->rc_dbe);
8485 	cp->rc_need_confirm = FALSE;
8486 	if (cp->rc_cp_confirmed) {
8487 		cptoclose = cp->rc_cp_confirmed;
8488 		cptoclose->rc_ss_remove = 1;
8489 		cp->rc_cp_confirmed = NULL;
8490 	}
8491 
8492 	/*
8493 	 * Update the client's associated server instance, if it's changed
8494 	 * since the client was created.
8495 	 */
8496 	if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8497 		rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8498 
8499 	/*
8500 	 * Record clientid in stable storage.
8501 	 * Must be done after server instance has been assigned.
8502 	 */
8503 	rfs4_ss_clid(nsrv4, cp);
8504 
8505 	rfs4_dbe_unlock(cp->rc_dbe);
8506 
8507 	if (cptoclose)
8508 		/* don't need to rele, client_close does it */
8509 		rfs4_client_close(cptoclose);
8510 
8511 	/* If needed, initiate CB_NULL call for callback path */
8512 	rfs4_deleg_cb_check(cp);
8513 	rfs4_update_lease(cp);
8514 
8515 	/*
8516 	 * Check to see if client can perform reclaims
8517 	 */
8518 	rfs4_ss_chkclid(nsrv4, cp);
8519 
8520 	rfs4_client_rele(cp);
8521 
8522 out:
8523 	DTRACE_NFSV4_2(op__setclientid__confirm__done,
8524 	    struct compound_state *, cs,
8525 	    SETCLIENTID_CONFIRM4 *, res);
8526 }
8527 
8528 
8529 /*ARGSUSED*/
8530 void
8531 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8532     struct svc_req *req, struct compound_state *cs)
8533 {
8534 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8535 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8536 	rfs4_state_t *sp;
8537 	nfsstat4 status;
8538 
8539 	DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8540 	    CLOSE4args *, args);
8541 
8542 	if (cs->vp == NULL) {
8543 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8544 		goto out;
8545 	}
8546 
8547 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8548 	if (status != NFS4_OK) {
8549 		*cs->statusp = resp->status = status;
8550 		goto out;
8551 	}
8552 
8553 	/* Ensure specified filehandle matches */
8554 	if (cs->vp != sp->rs_finfo->rf_vp) {
8555 		rfs4_state_rele(sp);
8556 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8557 		goto out;
8558 	}
8559 
8560 	/* hold off other access to open_owner while we tinker */
8561 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
8562 
8563 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8564 	case NFS4_CHECK_STATEID_OKAY:
8565 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8566 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
8567 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8568 			goto end;
8569 		}
8570 		break;
8571 	case NFS4_CHECK_STATEID_OLD:
8572 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8573 		goto end;
8574 	case NFS4_CHECK_STATEID_BAD:
8575 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8576 		goto end;
8577 	case NFS4_CHECK_STATEID_EXPIRED:
8578 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8579 		goto end;
8580 	case NFS4_CHECK_STATEID_CLOSED:
8581 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8582 		goto end;
8583 	case NFS4_CHECK_STATEID_UNCONFIRMED:
8584 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8585 		goto end;
8586 	case NFS4_CHECK_STATEID_REPLAY:
8587 		ASSERT(!rfs4_has_session(cs));
8588 
8589 		/* Check the sequence id for the open owner */
8590 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8591 		    resop, cs)) {
8592 		case NFS4_CHKSEQ_OKAY:
8593 			/*
8594 			 * This is replayed stateid; if seqid matches
8595 			 * next expected, then client is using wrong seqid.
8596 			 */
8597 			/* FALL THROUGH */
8598 		case NFS4_CHKSEQ_BAD:
8599 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8600 			goto end;
8601 		case NFS4_CHKSEQ_REPLAY:
8602 			/*
8603 			 * Note this case is the duplicate case so
8604 			 * resp->status is already set.
8605 			 */
8606 			*cs->statusp = resp->status;
8607 			rfs4_update_lease(sp->rs_owner->ro_client);
8608 			goto end;
8609 		}
8610 		break;
8611 	default:
8612 		ASSERT(FALSE);
8613 		break;
8614 	}
8615 
8616 	rfs4_dbe_lock(sp->rs_dbe);
8617 
8618 	/* Update the stateid. */
8619 	next_stateid(&sp->rs_stateid);
8620 	resp->open_stateid = sp->rs_stateid.stateid;
8621 
8622 	rfs4_dbe_unlock(sp->rs_dbe);
8623 
8624 	rfs4_update_lease(sp->rs_owner->ro_client);
8625 	rfs4_update_open_sequence(sp->rs_owner);
8626 	rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8627 
8628 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8629 
8630 	*cs->statusp = resp->status = status;
8631 
8632 end:
8633 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8634 	rfs4_state_rele(sp);
8635 out:
8636 	DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8637 	    CLOSE4res *, resp);
8638 }
8639 
8640 /*
8641  * Manage the counts on the file struct and close all file locks
8642  */
8643 /*ARGSUSED*/
8644 void
8645 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8646     bool_t close_of_client)
8647 {
8648 	rfs4_file_t *fp = sp->rs_finfo;
8649 	rfs4_lo_state_t *lsp;
8650 	int fflags = 0;
8651 
8652 	/*
8653 	 * If this call is part of the larger closing down of client
8654 	 * state then it is just easier to release all locks
8655 	 * associated with this client instead of going through each
8656 	 * individual file and cleaning locks there.
8657 	 */
8658 	if (close_of_client) {
8659 		if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8660 		    !list_is_empty(&sp->rs_lostatelist) &&
8661 		    sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8662 			/* Is the PxFS kernel module loaded? */
8663 			if (lm_remove_file_locks != NULL) {
8664 				int new_sysid;
8665 
8666 				/* Encode the cluster nodeid in new sysid */
8667 				new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8668 				lm_set_nlmid_flk(&new_sysid);
8669 
8670 				/*
8671 				 * This PxFS routine removes file locks for a
8672 				 * client over all nodes of a cluster.
8673 				 */
8674 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8675 				    "lm_remove_file_locks(sysid=0x%x)\n",
8676 				    new_sysid));
8677 				(*lm_remove_file_locks)(new_sysid);
8678 			} else {
8679 				struct flock64 flk;
8680 
8681 				/* Release all locks for this client */
8682 				flk.l_type = F_UNLKSYS;
8683 				flk.l_whence = 0;
8684 				flk.l_start = 0;
8685 				flk.l_len = 0;
8686 				flk.l_sysid =
8687 				    sp->rs_owner->ro_client->rc_sysidt;
8688 				flk.l_pid = 0;
8689 				(void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8690 				    &flk, F_REMOTELOCK | FREAD | FWRITE,
8691 				    (u_offset_t)0, NULL, CRED(), NULL);
8692 			}
8693 
8694 			sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8695 		}
8696 	}
8697 
8698 	/*
8699 	 * Release all locks on this file by this lock owner or at
8700 	 * least mark the locks as having been released
8701 	 */
8702 	for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8703 	    lsp = list_next(&sp->rs_lostatelist, lsp)) {
8704 		lsp->rls_locks_cleaned = TRUE;
8705 
8706 		/* Was this already taken care of above? */
8707 		if (!close_of_client &&
8708 		    sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8709 			(void) cleanlocks(sp->rs_finfo->rf_vp,
8710 			    lsp->rls_locker->rl_pid,
8711 			    lsp->rls_locker->rl_client->rc_sysidt);
8712 	}
8713 
8714 	/*
8715 	 * Release any shrlocks associated with this open state ID.
8716 	 * This must be done before the rfs4_state gets marked closed.
8717 	 */
8718 	if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8719 		(void) rfs4_unshare(sp);
8720 
8721 	if (sp->rs_open_access) {
8722 		rfs4_dbe_lock(fp->rf_dbe);
8723 
8724 		/*
8725 		 * Decrement the count for each access and deny bit that this
8726 		 * state has contributed to the file.
8727 		 * If the file counts go to zero
8728 		 * clear the appropriate bit in the appropriate mask.
8729 		 */
8730 		if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8731 			fp->rf_access_read--;
8732 			fflags |= FREAD;
8733 			if (fp->rf_access_read == 0)
8734 				fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8735 		}
8736 		if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8737 			fp->rf_access_write--;
8738 			fflags |= FWRITE;
8739 			if (fp->rf_access_write == 0)
8740 				fp->rf_share_access &=
8741 				    ~OPEN4_SHARE_ACCESS_WRITE;
8742 		}
8743 		if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8744 			fp->rf_deny_read--;
8745 			if (fp->rf_deny_read == 0)
8746 				fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8747 		}
8748 		if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8749 			fp->rf_deny_write--;
8750 			if (fp->rf_deny_write == 0)
8751 				fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8752 		}
8753 
8754 		(void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8755 
8756 		rfs4_dbe_unlock(fp->rf_dbe);
8757 
8758 		sp->rs_open_access = 0;
8759 		sp->rs_open_deny = 0;
8760 	}
8761 }
8762 
8763 /*
8764  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8765  */
8766 static nfsstat4
8767 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8768 {
8769 	rfs4_lockowner_t *lo;
8770 	rfs4_client_t *cp;
8771 	uint32_t len;
8772 
8773 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
8774 	if (lo != NULL) {
8775 		cp = lo->rl_client;
8776 		if (rfs4_lease_expired(cp)) {
8777 			rfs4_lockowner_rele(lo);
8778 			rfs4_dbe_hold(cp->rc_dbe);
8779 			rfs4_client_close(cp);
8780 			return (NFS4ERR_EXPIRED);
8781 		}
8782 		dp->owner.clientid = lo->rl_owner.clientid;
8783 		len = lo->rl_owner.owner_len;
8784 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8785 		bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8786 		dp->owner.owner_len = len;
8787 		rfs4_lockowner_rele(lo);
8788 		goto finish;
8789 	}
8790 
8791 	/*
8792 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8793 	 * of the client id contain the boot time for a NFS4 lock. So we
8794 	 * fabricate and identity by setting clientid to the sysid, and
8795 	 * the lock owner to the pid.
8796 	 */
8797 	dp->owner.clientid = flk->l_sysid;
8798 	len = sizeof (pid_t);
8799 	dp->owner.owner_len = len;
8800 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8801 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
8802 finish:
8803 	dp->offset = flk->l_start;
8804 	dp->length = flk->l_len;
8805 
8806 	if (flk->l_type == F_RDLCK)
8807 		dp->locktype = READ_LT;
8808 	else if (flk->l_type == F_WRLCK)
8809 		dp->locktype = WRITE_LT;
8810 	else
8811 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
8812 
8813 	return (NFS4_OK);
8814 }
8815 
8816 /*
8817  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8818  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8819  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8820  * for that (obviously); they are sending the LOCK requests with some delays
8821  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8822  * locking and delay implementation at the client side.
8823  *
8824  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8825  * fast retries on its own (the for loop below) in a hope the lock will be
8826  * available soon.  And if not, the client won't need to resend the LOCK
8827  * requests so fast to check the lock availability.  This basically saves some
8828  * network traffic and tries to make sure the client gets the lock ASAP.
8829  */
8830 static int
8831 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8832 {
8833 	int error;
8834 	struct flock64 flk;
8835 	int i;
8836 	clock_t delaytime;
8837 	int cmd;
8838 	int spin_cnt = 0;
8839 
8840 	cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8841 retry:
8842 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8843 
8844 	for (i = 0; i < rfs4_maxlock_tries; i++) {
8845 		LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8846 		error = VOP_FRLOCK(vp, cmd,
8847 		    flock, flag, (u_offset_t)0, NULL, cred, NULL);
8848 
8849 		if (error != EAGAIN && error != EACCES)
8850 			break;
8851 
8852 		if (i < rfs4_maxlock_tries - 1) {
8853 			delay(delaytime);
8854 			delaytime *= 2;
8855 		}
8856 	}
8857 
8858 	if (error == EAGAIN || error == EACCES) {
8859 		/* Get the owner of the lock */
8860 		flk = *flock;
8861 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8862 		if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8863 		    NULL) == 0) {
8864 			/*
8865 			 * There's a race inherent in the current VOP_FRLOCK
8866 			 * design where:
8867 			 * a: "other guy" takes a lock that conflicts with a
8868 			 * lock we want
8869 			 * b: we attempt to take our lock (non-blocking) and
8870 			 * the attempt fails.
8871 			 * c: "other guy" releases the conflicting lock
8872 			 * d: we ask what lock conflicts with the lock we want,
8873 			 * getting F_UNLCK (no lock blocks us)
8874 			 *
8875 			 * If we retry the non-blocking lock attempt in this
8876 			 * case (restart at step 'b') there's some possibility
8877 			 * that many such attempts might fail.  However a test
8878 			 * designed to actually provoke this race shows that
8879 			 * the vast majority of cases require no retry, and
8880 			 * only a few took as many as three retries.  Here's
8881 			 * the test outcome:
8882 			 *
8883 			 *	   number of retries    how many times we needed
8884 			 *				that many retries
8885 			 *	   0			79461
8886 			 *	   1			  862
8887 			 *	   2			   49
8888 			 *	   3			    5
8889 			 *
8890 			 * Given those empirical results, we arbitrarily limit
8891 			 * the retry count to ten.
8892 			 *
8893 			 * If we actually make to ten retries and give up,
8894 			 * nothing catastrophic happens, but we're unable to
8895 			 * return the information about the conflicting lock to
8896 			 * the NFS client.  That's an acceptable trade off vs.
8897 			 * letting this retry loop run forever.
8898 			 */
8899 			if (flk.l_type == F_UNLCK) {
8900 				if (spin_cnt++ < 10) {
8901 					/* No longer locked, retry */
8902 					goto retry;
8903 				}
8904 			} else {
8905 				*flock = flk;
8906 				LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8907 				    F_GETLK, &flk);
8908 			}
8909 		}
8910 	}
8911 
8912 	return (error);
8913 }
8914 
8915 /*ARGSUSED*/
8916 static nfsstat4
8917 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8918     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8919 {
8920 	nfsstat4 status;
8921 	rfs4_lockowner_t *lo = lsp->rls_locker;
8922 	rfs4_state_t *sp = lsp->rls_state;
8923 	struct flock64 flock;
8924 	int16_t ltype;
8925 	int flag;
8926 	int error;
8927 	sysid_t sysid;
8928 	LOCK4res *lres;
8929 	vnode_t *vp;
8930 
8931 	if (rfs4_lease_expired(lo->rl_client)) {
8932 		return (NFS4ERR_EXPIRED);
8933 	}
8934 
8935 	if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8936 		return (status);
8937 
8938 	/* Check for zero length. To lock to end of file use all ones for V4 */
8939 	if (length == 0)
8940 		return (NFS4ERR_INVAL);
8941 	else if (length == (length4)(~0))
8942 		length = 0;		/* Posix to end of file  */
8943 
8944 retry:
8945 	rfs4_dbe_lock(sp->rs_dbe);
8946 	if (sp->rs_closed == TRUE) {
8947 		rfs4_dbe_unlock(sp->rs_dbe);
8948 		return (NFS4ERR_OLD_STATEID);
8949 	}
8950 
8951 	if (resop->resop != OP_LOCKU) {
8952 		switch (locktype) {
8953 		case READ_LT:
8954 		case READW_LT:
8955 			if ((sp->rs_share_access
8956 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
8957 				rfs4_dbe_unlock(sp->rs_dbe);
8958 
8959 				return (NFS4ERR_OPENMODE);
8960 			}
8961 			ltype = F_RDLCK;
8962 			break;
8963 		case WRITE_LT:
8964 		case WRITEW_LT:
8965 			if ((sp->rs_share_access
8966 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8967 				rfs4_dbe_unlock(sp->rs_dbe);
8968 
8969 				return (NFS4ERR_OPENMODE);
8970 			}
8971 			ltype = F_WRLCK;
8972 			break;
8973 		}
8974 	} else
8975 		ltype = F_UNLCK;
8976 
8977 	flock.l_type = ltype;
8978 	flock.l_whence = 0;		/* SEEK_SET */
8979 	flock.l_start = offset;
8980 	flock.l_len = length;
8981 	flock.l_sysid = sysid;
8982 	flock.l_pid = lsp->rls_locker->rl_pid;
8983 
8984 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
8985 	if (flock.l_len < 0 || flock.l_start < 0) {
8986 		rfs4_dbe_unlock(sp->rs_dbe);
8987 		return (NFS4ERR_INVAL);
8988 	}
8989 
8990 	/*
8991 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8992 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8993 	 */
8994 	flag = (int)sp->rs_share_access | F_REMOTELOCK;
8995 
8996 	vp = sp->rs_finfo->rf_vp;
8997 	VN_HOLD(vp);
8998 
8999 	/*
9000 	 * We need to unlock sp before we call the underlying filesystem to
9001 	 * acquire the file lock.
9002 	 */
9003 	rfs4_dbe_unlock(sp->rs_dbe);
9004 
9005 	error = setlock(vp, &flock, flag, cred);
9006 
9007 	/*
9008 	 * Make sure the file is still open.  In a case the file was closed in
9009 	 * the meantime, clean the lock we acquired using the setlock() call
9010 	 * above, and return the appropriate error.
9011 	 */
9012 	rfs4_dbe_lock(sp->rs_dbe);
9013 	if (sp->rs_closed == TRUE) {
9014 		cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
9015 		rfs4_dbe_unlock(sp->rs_dbe);
9016 
9017 		VN_RELE(vp);
9018 
9019 		return (NFS4ERR_OLD_STATEID);
9020 	}
9021 	rfs4_dbe_unlock(sp->rs_dbe);
9022 
9023 	VN_RELE(vp);
9024 
9025 	if (error == 0) {
9026 		rfs4_dbe_lock(lsp->rls_dbe);
9027 		next_stateid(&lsp->rls_lockid);
9028 		rfs4_dbe_unlock(lsp->rls_dbe);
9029 	}
9030 
9031 	/*
9032 	 * N.B. We map error values to nfsv4 errors. This is differrent
9033 	 * than puterrno4 routine.
9034 	 */
9035 	switch (error) {
9036 	case 0:
9037 		status = NFS4_OK;
9038 		break;
9039 	case EAGAIN:
9040 	case EACCES:		/* Old value */
9041 		/* Can only get here if op is OP_LOCK */
9042 		ASSERT(resop->resop == OP_LOCK);
9043 		lres = &resop->nfs_resop4_u.oplock;
9044 		status = NFS4ERR_DENIED;
9045 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
9046 		    == NFS4ERR_EXPIRED)
9047 			goto retry;
9048 		break;
9049 	case ENOLCK:
9050 		status = NFS4ERR_DELAY;
9051 		break;
9052 	case EOVERFLOW:
9053 		status = NFS4ERR_INVAL;
9054 		break;
9055 	case EINVAL:
9056 		status = NFS4ERR_NOTSUPP;
9057 		break;
9058 	default:
9059 		status = NFS4ERR_SERVERFAULT;
9060 		break;
9061 	}
9062 
9063 	return (status);
9064 }
9065 
9066 /*ARGSUSED*/
9067 void
9068 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
9069     struct svc_req *req, struct compound_state *cs)
9070 {
9071 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
9072 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
9073 	nfsstat4 status;
9074 	stateid4 *stateid;
9075 	rfs4_lockowner_t *lo;
9076 	rfs4_client_t *cp;
9077 	rfs4_state_t *sp = NULL;
9078 	rfs4_lo_state_t *lsp = NULL;
9079 	bool_t ls_sw_held = FALSE;
9080 	bool_t create = TRUE;
9081 	bool_t lcreate = TRUE;
9082 	bool_t dup_lock = FALSE;
9083 	int rc;
9084 
9085 	DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
9086 	    LOCK4args *, args);
9087 
9088 	if (cs->vp == NULL) {
9089 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9090 		DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9091 		    cs, LOCK4res *, resp);
9092 		return;
9093 	}
9094 
9095 	if (args->locker.new_lock_owner) {
9096 		/* Create a new lockowner for this instance */
9097 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
9098 
9099 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
9100 
9101 		stateid = &olo->open_stateid;
9102 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
9103 		if (status != NFS4_OK) {
9104 			NFS4_DEBUG(rfs4_debug,
9105 			    (CE_NOTE, "Get state failed in lock %d", status));
9106 			*cs->statusp = resp->status = status;
9107 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9108 			    cs, LOCK4res *, resp);
9109 			return;
9110 		}
9111 
9112 		/* Ensure specified filehandle matches */
9113 		if (cs->vp != sp->rs_finfo->rf_vp) {
9114 			rfs4_state_rele(sp);
9115 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9116 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9117 			    cs, LOCK4res *, resp);
9118 			return;
9119 		}
9120 
9121 		/* hold off other access to open_owner while we tinker */
9122 		rfs4_sw_enter(&sp->rs_owner->ro_sw);
9123 
9124 		switch (rc = rfs4_check_stateid_seqid(sp, stateid, cs)) {
9125 		case NFS4_CHECK_STATEID_OLD:
9126 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9127 			goto end;
9128 		case NFS4_CHECK_STATEID_BAD:
9129 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9130 			goto end;
9131 		case NFS4_CHECK_STATEID_EXPIRED:
9132 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9133 			goto end;
9134 		case NFS4_CHECK_STATEID_UNCONFIRMED:
9135 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9136 			goto end;
9137 		case NFS4_CHECK_STATEID_CLOSED:
9138 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9139 			goto end;
9140 		case NFS4_CHECK_STATEID_OKAY:
9141 			if (rfs4_has_session(cs))
9142 				break;
9143 			/* FALLTHROUGH */
9144 		case NFS4_CHECK_STATEID_REPLAY:
9145 			ASSERT(!rfs4_has_session(cs));
9146 
9147 			switch (rfs4_check_olo_seqid(olo->open_seqid,
9148 			    sp->rs_owner, resop)) {
9149 			case NFS4_CHKSEQ_OKAY:
9150 				if (rc == NFS4_CHECK_STATEID_OKAY)
9151 					break;
9152 				/*
9153 				 * This is replayed stateid; if seqid
9154 				 * matches next expected, then client
9155 				 * is using wrong seqid.
9156 				 */
9157 				/* FALLTHROUGH */
9158 			case NFS4_CHKSEQ_BAD:
9159 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9160 				goto end;
9161 			case NFS4_CHKSEQ_REPLAY:
9162 				/* This is a duplicate LOCK request */
9163 				dup_lock = TRUE;
9164 
9165 				/*
9166 				 * For a duplicate we do not want to
9167 				 * create a new lockowner as it should
9168 				 * already exist.
9169 				 * Turn off the lockowner create flag.
9170 				 */
9171 				lcreate = FALSE;
9172 			}
9173 			break;
9174 		}
9175 
9176 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9177 		if (lo == NULL) {
9178 			NFS4_DEBUG(rfs4_debug,
9179 			    (CE_NOTE, "rfs4_op_lock: no lock owner"));
9180 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
9181 			goto end;
9182 		}
9183 
9184 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9185 		if (lsp == NULL) {
9186 			rfs4_update_lease(sp->rs_owner->ro_client);
9187 			/*
9188 			 * Only update theh open_seqid if this is not
9189 			 * a duplicate request
9190 			 */
9191 			if (dup_lock == FALSE) {
9192 				rfs4_update_open_sequence(sp->rs_owner);
9193 			}
9194 
9195 			NFS4_DEBUG(rfs4_debug,
9196 			    (CE_NOTE, "rfs4_op_lock: no state"));
9197 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9198 			rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9199 			rfs4_lockowner_rele(lo);
9200 			goto end;
9201 		}
9202 
9203 		/*
9204 		 * This is the new_lock_owner branch and the client is
9205 		 * supposed to be associating a new lock_owner with
9206 		 * the open file at this point.  If we find that a
9207 		 * lock_owner/state association already exists and a
9208 		 * successful LOCK request was returned to the client,
9209 		 * an error is returned to the client since this is
9210 		 * not appropriate.  The client should be using the
9211 		 * existing lock_owner branch.
9212 		 */
9213 		if (!rfs4_has_session(cs) && !dup_lock && !create) {
9214 			if (lsp->rls_lock_completed == TRUE) {
9215 				*cs->statusp =
9216 				    resp->status = NFS4ERR_BAD_SEQID;
9217 				rfs4_lockowner_rele(lo);
9218 				goto end;
9219 			}
9220 		}
9221 
9222 		rfs4_update_lease(sp->rs_owner->ro_client);
9223 
9224 		/*
9225 		 * Only update theh open_seqid if this is not
9226 		 * a duplicate request
9227 		 */
9228 		if (dup_lock == FALSE) {
9229 			rfs4_update_open_sequence(sp->rs_owner);
9230 		}
9231 
9232 		/*
9233 		 * If this is a duplicate lock request, just copy the
9234 		 * previously saved reply and return.
9235 		 */
9236 		if (dup_lock == TRUE) {
9237 			/* verify that lock_seqid's match */
9238 			if (lsp->rls_seqid != olo->lock_seqid) {
9239 				NFS4_DEBUG(rfs4_debug,
9240 				    (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9241 				    "lsp->seqid=%d old->seqid=%d",
9242 				    lsp->rls_seqid, olo->lock_seqid));
9243 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9244 			} else {
9245 				rfs4_copy_reply(resop, &lsp->rls_reply);
9246 				/*
9247 				 * Make sure to copy the just
9248 				 * retrieved reply status into the
9249 				 * overall compound status
9250 				 */
9251 				*cs->statusp = resp->status;
9252 			}
9253 			rfs4_lockowner_rele(lo);
9254 			goto end;
9255 		}
9256 
9257 		rfs4_dbe_lock(lsp->rls_dbe);
9258 
9259 		/* Make sure to update the lock sequence id */
9260 		lsp->rls_seqid = olo->lock_seqid;
9261 
9262 		NFS4_DEBUG(rfs4_debug,
9263 		    (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9264 
9265 		/*
9266 		 * This is used to signify the newly created lockowner
9267 		 * stateid and its sequence number.  The checks for
9268 		 * sequence number and increment don't occur on the
9269 		 * very first lock request for a lockowner.
9270 		 */
9271 		lsp->rls_skip_seqid_check = TRUE;
9272 
9273 		/* hold off other access to lsp while we tinker */
9274 		rfs4_sw_enter(&lsp->rls_sw);
9275 		ls_sw_held = TRUE;
9276 
9277 		rfs4_dbe_unlock(lsp->rls_dbe);
9278 
9279 		rfs4_lockowner_rele(lo);
9280 	} else {
9281 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9282 		/* get lsp and hold the lock on the underlying file struct */
9283 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9284 		    != NFS4_OK) {
9285 			*cs->statusp = resp->status = status;
9286 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9287 			    cs, LOCK4res *, resp);
9288 			return;
9289 		}
9290 		create = FALSE;	/* We didn't create lsp */
9291 
9292 		/* Ensure specified filehandle matches */
9293 		if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9294 			rfs4_lo_state_rele(lsp, TRUE);
9295 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9296 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9297 			    cs, LOCK4res *, resp);
9298 			return;
9299 		}
9300 
9301 		/* hold off other access to lsp while we tinker */
9302 		rfs4_sw_enter(&lsp->rls_sw);
9303 		ls_sw_held = TRUE;
9304 
9305 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9306 		/*
9307 		 * The stateid looks like it was okay (expected to be
9308 		 * the next one)
9309 		 */
9310 		case NFS4_CHECK_STATEID_OKAY:
9311 			if (rfs4_has_session(cs))
9312 				break;
9313 
9314 			/*
9315 			 * The sequence id is now checked.  Determine
9316 			 * if this is a replay or if it is in the
9317 			 * expected (next) sequence.  In the case of a
9318 			 * replay, there are two replay conditions
9319 			 * that may occur.  The first is the normal
9320 			 * condition where a LOCK is done with a
9321 			 * NFS4_OK response and the stateid is
9322 			 * updated.  That case is handled below when
9323 			 * the stateid is identified as a REPLAY.  The
9324 			 * second is the case where an error is
9325 			 * returned, like NFS4ERR_DENIED, and the
9326 			 * sequence number is updated but the stateid
9327 			 * is not updated.  This second case is dealt
9328 			 * with here.  So it may seem odd that the
9329 			 * stateid is okay but the sequence id is a
9330 			 * replay but it is okay.
9331 			 */
9332 			switch (rfs4_check_lock_seqid(
9333 			    args->locker.locker4_u.lock_owner.lock_seqid,
9334 			    lsp, resop)) {
9335 			case NFS4_CHKSEQ_REPLAY:
9336 				if (resp->status != NFS4_OK) {
9337 					/*
9338 					 * Here is our replay and need
9339 					 * to verify that the last
9340 					 * response was an error.
9341 					 */
9342 					*cs->statusp = resp->status;
9343 					goto end;
9344 				}
9345 				/*
9346 				 * This is done since the sequence id
9347 				 * looked like a replay but it didn't
9348 				 * pass our check so a BAD_SEQID is
9349 				 * returned as a result.
9350 				 */
9351 				/*FALLTHROUGH*/
9352 			case NFS4_CHKSEQ_BAD:
9353 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9354 				goto end;
9355 			case NFS4_CHKSEQ_OKAY:
9356 				/* Everything looks okay move ahead */
9357 				break;
9358 			}
9359 			break;
9360 		case NFS4_CHECK_STATEID_OLD:
9361 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9362 			goto end;
9363 		case NFS4_CHECK_STATEID_BAD:
9364 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9365 			goto end;
9366 		case NFS4_CHECK_STATEID_EXPIRED:
9367 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9368 			goto end;
9369 		case NFS4_CHECK_STATEID_CLOSED:
9370 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9371 			goto end;
9372 		case NFS4_CHECK_STATEID_REPLAY:
9373 			ASSERT(!rfs4_has_session(cs));
9374 
9375 			switch (rfs4_check_lock_seqid(
9376 			    args->locker.locker4_u.lock_owner.lock_seqid,
9377 			    lsp, resop)) {
9378 			case NFS4_CHKSEQ_OKAY:
9379 				/*
9380 				 * This is a replayed stateid; if
9381 				 * seqid matches the next expected,
9382 				 * then client is using wrong seqid.
9383 				 */
9384 			case NFS4_CHKSEQ_BAD:
9385 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9386 				goto end;
9387 			case NFS4_CHKSEQ_REPLAY:
9388 				rfs4_update_lease(lsp->rls_locker->rl_client);
9389 				*cs->statusp = status = resp->status;
9390 				goto end;
9391 			}
9392 			break;
9393 		default:
9394 			ASSERT(FALSE);
9395 			break;
9396 		}
9397 
9398 		rfs4_update_lock_sequence(lsp);
9399 		rfs4_update_lease(lsp->rls_locker->rl_client);
9400 	}
9401 
9402 	/*
9403 	 * NFS4 only allows locking on regular files, so
9404 	 * verify type of object.
9405 	 */
9406 	if (cs->vp->v_type != VREG) {
9407 		if (cs->vp->v_type == VDIR)
9408 			status = NFS4ERR_ISDIR;
9409 		else
9410 			status = NFS4ERR_INVAL;
9411 		goto out;
9412 	}
9413 
9414 	cp = lsp->rls_state->rs_owner->ro_client;
9415 
9416 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9417 		status = NFS4ERR_GRACE;
9418 		goto out;
9419 	}
9420 
9421 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9422 		status = NFS4ERR_NO_GRACE;
9423 		goto out;
9424 	}
9425 
9426 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9427 		status = NFS4ERR_NO_GRACE;
9428 		goto out;
9429 	}
9430 
9431 	if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9432 		cs->deleg = TRUE;
9433 
9434 	status = rfs4_do_lock(lsp, args->locktype,
9435 	    args->offset, args->length, cs->cr, resop);
9436 
9437 out:
9438 	lsp->rls_skip_seqid_check = FALSE;
9439 
9440 	*cs->statusp = resp->status = status;
9441 
9442 	if (status == NFS4_OK) {
9443 		resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9444 		lsp->rls_lock_completed = TRUE;
9445 	}
9446 	/*
9447 	 * Only update the "OPEN" response here if this was a new
9448 	 * lock_owner
9449 	 */
9450 	if (sp)
9451 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9452 
9453 	rfs4_update_lock_resp(lsp, resop);
9454 
9455 end:
9456 	if (lsp) {
9457 		if (ls_sw_held)
9458 			rfs4_sw_exit(&lsp->rls_sw);
9459 		/*
9460 		 * If an sp obtained, then the lsp does not represent
9461 		 * a lock on the file struct.
9462 		 */
9463 		if (sp != NULL)
9464 			rfs4_lo_state_rele(lsp, FALSE);
9465 		else
9466 			rfs4_lo_state_rele(lsp, TRUE);
9467 	}
9468 	if (sp) {
9469 		rfs4_sw_exit(&sp->rs_owner->ro_sw);
9470 		rfs4_state_rele(sp);
9471 	}
9472 
9473 	DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9474 	    LOCK4res *, resp);
9475 }
9476 
9477 /* free function for LOCK/LOCKT */
9478 static void
9479 lock_denied_free(nfs_resop4 *resop)
9480 {
9481 	LOCK4denied *dp = NULL;
9482 
9483 	switch (resop->resop) {
9484 	case OP_LOCK:
9485 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9486 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9487 		break;
9488 	case OP_LOCKT:
9489 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9490 			dp = &resop->nfs_resop4_u.oplockt.denied;
9491 		break;
9492 	default:
9493 		break;
9494 	}
9495 
9496 	if (dp)
9497 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9498 }
9499 
9500 /*ARGSUSED*/
9501 void
9502 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9503     struct svc_req *req, struct compound_state *cs)
9504 {
9505 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9506 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9507 	nfsstat4 status;
9508 	stateid4 *stateid = &args->lock_stateid;
9509 	rfs4_lo_state_t *lsp;
9510 
9511 	DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9512 	    LOCKU4args *, args);
9513 
9514 	if (cs->vp == NULL) {
9515 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9516 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9517 		    LOCKU4res *, resp);
9518 		return;
9519 	}
9520 
9521 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9522 		*cs->statusp = resp->status = status;
9523 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9524 		    LOCKU4res *, resp);
9525 		return;
9526 	}
9527 
9528 	/* Ensure specified filehandle matches */
9529 	if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9530 		rfs4_lo_state_rele(lsp, TRUE);
9531 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9532 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9533 		    LOCKU4res *, resp);
9534 		return;
9535 	}
9536 
9537 	/* hold off other access to lsp while we tinker */
9538 	rfs4_sw_enter(&lsp->rls_sw);
9539 
9540 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9541 	case NFS4_CHECK_STATEID_OKAY:
9542 		if (rfs4_has_session(cs))
9543 			break;
9544 
9545 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9546 		    != NFS4_CHKSEQ_OKAY) {
9547 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9548 			goto end;
9549 		}
9550 		break;
9551 	case NFS4_CHECK_STATEID_OLD:
9552 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9553 		goto end;
9554 	case NFS4_CHECK_STATEID_BAD:
9555 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9556 		goto end;
9557 	case NFS4_CHECK_STATEID_EXPIRED:
9558 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9559 		goto end;
9560 	case NFS4_CHECK_STATEID_CLOSED:
9561 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9562 		goto end;
9563 	case NFS4_CHECK_STATEID_REPLAY:
9564 		ASSERT(!rfs4_has_session(cs));
9565 
9566 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9567 		case NFS4_CHKSEQ_OKAY:
9568 				/*
9569 				 * This is a replayed stateid; if
9570 				 * seqid matches the next expected,
9571 				 * then client is using wrong seqid.
9572 				 */
9573 		case NFS4_CHKSEQ_BAD:
9574 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9575 			goto end;
9576 		case NFS4_CHKSEQ_REPLAY:
9577 			rfs4_update_lease(lsp->rls_locker->rl_client);
9578 			*cs->statusp = status = resp->status;
9579 			goto end;
9580 		}
9581 		break;
9582 	default:
9583 		ASSERT(FALSE);
9584 		break;
9585 	}
9586 
9587 	rfs4_update_lock_sequence(lsp);
9588 	rfs4_update_lease(lsp->rls_locker->rl_client);
9589 
9590 	/*
9591 	 * NFS4 only allows locking on regular files, so
9592 	 * verify type of object.
9593 	 */
9594 	if (cs->vp->v_type != VREG) {
9595 		if (cs->vp->v_type == VDIR)
9596 			status = NFS4ERR_ISDIR;
9597 		else
9598 			status = NFS4ERR_INVAL;
9599 		goto out;
9600 	}
9601 
9602 	if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9603 		status = NFS4ERR_GRACE;
9604 		goto out;
9605 	}
9606 
9607 	status = rfs4_do_lock(lsp, args->locktype,
9608 	    args->offset, args->length, cs->cr, resop);
9609 
9610 out:
9611 	*cs->statusp = resp->status = status;
9612 
9613 	if (status == NFS4_OK)
9614 		resp->lock_stateid = lsp->rls_lockid.stateid;
9615 
9616 	rfs4_update_lock_resp(lsp, resop);
9617 
9618 end:
9619 	rfs4_sw_exit(&lsp->rls_sw);
9620 	rfs4_lo_state_rele(lsp, TRUE);
9621 
9622 	DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9623 	    LOCKU4res *, resp);
9624 }
9625 
9626 /*
9627  * LOCKT is a best effort routine, the client can not be guaranteed that
9628  * the status return is still in effect by the time the reply is received.
9629  * They are numerous race conditions in this routine, but we are not required
9630  * and can not be accurate.
9631  */
9632 /*ARGSUSED*/
9633 void
9634 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9635     struct svc_req *req, struct compound_state *cs)
9636 {
9637 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9638 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9639 	rfs4_lockowner_t *lo;
9640 	rfs4_client_t *cp;
9641 	bool_t create = FALSE;
9642 	struct flock64 flk;
9643 	int error;
9644 	int flag = FREAD | FWRITE;
9645 	int ltype;
9646 	length4 posix_length;
9647 	sysid_t sysid;
9648 	pid_t pid;
9649 
9650 	DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9651 	    LOCKT4args *, args);
9652 
9653 	if (cs->vp == NULL) {
9654 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9655 		goto out;
9656 	}
9657 
9658 	/*
9659 	 * NFS4 only allows locking on regular files, so
9660 	 * verify type of object.
9661 	 */
9662 	if (cs->vp->v_type != VREG) {
9663 		if (cs->vp->v_type == VDIR)
9664 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
9665 		else
9666 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
9667 		goto out;
9668 	}
9669 
9670 	/*
9671 	 * Check out the clientid to ensure the server knows about it
9672 	 * so that we correctly inform the client of a server reboot.
9673 	 */
9674 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9675 	    == NULL) {
9676 		*cs->statusp = resp->status =
9677 		    rfs4_check_clientid(&args->owner.clientid, 0);
9678 		goto out;
9679 	}
9680 	if (rfs4_lease_expired(cp)) {
9681 		rfs4_client_close(cp);
9682 		/*
9683 		 * Protocol doesn't allow returning NFS4ERR_STALE as
9684 		 * other operations do on this check so STALE_CLIENTID
9685 		 * is returned instead
9686 		 */
9687 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9688 		goto out;
9689 	}
9690 
9691 	if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9692 		*cs->statusp = resp->status = NFS4ERR_GRACE;
9693 		rfs4_client_rele(cp);
9694 		goto out;
9695 	}
9696 	rfs4_client_rele(cp);
9697 
9698 	resp->status = NFS4_OK;
9699 
9700 	switch (args->locktype) {
9701 	case READ_LT:
9702 	case READW_LT:
9703 		ltype = F_RDLCK;
9704 		break;
9705 	case WRITE_LT:
9706 	case WRITEW_LT:
9707 		ltype = F_WRLCK;
9708 		break;
9709 	}
9710 
9711 	posix_length = args->length;
9712 	/* Check for zero length. To lock to end of file use all ones for V4 */
9713 	if (posix_length == 0) {
9714 		*cs->statusp = resp->status = NFS4ERR_INVAL;
9715 		goto out;
9716 	} else if (posix_length == (length4)(~0)) {
9717 		posix_length = 0;	/* Posix to end of file  */
9718 	}
9719 
9720 	/* Find or create a lockowner */
9721 	lo = rfs4_findlockowner(&args->owner, &create);
9722 
9723 	if (lo) {
9724 		pid = lo->rl_pid;
9725 		if ((resp->status =
9726 		    rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9727 			goto err;
9728 	} else {
9729 		pid = 0;
9730 		sysid = lockt_sysid;
9731 	}
9732 retry:
9733 	flk.l_type = ltype;
9734 	flk.l_whence = 0;		/* SEEK_SET */
9735 	flk.l_start = args->offset;
9736 	flk.l_len = posix_length;
9737 	flk.l_sysid = sysid;
9738 	flk.l_pid = pid;
9739 	flag |= F_REMOTELOCK;
9740 
9741 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9742 
9743 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
9744 	if (flk.l_len < 0 || flk.l_start < 0) {
9745 		resp->status = NFS4ERR_INVAL;
9746 		goto err;
9747 	}
9748 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9749 	    NULL, cs->cr, NULL);
9750 
9751 	/*
9752 	 * N.B. We map error values to nfsv4 errors. This is differrent
9753 	 * than puterrno4 routine.
9754 	 */
9755 	switch (error) {
9756 	case 0:
9757 		if (flk.l_type == F_UNLCK)
9758 			resp->status = NFS4_OK;
9759 		else {
9760 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9761 				goto retry;
9762 			resp->status = NFS4ERR_DENIED;
9763 		}
9764 		break;
9765 	case EOVERFLOW:
9766 		resp->status = NFS4ERR_INVAL;
9767 		break;
9768 	case EINVAL:
9769 		resp->status = NFS4ERR_NOTSUPP;
9770 		break;
9771 	default:
9772 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9773 		    error);
9774 		resp->status = NFS4ERR_SERVERFAULT;
9775 		break;
9776 	}
9777 
9778 err:
9779 	if (lo)
9780 		rfs4_lockowner_rele(lo);
9781 	*cs->statusp = resp->status;
9782 out:
9783 	DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9784 	    LOCKT4res *, resp);
9785 }
9786 
9787 int
9788 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9789 {
9790 	int err;
9791 	int cmd;
9792 	vnode_t *vp;
9793 	struct shrlock shr;
9794 	struct shr_locowner shr_loco;
9795 	int fflags = 0;
9796 
9797 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9798 	ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9799 
9800 	if (sp->rs_closed)
9801 		return (NFS4ERR_OLD_STATEID);
9802 
9803 	vp = sp->rs_finfo->rf_vp;
9804 	ASSERT(vp);
9805 
9806 	shr.s_access = shr.s_deny = 0;
9807 
9808 	if (access & OPEN4_SHARE_ACCESS_READ) {
9809 		fflags |= FREAD;
9810 		shr.s_access |= F_RDACC;
9811 	}
9812 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
9813 		fflags |= FWRITE;
9814 		shr.s_access |= F_WRACC;
9815 	}
9816 	ASSERT(shr.s_access);
9817 
9818 	if (deny & OPEN4_SHARE_DENY_READ)
9819 		shr.s_deny |= F_RDDNY;
9820 	if (deny & OPEN4_SHARE_DENY_WRITE)
9821 		shr.s_deny |= F_WRDNY;
9822 
9823 	shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9824 	shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9825 	shr_loco.sl_pid = shr.s_pid;
9826 	shr_loco.sl_id = shr.s_sysid;
9827 	shr.s_owner = (caddr_t)&shr_loco;
9828 	shr.s_own_len = sizeof (shr_loco);
9829 
9830 	cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9831 
9832 	err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9833 	if (err != 0) {
9834 		if (err == EAGAIN)
9835 			err = NFS4ERR_SHARE_DENIED;
9836 		else
9837 			err = puterrno4(err);
9838 		return (err);
9839 	}
9840 
9841 	sp->rs_share_access |= access;
9842 	sp->rs_share_deny |= deny;
9843 
9844 	return (0);
9845 }
9846 
9847 int
9848 rfs4_unshare(rfs4_state_t *sp)
9849 {
9850 	int err;
9851 	struct shrlock shr;
9852 	struct shr_locowner shr_loco;
9853 
9854 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9855 
9856 	if (sp->rs_closed || sp->rs_share_access == 0)
9857 		return (0);
9858 
9859 	ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9860 	ASSERT(sp->rs_finfo->rf_vp);
9861 
9862 	shr.s_access = shr.s_deny = 0;
9863 	shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9864 	shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9865 	shr_loco.sl_pid = shr.s_pid;
9866 	shr_loco.sl_id = shr.s_sysid;
9867 	shr.s_owner = (caddr_t)&shr_loco;
9868 	shr.s_own_len = sizeof (shr_loco);
9869 
9870 	err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9871 	    NULL);
9872 	if (err != 0) {
9873 		err = puterrno4(err);
9874 		return (err);
9875 	}
9876 
9877 	sp->rs_share_access = 0;
9878 	sp->rs_share_deny = 0;
9879 
9880 	return (0);
9881 
9882 }
9883 
9884 static int
9885 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9886 {
9887 	struct clist	*wcl;
9888 	count4		count = rok->data_len;
9889 	int		wlist_len;
9890 
9891 	wcl = args->wlist;
9892 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9893 		return (FALSE);
9894 	}
9895 	wcl = args->wlist;
9896 	rok->wlist_len = wlist_len;
9897 	rok->wlist = wcl;
9898 	return (TRUE);
9899 }
9900 
9901 /* tunable to disable server referrals */
9902 int rfs4_no_referrals = 0;
9903 
9904 /*
9905  * Find an NFS record in reparse point data.
9906  * Returns 0 for success and <0 or an errno value on failure.
9907  */
9908 int
9909 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9910 {
9911 	int err;
9912 	char *stype, *val;
9913 	nvlist_t *nvl;
9914 	nvpair_t *curr;
9915 
9916 	if ((nvl = reparse_init()) == NULL)
9917 		return (-1);
9918 
9919 	if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9920 		reparse_free(nvl);
9921 		return (err);
9922 	}
9923 
9924 	curr = NULL;
9925 	while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9926 		if ((stype = nvpair_name(curr)) == NULL) {
9927 			reparse_free(nvl);
9928 			return (-2);
9929 		}
9930 		if (strncasecmp(stype, "NFS", 3) == 0)
9931 			break;
9932 	}
9933 
9934 	if ((curr == NULL) ||
9935 	    (nvpair_value_string(curr, &val))) {
9936 		reparse_free(nvl);
9937 		return (-3);
9938 	}
9939 	*nvlp = nvl;
9940 	*svcp = stype;
9941 	*datap = val;
9942 	return (0);
9943 }
9944 
9945 int
9946 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9947 {
9948 	nvlist_t *nvl;
9949 	char *s, *d;
9950 
9951 	if (rfs4_no_referrals != 0)
9952 		return (B_FALSE);
9953 
9954 	if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9955 		return (B_FALSE);
9956 
9957 	if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9958 		return (B_FALSE);
9959 
9960 	reparse_free(nvl);
9961 
9962 	return (B_TRUE);
9963 }
9964 
9965 /*
9966  * There is a user-level copy of this routine in ref_subr.c.
9967  * Changes should be kept in sync.
9968  */
9969 static int
9970 nfs4_create_components(char *path, component4 *comp4)
9971 {
9972 	int slen, plen, ncomp;
9973 	char *ori_path, *nxtc, buf[MAXNAMELEN];
9974 
9975 	if (path == NULL)
9976 		return (0);
9977 
9978 	plen = strlen(path) + 1;	/* include the terminator */
9979 	ori_path = path;
9980 	ncomp = 0;
9981 
9982 	/* count number of components in the path */
9983 	for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9984 		if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9985 			if ((slen = nxtc - path) == 0) {
9986 				path = nxtc + 1;
9987 				continue;
9988 			}
9989 
9990 			if (comp4 != NULL) {
9991 				bcopy(path, buf, slen);
9992 				buf[slen] = '\0';
9993 				(void) str_to_utf8(buf, &comp4[ncomp]);
9994 			}
9995 
9996 			ncomp++;	/* 1 valid component */
9997 			path = nxtc + 1;
9998 		}
9999 		if (*nxtc == '\0' || *nxtc == '\n')
10000 			break;
10001 	}
10002 
10003 	return (ncomp);
10004 }
10005 
10006 /*
10007  * There is a user-level copy of this routine in ref_subr.c.
10008  * Changes should be kept in sync.
10009  */
10010 static int
10011 make_pathname4(char *path, pathname4 *pathname)
10012 {
10013 	int ncomp;
10014 	component4 *comp4;
10015 
10016 	if (pathname == NULL)
10017 		return (0);
10018 
10019 	if (path == NULL) {
10020 		pathname->pathname4_val = NULL;
10021 		pathname->pathname4_len = 0;
10022 		return (0);
10023 	}
10024 
10025 	/* count number of components to alloc buffer */
10026 	if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
10027 		pathname->pathname4_val = NULL;
10028 		pathname->pathname4_len = 0;
10029 		return (0);
10030 	}
10031 	comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
10032 
10033 	/* copy components into allocated buffer */
10034 	ncomp = nfs4_create_components(path, comp4);
10035 
10036 	pathname->pathname4_val = comp4;
10037 	pathname->pathname4_len = ncomp;
10038 
10039 	return (ncomp);
10040 }
10041 
10042 #define	xdr_fs_locations4 xdr_fattr4_fs_locations
10043 
10044 fs_locations4 *
10045 fetch_referral(vnode_t *vp, cred_t *cr)
10046 {
10047 	nvlist_t *nvl;
10048 	char *stype, *sdata;
10049 	fs_locations4 *result;
10050 	char buf[1024];
10051 	size_t bufsize;
10052 	XDR xdr;
10053 	int err;
10054 
10055 	/*
10056 	 * Check attrs to ensure it's a reparse point
10057 	 */
10058 	if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10059 		return (NULL);
10060 
10061 	/*
10062 	 * Look for an NFS record and get the type and data
10063 	 */
10064 	if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
10065 		return (NULL);
10066 
10067 	/*
10068 	 * With the type and data, upcall to get the referral
10069 	 */
10070 	bufsize = sizeof (buf);
10071 	bzero(buf, sizeof (buf));
10072 	err = reparse_kderef((const char *)stype, (const char *)sdata,
10073 	    buf, &bufsize);
10074 	reparse_free(nvl);
10075 
10076 	DTRACE_PROBE4(nfs4serv__func__referral__upcall,
10077 	    char *, stype, char *, sdata, char *, buf, int, err);
10078 	if (err) {
10079 		cmn_err(CE_NOTE,
10080 		    "reparsed daemon not running: unable to get referral (%d)",
10081 		    err);
10082 		return (NULL);
10083 	}
10084 
10085 	/*
10086 	 * We get an XDR'ed record back from the kderef call
10087 	 */
10088 	xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
10089 	result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
10090 	err = xdr_fs_locations4(&xdr, result);
10091 	XDR_DESTROY(&xdr);
10092 	if (err != TRUE) {
10093 		DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
10094 		    int, err);
10095 		return (NULL);
10096 	}
10097 
10098 	/*
10099 	 * Look at path to recover fs_root, ignoring the leading '/'
10100 	 */
10101 	(void) make_pathname4(vp->v_path, &result->fs_root);
10102 
10103 	return (result);
10104 }
10105 
10106 char *
10107 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
10108 {
10109 	fs_locations4 *fsl;
10110 	fs_location4 *fs;
10111 	char *server, *path, *symbuf;
10112 	static char *prefix = "/net/";
10113 	int i, size, npaths;
10114 	uint_t len;
10115 
10116 	/* Get the referral */
10117 	if ((fsl = fetch_referral(vp, cr)) == NULL)
10118 		return (NULL);
10119 
10120 	/* Deal with only the first location and first server */
10121 	fs = &fsl->locations_val[0];
10122 	server = utf8_to_str(&fs->server_val[0], &len, NULL);
10123 	if (server == NULL) {
10124 		rfs4_free_fs_locations4(fsl);
10125 		kmem_free(fsl, sizeof (fs_locations4));
10126 		return (NULL);
10127 	}
10128 
10129 	/* Figure out size for "/net/" + host + /path/path/path + NULL */
10130 	size = strlen(prefix) + len;
10131 	for (i = 0; i < fs->rootpath.pathname4_len; i++)
10132 		size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
10133 
10134 	/* Allocate the symlink buffer and fill it */
10135 	symbuf = kmem_zalloc(size, KM_SLEEP);
10136 	(void) strcat(symbuf, prefix);
10137 	(void) strcat(symbuf, server);
10138 	kmem_free(server, len);
10139 
10140 	npaths = 0;
10141 	for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10142 		path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10143 		if (path == NULL)
10144 			continue;
10145 		(void) strcat(symbuf, "/");
10146 		(void) strcat(symbuf, path);
10147 		npaths++;
10148 		kmem_free(path, len);
10149 	}
10150 
10151 	rfs4_free_fs_locations4(fsl);
10152 	kmem_free(fsl, sizeof (fs_locations4));
10153 
10154 	if (strsz != NULL)
10155 		*strsz = size;
10156 	return (symbuf);
10157 }
10158 
10159 /*
10160  * Check to see if we have a downrev Solaris client, so that we
10161  * can send it a symlink instead of a referral.
10162  */
10163 int
10164 client_is_downrev(struct svc_req *req)
10165 {
10166 	struct sockaddr *ca;
10167 	rfs4_clntip_t *ci;
10168 	bool_t create = FALSE;
10169 	int is_downrev;
10170 
10171 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10172 	ASSERT(ca);
10173 	ci = rfs4_find_clntip(ca, &create);
10174 	if (ci == NULL)
10175 		return (0);
10176 	is_downrev = ci->ri_no_referrals;
10177 	rfs4_dbe_rele(ci->ri_dbe);
10178 	return (is_downrev);
10179 }
10180 
10181 /*
10182  * Do the main work of handling HA-NFSv4 Resource Group failover on
10183  * Sun Cluster.
10184  * We need to detect whether any RG admin paths have been added or removed,
10185  * and adjust resources accordingly.
10186  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
10187  * order to scale, the list and array of paths need to be held in more
10188  * suitable data structures.
10189  */
10190 static void
10191 hanfsv4_failover(nfs4_srv_t *nsrv4)
10192 {
10193 	int i, start_grace, numadded_paths = 0;
10194 	char **added_paths = NULL;
10195 	rfs4_dss_path_t *dss_path;
10196 
10197 	/*
10198 	 * Note: currently, dss_pathlist cannot be NULL, since
10199 	 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
10200 	 * make the latter dynamically specified too, the following will
10201 	 * need to be adjusted.
10202 	 */
10203 
10204 	/*
10205 	 * First, look for removed paths: RGs that have been failed-over
10206 	 * away from this node.
10207 	 * Walk the "currently-serving" dss_pathlist and, for each
10208 	 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
10209 	 * from nfsd. If not, that RG path has been removed.
10210 	 *
10211 	 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
10212 	 * any duplicates.
10213 	 */
10214 	dss_path = nsrv4->dss_pathlist;
10215 	do {
10216 		int found = 0;
10217 		char *path = dss_path->path;
10218 
10219 		/* used only for non-HA so may not be removed */
10220 		if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10221 			dss_path = dss_path->next;
10222 			continue;
10223 		}
10224 
10225 		for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10226 			int cmpret;
10227 			char *newpath = rfs4_dss_newpaths[i];
10228 
10229 			/*
10230 			 * Since nfsd has sorted rfs4_dss_newpaths for us,
10231 			 * once the return from strcmp is negative we know
10232 			 * we've passed the point where "path" should be,
10233 			 * and can stop searching: "path" has been removed.
10234 			 */
10235 			cmpret = strcmp(path, newpath);
10236 			if (cmpret < 0)
10237 				break;
10238 			if (cmpret == 0) {
10239 				found = 1;
10240 				break;
10241 			}
10242 		}
10243 
10244 		if (found == 0) {
10245 			unsigned index = dss_path->index;
10246 			rfs4_servinst_t *sip = dss_path->sip;
10247 			rfs4_dss_path_t *path_next = dss_path->next;
10248 
10249 			/*
10250 			 * This path has been removed.
10251 			 * We must clear out the servinst reference to
10252 			 * it, since it's now owned by another
10253 			 * node: we should not attempt to touch it.
10254 			 */
10255 			ASSERT(dss_path == sip->dss_paths[index]);
10256 			sip->dss_paths[index] = NULL;
10257 
10258 			/* remove from "currently-serving" list, and destroy */
10259 			remque(dss_path);
10260 			/* allow for NUL */
10261 			kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10262 			kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10263 
10264 			dss_path = path_next;
10265 		} else {
10266 			/* path was found; not removed */
10267 			dss_path = dss_path->next;
10268 		}
10269 	} while (dss_path != nsrv4->dss_pathlist);
10270 
10271 	/*
10272 	 * Now, look for added paths: RGs that have been failed-over
10273 	 * to this node.
10274 	 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10275 	 * for each path, check if it is on the "currently-serving"
10276 	 * dss_pathlist. If not, that RG path has been added.
10277 	 *
10278 	 * Note: we don't do duplicate detection here; nfsd does that for us.
10279 	 *
10280 	 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10281 	 * an upper bound for the size needed for added_paths[numadded_paths].
10282 	 */
10283 
10284 	/* probably more space than we need, but guaranteed to be enough */
10285 	if (rfs4_dss_numnewpaths > 0) {
10286 		size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10287 		added_paths = kmem_zalloc(sz, KM_SLEEP);
10288 	}
10289 
10290 	/* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10291 	for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10292 		int found = 0;
10293 		char *newpath = rfs4_dss_newpaths[i];
10294 
10295 		dss_path = nsrv4->dss_pathlist;
10296 		do {
10297 			char *path = dss_path->path;
10298 
10299 			/* used only for non-HA */
10300 			if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10301 				dss_path = dss_path->next;
10302 				continue;
10303 			}
10304 
10305 			if (strncmp(path, newpath, strlen(path)) == 0) {
10306 				found = 1;
10307 				break;
10308 			}
10309 
10310 			dss_path = dss_path->next;
10311 		} while (dss_path != nsrv4->dss_pathlist);
10312 
10313 		if (found == 0) {
10314 			added_paths[numadded_paths] = newpath;
10315 			numadded_paths++;
10316 		}
10317 	}
10318 
10319 	/* did we find any added paths? */
10320 	if (numadded_paths > 0) {
10321 
10322 		/* create a new server instance, and start its grace period */
10323 		start_grace = 1;
10324 		/* CSTYLED */
10325 		rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10326 
10327 		/* read in the stable storage state from these paths */
10328 		rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10329 
10330 		/*
10331 		 * Multiple failovers during a grace period will cause
10332 		 * clients of the same resource group to be partitioned
10333 		 * into different server instances, with different
10334 		 * grace periods.  Since clients of the same resource
10335 		 * group must be subject to the same grace period,
10336 		 * we need to reset all currently active grace periods.
10337 		 */
10338 		rfs4_grace_reset_all(nsrv4);
10339 	}
10340 
10341 	if (rfs4_dss_numnewpaths > 0)
10342 		kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10343 }
10344