1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 /* 32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 33 * Copyright 2019 Nexenta Systems, Inc. 34 * Copyright 2019 Nexenta by DDN, Inc. 35 * Copyright 2021 Racktop Systems, Inc. 36 */ 37 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/systm.h> 41 #include <sys/cred.h> 42 #include <sys/buf.h> 43 #include <sys/vfs.h> 44 #include <sys/vfs_opreg.h> 45 #include <sys/vnode.h> 46 #include <sys/uio.h> 47 #include <sys/errno.h> 48 #include <sys/sysmacros.h> 49 #include <sys/statvfs.h> 50 #include <sys/kmem.h> 51 #include <sys/dirent.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/systeminfo.h> 55 #include <sys/flock.h> 56 #include <sys/pathname.h> 57 #include <sys/nbmlock.h> 58 #include <sys/share.h> 59 #include <sys/atomic.h> 60 #include <sys/policy.h> 61 #include <sys/fem.h> 62 #include <sys/sdt.h> 63 #include <sys/ddi.h> 64 #include <sys/zone.h> 65 66 #include <fs/fs_reparse.h> 67 68 #include <rpc/types.h> 69 #include <rpc/auth.h> 70 #include <rpc/rpcsec_gss.h> 71 #include <rpc/svc.h> 72 73 #include <nfs/nfs.h> 74 #include <nfs/nfssys.h> 75 #include <nfs/export.h> 76 #include <nfs/nfs_cmd.h> 77 #include <nfs/lm.h> 78 #include <nfs/nfs4.h> 79 #include <nfs/nfs4_drc.h> 80 81 #include <sys/strsubr.h> 82 #include <sys/strsun.h> 83 84 #include <inet/common.h> 85 #include <inet/ip.h> 86 #include <inet/ip6.h> 87 88 #include <sys/tsol/label.h> 89 #include <sys/tsol/tndb.h> 90 91 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */ 92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES; 93 #define RFS4_LOCK_DELAY 10 /* Milliseconds */ 94 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY; 95 extern struct svc_ops rdma_svc_ops; 96 extern int nfs_loaned_buffers; 97 /* End of Tunables */ 98 99 static int rdma_setup_read_data4(READ4args *, READ4res *); 100 101 /* 102 * Used to bump the stateid4.seqid value and show changes in the stateid 103 */ 104 #define next_stateid(sp) (++(sp)->bits.chgseq) 105 106 /* 107 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent. 108 * This is used to return NFS4ERR_TOOSMALL when clients specify 109 * maxcount that isn't large enough to hold the smallest possible 110 * XDR encoded dirent. 111 * 112 * sizeof cookie (8 bytes) + 113 * sizeof name_len (4 bytes) + 114 * sizeof smallest (padded) name (4 bytes) + 115 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4 116 * sizeof attrlist4_len (4 bytes) + 117 * sizeof next boolean (4 bytes) 118 * 119 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing 120 * the smallest possible entry4 (assumes no attrs requested). 121 * sizeof nfsstat4 (4 bytes) + 122 * sizeof verifier4 (8 bytes) + 123 * sizeof entry4list bool (4 bytes) + 124 * sizeof entry4 (36 bytes) + 125 * sizeof eof bool (4 bytes) 126 * 127 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to 128 * VOP_READDIR. Its value is the size of the maximum possible dirent 129 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent 130 * required for a given name length. MAXNAMELEN is the maximum 131 * filename length allowed in Solaris. The first two DIRENT64_RECLEN() 132 * macros are to allow for . and .. entries -- just a minor tweak to try 133 * and guarantee that buffer we give to VOP_READDIR will be large enough 134 * to hold ., .., and the largest possible solaris dirent64. 135 */ 136 #define RFS4_MINLEN_ENTRY4 36 137 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4) 138 #define RFS4_MINLEN_RDDIR_BUF \ 139 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN)) 140 141 /* 142 * It would be better to pad to 4 bytes since that's what XDR would do, 143 * but the dirents UFS gives us are already padded to 8, so just take 144 * what we're given. Dircount is only a hint anyway. Currently the 145 * solaris kernel is ASCII only, so there's no point in calling the 146 * UTF8 functions. 147 * 148 * dirent64: named padded to provide 8 byte struct alignment 149 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad) 150 * 151 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes 152 * 153 */ 154 #define DIRENT64_TO_DIRCOUNT(dp) \ 155 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen)) 156 157 158 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */ 159 160 u_longlong_t nfs4_srv_caller_id; 161 uint_t nfs4_srv_vkey = 0; 162 163 void rfs4_init_compound_state(struct compound_state *); 164 165 static void nullfree(caddr_t); 166 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 167 struct compound_state *); 168 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 169 struct compound_state *); 170 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 171 struct compound_state *); 172 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 173 struct compound_state *); 174 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 175 struct compound_state *); 176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *, 177 struct svc_req *, struct compound_state *); 178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *, 179 struct svc_req *, struct compound_state *); 180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 181 struct compound_state *); 182 static void rfs4_op_getattr_free(nfs_resop4 *); 183 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 184 struct compound_state *); 185 static void rfs4_op_getfh_free(nfs_resop4 *); 186 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 187 struct compound_state *); 188 static void rfs4_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 189 struct compound_state *); 190 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 191 struct compound_state *); 192 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 193 struct compound_state *); 194 static void lock_denied_free(nfs_resop4 *); 195 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 196 struct compound_state *); 197 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 198 struct compound_state *); 199 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 200 struct compound_state *); 201 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 202 struct compound_state *); 203 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, 204 struct svc_req *req, struct compound_state *cs); 205 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 206 struct compound_state *); 207 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 208 struct compound_state *); 209 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *, 210 struct svc_req *, struct compound_state *); 211 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *, 212 struct svc_req *, struct compound_state *); 213 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 214 struct compound_state *); 215 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 216 struct compound_state *); 217 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 218 struct compound_state *); 219 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 220 struct compound_state *); 221 static void rfs4_op_read_free(nfs_resop4 *); 222 static void rfs4_op_readdir_free(nfs_resop4 *resop); 223 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 224 struct compound_state *); 225 static void rfs4_op_readlink_free(nfs_resop4 *); 226 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *, 227 struct svc_req *, struct compound_state *); 228 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 229 struct compound_state *); 230 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 231 struct compound_state *); 232 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 233 struct compound_state *); 234 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 235 struct compound_state *); 236 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 237 struct compound_state *); 238 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 239 struct compound_state *); 240 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 241 struct compound_state *); 242 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 243 struct compound_state *); 244 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *, 245 struct svc_req *, struct compound_state *); 246 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *, 247 struct svc_req *req, struct compound_state *); 248 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 249 struct compound_state *); 250 static void rfs4_op_secinfo_free(nfs_resop4 *); 251 252 void rfs4x_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop, 253 struct svc_req *req, struct compound_state *cs); 254 void rfs4x_exchange_id_free(nfs_resop4 *); 255 256 void rfs4x_op_create_session(nfs_argop4 *argop, nfs_resop4 *resop, 257 struct svc_req *req, struct compound_state *cs); 258 259 void rfs4x_op_destroy_session(nfs_argop4 *argop, nfs_resop4 *resop, 260 struct svc_req *req, compound_state_t *cs); 261 262 void rfs4x_op_sequence(nfs_argop4 *argop, nfs_resop4 *resop, 263 struct svc_req *req, struct compound_state *cs); 264 265 void rfs4x_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop, 266 struct svc_req *req, compound_state_t *cs); 267 268 void rfs4x_op_destroy_clientid(nfs_argop4 *argop, nfs_resop4 *resop, 269 struct svc_req *req, compound_state_t *cs); 270 271 void rfs4x_op_bind_conn_to_session(nfs_argop4 *argop, nfs_resop4 *resop, 272 struct svc_req *req, compound_state_t *cs); 273 274 void rfs4x_op_secinfo_noname(nfs_argop4 *argop, nfs_resop4 *resop, 275 struct svc_req *req, compound_state_t *cs); 276 277 static nfsstat4 check_open_access(uint32_t, struct compound_state *, 278 struct svc_req *); 279 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); 280 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *); 281 282 /* 283 * translation table for attrs 284 */ 285 struct nfs4_ntov_table { 286 union nfs4_attr_u *na; 287 uint8_t amap[NFS4_MAXNUM_ATTRS]; 288 int attrcnt; 289 bool_t vfsstat; 290 }; 291 292 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp); 293 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp, 294 struct nfs4_svgetit_arg *sargp); 295 296 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, 297 struct compound_state *cs, struct nfs4_svgetit_arg *sargp, 298 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd); 299 300 static void hanfsv4_failover(nfs4_srv_t *); 301 302 fem_t *deleg_rdops; 303 fem_t *deleg_wrops; 304 305 /* 306 * NFS4 op dispatch table 307 */ 308 309 struct rfsv4disp { 310 void (*dis_proc)(); /* proc to call */ 311 void (*dis_resfree)(); /* frees space allocated by proc */ 312 int dis_flags; /* RPC_IDEMPOTENT, etc... */ 313 }; 314 315 static struct rfsv4disp rfsv4disptab[] = { 316 /* 317 * NFS VERSION 4 318 */ 319 320 /* RFS_NULL = 0 */ 321 {rfs4_op_illegal, nullfree, 0}, 322 323 /* UNUSED = 1 */ 324 {rfs4_op_illegal, nullfree, 0}, 325 326 /* UNUSED = 2 */ 327 {rfs4_op_illegal, nullfree, 0}, 328 329 /* OP_ACCESS = 3 */ 330 {rfs4_op_access, nullfree, RPC_IDEMPOTENT}, 331 332 /* OP_CLOSE = 4 */ 333 {rfs4_op_close, nullfree, 0}, 334 335 /* OP_COMMIT = 5 */ 336 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT}, 337 338 /* OP_CREATE = 6 */ 339 {rfs4_op_create, nullfree, 0}, 340 341 /* OP_DELEGPURGE = 7 */ 342 {rfs4_op_delegpurge, nullfree, 0}, 343 344 /* OP_DELEGRETURN = 8 */ 345 {rfs4_op_delegreturn, nullfree, 0}, 346 347 /* OP_GETATTR = 9 */ 348 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT}, 349 350 /* OP_GETFH = 10 */ 351 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL}, 352 353 /* OP_LINK = 11 */ 354 {rfs4_op_link, nullfree, 0}, 355 356 /* OP_LOCK = 12 */ 357 {rfs4_op_lock, lock_denied_free, 0}, 358 359 /* OP_LOCKT = 13 */ 360 {rfs4_op_lockt, lock_denied_free, 0}, 361 362 /* OP_LOCKU = 14 */ 363 {rfs4_op_locku, nullfree, 0}, 364 365 /* OP_LOOKUP = 15 */ 366 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)}, 367 368 /* OP_LOOKUPP = 16 */ 369 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)}, 370 371 /* OP_NVERIFY = 17 */ 372 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT}, 373 374 /* OP_OPEN = 18 */ 375 {rfs4_op_open, rfs4_free_reply, 0}, 376 377 /* OP_OPENATTR = 19 */ 378 {rfs4_op_openattr, nullfree, 0}, 379 380 /* OP_OPEN_CONFIRM = 20 */ 381 {rfs4_op_open_confirm, nullfree, 0}, 382 383 /* OP_OPEN_DOWNGRADE = 21 */ 384 {rfs4_op_open_downgrade, nullfree, 0}, 385 386 /* OP_OPEN_PUTFH = 22 */ 387 {rfs4_op_putfh, nullfree, RPC_ALL}, 388 389 /* OP_PUTPUBFH = 23 */ 390 {rfs4_op_putpubfh, nullfree, RPC_ALL}, 391 392 /* OP_PUTROOTFH = 24 */ 393 {rfs4_op_putrootfh, nullfree, RPC_ALL}, 394 395 /* OP_READ = 25 */ 396 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT}, 397 398 /* OP_READDIR = 26 */ 399 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT}, 400 401 /* OP_READLINK = 27 */ 402 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT}, 403 404 /* OP_REMOVE = 28 */ 405 {rfs4_op_remove, nullfree, 0}, 406 407 /* OP_RENAME = 29 */ 408 {rfs4_op_rename, nullfree, 0}, 409 410 /* OP_RENEW = 30 */ 411 {rfs4_op_renew, nullfree, 0}, 412 413 /* OP_RESTOREFH = 31 */ 414 {rfs4_op_restorefh, nullfree, RPC_ALL}, 415 416 /* OP_SAVEFH = 32 */ 417 {rfs4_op_savefh, nullfree, RPC_ALL}, 418 419 /* OP_SECINFO = 33 */ 420 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0}, 421 422 /* OP_SETATTR = 34 */ 423 {rfs4_op_setattr, nullfree, 0}, 424 425 /* OP_SETCLIENTID = 35 */ 426 {rfs4_op_setclientid, nullfree, 0}, 427 428 /* OP_SETCLIENTID_CONFIRM = 36 */ 429 {rfs4_op_setclientid_confirm, nullfree, 0}, 430 431 /* OP_VERIFY = 37 */ 432 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT}, 433 434 /* OP_WRITE = 38 */ 435 {rfs4_op_write, nullfree, 0}, 436 437 /* OP_RELEASE_LOCKOWNER = 39 */ 438 {rfs4_op_release_lockowner, nullfree, 0}, 439 440 /* 441 * NFSv4.1 operations 442 */ 443 444 /* OP_BACKCHANNEL_CTL = 40 */ 445 {rfs4_op_notsup, nullfree, 0}, 446 447 /* OP_BIND_CONN_TO_SESSION = 41 */ 448 {rfs4x_op_bind_conn_to_session, nullfree, 0}, 449 450 /* OP_EXCHANGE_ID = 42 */ 451 {rfs4x_op_exchange_id, rfs4x_exchange_id_free, 0}, 452 453 /* OP_CREATE_SESSION = 43 */ 454 {rfs4x_op_create_session, nullfree, 0}, 455 456 /* OP_DESTROY_SESSION = 44 */ 457 {rfs4x_op_destroy_session, nullfree, 0}, 458 459 /* OP_FREE_STATEID = 45 */ 460 {rfs4_op_notsup, nullfree, 0}, 461 462 /* OP_GET_DIR_DELEGATION = 46 */ 463 {rfs4_op_notsup, nullfree, 0}, 464 465 /* OP_GETDEVICEINFO = 47 */ 466 {rfs4_op_notsup, nullfree, 0}, 467 468 /* OP_GETDEVICELIST = 48 */ 469 {rfs4_op_notsup, nullfree, 0}, 470 471 /* OP_LAYOUTCOMMIT = 49 */ 472 {rfs4_op_notsup, nullfree, 0}, 473 474 /* OP_LAYOUTGET = 50 */ 475 {rfs4_op_notsup, nullfree, 0}, 476 477 /* OP_LAYOUTRETURN = 51 */ 478 {rfs4_op_notsup, nullfree, 0}, 479 480 /* OP_SECINFO_NO_NAME = 52 */ 481 {rfs4x_op_secinfo_noname, rfs4_op_secinfo_free, 0}, 482 483 /* OP_SEQUENCE = 53 */ 484 {rfs4x_op_sequence, nullfree, 0}, 485 486 /* OP_SET_SSV = 54 */ 487 {rfs4_op_notsup, nullfree, 0}, 488 489 /* OP_TEST_STATEID = 55 */ 490 {rfs4_op_notsup, nullfree, 0}, 491 492 /* OP_WANT_DELEGATION = 56 */ 493 {rfs4_op_notsup, nullfree, 0}, 494 495 /* OP_DESTROY_CLIENTID = 57 */ 496 {rfs4x_op_destroy_clientid, nullfree, 0}, 497 498 /* OP_RECLAIM_COMPLETE = 58 */ 499 {rfs4x_op_reclaim_complete, nullfree, 0}, 500 }; 501 502 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]); 503 504 #define OP_ILLEGAL_IDX (rfsv4disp_cnt) 505 506 #ifdef DEBUG 507 508 int rfs4_fillone_debug = 0; 509 int rfs4_no_stub_access = 1; 510 int rfs4_rddir_debug = 0; 511 512 static char *rfs4_op_string[] = { 513 "rfs4_op_null", 514 "rfs4_op_1 unused", 515 "rfs4_op_2 unused", 516 "rfs4_op_access", 517 "rfs4_op_close", 518 "rfs4_op_commit", 519 "rfs4_op_create", 520 "rfs4_op_delegpurge", 521 "rfs4_op_delegreturn", 522 "rfs4_op_getattr", 523 "rfs4_op_getfh", 524 "rfs4_op_link", 525 "rfs4_op_lock", 526 "rfs4_op_lockt", 527 "rfs4_op_locku", 528 "rfs4_op_lookup", 529 "rfs4_op_lookupp", 530 "rfs4_op_nverify", 531 "rfs4_op_open", 532 "rfs4_op_openattr", 533 "rfs4_op_open_confirm", 534 "rfs4_op_open_downgrade", 535 "rfs4_op_putfh", 536 "rfs4_op_putpubfh", 537 "rfs4_op_putrootfh", 538 "rfs4_op_read", 539 "rfs4_op_readdir", 540 "rfs4_op_readlink", 541 "rfs4_op_remove", 542 "rfs4_op_rename", 543 "rfs4_op_renew", 544 "rfs4_op_restorefh", 545 "rfs4_op_savefh", 546 "rfs4_op_secinfo", 547 "rfs4_op_setattr", 548 "rfs4_op_setclientid", 549 "rfs4_op_setclient_confirm", 550 "rfs4_op_verify", 551 "rfs4_op_write", 552 "rfs4_op_release_lockowner", 553 /* NFSv4.1 */ 554 "backchannel_ctl", 555 "bind_conn_to_session", 556 "exchange_id", 557 "create_session", 558 "destroy_session", 559 "free_stateid", 560 "get_dir_delegation", 561 "getdeviceinfo", 562 "getdevicelist", 563 "layoutcommit", 564 "layoutget", 565 "layoutreturn", 566 "secinfo_no_name", 567 "sequence", 568 "set_ssv", 569 "test_stateid", 570 "want_delegation", 571 "destroy_clientid", 572 "reclaim_complete", 573 "rfs4_op_illegal" 574 }; 575 576 #endif 577 578 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *); 579 580 extern size_t strlcpy(char *dst, const char *src, size_t dstsize); 581 582 extern void rfs4_free_fs_locations4(fs_locations4 *); 583 584 #ifdef nextdp 585 #undef nextdp 586 #endif 587 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) 588 589 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = { 590 VOPNAME_OPEN, { .femop_open = deleg_rd_open }, 591 VOPNAME_WRITE, { .femop_write = deleg_rd_write }, 592 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr }, 593 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock }, 594 VOPNAME_SPACE, { .femop_space = deleg_rd_space }, 595 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr }, 596 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent }, 597 NULL, NULL 598 }; 599 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = { 600 VOPNAME_OPEN, { .femop_open = deleg_wr_open }, 601 VOPNAME_READ, { .femop_read = deleg_wr_read }, 602 VOPNAME_WRITE, { .femop_write = deleg_wr_write }, 603 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr }, 604 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock }, 605 VOPNAME_SPACE, { .femop_space = deleg_wr_space }, 606 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr }, 607 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent }, 608 NULL, NULL 609 }; 610 611 nfs4_srv_t * 612 nfs4_get_srv(void) 613 { 614 nfs_globals_t *ng = nfs_srv_getzg(); 615 nfs4_srv_t *srv = ng->nfs4_srv; 616 ASSERT(srv != NULL); 617 return (srv); 618 } 619 620 void 621 rfs4_srv_zone_init(nfs_globals_t *ng) 622 { 623 nfs4_srv_t *nsrv4; 624 timespec32_t verf; 625 626 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP); 627 628 /* 629 * The following algorithm attempts to find a unique verifier 630 * to be used as the write verifier returned from the server 631 * to the client. It is important that this verifier change 632 * whenever the server reboots. Of secondary importance, it 633 * is important for the verifier to be unique between two 634 * different servers. 635 * 636 * Thus, an attempt is made to use the system hostid and the 637 * current time in seconds when the nfssrv kernel module is 638 * loaded. It is assumed that an NFS server will not be able 639 * to boot and then to reboot in less than a second. If the 640 * hostid has not been set, then the current high resolution 641 * time is used. This will ensure different verifiers each 642 * time the server reboots and minimize the chances that two 643 * different servers will have the same verifier. 644 * XXX - this is broken on LP64 kernels. 645 */ 646 verf.tv_sec = (time_t)zone_get_hostid(NULL); 647 if (verf.tv_sec != 0) { 648 verf.tv_nsec = gethrestime_sec(); 649 } else { 650 timespec_t tverf; 651 652 gethrestime(&tverf); 653 verf.tv_sec = (time_t)tverf.tv_sec; 654 verf.tv_nsec = tverf.tv_nsec; 655 } 656 nsrv4->write4verf = *(uint64_t *)&verf; 657 658 /* Used to manage create/destroy of server state */ 659 nsrv4->nfs4_server_state = NULL; 660 nsrv4->nfs4_cur_servinst = NULL; 661 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE; 662 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL); 663 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL); 664 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL); 665 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL); 666 667 ng->nfs4_srv = nsrv4; 668 } 669 670 void 671 rfs4_srv_zone_fini(nfs_globals_t *ng) 672 { 673 nfs4_srv_t *nsrv4 = ng->nfs4_srv; 674 675 ng->nfs4_srv = NULL; 676 677 mutex_destroy(&nsrv4->deleg_lock); 678 mutex_destroy(&nsrv4->state_lock); 679 mutex_destroy(&nsrv4->servinst_lock); 680 rw_destroy(&nsrv4->deleg_policy_lock); 681 682 kmem_free(nsrv4, sizeof (*nsrv4)); 683 } 684 685 void 686 rfs4_srvrinit(void) 687 { 688 extern void rfs4_attr_init(); 689 690 rfs4_attr_init(); 691 692 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) { 693 rfs4_disable_delegation(); 694 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl, 695 &deleg_wrops) != 0) { 696 rfs4_disable_delegation(); 697 fem_free(deleg_rdops); 698 } 699 700 nfs4_srv_caller_id = fs_new_caller_id(); 701 lockt_sysid = lm_alloc_sysidt(); 702 vsd_create(&nfs4_srv_vkey, NULL); 703 rfs4_state_g_init(); 704 } 705 706 void 707 rfs4_srvrfini(void) 708 { 709 if (lockt_sysid != LM_NOSYSID) { 710 lm_free_sysidt(lockt_sysid); 711 lockt_sysid = LM_NOSYSID; 712 } 713 714 rfs4_state_g_fini(); 715 716 fem_free(deleg_rdops); 717 fem_free(deleg_wrops); 718 } 719 720 void 721 rfs4_do_server_start(int server_upordown, 722 int srv_delegation, int cluster_booted) 723 { 724 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 725 726 /* Is this a warm start? */ 727 if (server_upordown == NFS_SERVER_QUIESCED) { 728 cmn_err(CE_NOTE, "nfs4_srv: " 729 "server was previously quiesced; " 730 "existing NFSv4 state will be re-used"); 731 732 /* 733 * HA-NFSv4: this is also the signal 734 * that a Resource Group failover has 735 * occurred. 736 */ 737 if (cluster_booted) 738 hanfsv4_failover(nsrv4); 739 } else { 740 /* Cold start */ 741 nsrv4->rfs4_start_time = 0; 742 rfs4_state_zone_init(nsrv4); 743 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max, 744 nfs4_drc_hash); 745 746 /* 747 * The nfsd service was started with the -s option 748 * we need to pull in any state from the paths indicated. 749 */ 750 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) { 751 /* read in the stable storage state from these paths */ 752 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths, 753 rfs4_dss_newpaths); 754 } 755 } 756 757 /* Check if delegation is to be enabled */ 758 if (srv_delegation != FALSE) 759 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE); 760 } 761 762 void 763 rfs4_init_compound_state(struct compound_state *cs) 764 { 765 bzero(cs, sizeof (*cs)); 766 cs->cont = TRUE; 767 cs->access = CS_ACCESS_DENIED; 768 cs->deleg = FALSE; 769 cs->mandlock = FALSE; 770 cs->fh.nfs_fh4_val = cs->fhbuf; 771 } 772 773 /* Do cleanup of the compound_state */ 774 void 775 rfs4_fini_compound_state(struct compound_state *cs) 776 { 777 if (cs->vp) { 778 VN_RELE(cs->vp); 779 } 780 if (cs->saved_vp) { 781 VN_RELE(cs->saved_vp); 782 } 783 if (cs->cr) { 784 crfree(cs->cr); 785 } 786 if (cs->saved_fh.nfs_fh4_val) { 787 kmem_free(cs->saved_fh.nfs_fh4_val, NFS4_FHSIZE); 788 } 789 if (cs->sp) { 790 rfs4x_session_rele(cs->sp); 791 } 792 } 793 794 void 795 rfs4_grace_start(rfs4_servinst_t *sip) 796 { 797 rw_enter(&sip->rwlock, RW_WRITER); 798 sip->start_time = nfs_sys_uptime(); 799 sip->grace_period = rfs4_grace_period; 800 rw_exit(&sip->rwlock); 801 } 802 803 /* 804 * returns true if the instance's grace period has never been started 805 */ 806 int 807 rfs4_servinst_grace_new(rfs4_servinst_t *sip) 808 { 809 time_t start_time; 810 811 rw_enter(&sip->rwlock, RW_READER); 812 start_time = sip->start_time; 813 rw_exit(&sip->rwlock); 814 815 return (start_time == 0); 816 } 817 818 /* 819 * Indicates if server instance is within the 820 * grace period. 821 */ 822 int 823 rfs4_servinst_in_grace(rfs4_servinst_t *sip) 824 { 825 time_t grace_expiry; 826 827 /* All clients called reclaim-complete */ 828 if (sip->nreclaim == 0 || sip->grace_period == 0) 829 return (0); 830 831 rw_enter(&sip->rwlock, RW_READER); 832 grace_expiry = sip->start_time + sip->grace_period; 833 rw_exit(&sip->rwlock); 834 835 if (nfs_sys_uptime() < grace_expiry) 836 return (1); 837 838 /* Once grace period ends, optimize next calls */ 839 sip->grace_period = 0; 840 return (0); 841 } 842 843 int 844 rfs4_clnt_in_grace(rfs4_client_t *cp) 845 { 846 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); 847 848 return (rfs4_servinst_in_grace(cp->rc_server_instance)); 849 } 850 851 /* 852 * reset all currently active grace periods 853 */ 854 void 855 rfs4_grace_reset_all(nfs4_srv_t *nsrv4) 856 { 857 rfs4_servinst_t *sip; 858 859 mutex_enter(&nsrv4->servinst_lock); 860 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) 861 if (rfs4_servinst_in_grace(sip)) 862 rfs4_grace_start(sip); 863 mutex_exit(&nsrv4->servinst_lock); 864 } 865 866 /* 867 * start any new instances' grace periods 868 */ 869 void 870 rfs4_grace_start_new(nfs4_srv_t *nsrv4) 871 { 872 rfs4_servinst_t *sip; 873 874 mutex_enter(&nsrv4->servinst_lock); 875 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) 876 if (rfs4_servinst_grace_new(sip)) 877 rfs4_grace_start(sip); 878 mutex_exit(&nsrv4->servinst_lock); 879 } 880 881 static rfs4_dss_path_t * 882 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip, 883 char *path, unsigned index) 884 { 885 size_t len; 886 rfs4_dss_path_t *dss_path; 887 888 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP); 889 890 /* 891 * Take a copy of the string, since the original may be overwritten. 892 * Sadly, no strdup() in the kernel. 893 */ 894 /* allow for NUL */ 895 len = strlen(path) + 1; 896 dss_path->path = kmem_alloc(len, KM_SLEEP); 897 (void) strlcpy(dss_path->path, path, len); 898 899 /* associate with servinst */ 900 dss_path->sip = sip; 901 dss_path->index = index; 902 903 /* 904 * Add to list of served paths. 905 * No locking required, as we're only ever called at startup. 906 */ 907 if (nsrv4->dss_pathlist == NULL) { 908 /* this is the first dss_path_t */ 909 910 /* needed for insque/remque */ 911 dss_path->next = dss_path->prev = dss_path; 912 913 nsrv4->dss_pathlist = dss_path; 914 } else { 915 insque(dss_path, nsrv4->dss_pathlist); 916 } 917 918 return (dss_path); 919 } 920 921 /* 922 * Create a new server instance, and make it the currently active instance. 923 * Note that starting the grace period too early will reduce the clients' 924 * recovery window. 925 */ 926 void 927 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace, 928 int dss_npaths, char **dss_paths) 929 { 930 unsigned i; 931 rfs4_servinst_t *sip; 932 rfs4_oldstate_t *oldstate; 933 934 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP); 935 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL); 936 937 sip->nreclaim = 0; 938 sip->start_time = (time_t)0; 939 sip->grace_period = (time_t)0; 940 sip->next = NULL; 941 sip->prev = NULL; 942 943 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL); 944 /* 945 * This initial dummy entry is required to setup for insque/remque. 946 * It must be skipped over whenever the list is traversed. 947 */ 948 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP); 949 /* insque/remque require initial list entry to be self-terminated */ 950 oldstate->next = oldstate; 951 oldstate->prev = oldstate; 952 sip->oldstate = oldstate; 953 954 955 sip->dss_npaths = dss_npaths; 956 sip->dss_paths = kmem_alloc(dss_npaths * 957 sizeof (rfs4_dss_path_t *), KM_SLEEP); 958 959 for (i = 0; i < dss_npaths; i++) { 960 sip->dss_paths[i] = 961 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i); 962 } 963 964 mutex_enter(&nsrv4->servinst_lock); 965 if (nsrv4->nfs4_cur_servinst != NULL) { 966 /* add to linked list */ 967 sip->prev = nsrv4->nfs4_cur_servinst; 968 nsrv4->nfs4_cur_servinst->next = sip; 969 } 970 if (start_grace) 971 rfs4_grace_start(sip); 972 /* make the new instance "current" */ 973 nsrv4->nfs4_cur_servinst = sip; 974 975 mutex_exit(&nsrv4->servinst_lock); 976 } 977 978 /* 979 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy 980 * all instances directly. 981 */ 982 void 983 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4) 984 { 985 rfs4_servinst_t *sip, *prev, *current; 986 #ifdef DEBUG 987 int n = 0; 988 #endif 989 990 mutex_enter(&nsrv4->servinst_lock); 991 ASSERT(nsrv4->nfs4_cur_servinst != NULL); 992 current = nsrv4->nfs4_cur_servinst; 993 nsrv4->nfs4_cur_servinst = NULL; 994 for (sip = current; sip != NULL; sip = prev) { 995 prev = sip->prev; 996 rw_destroy(&sip->rwlock); 997 if (sip->oldstate) 998 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t)); 999 if (sip->dss_paths) { 1000 int i = sip->dss_npaths; 1001 1002 while (i > 0) { 1003 i--; 1004 if (sip->dss_paths[i] != NULL) { 1005 char *path = sip->dss_paths[i]->path; 1006 1007 if (path != NULL) { 1008 kmem_free(path, 1009 strlen(path) + 1); 1010 } 1011 kmem_free(sip->dss_paths[i], 1012 sizeof (rfs4_dss_path_t)); 1013 } 1014 } 1015 kmem_free(sip->dss_paths, 1016 sip->dss_npaths * sizeof (rfs4_dss_path_t *)); 1017 } 1018 kmem_free(sip, sizeof (rfs4_servinst_t)); 1019 #ifdef DEBUG 1020 n++; 1021 #endif 1022 } 1023 mutex_exit(&nsrv4->servinst_lock); 1024 } 1025 1026 /* 1027 * Assign the current server instance to a client_t. 1028 * Should be called with cp->rc_dbe held. 1029 */ 1030 void 1031 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp, 1032 rfs4_servinst_t *sip) 1033 { 1034 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); 1035 1036 /* 1037 * The lock ensures that if the current instance is in the process 1038 * of changing, we will see the new one. 1039 */ 1040 mutex_enter(&nsrv4->servinst_lock); 1041 cp->rc_server_instance = sip; 1042 mutex_exit(&nsrv4->servinst_lock); 1043 } 1044 1045 rfs4_servinst_t * 1046 rfs4_servinst(rfs4_client_t *cp) 1047 { 1048 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); 1049 1050 return (cp->rc_server_instance); 1051 } 1052 1053 /* ARGSUSED */ 1054 static void 1055 nullfree(caddr_t resop) 1056 { 1057 } 1058 1059 /* 1060 * This is a fall-through for invalid or not implemented (yet) ops 1061 */ 1062 /* ARGSUSED */ 1063 static void 1064 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1065 struct compound_state *cs) 1066 { 1067 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL; 1068 } 1069 1070 /* 1071 * Check if the security flavor, nfsnum, is in the flavor_list. 1072 */ 1073 bool_t 1074 in_flavor_list(int nfsnum, int *flavor_list, int count) 1075 { 1076 int i; 1077 1078 for (i = 0; i < count; i++) { 1079 if (nfsnum == flavor_list[i]) 1080 return (TRUE); 1081 } 1082 return (FALSE); 1083 } 1084 1085 /* 1086 * Used by rfs4_op_secinfo to get the security information from the 1087 * export structure associated with the component. 1088 */ 1089 /* ARGSUSED */ 1090 nfsstat4 1091 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) 1092 { 1093 int error, different_export = 0; 1094 vnode_t *dvp, *vp; 1095 struct exportinfo *exi; 1096 fid_t fid; 1097 uint_t count, i; 1098 secinfo4 *resok_val; 1099 struct secinfo *secp; 1100 seconfig_t *si; 1101 bool_t did_traverse = FALSE; 1102 int dotdot, walk; 1103 nfs_export_t *ne = nfs_get_export(); 1104 1105 dvp = cs->vp; 1106 exi = cs->exi; 1107 ASSERT(exi != NULL); 1108 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); 1109 1110 /* 1111 * If dotdotting, then need to check whether it's above the 1112 * root of a filesystem, or above an export point. 1113 */ 1114 if (dotdot) { 1115 vnode_t *zone_rootvp = ne->exi_root->exi_vp; 1116 1117 ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid); 1118 /* 1119 * If dotdotting at the root of a filesystem, then 1120 * need to traverse back to the mounted-on filesystem 1121 * and do the dotdot lookup there. 1122 */ 1123 if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) { 1124 1125 /* 1126 * If at the system root, then can 1127 * go up no further. 1128 */ 1129 if (VN_CMP(dvp, zone_rootvp)) 1130 return (puterrno4(ENOENT)); 1131 1132 /* 1133 * Traverse back to the mounted-on filesystem 1134 */ 1135 dvp = untraverse(dvp, zone_rootvp); 1136 1137 /* 1138 * Set the different_export flag so we remember 1139 * to pick up a new exportinfo entry for 1140 * this new filesystem. 1141 */ 1142 different_export = 1; 1143 } else { 1144 1145 /* 1146 * If dotdotting above an export point then set 1147 * the different_export to get new export info. 1148 */ 1149 different_export = nfs_exported(exi, dvp); 1150 } 1151 } 1152 1153 /* 1154 * Get the vnode for the component "nm". 1155 */ 1156 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr, 1157 NULL, NULL, NULL); 1158 if (error) 1159 return (puterrno4(error)); 1160 1161 /* 1162 * If the vnode is in a pseudo filesystem, or if the security flavor 1163 * used in the request is valid but not an explicitly shared flavor, 1164 * or the access bit indicates that this is a limited access, 1165 * check whether this vnode is visible. 1166 */ 1167 if (!different_export && 1168 (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) || 1169 cs->access & CS_ACCESS_LIMITED)) { 1170 if (! nfs_visible(exi, vp, &different_export)) { 1171 VN_RELE(vp); 1172 return (puterrno4(ENOENT)); 1173 } 1174 } 1175 1176 /* 1177 * If it's a mountpoint, then traverse it. 1178 */ 1179 if (vn_ismntpt(vp)) { 1180 if ((error = traverse(&vp)) != 0) { 1181 VN_RELE(vp); 1182 return (puterrno4(error)); 1183 } 1184 /* remember that we had to traverse mountpoint */ 1185 did_traverse = TRUE; 1186 different_export = 1; 1187 } else if (vp->v_vfsp != dvp->v_vfsp) { 1188 /* 1189 * If vp isn't a mountpoint and the vfs ptrs aren't the same, 1190 * then vp is probably an LOFS object. We don't need the 1191 * realvp, we just need to know that we might have crossed 1192 * a server fs boundary and need to call checkexport4. 1193 * (LOFS lookup hides server fs mountpoints, and actually calls 1194 * traverse) 1195 */ 1196 different_export = 1; 1197 } 1198 1199 /* 1200 * Get the export information for it. 1201 */ 1202 if (different_export) { 1203 1204 bzero(&fid, sizeof (fid)); 1205 fid.fid_len = MAXFIDSZ; 1206 error = vop_fid_pseudo(vp, &fid); 1207 if (error) { 1208 VN_RELE(vp); 1209 return (puterrno4(error)); 1210 } 1211 1212 /* We'll need to reassign "exi". */ 1213 if (dotdot) 1214 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); 1215 else 1216 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp); 1217 1218 if (exi == NULL) { 1219 if (did_traverse == TRUE) { 1220 /* 1221 * If this vnode is a mounted-on vnode, 1222 * but the mounted-on file system is not 1223 * exported, send back the secinfo for 1224 * the exported node that the mounted-on 1225 * vnode lives in. 1226 */ 1227 exi = cs->exi; 1228 } else { 1229 VN_RELE(vp); 1230 return (puterrno4(EACCES)); 1231 } 1232 } 1233 } 1234 ASSERT(exi != NULL); 1235 1236 1237 /* 1238 * Create the secinfo result based on the security information 1239 * from the exportinfo structure (exi). 1240 * 1241 * Return all flavors for a pseudo node. 1242 * For a real export node, return the flavor that the client 1243 * has access with. 1244 */ 1245 ASSERT(RW_LOCK_HELD(&ne->exported_lock)); 1246 if (PSEUDO(exi)) { 1247 count = exi->exi_export.ex_seccnt; /* total sec count */ 1248 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP); 1249 secp = exi->exi_export.ex_secinfo; 1250 1251 for (i = 0; i < count; i++) { 1252 si = &secp[i].s_secinfo; 1253 resok_val[i].flavor = si->sc_rpcnum; 1254 if (resok_val[i].flavor == RPCSEC_GSS) { 1255 rpcsec_gss_info *info; 1256 1257 info = &resok_val[i].flavor_info; 1258 info->qop = si->sc_qop; 1259 info->service = (rpc_gss_svc_t)si->sc_service; 1260 1261 /* get oid opaque data */ 1262 info->oid.sec_oid4_len = 1263 si->sc_gss_mech_type->length; 1264 info->oid.sec_oid4_val = kmem_alloc( 1265 si->sc_gss_mech_type->length, KM_SLEEP); 1266 bcopy( 1267 si->sc_gss_mech_type->elements, 1268 info->oid.sec_oid4_val, 1269 info->oid.sec_oid4_len); 1270 } 1271 } 1272 resp->SECINFO4resok_len = count; 1273 resp->SECINFO4resok_val = resok_val; 1274 } else { 1275 int ret_cnt = 0, k = 0; 1276 int *flavor_list; 1277 1278 count = exi->exi_export.ex_seccnt; /* total sec count */ 1279 secp = exi->exi_export.ex_secinfo; 1280 1281 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP); 1282 /* find out which flavors to return */ 1283 for (i = 0; i < count; i ++) { 1284 int access, flavor, perm; 1285 1286 flavor = secp[i].s_secinfo.sc_nfsnum; 1287 perm = secp[i].s_flags; 1288 1289 access = nfsauth4_secinfo_access(exi, cs->req, 1290 flavor, perm, cs->basecr); 1291 1292 if (! (access & NFSAUTH_DENIED) && 1293 ! (access & NFSAUTH_WRONGSEC)) { 1294 flavor_list[ret_cnt] = flavor; 1295 ret_cnt++; 1296 } 1297 } 1298 1299 /* Create the returning SECINFO value */ 1300 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP); 1301 1302 for (i = 0; i < count; i++) { 1303 /* 1304 * If the flavor is in the flavor list, 1305 * fill in resok_val. 1306 */ 1307 si = &secp[i].s_secinfo; 1308 if (in_flavor_list(si->sc_nfsnum, 1309 flavor_list, ret_cnt)) { 1310 resok_val[k].flavor = si->sc_rpcnum; 1311 if (resok_val[k].flavor == RPCSEC_GSS) { 1312 rpcsec_gss_info *info; 1313 1314 info = &resok_val[k].flavor_info; 1315 info->qop = si->sc_qop; 1316 info->service = (rpc_gss_svc_t) 1317 si->sc_service; 1318 1319 /* get oid opaque data */ 1320 info->oid.sec_oid4_len = 1321 si->sc_gss_mech_type->length; 1322 info->oid.sec_oid4_val = kmem_alloc( 1323 si->sc_gss_mech_type->length, 1324 KM_SLEEP); 1325 bcopy(si->sc_gss_mech_type->elements, 1326 info->oid.sec_oid4_val, 1327 info->oid.sec_oid4_len); 1328 } 1329 k++; 1330 } 1331 if (k >= ret_cnt) 1332 break; 1333 } 1334 resp->SECINFO4resok_len = ret_cnt; 1335 resp->SECINFO4resok_val = resok_val; 1336 kmem_free(flavor_list, count * sizeof (int)); 1337 } 1338 1339 VN_RELE(vp); 1340 return (NFS4_OK); 1341 } 1342 1343 /* 1344 * SECINFO (Operation 33): Obtain required security information on 1345 * the component name in the format of (security-mechanism-oid, qop, service) 1346 * triplets. 1347 */ 1348 /* ARGSUSED */ 1349 static void 1350 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1351 struct compound_state *cs) 1352 { 1353 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo; 1354 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo; 1355 utf8string *utfnm = &args->name; 1356 uint_t len; 1357 char *nm; 1358 struct sockaddr *ca; 1359 char *name = NULL; 1360 nfsstat4 status = NFS4_OK; 1361 1362 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs, 1363 SECINFO4args *, args); 1364 1365 /* 1366 * Current file handle (cfh) should have been set before getting 1367 * into this function. If not, return error. 1368 */ 1369 if (cs->vp == NULL) { 1370 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1371 goto out; 1372 } 1373 1374 if (cs->vp->v_type != VDIR) { 1375 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1376 goto out; 1377 } 1378 1379 /* 1380 * Verify the component name. If failed, error out, but 1381 * do not error out if the component name is a "..". 1382 * SECINFO will return its parents secinfo data for SECINFO "..". 1383 */ 1384 status = utf8_dir_verify(utfnm); 1385 if (status != NFS4_OK) { 1386 if (utfnm->utf8string_len != 2 || 1387 utfnm->utf8string_val[0] != '.' || 1388 utfnm->utf8string_val[1] != '.') { 1389 *cs->statusp = resp->status = status; 1390 goto out; 1391 } 1392 } 1393 1394 nm = utf8_to_str(utfnm, &len, NULL); 1395 if (nm == NULL) { 1396 *cs->statusp = resp->status = NFS4ERR_INVAL; 1397 goto out; 1398 } 1399 1400 if (len > MAXNAMELEN) { 1401 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1402 kmem_free(nm, len); 1403 goto out; 1404 } 1405 1406 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1407 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 1408 MAXPATHLEN + 1); 1409 1410 if (name == NULL) { 1411 *cs->statusp = resp->status = NFS4ERR_INVAL; 1412 kmem_free(nm, len); 1413 goto out; 1414 } 1415 1416 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp); 1417 1418 if (resp->status == NFS4_OK && rfs4_has_session(cs)) { 1419 /* 1420 * See rfc 5661 section 2.6.3.1.1.8 and 18.29.3 1421 * 1422 * 2.6.3.1.1.8 1423 * SECINFO and SECINFO_NO_NAME consume the current 1424 * filehandle (note that this is a change from NFSv4.0). 1425 * 1426 * 18.29.3 1427 * On success, the current filehandle is consumed (see 1428 * Section 2.6.3.1.1.8), and if the next operation after 1429 * SECINFO tries to use the current filehandle, that 1430 * operation will fail with the status 1431 * NFS4ERR_NOFILEHANDLE. 1432 */ 1433 VN_RELE(cs->vp); 1434 cs->vp = NULL; 1435 } 1436 1437 if (name != nm) 1438 kmem_free(name, MAXPATHLEN + 1); 1439 kmem_free(nm, len); 1440 1441 out: 1442 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs, 1443 SECINFO4res *, resp); 1444 } 1445 1446 /* 1447 * Free SECINFO result. 1448 */ 1449 /* ARGSUSED */ 1450 static void 1451 rfs4_op_secinfo_free(nfs_resop4 *resop) 1452 { 1453 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo; 1454 int count, i; 1455 secinfo4 *resok_val; 1456 1457 /* If this is not an Ok result, nothing to free. */ 1458 if (resp->status != NFS4_OK) { 1459 return; 1460 } 1461 1462 count = resp->SECINFO4resok_len; 1463 resok_val = resp->SECINFO4resok_val; 1464 1465 for (i = 0; i < count; i++) { 1466 if (resok_val[i].flavor == RPCSEC_GSS) { 1467 rpcsec_gss_info *info; 1468 1469 info = &resok_val[i].flavor_info; 1470 kmem_free(info->oid.sec_oid4_val, 1471 info->oid.sec_oid4_len); 1472 } 1473 } 1474 kmem_free(resok_val, count * sizeof (secinfo4)); 1475 resp->SECINFO4resok_len = 0; 1476 resp->SECINFO4resok_val = NULL; 1477 } 1478 1479 /* ARGSUSED */ 1480 static void 1481 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1482 struct compound_state *cs) 1483 { 1484 ACCESS4args *args = &argop->nfs_argop4_u.opaccess; 1485 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess; 1486 int error; 1487 vnode_t *vp; 1488 struct vattr va; 1489 int checkwriteperm; 1490 cred_t *cr = cs->cr; 1491 bslabel_t *clabel, *slabel; 1492 ts_label_t *tslabel; 1493 boolean_t admin_low_client; 1494 1495 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs, 1496 ACCESS4args *, args); 1497 1498 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */ 1499 if (cs->access == CS_ACCESS_DENIED) { 1500 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1501 goto out; 1502 } 1503 #endif 1504 if (cs->vp == NULL) { 1505 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1506 goto out; 1507 } 1508 1509 ASSERT(cr != NULL); 1510 1511 vp = cs->vp; 1512 1513 /* 1514 * If the file system is exported read only, it is not appropriate 1515 * to check write permissions for regular files and directories. 1516 * Special files are interpreted by the client, so the underlying 1517 * permissions are sent back to the client for interpretation. 1518 */ 1519 if (rdonly4(req, cs) && 1520 (vp->v_type == VREG || vp->v_type == VDIR)) 1521 checkwriteperm = 0; 1522 else 1523 checkwriteperm = 1; 1524 1525 /* 1526 * XXX 1527 * We need the mode so that we can correctly determine access 1528 * permissions relative to a mandatory lock file. Access to 1529 * mandatory lock files is denied on the server, so it might 1530 * as well be reflected to the server during the open. 1531 */ 1532 va.va_mask = AT_MODE; 1533 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1534 if (error) { 1535 *cs->statusp = resp->status = puterrno4(error); 1536 goto out; 1537 } 1538 resp->access = 0; 1539 resp->supported = 0; 1540 1541 if (is_system_labeled()) { 1542 ASSERT(req->rq_label != NULL); 1543 clabel = req->rq_label; 1544 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *, 1545 "got client label from request(1)", 1546 struct svc_req *, req); 1547 if (!blequal(&l_admin_low->tsl_label, clabel)) { 1548 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) { 1549 *cs->statusp = resp->status = puterrno4(EACCES); 1550 goto out; 1551 } 1552 slabel = label2bslabel(tslabel); 1553 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel, 1554 char *, "got server label(1) for vp(2)", 1555 bslabel_t *, slabel, vnode_t *, vp); 1556 1557 admin_low_client = B_FALSE; 1558 } else 1559 admin_low_client = B_TRUE; 1560 } 1561 1562 if (args->access & ACCESS4_READ) { 1563 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 1564 if (!error && !MANDLOCK(vp, va.va_mode) && 1565 (!is_system_labeled() || admin_low_client || 1566 bldominates(clabel, slabel))) 1567 resp->access |= ACCESS4_READ; 1568 resp->supported |= ACCESS4_READ; 1569 } 1570 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) { 1571 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1572 if (!error && (!is_system_labeled() || admin_low_client || 1573 bldominates(clabel, slabel))) 1574 resp->access |= ACCESS4_LOOKUP; 1575 resp->supported |= ACCESS4_LOOKUP; 1576 } 1577 if (checkwriteperm && 1578 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) { 1579 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1580 if (!error && !MANDLOCK(vp, va.va_mode) && 1581 (!is_system_labeled() || admin_low_client || 1582 blequal(clabel, slabel))) 1583 resp->access |= 1584 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND)); 1585 resp->supported |= 1586 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND); 1587 } 1588 1589 if (checkwriteperm && 1590 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) { 1591 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1592 if (!error && (!is_system_labeled() || admin_low_client || 1593 blequal(clabel, slabel))) 1594 resp->access |= ACCESS4_DELETE; 1595 resp->supported |= ACCESS4_DELETE; 1596 } 1597 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) { 1598 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1599 if (!error && !MANDLOCK(vp, va.va_mode) && 1600 (!is_system_labeled() || admin_low_client || 1601 bldominates(clabel, slabel))) 1602 resp->access |= ACCESS4_EXECUTE; 1603 resp->supported |= ACCESS4_EXECUTE; 1604 } 1605 1606 if (is_system_labeled() && !admin_low_client) 1607 label_rele(tslabel); 1608 1609 *cs->statusp = resp->status = NFS4_OK; 1610 out: 1611 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs, 1612 ACCESS4res *, resp); 1613 } 1614 1615 /* ARGSUSED */ 1616 static void 1617 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1618 struct compound_state *cs) 1619 { 1620 COMMIT4args *args = &argop->nfs_argop4_u.opcommit; 1621 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit; 1622 int error; 1623 vnode_t *vp = cs->vp; 1624 cred_t *cr = cs->cr; 1625 vattr_t va; 1626 nfs4_srv_t *nsrv4; 1627 1628 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs, 1629 COMMIT4args *, args); 1630 1631 if (vp == NULL) { 1632 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1633 goto out; 1634 } 1635 if (cs->access == CS_ACCESS_DENIED) { 1636 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1637 goto out; 1638 } 1639 1640 if (args->offset + args->count < args->offset) { 1641 *cs->statusp = resp->status = NFS4ERR_INVAL; 1642 goto out; 1643 } 1644 1645 va.va_mask = AT_UID; 1646 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1647 1648 /* 1649 * If we can't get the attributes, then we can't do the 1650 * right access checking. So, we'll fail the request. 1651 */ 1652 if (error) { 1653 *cs->statusp = resp->status = puterrno4(error); 1654 goto out; 1655 } 1656 if (rdonly4(req, cs)) { 1657 *cs->statusp = resp->status = NFS4ERR_ROFS; 1658 goto out; 1659 } 1660 1661 if (vp->v_type != VREG) { 1662 if (vp->v_type == VDIR) 1663 resp->status = NFS4ERR_ISDIR; 1664 else 1665 resp->status = NFS4ERR_INVAL; 1666 *cs->statusp = resp->status; 1667 goto out; 1668 } 1669 1670 if (crgetuid(cr) != va.va_uid && 1671 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) { 1672 *cs->statusp = resp->status = puterrno4(error); 1673 goto out; 1674 } 1675 1676 error = VOP_FSYNC(vp, FSYNC, cr, NULL); 1677 1678 if (error) { 1679 *cs->statusp = resp->status = puterrno4(error); 1680 goto out; 1681 } 1682 1683 nsrv4 = nfs4_get_srv(); 1684 *cs->statusp = resp->status = NFS4_OK; 1685 resp->writeverf = nsrv4->write4verf; 1686 out: 1687 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs, 1688 COMMIT4res *, resp); 1689 } 1690 1691 /* 1692 * rfs4_op_mknod is called from rfs4_op_create after all initial verification 1693 * was completed. It does the nfsv4 create for special files. 1694 */ 1695 /* ARGSUSED */ 1696 static vnode_t * 1697 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req, 1698 struct compound_state *cs, vattr_t *vap, char *nm) 1699 { 1700 int error; 1701 cred_t *cr = cs->cr; 1702 vnode_t *dvp = cs->vp; 1703 vnode_t *vp = NULL; 1704 int mode; 1705 enum vcexcl excl; 1706 1707 switch (args->type) { 1708 case NF4CHR: 1709 case NF4BLK: 1710 if (secpolicy_sys_devices(cr) != 0) { 1711 *cs->statusp = resp->status = NFS4ERR_PERM; 1712 return (NULL); 1713 } 1714 if (args->type == NF4CHR) 1715 vap->va_type = VCHR; 1716 else 1717 vap->va_type = VBLK; 1718 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1, 1719 args->ftype4_u.devdata.specdata2); 1720 vap->va_mask |= AT_RDEV; 1721 break; 1722 case NF4SOCK: 1723 vap->va_type = VSOCK; 1724 break; 1725 case NF4FIFO: 1726 vap->va_type = VFIFO; 1727 break; 1728 default: 1729 *cs->statusp = resp->status = NFS4ERR_BADTYPE; 1730 return (NULL); 1731 } 1732 1733 /* 1734 * Must specify the mode. 1735 */ 1736 if (!(vap->va_mask & AT_MODE)) { 1737 *cs->statusp = resp->status = NFS4ERR_INVAL; 1738 return (NULL); 1739 } 1740 1741 excl = EXCL; 1742 1743 mode = 0; 1744 1745 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL); 1746 if (error) { 1747 *cs->statusp = resp->status = puterrno4(error); 1748 return (NULL); 1749 } 1750 return (vp); 1751 } 1752 1753 /* 1754 * nfsv4 create is used to create non-regular files. For regular files, 1755 * use nfsv4 open. 1756 */ 1757 /* ARGSUSED */ 1758 static void 1759 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1760 struct compound_state *cs) 1761 { 1762 CREATE4args *args = &argop->nfs_argop4_u.opcreate; 1763 CREATE4res *resp = &resop->nfs_resop4_u.opcreate; 1764 int error; 1765 struct vattr bva, iva, iva2, ava, *vap; 1766 cred_t *cr = cs->cr; 1767 vnode_t *dvp = cs->vp; 1768 vnode_t *vp = NULL; 1769 vnode_t *realvp; 1770 char *nm, *lnm; 1771 uint_t len, llen; 1772 int syncval = 0; 1773 struct nfs4_svgetit_arg sarg; 1774 struct nfs4_ntov_table ntov; 1775 struct statvfs64 sb; 1776 nfsstat4 status; 1777 struct sockaddr *ca; 1778 char *name = NULL; 1779 char *lname = NULL; 1780 1781 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs, 1782 CREATE4args *, args); 1783 1784 resp->attrset = 0; 1785 1786 if (dvp == NULL) { 1787 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1788 goto out; 1789 } 1790 1791 /* 1792 * If there is an unshared filesystem mounted on this vnode, 1793 * do not allow to create an object in this directory. 1794 */ 1795 if (vn_ismntpt(dvp)) { 1796 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1797 goto out; 1798 } 1799 1800 /* Verify that type is correct */ 1801 switch (args->type) { 1802 case NF4LNK: 1803 case NF4BLK: 1804 case NF4CHR: 1805 case NF4SOCK: 1806 case NF4FIFO: 1807 case NF4DIR: 1808 break; 1809 default: 1810 *cs->statusp = resp->status = NFS4ERR_BADTYPE; 1811 goto out; 1812 }; 1813 1814 if (cs->access == CS_ACCESS_DENIED) { 1815 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1816 goto out; 1817 } 1818 if (dvp->v_type != VDIR) { 1819 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1820 goto out; 1821 } 1822 status = utf8_dir_verify(&args->objname); 1823 if (status != NFS4_OK) { 1824 *cs->statusp = resp->status = status; 1825 goto out; 1826 } 1827 1828 if (rdonly4(req, cs)) { 1829 *cs->statusp = resp->status = NFS4ERR_ROFS; 1830 goto out; 1831 } 1832 1833 /* 1834 * Name of newly created object 1835 */ 1836 nm = utf8_to_fn(&args->objname, &len, NULL); 1837 if (nm == NULL) { 1838 *cs->statusp = resp->status = NFS4ERR_INVAL; 1839 goto out; 1840 } 1841 1842 if (len > MAXNAMELEN) { 1843 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1844 kmem_free(nm, len); 1845 goto out; 1846 } 1847 1848 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1849 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 1850 MAXPATHLEN + 1); 1851 1852 if (name == NULL) { 1853 *cs->statusp = resp->status = NFS4ERR_INVAL; 1854 kmem_free(nm, len); 1855 goto out; 1856 } 1857 1858 resp->attrset = 0; 1859 1860 sarg.sbp = &sb; 1861 sarg.is_referral = B_FALSE; 1862 nfs4_ntov_table_init(&ntov); 1863 1864 status = do_rfs4_set_attrs(&resp->attrset, 1865 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT); 1866 1867 if (sarg.vap->va_mask == 0 && status == NFS4_OK) 1868 status = NFS4ERR_INVAL; 1869 1870 if (status != NFS4_OK) { 1871 *cs->statusp = resp->status = status; 1872 if (name != nm) 1873 kmem_free(name, MAXPATHLEN + 1); 1874 kmem_free(nm, len); 1875 nfs4_ntov_table_free(&ntov, &sarg); 1876 resp->attrset = 0; 1877 goto out; 1878 } 1879 1880 /* Get "before" change value */ 1881 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE; 1882 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL); 1883 if (error) { 1884 *cs->statusp = resp->status = puterrno4(error); 1885 if (name != nm) 1886 kmem_free(name, MAXPATHLEN + 1); 1887 kmem_free(nm, len); 1888 nfs4_ntov_table_free(&ntov, &sarg); 1889 resp->attrset = 0; 1890 goto out; 1891 } 1892 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime) 1893 1894 vap = sarg.vap; 1895 1896 /* 1897 * Set the default initial values for attributes when the parent 1898 * directory does not have the VSUID/VSGID bit set and they have 1899 * not been specified in createattrs. 1900 */ 1901 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) { 1902 vap->va_uid = crgetuid(cr); 1903 vap->va_mask |= AT_UID; 1904 } 1905 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) { 1906 vap->va_gid = crgetgid(cr); 1907 vap->va_mask |= AT_GID; 1908 } 1909 1910 vap->va_mask |= AT_TYPE; 1911 switch (args->type) { 1912 case NF4DIR: 1913 vap->va_type = VDIR; 1914 if ((vap->va_mask & AT_MODE) == 0) { 1915 vap->va_mode = 0700; /* default: owner rwx only */ 1916 vap->va_mask |= AT_MODE; 1917 } 1918 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL); 1919 if (error) 1920 break; 1921 1922 /* 1923 * Get the initial "after" sequence number, if it fails, 1924 * set to zero 1925 */ 1926 iva.va_mask = AT_SEQ; 1927 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 1928 iva.va_seq = 0; 1929 break; 1930 case NF4LNK: 1931 vap->va_type = VLNK; 1932 if ((vap->va_mask & AT_MODE) == 0) { 1933 vap->va_mode = 0700; /* default: owner rwx only */ 1934 vap->va_mask |= AT_MODE; 1935 } 1936 1937 /* 1938 * symlink names must be treated as data 1939 */ 1940 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata, 1941 &llen, NULL); 1942 1943 if (lnm == NULL) { 1944 *cs->statusp = resp->status = NFS4ERR_INVAL; 1945 if (name != nm) 1946 kmem_free(name, MAXPATHLEN + 1); 1947 kmem_free(nm, len); 1948 nfs4_ntov_table_free(&ntov, &sarg); 1949 resp->attrset = 0; 1950 goto out; 1951 } 1952 1953 if (llen > MAXPATHLEN) { 1954 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1955 if (name != nm) 1956 kmem_free(name, MAXPATHLEN + 1); 1957 kmem_free(nm, len); 1958 kmem_free(lnm, llen); 1959 nfs4_ntov_table_free(&ntov, &sarg); 1960 resp->attrset = 0; 1961 goto out; 1962 } 1963 1964 lname = nfscmd_convname(ca, cs->exi, lnm, 1965 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 1966 1967 if (lname == NULL) { 1968 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 1969 if (name != nm) 1970 kmem_free(name, MAXPATHLEN + 1); 1971 kmem_free(nm, len); 1972 kmem_free(lnm, llen); 1973 nfs4_ntov_table_free(&ntov, &sarg); 1974 resp->attrset = 0; 1975 goto out; 1976 } 1977 1978 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0); 1979 if (lname != lnm) 1980 kmem_free(lname, MAXPATHLEN + 1); 1981 kmem_free(lnm, llen); 1982 if (error) 1983 break; 1984 1985 /* 1986 * Get the initial "after" sequence number, if it fails, 1987 * set to zero 1988 */ 1989 iva.va_mask = AT_SEQ; 1990 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 1991 iva.va_seq = 0; 1992 1993 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 1994 NULL, NULL, NULL); 1995 if (error) 1996 break; 1997 1998 /* 1999 * va_seq is not safe over VOP calls, check it again 2000 * if it has changed zero out iva to force atomic = FALSE. 2001 */ 2002 iva2.va_mask = AT_SEQ; 2003 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) || 2004 iva2.va_seq != iva.va_seq) 2005 iva.va_seq = 0; 2006 break; 2007 default: 2008 /* 2009 * probably a special file. 2010 */ 2011 if ((vap->va_mask & AT_MODE) == 0) { 2012 vap->va_mode = 0600; /* default: owner rw only */ 2013 vap->va_mask |= AT_MODE; 2014 } 2015 syncval = FNODSYNC; 2016 /* 2017 * We know this will only generate one VOP call 2018 */ 2019 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name); 2020 2021 if (vp == NULL) { 2022 if (name != nm) 2023 kmem_free(name, MAXPATHLEN + 1); 2024 kmem_free(nm, len); 2025 nfs4_ntov_table_free(&ntov, &sarg); 2026 resp->attrset = 0; 2027 goto out; 2028 } 2029 2030 /* 2031 * Get the initial "after" sequence number, if it fails, 2032 * set to zero 2033 */ 2034 iva.va_mask = AT_SEQ; 2035 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) 2036 iva.va_seq = 0; 2037 2038 break; 2039 } 2040 if (name != nm) 2041 kmem_free(name, MAXPATHLEN + 1); 2042 kmem_free(nm, len); 2043 2044 if (error) { 2045 *cs->statusp = resp->status = puterrno4(error); 2046 } 2047 2048 /* 2049 * Force modified data and metadata out to stable storage. 2050 */ 2051 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2052 2053 if (resp->status != NFS4_OK) { 2054 if (vp != NULL) 2055 VN_RELE(vp); 2056 nfs4_ntov_table_free(&ntov, &sarg); 2057 resp->attrset = 0; 2058 goto out; 2059 } 2060 2061 /* 2062 * Finish setup of cinfo response, "before" value already set. 2063 * Get "after" change value, if it fails, simply return the 2064 * before value. 2065 */ 2066 ava.va_mask = AT_CTIME|AT_SEQ; 2067 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) { 2068 ava.va_ctime = bva.va_ctime; 2069 ava.va_seq = 0; 2070 } 2071 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime); 2072 2073 /* 2074 * True verification that object was created with correct 2075 * attrs is impossible. The attrs could have been changed 2076 * immediately after object creation. If attributes did 2077 * not verify, the only recourse for the server is to 2078 * destroy the object. Maybe if some attrs (like gid) 2079 * are set incorrectly, the object should be destroyed; 2080 * however, seems bad as a default policy. Do we really 2081 * want to destroy an object over one of the times not 2082 * verifying correctly? For these reasons, the server 2083 * currently sets bits in attrset for createattrs 2084 * that were set; however, no verification is done. 2085 * 2086 * vmask_to_nmask accounts for vattr bits set on create 2087 * [do_rfs4_set_attrs() only sets resp bits for 2088 * non-vattr/vfs bits.] 2089 * Mask off any bits set by default so as not to return 2090 * more attrset bits than were requested in createattrs 2091 */ 2092 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset); 2093 resp->attrset &= args->createattrs.attrmask; 2094 nfs4_ntov_table_free(&ntov, &sarg); 2095 2096 error = makefh4(&cs->fh, vp, cs->exi); 2097 if (error) { 2098 *cs->statusp = resp->status = puterrno4(error); 2099 } 2100 2101 /* 2102 * The cinfo.atomic = TRUE only if we got no errors, we have 2103 * non-zero va_seq's, and it has incremented by exactly one 2104 * during the creation and it didn't change during the VOP_LOOKUP 2105 * or VOP_FSYNC. 2106 */ 2107 if (!error && bva.va_seq && iva.va_seq && ava.va_seq && 2108 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq) 2109 resp->cinfo.atomic = TRUE; 2110 else 2111 resp->cinfo.atomic = FALSE; 2112 2113 /* 2114 * Force modified metadata out to stable storage. 2115 * 2116 * if a underlying vp exists, pass it to VOP_FSYNC 2117 */ 2118 if (VOP_REALVP(vp, &realvp, NULL) == 0) 2119 (void) VOP_FSYNC(realvp, syncval, cr, NULL); 2120 else 2121 (void) VOP_FSYNC(vp, syncval, cr, NULL); 2122 2123 if (resp->status != NFS4_OK) { 2124 VN_RELE(vp); 2125 goto out; 2126 } 2127 if (cs->vp) 2128 VN_RELE(cs->vp); 2129 2130 cs->vp = vp; 2131 *cs->statusp = resp->status = NFS4_OK; 2132 out: 2133 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs, 2134 CREATE4res *, resp); 2135 } 2136 2137 /*ARGSUSED*/ 2138 static void 2139 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2140 struct compound_state *cs) 2141 { 2142 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs, 2143 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge); 2144 2145 rfs4_op_inval(argop, resop, req, cs); 2146 2147 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs, 2148 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge); 2149 } 2150 2151 /*ARGSUSED*/ 2152 static void 2153 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2154 struct compound_state *cs) 2155 { 2156 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn; 2157 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn; 2158 rfs4_deleg_state_t *dsp; 2159 nfsstat4 status; 2160 2161 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs, 2162 DELEGRETURN4args *, args); 2163 2164 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp); 2165 resp->status = *cs->statusp = status; 2166 if (status != NFS4_OK) 2167 goto out; 2168 2169 /* Ensure specified filehandle matches */ 2170 if (cs->vp != dsp->rds_finfo->rf_vp) { 2171 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID; 2172 } else 2173 rfs4_return_deleg(dsp, FALSE); 2174 2175 rfs4_update_lease(dsp->rds_client); 2176 2177 rfs4_deleg_state_rele(dsp); 2178 out: 2179 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs, 2180 DELEGRETURN4res *, resp); 2181 } 2182 2183 /* 2184 * Check to see if a given "flavor" is an explicitly shared flavor. 2185 * The assumption of this routine is the "flavor" is already a valid 2186 * flavor in the secinfo list of "exi". 2187 * 2188 * e.g. 2189 * # share -o sec=flavor1 /export 2190 * # share -o sec=flavor2 /export/home 2191 * 2192 * flavor2 is not an explicitly shared flavor for /export, 2193 * however it is in the secinfo list for /export thru the 2194 * server namespace setup. 2195 */ 2196 int 2197 is_exported_sec(int flavor, struct exportinfo *exi) 2198 { 2199 int i; 2200 struct secinfo *sp; 2201 2202 sp = exi->exi_export.ex_secinfo; 2203 for (i = 0; i < exi->exi_export.ex_seccnt; i++) { 2204 if (flavor == sp[i].s_secinfo.sc_nfsnum || 2205 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) { 2206 return (SEC_REF_EXPORTED(&sp[i])); 2207 } 2208 } 2209 2210 /* Should not reach this point based on the assumption */ 2211 return (0); 2212 } 2213 2214 /* 2215 * Check if the security flavor used in the request matches what is 2216 * required at the export point or at the root pseudo node (exi_root). 2217 * 2218 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise. 2219 * 2220 */ 2221 static int 2222 secinfo_match_or_authnone(struct compound_state *cs) 2223 { 2224 int i; 2225 struct secinfo *sp; 2226 2227 /* 2228 * Check cs->nfsflavor (from the request) against 2229 * the current export data in cs->exi. 2230 */ 2231 sp = cs->exi->exi_export.ex_secinfo; 2232 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) { 2233 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum || 2234 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) 2235 return (1); 2236 } 2237 2238 return (0); 2239 } 2240 2241 /* 2242 * Check the access authority for the client and return the correct error. 2243 */ 2244 nfsstat4 2245 call_checkauth4(struct compound_state *cs, struct svc_req *req) 2246 { 2247 int authres; 2248 2249 /* 2250 * First, check if the security flavor used in the request 2251 * are among the flavors set in the server namespace. 2252 */ 2253 if (!secinfo_match_or_authnone(cs)) { 2254 *cs->statusp = NFS4ERR_WRONGSEC; 2255 return (*cs->statusp); 2256 } 2257 2258 authres = checkauth4(cs, req); 2259 2260 if (authres > 0) { 2261 *cs->statusp = NFS4_OK; 2262 if (! (cs->access & CS_ACCESS_LIMITED)) 2263 cs->access = CS_ACCESS_OK; 2264 } else if (authres == 0) { 2265 *cs->statusp = NFS4ERR_ACCESS; 2266 } else if (authres == -2) { 2267 *cs->statusp = NFS4ERR_WRONGSEC; 2268 } else { 2269 *cs->statusp = NFS4ERR_DELAY; 2270 } 2271 return (*cs->statusp); 2272 } 2273 2274 /* 2275 * bitmap4_to_attrmask is called by getattr and readdir. 2276 * It sets up the vattr mask and determines whether vfsstat call is needed 2277 * based on the input bitmap. 2278 * Returns nfsv4 status. 2279 */ 2280 static nfsstat4 2281 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp) 2282 { 2283 int i; 2284 uint_t va_mask; 2285 struct statvfs64 *sbp = sargp->sbp; 2286 2287 sargp->sbp = NULL; 2288 sargp->flag = 0; 2289 sargp->rdattr_error = NFS4_OK; 2290 sargp->mntdfid_set = FALSE; 2291 if (sargp->cs->vp) 2292 sargp->xattr = get_fh4_flag(&sargp->cs->fh, 2293 FH4_ATTRDIR | FH4_NAMEDATTR); 2294 else 2295 sargp->xattr = 0; 2296 2297 /* 2298 * Set rdattr_error_req to true if return error per 2299 * failed entry rather than fail the readdir. 2300 */ 2301 if (breq & FATTR4_RDATTR_ERROR_MASK) 2302 sargp->rdattr_error_req = 1; 2303 else 2304 sargp->rdattr_error_req = 0; 2305 2306 /* 2307 * generate the va_mask 2308 * Handle the easy cases first 2309 */ 2310 switch (breq) { 2311 case NFS4_NTOV_ATTR_MASK: 2312 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK; 2313 return (NFS4_OK); 2314 2315 case NFS4_FS_ATTR_MASK: 2316 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK; 2317 sargp->sbp = sbp; 2318 return (NFS4_OK); 2319 2320 case NFS4_NTOV_ATTR_CACHE_MASK: 2321 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK; 2322 return (NFS4_OK); 2323 2324 case FATTR4_LEASE_TIME_MASK: 2325 sargp->vap->va_mask = 0; 2326 return (NFS4_OK); 2327 2328 default: 2329 va_mask = 0; 2330 for (i = 0; i < nfs4_ntov_map_size; i++) { 2331 if ((breq & nfs4_ntov_map[i].fbit) && 2332 nfs4_ntov_map[i].vbit) 2333 va_mask |= nfs4_ntov_map[i].vbit; 2334 } 2335 2336 /* 2337 * Check is vfsstat is needed 2338 */ 2339 if (breq & NFS4_FS_ATTR_MASK) 2340 sargp->sbp = sbp; 2341 2342 sargp->vap->va_mask = va_mask; 2343 return (NFS4_OK); 2344 } 2345 /* NOTREACHED */ 2346 } 2347 2348 /* 2349 * bitmap4_get_sysattrs is called by getattr and readdir. 2350 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs. 2351 * Returns nfsv4 status. 2352 */ 2353 static nfsstat4 2354 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp) 2355 { 2356 int error; 2357 struct compound_state *cs = sargp->cs; 2358 vnode_t *vp = cs->vp; 2359 2360 if (sargp->sbp != NULL) { 2361 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) { 2362 sargp->sbp = NULL; /* to identify error */ 2363 return (puterrno4(error)); 2364 } 2365 } 2366 2367 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr)); 2368 } 2369 2370 static void 2371 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp) 2372 { 2373 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size, 2374 KM_SLEEP); 2375 ntovp->attrcnt = 0; 2376 ntovp->vfsstat = FALSE; 2377 } 2378 2379 static void 2380 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp, 2381 struct nfs4_svgetit_arg *sargp) 2382 { 2383 int i; 2384 union nfs4_attr_u *na; 2385 uint8_t *amap; 2386 2387 /* 2388 * XXX Should do the same checks for whether the bit is set 2389 */ 2390 for (i = 0, na = ntovp->na, amap = ntovp->amap; 2391 i < ntovp->attrcnt; i++, na++, amap++) { 2392 (void) (*nfs4_ntov_map[*amap].sv_getit)( 2393 NFS4ATTR_FREEIT, sargp, na); 2394 } 2395 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) { 2396 /* 2397 * xdr_free for getattr will be done later 2398 */ 2399 for (i = 0, na = ntovp->na, amap = ntovp->amap; 2400 i < ntovp->attrcnt; i++, na++, amap++) { 2401 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na); 2402 } 2403 } 2404 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size); 2405 } 2406 2407 /* 2408 * do_rfs4_op_getattr gets the system attrs and converts into fattr4. 2409 */ 2410 static nfsstat4 2411 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp, 2412 struct nfs4_svgetit_arg *sargp) 2413 { 2414 int error = 0; 2415 int i, k; 2416 struct nfs4_ntov_table ntov; 2417 XDR xdr; 2418 ulong_t xdr_size; 2419 char *xdr_attrs; 2420 nfsstat4 status = NFS4_OK; 2421 nfsstat4 prev_rdattr_error = sargp->rdattr_error; 2422 union nfs4_attr_u *na; 2423 uint8_t *amap; 2424 2425 sargp->op = NFS4ATTR_GETIT; 2426 sargp->flag = 0; 2427 2428 fattrp->attrmask = 0; 2429 /* if no bits requested, then return empty fattr4 */ 2430 if (breq == 0) { 2431 fattrp->attrlist4_len = 0; 2432 fattrp->attrlist4 = NULL; 2433 return (NFS4_OK); 2434 } 2435 2436 /* 2437 * return NFS4ERR_INVAL when client requests write-only attrs 2438 */ 2439 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK)) 2440 return (NFS4ERR_INVAL); 2441 2442 nfs4_ntov_table_init(&ntov); 2443 na = ntov.na; 2444 amap = ntov.amap; 2445 2446 /* 2447 * Now loop to get or verify the attrs 2448 */ 2449 for (i = 0; i < nfs4_ntov_map_size; i++) { 2450 if (breq & nfs4_ntov_map[i].fbit) { 2451 if ((*nfs4_ntov_map[i].sv_getit)( 2452 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) { 2453 2454 error = (*nfs4_ntov_map[i].sv_getit)( 2455 NFS4ATTR_GETIT, sargp, na); 2456 2457 /* 2458 * Possible error values: 2459 * >0 if sv_getit failed to 2460 * get the attr; 0 if succeeded; 2461 * <0 if rdattr_error and the 2462 * attribute cannot be returned. 2463 */ 2464 if (error && !(sargp->rdattr_error_req)) 2465 goto done; 2466 /* 2467 * If error then just for entry 2468 */ 2469 if (error == 0) { 2470 fattrp->attrmask |= 2471 nfs4_ntov_map[i].fbit; 2472 *amap++ = 2473 (uint8_t)nfs4_ntov_map[i].nval; 2474 na++; 2475 (ntov.attrcnt)++; 2476 } else if ((error > 0) && 2477 (sargp->rdattr_error == NFS4_OK)) { 2478 sargp->rdattr_error = puterrno4(error); 2479 } 2480 error = 0; 2481 } 2482 } 2483 } 2484 2485 /* 2486 * If rdattr_error was set after the return value for it was assigned, 2487 * update it. 2488 */ 2489 if (prev_rdattr_error != sargp->rdattr_error) { 2490 na = ntov.na; 2491 amap = ntov.amap; 2492 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2493 k = *amap; 2494 if (k < FATTR4_RDATTR_ERROR) { 2495 continue; 2496 } 2497 if ((k == FATTR4_RDATTR_ERROR) && 2498 ((*nfs4_ntov_map[k].sv_getit)( 2499 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) { 2500 2501 (void) (*nfs4_ntov_map[k].sv_getit)( 2502 NFS4ATTR_GETIT, sargp, na); 2503 } 2504 break; 2505 } 2506 } 2507 2508 xdr_size = 0; 2509 na = ntov.na; 2510 amap = ntov.amap; 2511 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2512 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na); 2513 } 2514 2515 fattrp->attrlist4_len = xdr_size; 2516 if (xdr_size) { 2517 /* freed by rfs4_op_getattr_free() */ 2518 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP); 2519 2520 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE); 2521 2522 na = ntov.na; 2523 amap = ntov.amap; 2524 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) { 2525 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) { 2526 DTRACE_PROBE1(nfss__e__getattr4_encfail, 2527 int, *amap); 2528 status = NFS4ERR_SERVERFAULT; 2529 break; 2530 } 2531 } 2532 /* xdrmem_destroy(&xdrs); */ /* NO-OP */ 2533 } else { 2534 fattrp->attrlist4 = NULL; 2535 } 2536 done: 2537 2538 nfs4_ntov_table_free(&ntov, sargp); 2539 2540 if (error != 0) 2541 status = puterrno4(error); 2542 2543 return (status); 2544 } 2545 2546 /* ARGSUSED */ 2547 static void 2548 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2549 struct compound_state *cs) 2550 { 2551 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr; 2552 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr; 2553 struct nfs4_svgetit_arg sarg; 2554 struct statvfs64 sb; 2555 nfsstat4 status; 2556 2557 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs, 2558 GETATTR4args *, args); 2559 2560 if (cs->vp == NULL) { 2561 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2562 goto out; 2563 } 2564 2565 if (cs->access == CS_ACCESS_DENIED) { 2566 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2567 goto out; 2568 } 2569 2570 sarg.sbp = &sb; 2571 sarg.cs = cs; 2572 sarg.is_referral = B_FALSE; 2573 2574 status = bitmap4_to_attrmask(args->attr_request, &sarg); 2575 if (status == NFS4_OK) { 2576 2577 status = bitmap4_get_sysattrs(&sarg); 2578 if (status == NFS4_OK) { 2579 2580 /* Is this a referral? */ 2581 if (vn_is_nfs_reparse(cs->vp, cs->cr)) { 2582 /* Older V4 Solaris client sees a link */ 2583 if (client_is_downrev(req)) 2584 sarg.vap->va_type = VLNK; 2585 else 2586 sarg.is_referral = B_TRUE; 2587 } 2588 2589 status = do_rfs4_op_getattr(args->attr_request, 2590 &resp->obj_attributes, &sarg); 2591 } 2592 } 2593 *cs->statusp = resp->status = status; 2594 out: 2595 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs, 2596 GETATTR4res *, resp); 2597 } 2598 2599 static void 2600 rfs4_op_getattr_free(nfs_resop4 *resop) 2601 { 2602 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr; 2603 2604 nfs4_fattr4_free(&resp->obj_attributes); 2605 } 2606 2607 /* ARGSUSED */ 2608 static void 2609 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2610 struct compound_state *cs) 2611 { 2612 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh; 2613 2614 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs); 2615 2616 if (cs->vp == NULL) { 2617 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2618 goto out; 2619 } 2620 if (cs->access == CS_ACCESS_DENIED) { 2621 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2622 goto out; 2623 } 2624 2625 /* check for reparse point at the share point */ 2626 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) { 2627 /* it's all bad */ 2628 cs->exi->exi_moved = 1; 2629 *cs->statusp = resp->status = NFS4ERR_MOVED; 2630 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved, 2631 vnode_t *, cs->vp, char *, "rfs4_op_getfh"); 2632 return; 2633 } 2634 2635 /* check for reparse point at vp */ 2636 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) { 2637 /* it's not all bad */ 2638 *cs->statusp = resp->status = NFS4ERR_MOVED; 2639 DTRACE_PROBE2(nfs4serv__func__referral__moved, 2640 vnode_t *, cs->vp, char *, "rfs4_op_getfh"); 2641 return; 2642 } 2643 2644 resp->object.nfs_fh4_val = 2645 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP); 2646 nfs_fh4_copy(&cs->fh, &resp->object); 2647 *cs->statusp = resp->status = NFS4_OK; 2648 out: 2649 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs, 2650 GETFH4res *, resp); 2651 } 2652 2653 static void 2654 rfs4_op_getfh_free(nfs_resop4 *resop) 2655 { 2656 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh; 2657 2658 if (resp->status == NFS4_OK && 2659 resp->object.nfs_fh4_val != NULL) { 2660 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len); 2661 resp->object.nfs_fh4_val = NULL; 2662 resp->object.nfs_fh4_len = 0; 2663 } 2664 } 2665 2666 /* 2667 * illegal: args: void 2668 * res : status (NFS4ERR_OP_ILLEGAL) 2669 */ 2670 /* ARGSUSED */ 2671 static void 2672 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop, 2673 struct svc_req *req, struct compound_state *cs) 2674 { 2675 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal; 2676 2677 resop->resop = OP_ILLEGAL; 2678 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL; 2679 } 2680 2681 /* ARGSUSED */ 2682 static void 2683 rfs4_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2684 struct compound_state *cs) 2685 { 2686 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP; 2687 } 2688 2689 /* 2690 * link: args: SAVED_FH: file, CURRENT_FH: target directory 2691 * res: status. If success - CURRENT_FH unchanged, return change_info 2692 */ 2693 /* ARGSUSED */ 2694 static void 2695 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2696 struct compound_state *cs) 2697 { 2698 LINK4args *args = &argop->nfs_argop4_u.oplink; 2699 LINK4res *resp = &resop->nfs_resop4_u.oplink; 2700 int error; 2701 vnode_t *vp; 2702 vnode_t *dvp; 2703 struct vattr bdva, idva, adva; 2704 char *nm; 2705 uint_t len; 2706 struct sockaddr *ca; 2707 char *name = NULL; 2708 nfsstat4 status; 2709 2710 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs, 2711 LINK4args *, args); 2712 2713 /* SAVED_FH: source object */ 2714 vp = cs->saved_vp; 2715 if (vp == NULL) { 2716 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2717 goto out; 2718 } 2719 2720 /* CURRENT_FH: target directory */ 2721 dvp = cs->vp; 2722 if (dvp == NULL) { 2723 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2724 goto out; 2725 } 2726 2727 /* 2728 * If there is a non-shared filesystem mounted on this vnode, 2729 * do not allow to link any file in this directory. 2730 */ 2731 if (vn_ismntpt(dvp)) { 2732 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2733 goto out; 2734 } 2735 2736 if (cs->access == CS_ACCESS_DENIED) { 2737 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2738 goto out; 2739 } 2740 2741 /* Check source object's type validity */ 2742 if (vp->v_type == VDIR) { 2743 *cs->statusp = resp->status = NFS4ERR_ISDIR; 2744 goto out; 2745 } 2746 2747 /* Check target directory's type */ 2748 if (dvp->v_type != VDIR) { 2749 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 2750 goto out; 2751 } 2752 2753 if (cs->saved_exi != cs->exi) { 2754 *cs->statusp = resp->status = NFS4ERR_XDEV; 2755 goto out; 2756 } 2757 2758 status = utf8_dir_verify(&args->newname); 2759 if (status != NFS4_OK) { 2760 *cs->statusp = resp->status = status; 2761 goto out; 2762 } 2763 2764 nm = utf8_to_fn(&args->newname, &len, NULL); 2765 if (nm == NULL) { 2766 *cs->statusp = resp->status = NFS4ERR_INVAL; 2767 goto out; 2768 } 2769 2770 if (len > MAXNAMELEN) { 2771 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 2772 kmem_free(nm, len); 2773 goto out; 2774 } 2775 2776 if (rdonly4(req, cs)) { 2777 *cs->statusp = resp->status = NFS4ERR_ROFS; 2778 kmem_free(nm, len); 2779 goto out; 2780 } 2781 2782 /* Get "before" change value */ 2783 bdva.va_mask = AT_CTIME|AT_SEQ; 2784 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL); 2785 if (error) { 2786 *cs->statusp = resp->status = puterrno4(error); 2787 kmem_free(nm, len); 2788 goto out; 2789 } 2790 2791 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2792 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 2793 MAXPATHLEN + 1); 2794 2795 if (name == NULL) { 2796 *cs->statusp = resp->status = NFS4ERR_INVAL; 2797 kmem_free(nm, len); 2798 goto out; 2799 } 2800 2801 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 2802 2803 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0); 2804 2805 if (nm != name) 2806 kmem_free(name, MAXPATHLEN + 1); 2807 kmem_free(nm, len); 2808 2809 /* 2810 * Get the initial "after" sequence number, if it fails, set to zero 2811 */ 2812 idva.va_mask = AT_SEQ; 2813 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL)) 2814 idva.va_seq = 0; 2815 2816 /* 2817 * Force modified data and metadata out to stable storage. 2818 */ 2819 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL); 2820 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 2821 2822 if (error) { 2823 *cs->statusp = resp->status = puterrno4(error); 2824 goto out; 2825 } 2826 2827 /* 2828 * Get "after" change value, if it fails, simply return the 2829 * before value. 2830 */ 2831 adva.va_mask = AT_CTIME|AT_SEQ; 2832 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) { 2833 adva.va_ctime = bdva.va_ctime; 2834 adva.va_seq = 0; 2835 } 2836 2837 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 2838 2839 /* 2840 * The cinfo.atomic = TRUE only if we have 2841 * non-zero va_seq's, and it has incremented by exactly one 2842 * during the VOP_LINK and it didn't change during the VOP_FSYNC. 2843 */ 2844 if (bdva.va_seq && idva.va_seq && adva.va_seq && 2845 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq) 2846 resp->cinfo.atomic = TRUE; 2847 else 2848 resp->cinfo.atomic = FALSE; 2849 2850 *cs->statusp = resp->status = NFS4_OK; 2851 out: 2852 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs, 2853 LINK4res *, resp); 2854 } 2855 2856 /* 2857 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work. 2858 */ 2859 2860 /* ARGSUSED */ 2861 static nfsstat4 2862 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs) 2863 { 2864 int error; 2865 int different_export = 0; 2866 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL; 2867 struct exportinfo *exi = NULL, *pre_exi = NULL; 2868 nfsstat4 stat; 2869 fid_t fid; 2870 int attrdir, dotdot, walk; 2871 bool_t is_newvp = FALSE; 2872 2873 if (cs->vp->v_flag & V_XATTRDIR) { 2874 attrdir = 1; 2875 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR)); 2876 } else { 2877 attrdir = 0; 2878 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR)); 2879 } 2880 2881 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); 2882 2883 /* 2884 * If dotdotting, then need to check whether it's 2885 * above the root of a filesystem, or above an 2886 * export point. 2887 */ 2888 if (dotdot) { 2889 vnode_t *zone_rootvp; 2890 2891 ASSERT(cs->exi != NULL); 2892 zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp; 2893 /* 2894 * If dotdotting at the root of a filesystem, then 2895 * need to traverse back to the mounted-on filesystem 2896 * and do the dotdot lookup there. 2897 */ 2898 if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) { 2899 2900 /* 2901 * If at the system root, then can 2902 * go up no further. 2903 */ 2904 if (VN_CMP(cs->vp, zone_rootvp)) 2905 return (puterrno4(ENOENT)); 2906 2907 /* 2908 * Traverse back to the mounted-on filesystem 2909 */ 2910 cs->vp = untraverse(cs->vp, zone_rootvp); 2911 2912 /* 2913 * Set the different_export flag so we remember 2914 * to pick up a new exportinfo entry for 2915 * this new filesystem. 2916 */ 2917 different_export = 1; 2918 } else { 2919 2920 /* 2921 * If dotdotting above an export point then set 2922 * the different_export to get new export info. 2923 */ 2924 different_export = nfs_exported(cs->exi, cs->vp); 2925 } 2926 } 2927 2928 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr, 2929 NULL, NULL, NULL); 2930 if (error) 2931 return (puterrno4(error)); 2932 2933 /* 2934 * If the vnode is in a pseudo filesystem, check whether it is visible. 2935 * 2936 * XXX if the vnode is a symlink and it is not visible in 2937 * a pseudo filesystem, return ENOENT (not following symlink). 2938 * V4 client can not mount such symlink. This is a regression 2939 * from V2/V3. 2940 * 2941 * In the same exported filesystem, if the security flavor used 2942 * is not an explicitly shared flavor, limit the view to the visible 2943 * list entries only. This is not a WRONGSEC case because it's already 2944 * checked via PUTROOTFH/PUTPUBFH or PUTFH. 2945 */ 2946 if (!different_export && 2947 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) || 2948 cs->access & CS_ACCESS_LIMITED)) { 2949 if (! nfs_visible(cs->exi, vp, &different_export)) { 2950 VN_RELE(vp); 2951 return (puterrno4(ENOENT)); 2952 } 2953 } 2954 2955 /* 2956 * If it's a mountpoint, then traverse it. 2957 */ 2958 if (vn_ismntpt(vp)) { 2959 pre_exi = cs->exi; /* save pre-traversed exportinfo */ 2960 pre_tvp = vp; /* save pre-traversed vnode */ 2961 2962 /* 2963 * hold pre_tvp to counteract rele by traverse. We will 2964 * need pre_tvp below if checkexport4 fails 2965 */ 2966 VN_HOLD(pre_tvp); 2967 if ((error = traverse(&vp)) != 0) { 2968 VN_RELE(vp); 2969 VN_RELE(pre_tvp); 2970 return (puterrno4(error)); 2971 } 2972 different_export = 1; 2973 } else if (vp->v_vfsp != cs->vp->v_vfsp) { 2974 /* 2975 * The vfsp comparison is to handle the case where 2976 * a LOFS mount is shared. lo_lookup traverses mount points, 2977 * and NFS is unaware of local fs transistions because 2978 * v_vfsmountedhere isn't set. For this special LOFS case, 2979 * the dir and the obj returned by lookup will have different 2980 * vfs ptrs. 2981 */ 2982 different_export = 1; 2983 } 2984 2985 if (different_export) { 2986 2987 bzero(&fid, sizeof (fid)); 2988 fid.fid_len = MAXFIDSZ; 2989 error = vop_fid_pseudo(vp, &fid); 2990 if (error) { 2991 VN_RELE(vp); 2992 if (pre_tvp) 2993 VN_RELE(pre_tvp); 2994 return (puterrno4(error)); 2995 } 2996 2997 if (dotdot) 2998 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); 2999 else 3000 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp); 3001 3002 if (exi == NULL) { 3003 if (pre_tvp) { 3004 /* 3005 * If this vnode is a mounted-on vnode, 3006 * but the mounted-on file system is not 3007 * exported, send back the filehandle for 3008 * the mounted-on vnode, not the root of 3009 * the mounted-on file system. 3010 */ 3011 VN_RELE(vp); 3012 vp = pre_tvp; 3013 exi = pre_exi; 3014 } else { 3015 VN_RELE(vp); 3016 return (puterrno4(EACCES)); 3017 } 3018 } else if (pre_tvp) { 3019 /* we're done with pre_tvp now. release extra hold */ 3020 VN_RELE(pre_tvp); 3021 } 3022 3023 cs->exi = exi; 3024 3025 /* 3026 * Now we do a checkauth4. The reason is that 3027 * this client/user may not have access to the new 3028 * exported file system, and if they do, 3029 * the client/user may be mapped to a different uid. 3030 * 3031 * We start with a new cr, because the checkauth4 done 3032 * in the PUT*FH operation over wrote the cred's uid, 3033 * gid, etc, and we want the real thing before calling 3034 * checkauth4() 3035 */ 3036 crfree(cs->cr); 3037 cs->cr = crdup(cs->basecr); 3038 3039 oldvp = cs->vp; 3040 cs->vp = vp; 3041 is_newvp = TRUE; 3042 3043 stat = call_checkauth4(cs, req); 3044 if (stat != NFS4_OK) { 3045 VN_RELE(cs->vp); 3046 cs->vp = oldvp; 3047 return (stat); 3048 } 3049 } 3050 3051 /* 3052 * After various NFS checks, do a label check on the path 3053 * component. The label on this path should either be the 3054 * global zone's label or a zone's label. We are only 3055 * interested in the zone's label because exported files 3056 * in global zone is accessible (though read-only) to 3057 * clients. The exportability/visibility check is already 3058 * done before reaching this code. 3059 */ 3060 if (is_system_labeled()) { 3061 bslabel_t *clabel; 3062 3063 ASSERT(req->rq_label != NULL); 3064 clabel = req->rq_label; 3065 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *, 3066 "got client label from request(1)", struct svc_req *, req); 3067 3068 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3069 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3070 cs->exi)) { 3071 error = EACCES; 3072 goto err_out; 3073 } 3074 } else { 3075 /* 3076 * We grant access to admin_low label clients 3077 * only if the client is trusted, i.e. also 3078 * running Solaris Trusted Extension. 3079 */ 3080 struct sockaddr *ca; 3081 int addr_type; 3082 void *ipaddr; 3083 tsol_tpc_t *tp; 3084 3085 ca = (struct sockaddr *)svc_getrpccaller( 3086 req->rq_xprt)->buf; 3087 if (ca->sa_family == AF_INET) { 3088 addr_type = IPV4_VERSION; 3089 ipaddr = &((struct sockaddr_in *)ca)->sin_addr; 3090 } else if (ca->sa_family == AF_INET6) { 3091 addr_type = IPV6_VERSION; 3092 ipaddr = &((struct sockaddr_in6 *) 3093 ca)->sin6_addr; 3094 } 3095 tp = find_tpc(ipaddr, addr_type, B_FALSE); 3096 if (tp == NULL || tp->tpc_tp.tp_doi != 3097 l_admin_low->tsl_doi || tp->tpc_tp.host_type != 3098 SUN_CIPSO) { 3099 if (tp != NULL) 3100 TPC_RELE(tp); 3101 error = EACCES; 3102 goto err_out; 3103 } 3104 TPC_RELE(tp); 3105 } 3106 } 3107 3108 error = makefh4(&cs->fh, vp, cs->exi); 3109 3110 err_out: 3111 if (error) { 3112 if (is_newvp) { 3113 VN_RELE(cs->vp); 3114 cs->vp = oldvp; 3115 } else 3116 VN_RELE(vp); 3117 return (puterrno4(error)); 3118 } 3119 3120 if (!is_newvp) { 3121 if (cs->vp) 3122 VN_RELE(cs->vp); 3123 cs->vp = vp; 3124 } else if (oldvp) 3125 VN_RELE(oldvp); 3126 3127 /* 3128 * if did lookup on attrdir and didn't lookup .., set named 3129 * attr fh flag 3130 */ 3131 if (attrdir && ! dotdot) 3132 set_fh4_flag(&cs->fh, FH4_NAMEDATTR); 3133 3134 /* Assume false for now, open proc will set this */ 3135 cs->mandlock = FALSE; 3136 3137 return (NFS4_OK); 3138 } 3139 3140 /* ARGSUSED */ 3141 static void 3142 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3143 struct compound_state *cs) 3144 { 3145 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup; 3146 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup; 3147 char *nm; 3148 uint_t len; 3149 struct sockaddr *ca; 3150 char *name = NULL; 3151 nfsstat4 status; 3152 3153 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs, 3154 LOOKUP4args *, args); 3155 3156 if (cs->vp == NULL) { 3157 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3158 goto out; 3159 } 3160 3161 if (cs->vp->v_type == VLNK) { 3162 *cs->statusp = resp->status = NFS4ERR_SYMLINK; 3163 goto out; 3164 } 3165 3166 if (cs->vp->v_type != VDIR) { 3167 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 3168 goto out; 3169 } 3170 3171 status = utf8_dir_verify(&args->objname); 3172 if (status != NFS4_OK) { 3173 *cs->statusp = resp->status = status; 3174 goto out; 3175 } 3176 3177 nm = utf8_to_str(&args->objname, &len, NULL); 3178 if (nm == NULL) { 3179 *cs->statusp = resp->status = NFS4ERR_INVAL; 3180 goto out; 3181 } 3182 3183 if (len > MAXNAMELEN) { 3184 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 3185 kmem_free(nm, len); 3186 goto out; 3187 } 3188 3189 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 3190 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 3191 MAXPATHLEN + 1); 3192 3193 if (name == NULL) { 3194 *cs->statusp = resp->status = NFS4ERR_INVAL; 3195 kmem_free(nm, len); 3196 goto out; 3197 } 3198 3199 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs); 3200 3201 if (name != nm) 3202 kmem_free(name, MAXPATHLEN + 1); 3203 kmem_free(nm, len); 3204 3205 out: 3206 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs, 3207 LOOKUP4res *, resp); 3208 } 3209 3210 /* ARGSUSED */ 3211 static void 3212 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 3213 struct compound_state *cs) 3214 { 3215 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp; 3216 3217 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs); 3218 3219 if (cs->vp == NULL) { 3220 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3221 goto out; 3222 } 3223 3224 if (cs->vp->v_type != VDIR) { 3225 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 3226 goto out; 3227 } 3228 3229 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs); 3230 3231 /* 3232 * From NFSV4 Specification, LOOKUPP should not check for 3233 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead. 3234 */ 3235 if (resp->status == NFS4ERR_WRONGSEC) { 3236 *cs->statusp = resp->status = NFS4_OK; 3237 } 3238 3239 out: 3240 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs, 3241 LOOKUPP4res *, resp); 3242 } 3243 3244 3245 /*ARGSUSED2*/ 3246 static void 3247 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3248 struct compound_state *cs) 3249 { 3250 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr; 3251 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr; 3252 vnode_t *avp = NULL; 3253 int lookup_flags = LOOKUP_XATTR, error; 3254 int exp_ro = 0; 3255 3256 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs, 3257 OPENATTR4args *, args); 3258 3259 if (cs->vp == NULL) { 3260 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3261 goto out; 3262 } 3263 3264 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 && 3265 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) { 3266 *cs->statusp = resp->status = puterrno4(ENOTSUP); 3267 goto out; 3268 } 3269 3270 /* 3271 * If file system supports passing ACE mask to VOP_ACCESS then 3272 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks 3273 */ 3274 3275 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS)) 3276 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS, 3277 V_ACE_MASK, cs->cr, NULL); 3278 else 3279 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) && 3280 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) && 3281 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0)); 3282 3283 if (error) { 3284 *cs->statusp = resp->status = puterrno4(EACCES); 3285 goto out; 3286 } 3287 3288 /* 3289 * The CREATE_XATTR_DIR VOP flag cannot be specified if 3290 * the file system is exported read-only -- regardless of 3291 * createdir flag. Otherwise the attrdir would be created 3292 * (assuming server fs isn't mounted readonly locally). If 3293 * VOP_LOOKUP returns ENOENT in this case, the error will 3294 * be translated into EROFS. ENOSYS is mapped to ENOTSUP 3295 * because specfs has no VOP_LOOKUP op, so the macro would 3296 * return ENOSYS. EINVAL is returned by all (current) 3297 * Solaris file system implementations when any of their 3298 * restrictions are violated (xattr(dir) can't have xattrdir). 3299 * Returning NOTSUPP is more appropriate in this case 3300 * because the object will never be able to have an attrdir. 3301 */ 3302 if (args->createdir && ! (exp_ro = rdonly4(req, cs))) 3303 lookup_flags |= CREATE_XATTR_DIR; 3304 3305 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr, 3306 NULL, NULL, NULL); 3307 3308 if (error) { 3309 if (error == ENOENT && args->createdir && exp_ro) 3310 *cs->statusp = resp->status = puterrno4(EROFS); 3311 else if (error == EINVAL || error == ENOSYS) 3312 *cs->statusp = resp->status = puterrno4(ENOTSUP); 3313 else 3314 *cs->statusp = resp->status = puterrno4(error); 3315 goto out; 3316 } 3317 3318 ASSERT(avp->v_flag & V_XATTRDIR); 3319 3320 error = makefh4(&cs->fh, avp, cs->exi); 3321 3322 if (error) { 3323 VN_RELE(avp); 3324 *cs->statusp = resp->status = puterrno4(error); 3325 goto out; 3326 } 3327 3328 VN_RELE(cs->vp); 3329 cs->vp = avp; 3330 3331 /* 3332 * There is no requirement for an attrdir fh flag 3333 * because the attrdir has a vnode flag to distinguish 3334 * it from regular (non-xattr) directories. The 3335 * FH4_ATTRDIR flag is set for future sanity checks. 3336 */ 3337 set_fh4_flag(&cs->fh, FH4_ATTRDIR); 3338 *cs->statusp = resp->status = NFS4_OK; 3339 3340 out: 3341 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs, 3342 OPENATTR4res *, resp); 3343 } 3344 3345 static int 3346 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred, 3347 caller_context_t *ct) 3348 { 3349 int error; 3350 int i; 3351 clock_t delaytime; 3352 3353 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay); 3354 3355 /* 3356 * Don't block on mandatory locks. If this routine returns 3357 * EAGAIN, the caller should return NFS4ERR_LOCKED. 3358 */ 3359 uio->uio_fmode = FNONBLOCK; 3360 3361 for (i = 0; i < rfs4_maxlock_tries; i++) { 3362 3363 3364 if (direction == FREAD) { 3365 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct); 3366 error = VOP_READ(vp, uio, ioflag, cred, ct); 3367 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct); 3368 } else { 3369 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct); 3370 error = VOP_WRITE(vp, uio, ioflag, cred, ct); 3371 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct); 3372 } 3373 3374 if (error != EAGAIN) 3375 break; 3376 3377 if (i < rfs4_maxlock_tries - 1) { 3378 delay(delaytime); 3379 delaytime *= 2; 3380 } 3381 } 3382 3383 return (error); 3384 } 3385 3386 /* ARGSUSED */ 3387 static void 3388 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3389 struct compound_state *cs) 3390 { 3391 READ4args *args = &argop->nfs_argop4_u.opread; 3392 READ4res *resp = &resop->nfs_resop4_u.opread; 3393 int error; 3394 int verror; 3395 vnode_t *vp; 3396 struct vattr va; 3397 struct iovec iov, *iovp = NULL; 3398 int iovcnt; 3399 struct uio uio; 3400 u_offset_t offset; 3401 bool_t *deleg = &cs->deleg; 3402 nfsstat4 stat; 3403 int in_crit = 0; 3404 mblk_t *mp = NULL; 3405 int alloc_err = 0; 3406 int rdma_used = 0; 3407 int loaned_buffers; 3408 caller_context_t ct; 3409 struct uio *uiop; 3410 3411 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs, 3412 READ4args, args); 3413 3414 vp = cs->vp; 3415 if (vp == NULL) { 3416 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3417 goto out; 3418 } 3419 if (cs->access == CS_ACCESS_DENIED) { 3420 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3421 goto out; 3422 } 3423 3424 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE, 3425 deleg, TRUE, &ct, cs)) != NFS4_OK) { 3426 *cs->statusp = resp->status = stat; 3427 goto out; 3428 } 3429 3430 /* 3431 * Enter the critical region before calling VOP_RWLOCK 3432 * to avoid a deadlock with write requests. 3433 */ 3434 if (nbl_need_check(vp)) { 3435 nbl_start_crit(vp, RW_READER); 3436 in_crit = 1; 3437 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0, 3438 &ct)) { 3439 *cs->statusp = resp->status = NFS4ERR_LOCKED; 3440 goto out; 3441 } 3442 } 3443 3444 if (args->wlist) { 3445 if (args->count > clist_len(args->wlist)) { 3446 *cs->statusp = resp->status = NFS4ERR_INVAL; 3447 goto out; 3448 } 3449 rdma_used = 1; 3450 } 3451 3452 /* use loaned buffers for TCP */ 3453 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0; 3454 3455 va.va_mask = AT_MODE|AT_SIZE|AT_UID; 3456 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 3457 3458 /* 3459 * If we can't get the attributes, then we can't do the 3460 * right access checking. So, we'll fail the request. 3461 */ 3462 if (verror) { 3463 *cs->statusp = resp->status = puterrno4(verror); 3464 goto out; 3465 } 3466 3467 if (vp->v_type != VREG) { 3468 *cs->statusp = resp->status = 3469 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL); 3470 goto out; 3471 } 3472 3473 if (crgetuid(cs->cr) != va.va_uid && 3474 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) && 3475 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) { 3476 *cs->statusp = resp->status = puterrno4(error); 3477 goto out; 3478 } 3479 3480 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */ 3481 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3482 goto out; 3483 } 3484 3485 offset = args->offset; 3486 if (offset >= va.va_size) { 3487 *cs->statusp = resp->status = NFS4_OK; 3488 resp->eof = TRUE; 3489 resp->data_len = 0; 3490 resp->data_val = NULL; 3491 resp->mblk = NULL; 3492 /* RDMA */ 3493 resp->wlist = args->wlist; 3494 resp->wlist_len = resp->data_len; 3495 *cs->statusp = resp->status = NFS4_OK; 3496 if (resp->wlist) 3497 clist_zero_len(resp->wlist); 3498 goto out; 3499 } 3500 3501 if (args->count == 0) { 3502 *cs->statusp = resp->status = NFS4_OK; 3503 resp->eof = FALSE; 3504 resp->data_len = 0; 3505 resp->data_val = NULL; 3506 resp->mblk = NULL; 3507 /* RDMA */ 3508 resp->wlist = args->wlist; 3509 resp->wlist_len = resp->data_len; 3510 if (resp->wlist) 3511 clist_zero_len(resp->wlist); 3512 goto out; 3513 } 3514 3515 /* 3516 * Do not allocate memory more than maximum allowed 3517 * transfer size 3518 */ 3519 if (args->count > rfs4_tsize(req)) 3520 args->count = rfs4_tsize(req); 3521 3522 if (loaned_buffers) { 3523 uiop = (uio_t *)rfs_setup_xuio(vp); 3524 ASSERT(uiop != NULL); 3525 uiop->uio_segflg = UIO_SYSSPACE; 3526 uiop->uio_loffset = args->offset; 3527 uiop->uio_resid = args->count; 3528 3529 /* Jump to do the read if successful */ 3530 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) { 3531 /* 3532 * Need to hold the vnode until after VOP_RETZCBUF() 3533 * is called. 3534 */ 3535 VN_HOLD(vp); 3536 goto doio_read; 3537 } 3538 3539 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int, 3540 uiop->uio_loffset, int, uiop->uio_resid); 3541 3542 uiop->uio_extflg = 0; 3543 3544 /* failure to setup for zero copy */ 3545 rfs_free_xuio((void *)uiop); 3546 loaned_buffers = 0; 3547 } 3548 3549 /* 3550 * If returning data via RDMA Write, then grab the chunk list. If we 3551 * aren't returning READ data w/RDMA_WRITE, then grab a mblk. 3552 */ 3553 if (rdma_used) { 3554 mp = NULL; 3555 (void) rdma_get_wchunk(req, &iov, args->wlist); 3556 uio.uio_iov = &iov; 3557 uio.uio_iovcnt = 1; 3558 } else { 3559 /* 3560 * mp will contain the data to be sent out in the read reply. 3561 * It will be freed after the reply has been sent. 3562 */ 3563 mp = rfs_read_alloc(args->count, &iovp, &iovcnt); 3564 ASSERT(mp != NULL); 3565 ASSERT(alloc_err == 0); 3566 uio.uio_iov = iovp; 3567 uio.uio_iovcnt = iovcnt; 3568 } 3569 3570 uio.uio_segflg = UIO_SYSSPACE; 3571 uio.uio_extflg = UIO_COPY_CACHED; 3572 uio.uio_loffset = args->offset; 3573 uio.uio_resid = args->count; 3574 uiop = &uio; 3575 3576 doio_read: 3577 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct); 3578 3579 va.va_mask = AT_SIZE; 3580 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 3581 3582 if (error) { 3583 if (mp) 3584 freemsg(mp); 3585 *cs->statusp = resp->status = puterrno4(error); 3586 goto out; 3587 } 3588 3589 /* make mblk using zc buffers */ 3590 if (loaned_buffers) { 3591 mp = uio_to_mblk(uiop); 3592 ASSERT(mp != NULL); 3593 } 3594 3595 *cs->statusp = resp->status = NFS4_OK; 3596 3597 ASSERT(uiop->uio_resid >= 0); 3598 resp->data_len = args->count - uiop->uio_resid; 3599 if (mp) { 3600 resp->data_val = (char *)mp->b_datap->db_base; 3601 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers); 3602 } else { 3603 resp->data_val = (caddr_t)iov.iov_base; 3604 } 3605 3606 resp->mblk = mp; 3607 3608 if (!verror && offset + resp->data_len == va.va_size) 3609 resp->eof = TRUE; 3610 else 3611 resp->eof = FALSE; 3612 3613 if (rdma_used) { 3614 if (!rdma_setup_read_data4(args, resp)) { 3615 *cs->statusp = resp->status = NFS4ERR_INVAL; 3616 } 3617 } else { 3618 resp->wlist = NULL; 3619 } 3620 3621 out: 3622 if (in_crit) 3623 nbl_end_crit(vp); 3624 3625 if (iovp != NULL) 3626 kmem_free(iovp, iovcnt * sizeof (struct iovec)); 3627 3628 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs, 3629 READ4res *, resp); 3630 } 3631 3632 static void 3633 rfs4_op_read_free(nfs_resop4 *resop) 3634 { 3635 READ4res *resp = &resop->nfs_resop4_u.opread; 3636 3637 if (resp->status == NFS4_OK && resp->mblk != NULL) { 3638 freemsg(resp->mblk); 3639 resp->mblk = NULL; 3640 resp->data_val = NULL; 3641 resp->data_len = 0; 3642 } 3643 } 3644 3645 static void 3646 rfs4_op_readdir_free(nfs_resop4 * resop) 3647 { 3648 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir; 3649 3650 if (resp->status == NFS4_OK && resp->mblk != NULL) { 3651 freeb(resp->mblk); 3652 resp->mblk = NULL; 3653 resp->data_len = 0; 3654 } 3655 } 3656 3657 3658 /* ARGSUSED */ 3659 static void 3660 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 3661 struct compound_state *cs) 3662 { 3663 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh; 3664 int error; 3665 vnode_t *vp; 3666 struct exportinfo *exi, *sav_exi; 3667 nfs_fh4_fmt_t *fh_fmtp; 3668 nfs_export_t *ne = nfs_get_export(); 3669 3670 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs); 3671 3672 if (cs->vp) { 3673 VN_RELE(cs->vp); 3674 cs->vp = NULL; 3675 } 3676 3677 if (cs->cr) 3678 crfree(cs->cr); 3679 3680 cs->cr = crdup(cs->basecr); 3681 3682 vp = ne->exi_public->exi_vp; 3683 if (vp == NULL) { 3684 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 3685 goto out; 3686 } 3687 3688 error = makefh4(&cs->fh, vp, ne->exi_public); 3689 if (error != 0) { 3690 *cs->statusp = resp->status = puterrno4(error); 3691 goto out; 3692 } 3693 sav_exi = cs->exi; 3694 if (ne->exi_public == ne->exi_root) { 3695 /* 3696 * No filesystem is actually shared public, so we default 3697 * to exi_root. In this case, we must check whether root 3698 * is exported. 3699 */ 3700 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val; 3701 3702 /* 3703 * if root filesystem is exported, the exportinfo struct that we 3704 * should use is what checkexport4 returns, because root_exi is 3705 * actually a mostly empty struct. 3706 */ 3707 exi = checkexport4(&fh_fmtp->fh4_fsid, 3708 (fid_t *)&fh_fmtp->fh4_xlen, NULL); 3709 cs->exi = ((exi != NULL) ? exi : ne->exi_public); 3710 } else { 3711 /* 3712 * it's a properly shared filesystem 3713 */ 3714 cs->exi = ne->exi_public; 3715 } 3716 3717 if (is_system_labeled()) { 3718 bslabel_t *clabel; 3719 3720 ASSERT(req->rq_label != NULL); 3721 clabel = req->rq_label; 3722 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *, 3723 "got client label from request(1)", 3724 struct svc_req *, req); 3725 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3726 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3727 cs->exi)) { 3728 *cs->statusp = resp->status = 3729 NFS4ERR_SERVERFAULT; 3730 goto out; 3731 } 3732 } 3733 } 3734 3735 VN_HOLD(vp); 3736 cs->vp = vp; 3737 3738 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3739 VN_RELE(cs->vp); 3740 cs->vp = NULL; 3741 cs->exi = sav_exi; 3742 goto out; 3743 } 3744 3745 *cs->statusp = resp->status = NFS4_OK; 3746 out: 3747 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs, 3748 PUTPUBFH4res *, resp); 3749 } 3750 3751 /* 3752 * XXX - issue with put*fh operations. Suppose /export/home is exported. 3753 * Suppose an NFS client goes to mount /export/home/joe. If /export, home, 3754 * or joe have restrictive search permissions, then we shouldn't let 3755 * the client get a file handle. This is easy to enforce. However, we 3756 * don't know what security flavor should be used until we resolve the 3757 * path name. Another complication is uid mapping. If root is 3758 * the user, then it will be mapped to the anonymous user by default, 3759 * but we won't know that till we've resolved the path name. And we won't 3760 * know what the anonymous user is. 3761 * Luckily, SECINFO is specified to take a full filename. 3762 * So what we will have to in rfs4_op_lookup is check that flavor of 3763 * the target object matches that of the request, and if root was the 3764 * caller, check for the root= and anon= options, and if necessary, 3765 * repeat the lookup using the right cred_t. But that's not done yet. 3766 */ 3767 /* ARGSUSED */ 3768 static void 3769 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3770 struct compound_state *cs) 3771 { 3772 PUTFH4args *args = &argop->nfs_argop4_u.opputfh; 3773 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh; 3774 nfs_fh4_fmt_t *fh_fmtp; 3775 3776 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs, 3777 PUTFH4args *, args); 3778 3779 if (cs->vp) { 3780 VN_RELE(cs->vp); 3781 cs->vp = NULL; 3782 } 3783 3784 if (cs->cr) { 3785 crfree(cs->cr); 3786 cs->cr = NULL; 3787 } 3788 3789 3790 if (args->object.nfs_fh4_len < NFS_FH4_LEN) { 3791 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 3792 goto out; 3793 } 3794 3795 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val; 3796 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen, 3797 NULL); 3798 3799 if (cs->exi == NULL) { 3800 *cs->statusp = resp->status = NFS4ERR_STALE; 3801 goto out; 3802 } 3803 3804 cs->cr = crdup(cs->basecr); 3805 3806 ASSERT(cs->cr != NULL); 3807 3808 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) { 3809 *cs->statusp = resp->status; 3810 goto out; 3811 } 3812 3813 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3814 VN_RELE(cs->vp); 3815 cs->vp = NULL; 3816 goto out; 3817 } 3818 3819 nfs_fh4_copy(&args->object, &cs->fh); 3820 *cs->statusp = resp->status = NFS4_OK; 3821 cs->deleg = FALSE; 3822 3823 out: 3824 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs, 3825 PUTFH4res *, resp); 3826 } 3827 3828 /* ARGSUSED */ 3829 static void 3830 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3831 struct compound_state *cs) 3832 { 3833 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh; 3834 int error; 3835 fid_t fid; 3836 struct exportinfo *exi, *sav_exi; 3837 3838 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs); 3839 3840 if (cs->vp) { 3841 VN_RELE(cs->vp); 3842 cs->vp = NULL; 3843 } 3844 3845 if (cs->cr) 3846 crfree(cs->cr); 3847 3848 cs->cr = crdup(cs->basecr); 3849 3850 /* 3851 * Using rootdir, the system root vnode, 3852 * get its fid. 3853 */ 3854 bzero(&fid, sizeof (fid)); 3855 fid.fid_len = MAXFIDSZ; 3856 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid); 3857 if (error != 0) { 3858 *cs->statusp = resp->status = puterrno4(error); 3859 goto out; 3860 } 3861 3862 /* 3863 * Then use the root fsid & fid it to find out if it's exported 3864 * 3865 * If the server root isn't exported directly, then 3866 * it should at least be a pseudo export based on 3867 * one or more exports further down in the server's 3868 * file tree. 3869 */ 3870 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL); 3871 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) { 3872 NFS4_DEBUG(rfs4_debug, 3873 (CE_WARN, "rfs4_op_putrootfh: export check failure")); 3874 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 3875 goto out; 3876 } 3877 3878 /* 3879 * Now make a filehandle based on the root 3880 * export and root vnode. 3881 */ 3882 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi); 3883 if (error != 0) { 3884 *cs->statusp = resp->status = puterrno4(error); 3885 goto out; 3886 } 3887 3888 sav_exi = cs->exi; 3889 cs->exi = exi; 3890 3891 VN_HOLD(ZONE_ROOTVP()); 3892 cs->vp = ZONE_ROOTVP(); 3893 3894 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 3895 VN_RELE(cs->vp); 3896 cs->vp = NULL; 3897 cs->exi = sav_exi; 3898 goto out; 3899 } 3900 3901 *cs->statusp = resp->status = NFS4_OK; 3902 cs->deleg = FALSE; 3903 out: 3904 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs, 3905 PUTROOTFH4res *, resp); 3906 } 3907 3908 /* 3909 * readlink: args: CURRENT_FH. 3910 * res: status. If success - CURRENT_FH unchanged, return linktext. 3911 */ 3912 3913 /* ARGSUSED */ 3914 static void 3915 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3916 struct compound_state *cs) 3917 { 3918 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink; 3919 int error; 3920 vnode_t *vp; 3921 struct iovec iov; 3922 struct vattr va; 3923 struct uio uio; 3924 char *data; 3925 struct sockaddr *ca; 3926 char *name = NULL; 3927 int is_referral; 3928 3929 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs); 3930 3931 /* CURRENT_FH: directory */ 3932 vp = cs->vp; 3933 if (vp == NULL) { 3934 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3935 goto out; 3936 } 3937 3938 if (cs->access == CS_ACCESS_DENIED) { 3939 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3940 goto out; 3941 } 3942 3943 /* Is it a referral? */ 3944 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) { 3945 3946 is_referral = 1; 3947 3948 } else { 3949 3950 is_referral = 0; 3951 3952 if (vp->v_type == VDIR) { 3953 *cs->statusp = resp->status = NFS4ERR_ISDIR; 3954 goto out; 3955 } 3956 3957 if (vp->v_type != VLNK) { 3958 *cs->statusp = resp->status = NFS4ERR_INVAL; 3959 goto out; 3960 } 3961 3962 } 3963 3964 va.va_mask = AT_MODE; 3965 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL); 3966 if (error) { 3967 *cs->statusp = resp->status = puterrno4(error); 3968 goto out; 3969 } 3970 3971 if (MANDLOCK(vp, va.va_mode)) { 3972 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3973 goto out; 3974 } 3975 3976 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP); 3977 3978 if (is_referral) { 3979 char *s; 3980 size_t strsz; 3981 kstat_named_t *stat = 3982 cs->exi->exi_ne->ne_globals->svstat[NFS_V4]; 3983 3984 /* Get an artificial symlink based on a referral */ 3985 s = build_symlink(vp, cs->cr, &strsz); 3986 stat[NFS_REFERLINKS].value.ui64++; 3987 DTRACE_PROBE2(nfs4serv__func__referral__reflink, 3988 vnode_t *, vp, char *, s); 3989 if (s == NULL) 3990 error = EINVAL; 3991 else { 3992 error = 0; 3993 (void) strlcpy(data, s, MAXPATHLEN + 1); 3994 kmem_free(s, strsz); 3995 } 3996 3997 } else { 3998 3999 iov.iov_base = data; 4000 iov.iov_len = MAXPATHLEN; 4001 uio.uio_iov = &iov; 4002 uio.uio_iovcnt = 1; 4003 uio.uio_segflg = UIO_SYSSPACE; 4004 uio.uio_extflg = UIO_COPY_CACHED; 4005 uio.uio_loffset = 0; 4006 uio.uio_resid = MAXPATHLEN; 4007 4008 error = VOP_READLINK(vp, &uio, cs->cr, NULL); 4009 4010 if (!error) 4011 *(data + MAXPATHLEN - uio.uio_resid) = '\0'; 4012 } 4013 4014 if (error) { 4015 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 4016 *cs->statusp = resp->status = puterrno4(error); 4017 goto out; 4018 } 4019 4020 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4021 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND, 4022 MAXPATHLEN + 1); 4023 4024 if (name == NULL) { 4025 /* 4026 * Even though the conversion failed, we return 4027 * something. We just don't translate it. 4028 */ 4029 name = data; 4030 } 4031 4032 /* 4033 * treat link name as data 4034 */ 4035 (void) str_to_utf8(name, (utf8string *)&resp->link); 4036 4037 if (name != data) 4038 kmem_free(name, MAXPATHLEN + 1); 4039 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 4040 *cs->statusp = resp->status = NFS4_OK; 4041 4042 out: 4043 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs, 4044 READLINK4res *, resp); 4045 } 4046 4047 static void 4048 rfs4_op_readlink_free(nfs_resop4 *resop) 4049 { 4050 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink; 4051 utf8string *symlink = (utf8string *)&resp->link; 4052 4053 if (symlink->utf8string_val) { 4054 UTF8STRING_FREE(*symlink) 4055 } 4056 } 4057 4058 /* 4059 * release_lockowner: 4060 * Release any state associated with the supplied 4061 * lockowner. Note if any lo_state is holding locks we will not 4062 * rele that lo_state and thus the lockowner will not be destroyed. 4063 * A client using lock after the lock owner stateid has been released 4064 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have 4065 * to reissue the lock with new_lock_owner set to TRUE. 4066 * args: lock_owner 4067 * res: status 4068 */ 4069 /* ARGSUSED */ 4070 static void 4071 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop, 4072 struct svc_req *req, struct compound_state *cs) 4073 { 4074 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner; 4075 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner; 4076 rfs4_lockowner_t *lo; 4077 rfs4_openowner_t *oo; 4078 rfs4_state_t *sp; 4079 rfs4_lo_state_t *lsp; 4080 rfs4_client_t *cp; 4081 bool_t create = FALSE; 4082 locklist_t *llist; 4083 sysid_t sysid; 4084 4085 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *, 4086 cs, RELEASE_LOCKOWNER4args *, ap); 4087 4088 /* Make sure there is a clientid around for this request */ 4089 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE); 4090 4091 if (cp == NULL) { 4092 *cs->statusp = resp->status = 4093 rfs4_check_clientid(&ap->lock_owner.clientid, 0); 4094 goto out; 4095 } 4096 rfs4_client_rele(cp); 4097 4098 lo = rfs4_findlockowner(&ap->lock_owner, &create); 4099 if (lo == NULL) { 4100 *cs->statusp = resp->status = NFS4_OK; 4101 goto out; 4102 } 4103 ASSERT(lo->rl_client != NULL); 4104 4105 /* 4106 * Check for EXPIRED client. If so will reap state with in a lease 4107 * period or on next set_clientid_confirm step 4108 */ 4109 if (rfs4_lease_expired(lo->rl_client)) { 4110 rfs4_lockowner_rele(lo); 4111 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 4112 goto out; 4113 } 4114 4115 /* 4116 * If no sysid has been assigned, then no locks exist; just return. 4117 */ 4118 rfs4_dbe_lock(lo->rl_client->rc_dbe); 4119 if (lo->rl_client->rc_sysidt == LM_NOSYSID) { 4120 rfs4_lockowner_rele(lo); 4121 rfs4_dbe_unlock(lo->rl_client->rc_dbe); 4122 goto out; 4123 } 4124 4125 sysid = lo->rl_client->rc_sysidt; 4126 rfs4_dbe_unlock(lo->rl_client->rc_dbe); 4127 4128 /* 4129 * Mark the lockowner invalid. 4130 */ 4131 rfs4_dbe_hide(lo->rl_dbe); 4132 4133 /* 4134 * sysid-pid pair should now not be used since the lockowner is 4135 * invalid. If the client were to instantiate the lockowner again 4136 * it would be assigned a new pid. Thus we can get the list of 4137 * current locks. 4138 */ 4139 4140 llist = flk_get_active_locks(sysid, lo->rl_pid); 4141 /* If we are still holding locks fail */ 4142 if (llist != NULL) { 4143 4144 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD; 4145 4146 flk_free_locklist(llist); 4147 /* 4148 * We need to unhide the lockowner so the client can 4149 * try it again. The bad thing here is if the client 4150 * has a logic error that took it here in the first place 4151 * they probably have lost accounting of the locks that it 4152 * is holding. So we may have dangling state until the 4153 * open owner state is reaped via close. One scenario 4154 * that could possibly occur is that the client has 4155 * sent the unlock request(s) in separate threads 4156 * and has not waited for the replies before sending the 4157 * RELEASE_LOCKOWNER request. Presumably, it would expect 4158 * and deal appropriately with NFS4ERR_LOCKS_HELD, by 4159 * reissuing the request. 4160 */ 4161 rfs4_dbe_unhide(lo->rl_dbe); 4162 rfs4_lockowner_rele(lo); 4163 goto out; 4164 } 4165 4166 /* 4167 * For the corresponding client we need to check each open 4168 * owner for any opens that have lockowner state associated 4169 * with this lockowner. 4170 */ 4171 4172 rfs4_dbe_lock(lo->rl_client->rc_dbe); 4173 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL; 4174 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) { 4175 4176 rfs4_dbe_lock(oo->ro_dbe); 4177 for (sp = list_head(&oo->ro_statelist); sp != NULL; 4178 sp = list_next(&oo->ro_statelist, sp)) { 4179 4180 rfs4_dbe_lock(sp->rs_dbe); 4181 for (lsp = list_head(&sp->rs_lostatelist); 4182 lsp != NULL; 4183 lsp = list_next(&sp->rs_lostatelist, lsp)) { 4184 if (lsp->rls_locker == lo) { 4185 rfs4_dbe_lock(lsp->rls_dbe); 4186 rfs4_dbe_invalidate(lsp->rls_dbe); 4187 rfs4_dbe_unlock(lsp->rls_dbe); 4188 } 4189 } 4190 rfs4_dbe_unlock(sp->rs_dbe); 4191 } 4192 rfs4_dbe_unlock(oo->ro_dbe); 4193 } 4194 rfs4_dbe_unlock(lo->rl_client->rc_dbe); 4195 4196 rfs4_lockowner_rele(lo); 4197 4198 *cs->statusp = resp->status = NFS4_OK; 4199 4200 out: 4201 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *, 4202 cs, RELEASE_LOCKOWNER4res *, resp); 4203 } 4204 4205 /* 4206 * short utility function to lookup a file and recall the delegation 4207 */ 4208 static rfs4_file_t * 4209 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp, 4210 int *lkup_error, cred_t *cr) 4211 { 4212 vnode_t *vp; 4213 rfs4_file_t *fp = NULL; 4214 bool_t fcreate = FALSE; 4215 int error; 4216 4217 if (vpp) 4218 *vpp = NULL; 4219 4220 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL, 4221 NULL)) == 0) { 4222 if (vp->v_type == VREG) 4223 fp = rfs4_findfile(vp, NULL, &fcreate); 4224 if (vpp) 4225 *vpp = vp; 4226 else 4227 VN_RELE(vp); 4228 } 4229 4230 if (lkup_error) 4231 *lkup_error = error; 4232 4233 return (fp); 4234 } 4235 4236 /* 4237 * remove: args: CURRENT_FH: directory; name. 4238 * res: status. If success - CURRENT_FH unchanged, return change_info 4239 * for directory. 4240 */ 4241 /* ARGSUSED */ 4242 static void 4243 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4244 struct compound_state *cs) 4245 { 4246 REMOVE4args *args = &argop->nfs_argop4_u.opremove; 4247 REMOVE4res *resp = &resop->nfs_resop4_u.opremove; 4248 int error; 4249 vnode_t *dvp, *vp; 4250 struct vattr bdva, idva, adva; 4251 char *nm; 4252 uint_t len; 4253 rfs4_file_t *fp; 4254 int in_crit = 0; 4255 bslabel_t *clabel; 4256 struct sockaddr *ca; 4257 char *name = NULL; 4258 nfsstat4 status; 4259 4260 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs, 4261 REMOVE4args *, args); 4262 4263 /* CURRENT_FH: directory */ 4264 dvp = cs->vp; 4265 if (dvp == NULL) { 4266 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4267 goto out; 4268 } 4269 4270 if (cs->access == CS_ACCESS_DENIED) { 4271 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4272 goto out; 4273 } 4274 4275 /* 4276 * If there is an unshared filesystem mounted on this vnode, 4277 * Do not allow to remove anything in this directory. 4278 */ 4279 if (vn_ismntpt(dvp)) { 4280 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4281 goto out; 4282 } 4283 4284 if (dvp->v_type != VDIR) { 4285 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 4286 goto out; 4287 } 4288 4289 status = utf8_dir_verify(&args->target); 4290 if (status != NFS4_OK) { 4291 *cs->statusp = resp->status = status; 4292 goto out; 4293 } 4294 4295 /* 4296 * Lookup the file so that we can check if it's a directory 4297 */ 4298 nm = utf8_to_fn(&args->target, &len, NULL); 4299 if (nm == NULL) { 4300 *cs->statusp = resp->status = NFS4ERR_INVAL; 4301 goto out; 4302 } 4303 4304 if (len > MAXNAMELEN) { 4305 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 4306 kmem_free(nm, len); 4307 goto out; 4308 } 4309 4310 if (rdonly4(req, cs)) { 4311 *cs->statusp = resp->status = NFS4ERR_ROFS; 4312 kmem_free(nm, len); 4313 goto out; 4314 } 4315 4316 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4317 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 4318 MAXPATHLEN + 1); 4319 4320 if (name == NULL) { 4321 *cs->statusp = resp->status = NFS4ERR_INVAL; 4322 kmem_free(nm, len); 4323 goto out; 4324 } 4325 4326 /* 4327 * Lookup the file to determine type and while we are see if 4328 * there is a file struct around and check for delegation. 4329 * We don't need to acquire va_seq before this lookup, if 4330 * it causes an update, cinfo.before will not match, which will 4331 * trigger a cache flush even if atomic is TRUE. 4332 */ 4333 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) { 4334 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE, 4335 NULL)) { 4336 VN_RELE(vp); 4337 rfs4_file_rele(fp); 4338 *cs->statusp = resp->status = NFS4ERR_DELAY; 4339 if (nm != name) 4340 kmem_free(name, MAXPATHLEN + 1); 4341 kmem_free(nm, len); 4342 goto out; 4343 } 4344 } 4345 4346 /* Didn't find anything to remove */ 4347 if (vp == NULL) { 4348 *cs->statusp = resp->status = error; 4349 if (nm != name) 4350 kmem_free(name, MAXPATHLEN + 1); 4351 kmem_free(nm, len); 4352 goto out; 4353 } 4354 4355 if (nbl_need_check(vp)) { 4356 nbl_start_crit(vp, RW_READER); 4357 in_crit = 1; 4358 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) { 4359 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4360 if (nm != name) 4361 kmem_free(name, MAXPATHLEN + 1); 4362 kmem_free(nm, len); 4363 nbl_end_crit(vp); 4364 VN_RELE(vp); 4365 if (fp) { 4366 rfs4_clear_dont_grant(fp); 4367 rfs4_file_rele(fp); 4368 } 4369 goto out; 4370 } 4371 } 4372 4373 /* check label before allowing removal */ 4374 if (is_system_labeled()) { 4375 ASSERT(req->rq_label != NULL); 4376 clabel = req->rq_label; 4377 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 4378 "got client label from request(1)", 4379 struct svc_req *, req); 4380 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4381 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK, 4382 cs->exi)) { 4383 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4384 if (name != nm) 4385 kmem_free(name, MAXPATHLEN + 1); 4386 kmem_free(nm, len); 4387 if (in_crit) 4388 nbl_end_crit(vp); 4389 VN_RELE(vp); 4390 if (fp) { 4391 rfs4_clear_dont_grant(fp); 4392 rfs4_file_rele(fp); 4393 } 4394 goto out; 4395 } 4396 } 4397 } 4398 4399 /* Get dir "before" change value */ 4400 bdva.va_mask = AT_CTIME|AT_SEQ; 4401 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL); 4402 if (error) { 4403 *cs->statusp = resp->status = puterrno4(error); 4404 if (nm != name) 4405 kmem_free(name, MAXPATHLEN + 1); 4406 kmem_free(nm, len); 4407 if (in_crit) 4408 nbl_end_crit(vp); 4409 VN_RELE(vp); 4410 if (fp) { 4411 rfs4_clear_dont_grant(fp); 4412 rfs4_file_rele(fp); 4413 } 4414 goto out; 4415 } 4416 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 4417 4418 /* Actually do the REMOVE operation */ 4419 if (vp->v_type == VDIR) { 4420 /* 4421 * Can't remove a directory that has a mounted-on filesystem. 4422 */ 4423 if (vn_ismntpt(vp)) { 4424 error = EACCES; 4425 } else { 4426 /* 4427 * System V defines rmdir to return EEXIST, 4428 * not ENOTEMPTY, if the directory is not 4429 * empty. A System V NFS server needs to map 4430 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to 4431 * transmit over the wire. 4432 */ 4433 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr, 4434 NULL, 0)) == EEXIST) 4435 error = ENOTEMPTY; 4436 } 4437 } else { 4438 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 && 4439 fp != NULL) { 4440 struct vattr va; 4441 vnode_t *tvp; 4442 4443 rfs4_dbe_lock(fp->rf_dbe); 4444 tvp = fp->rf_vp; 4445 if (tvp) 4446 VN_HOLD(tvp); 4447 rfs4_dbe_unlock(fp->rf_dbe); 4448 4449 if (tvp) { 4450 /* 4451 * This is va_seq safe because we are not 4452 * manipulating dvp. 4453 */ 4454 va.va_mask = AT_NLINK; 4455 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) && 4456 va.va_nlink == 0) { 4457 /* Remove state on file remove */ 4458 if (in_crit) { 4459 nbl_end_crit(vp); 4460 in_crit = 0; 4461 } 4462 rfs4_close_all_state(fp); 4463 } 4464 VN_RELE(tvp); 4465 } 4466 } 4467 } 4468 4469 if (in_crit) 4470 nbl_end_crit(vp); 4471 VN_RELE(vp); 4472 4473 if (fp) { 4474 rfs4_clear_dont_grant(fp); 4475 rfs4_file_rele(fp); 4476 } 4477 if (nm != name) 4478 kmem_free(name, MAXPATHLEN + 1); 4479 kmem_free(nm, len); 4480 4481 if (error) { 4482 *cs->statusp = resp->status = puterrno4(error); 4483 goto out; 4484 } 4485 4486 /* 4487 * Get the initial "after" sequence number, if it fails, set to zero 4488 */ 4489 idva.va_mask = AT_SEQ; 4490 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL)) 4491 idva.va_seq = 0; 4492 4493 /* 4494 * Force modified data and metadata out to stable storage. 4495 */ 4496 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 4497 4498 /* 4499 * Get "after" change value, if it fails, simply return the 4500 * before value. 4501 */ 4502 adva.va_mask = AT_CTIME|AT_SEQ; 4503 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) { 4504 adva.va_ctime = bdva.va_ctime; 4505 adva.va_seq = 0; 4506 } 4507 4508 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 4509 4510 /* 4511 * The cinfo.atomic = TRUE only if we have 4512 * non-zero va_seq's, and it has incremented by exactly one 4513 * during the VOP_REMOVE/RMDIR and it didn't change during 4514 * the VOP_FSYNC. 4515 */ 4516 if (bdva.va_seq && idva.va_seq && adva.va_seq && 4517 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq) 4518 resp->cinfo.atomic = TRUE; 4519 else 4520 resp->cinfo.atomic = FALSE; 4521 4522 *cs->statusp = resp->status = NFS4_OK; 4523 4524 out: 4525 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs, 4526 REMOVE4res *, resp); 4527 } 4528 4529 /* 4530 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory, 4531 * oldname and newname. 4532 * res: status. If success - CURRENT_FH unchanged, return change_info 4533 * for both from and target directories. 4534 */ 4535 /* ARGSUSED */ 4536 static void 4537 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4538 struct compound_state *cs) 4539 { 4540 RENAME4args *args = &argop->nfs_argop4_u.oprename; 4541 RENAME4res *resp = &resop->nfs_resop4_u.oprename; 4542 int error; 4543 vnode_t *odvp; 4544 vnode_t *ndvp; 4545 vnode_t *srcvp, *targvp, *tvp; 4546 struct vattr obdva, oidva, oadva; 4547 struct vattr nbdva, nidva, nadva; 4548 char *onm, *nnm; 4549 uint_t olen, nlen; 4550 rfs4_file_t *fp, *sfp; 4551 int in_crit_src, in_crit_targ; 4552 int fp_rele_grant_hold, sfp_rele_grant_hold; 4553 int unlinked; 4554 bslabel_t *clabel; 4555 struct sockaddr *ca; 4556 char *converted_onm = NULL; 4557 char *converted_nnm = NULL; 4558 nfsstat4 status; 4559 4560 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs, 4561 RENAME4args *, args); 4562 4563 fp = sfp = NULL; 4564 srcvp = targvp = tvp = NULL; 4565 in_crit_src = in_crit_targ = 0; 4566 fp_rele_grant_hold = sfp_rele_grant_hold = 0; 4567 unlinked = 0; 4568 4569 /* CURRENT_FH: target directory */ 4570 ndvp = cs->vp; 4571 if (ndvp == NULL) { 4572 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4573 goto out; 4574 } 4575 4576 /* SAVED_FH: from directory */ 4577 odvp = cs->saved_vp; 4578 if (odvp == NULL) { 4579 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4580 goto out; 4581 } 4582 4583 if (cs->access == CS_ACCESS_DENIED) { 4584 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4585 goto out; 4586 } 4587 4588 /* 4589 * If there is an unshared filesystem mounted on this vnode, 4590 * do not allow to rename objects in this directory. 4591 */ 4592 if (vn_ismntpt(odvp)) { 4593 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4594 goto out; 4595 } 4596 4597 /* 4598 * If there is an unshared filesystem mounted on this vnode, 4599 * do not allow to rename to this directory. 4600 */ 4601 if (vn_ismntpt(ndvp)) { 4602 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4603 goto out; 4604 } 4605 4606 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) { 4607 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 4608 goto out; 4609 } 4610 4611 if (cs->saved_exi != cs->exi) { 4612 *cs->statusp = resp->status = NFS4ERR_XDEV; 4613 goto out; 4614 } 4615 4616 status = utf8_dir_verify(&args->oldname); 4617 if (status != NFS4_OK) { 4618 *cs->statusp = resp->status = status; 4619 goto out; 4620 } 4621 4622 status = utf8_dir_verify(&args->newname); 4623 if (status != NFS4_OK) { 4624 *cs->statusp = resp->status = status; 4625 goto out; 4626 } 4627 4628 onm = utf8_to_fn(&args->oldname, &olen, NULL); 4629 if (onm == NULL) { 4630 *cs->statusp = resp->status = NFS4ERR_INVAL; 4631 goto out; 4632 } 4633 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 4634 nlen = MAXPATHLEN + 1; 4635 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND, 4636 nlen); 4637 4638 if (converted_onm == NULL) { 4639 *cs->statusp = resp->status = NFS4ERR_INVAL; 4640 kmem_free(onm, olen); 4641 goto out; 4642 } 4643 4644 nnm = utf8_to_fn(&args->newname, &nlen, NULL); 4645 if (nnm == NULL) { 4646 *cs->statusp = resp->status = NFS4ERR_INVAL; 4647 if (onm != converted_onm) 4648 kmem_free(converted_onm, MAXPATHLEN + 1); 4649 kmem_free(onm, olen); 4650 goto out; 4651 } 4652 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND, 4653 MAXPATHLEN + 1); 4654 4655 if (converted_nnm == NULL) { 4656 *cs->statusp = resp->status = NFS4ERR_INVAL; 4657 kmem_free(nnm, nlen); 4658 nnm = NULL; 4659 if (onm != converted_onm) 4660 kmem_free(converted_onm, MAXPATHLEN + 1); 4661 kmem_free(onm, olen); 4662 goto out; 4663 } 4664 4665 4666 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) { 4667 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 4668 kmem_free(onm, olen); 4669 kmem_free(nnm, nlen); 4670 goto out; 4671 } 4672 4673 4674 if (rdonly4(req, cs)) { 4675 *cs->statusp = resp->status = NFS4ERR_ROFS; 4676 if (onm != converted_onm) 4677 kmem_free(converted_onm, MAXPATHLEN + 1); 4678 kmem_free(onm, olen); 4679 if (nnm != converted_nnm) 4680 kmem_free(converted_nnm, MAXPATHLEN + 1); 4681 kmem_free(nnm, nlen); 4682 goto out; 4683 } 4684 4685 /* check label of the target dir */ 4686 if (is_system_labeled()) { 4687 ASSERT(req->rq_label != NULL); 4688 clabel = req->rq_label; 4689 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *, 4690 "got client label from request(1)", 4691 struct svc_req *, req); 4692 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4693 if (!do_rfs_label_check(clabel, ndvp, 4694 EQUALITY_CHECK, cs->exi)) { 4695 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4696 goto err_out; 4697 } 4698 } 4699 } 4700 4701 /* 4702 * Is the source a file and have a delegation? 4703 * We don't need to acquire va_seq before these lookups, if 4704 * it causes an update, cinfo.before will not match, which will 4705 * trigger a cache flush even if atomic is TRUE. 4706 */ 4707 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp, 4708 &error, cs->cr)) { 4709 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE, 4710 NULL)) { 4711 *cs->statusp = resp->status = NFS4ERR_DELAY; 4712 goto err_out; 4713 } 4714 } 4715 4716 if (srcvp == NULL) { 4717 *cs->statusp = resp->status = puterrno4(error); 4718 if (onm != converted_onm) 4719 kmem_free(converted_onm, MAXPATHLEN + 1); 4720 kmem_free(onm, olen); 4721 if (nnm != converted_nnm) 4722 kmem_free(converted_nnm, MAXPATHLEN + 1); 4723 kmem_free(nnm, nlen); 4724 goto out; 4725 } 4726 4727 sfp_rele_grant_hold = 1; 4728 4729 /* Does the destination exist and a file and have a delegation? */ 4730 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp, 4731 NULL, cs->cr)) { 4732 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE, 4733 NULL)) { 4734 *cs->statusp = resp->status = NFS4ERR_DELAY; 4735 goto err_out; 4736 } 4737 } 4738 fp_rele_grant_hold = 1; 4739 4740 /* Check for NBMAND lock on both source and target */ 4741 if (nbl_need_check(srcvp)) { 4742 nbl_start_crit(srcvp, RW_READER); 4743 in_crit_src = 1; 4744 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 4745 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4746 goto err_out; 4747 } 4748 } 4749 4750 if (targvp && nbl_need_check(targvp)) { 4751 nbl_start_crit(targvp, RW_READER); 4752 in_crit_targ = 1; 4753 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 4754 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 4755 goto err_out; 4756 } 4757 } 4758 4759 /* Get source "before" change value */ 4760 obdva.va_mask = AT_CTIME|AT_SEQ; 4761 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL); 4762 if (!error) { 4763 nbdva.va_mask = AT_CTIME|AT_SEQ; 4764 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL); 4765 } 4766 if (error) { 4767 *cs->statusp = resp->status = puterrno4(error); 4768 goto err_out; 4769 } 4770 4771 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime) 4772 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime) 4773 4774 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr, 4775 NULL, 0); 4776 4777 /* 4778 * If target existed and was unlinked by VOP_RENAME, state will need 4779 * closed. To avoid deadlock, rfs4_close_all_state will be done after 4780 * any necessary nbl_end_crit on srcvp and tgtvp. 4781 */ 4782 if (error == 0 && fp != NULL) { 4783 rfs4_dbe_lock(fp->rf_dbe); 4784 tvp = fp->rf_vp; 4785 if (tvp) 4786 VN_HOLD(tvp); 4787 rfs4_dbe_unlock(fp->rf_dbe); 4788 4789 if (tvp) { 4790 struct vattr va; 4791 va.va_mask = AT_NLINK; 4792 4793 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) && 4794 va.va_nlink == 0) { 4795 unlinked = 1; 4796 4797 /* DEBUG data */ 4798 if ((srcvp == targvp) || (tvp != targvp)) { 4799 cmn_err(CE_WARN, "rfs4_op_rename: " 4800 "srcvp %p, targvp: %p, tvp: %p", 4801 (void *)srcvp, (void *)targvp, 4802 (void *)tvp); 4803 } 4804 } else { 4805 VN_RELE(tvp); 4806 } 4807 } 4808 } 4809 if (error == 0) 4810 vn_renamepath(ndvp, srcvp, nnm, nlen - 1); 4811 4812 if (in_crit_src) 4813 nbl_end_crit(srcvp); 4814 if (srcvp) 4815 VN_RELE(srcvp); 4816 if (in_crit_targ) 4817 nbl_end_crit(targvp); 4818 if (targvp) 4819 VN_RELE(targvp); 4820 4821 if (unlinked) { 4822 ASSERT(fp != NULL); 4823 ASSERT(tvp != NULL); 4824 4825 /* DEBUG data */ 4826 if (RW_READ_HELD(&tvp->v_nbllock)) { 4827 cmn_err(CE_WARN, "rfs4_op_rename: " 4828 "RW_READ_HELD(%p)", (void *)tvp); 4829 } 4830 4831 /* The file is gone and so should the state */ 4832 rfs4_close_all_state(fp); 4833 VN_RELE(tvp); 4834 } 4835 4836 if (sfp) { 4837 rfs4_clear_dont_grant(sfp); 4838 rfs4_file_rele(sfp); 4839 } 4840 if (fp) { 4841 rfs4_clear_dont_grant(fp); 4842 rfs4_file_rele(fp); 4843 } 4844 4845 if (converted_onm != onm) 4846 kmem_free(converted_onm, MAXPATHLEN + 1); 4847 kmem_free(onm, olen); 4848 if (converted_nnm != nnm) 4849 kmem_free(converted_nnm, MAXPATHLEN + 1); 4850 kmem_free(nnm, nlen); 4851 4852 /* 4853 * Get the initial "after" sequence number, if it fails, set to zero 4854 */ 4855 oidva.va_mask = AT_SEQ; 4856 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL)) 4857 oidva.va_seq = 0; 4858 4859 nidva.va_mask = AT_SEQ; 4860 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL)) 4861 nidva.va_seq = 0; 4862 4863 /* 4864 * Force modified data and metadata out to stable storage. 4865 */ 4866 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL); 4867 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL); 4868 4869 if (error) { 4870 *cs->statusp = resp->status = puterrno4(error); 4871 goto out; 4872 } 4873 4874 /* 4875 * Get "after" change values, if it fails, simply return the 4876 * before value. 4877 */ 4878 oadva.va_mask = AT_CTIME|AT_SEQ; 4879 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) { 4880 oadva.va_ctime = obdva.va_ctime; 4881 oadva.va_seq = 0; 4882 } 4883 4884 nadva.va_mask = AT_CTIME|AT_SEQ; 4885 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) { 4886 nadva.va_ctime = nbdva.va_ctime; 4887 nadva.va_seq = 0; 4888 } 4889 4890 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime) 4891 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime) 4892 4893 /* 4894 * The cinfo.atomic = TRUE only if we have 4895 * non-zero va_seq's, and it has incremented by exactly one 4896 * during the VOP_RENAME and it didn't change during the VOP_FSYNC. 4897 */ 4898 if (obdva.va_seq && oidva.va_seq && oadva.va_seq && 4899 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq) 4900 resp->source_cinfo.atomic = TRUE; 4901 else 4902 resp->source_cinfo.atomic = FALSE; 4903 4904 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq && 4905 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq) 4906 resp->target_cinfo.atomic = TRUE; 4907 else 4908 resp->target_cinfo.atomic = FALSE; 4909 4910 #ifdef VOLATILE_FH_TEST 4911 { 4912 extern void add_volrnm_fh(struct exportinfo *, vnode_t *); 4913 4914 /* 4915 * Add the renamed file handle to the volatile rename list 4916 */ 4917 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) { 4918 /* file handles may expire on rename */ 4919 vnode_t *vp; 4920 4921 nnm = utf8_to_fn(&args->newname, &nlen, NULL); 4922 /* 4923 * Already know that nnm will be a valid string 4924 */ 4925 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr, 4926 NULL, NULL, NULL); 4927 kmem_free(nnm, nlen); 4928 if (!error) { 4929 add_volrnm_fh(cs->exi, vp); 4930 VN_RELE(vp); 4931 } 4932 } 4933 } 4934 #endif /* VOLATILE_FH_TEST */ 4935 4936 *cs->statusp = resp->status = NFS4_OK; 4937 out: 4938 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs, 4939 RENAME4res *, resp); 4940 return; 4941 4942 err_out: 4943 if (onm != converted_onm) 4944 kmem_free(converted_onm, MAXPATHLEN + 1); 4945 if (onm != NULL) 4946 kmem_free(onm, olen); 4947 if (nnm != converted_nnm) 4948 kmem_free(converted_nnm, MAXPATHLEN + 1); 4949 if (nnm != NULL) 4950 kmem_free(nnm, nlen); 4951 4952 if (in_crit_src) nbl_end_crit(srcvp); 4953 if (in_crit_targ) nbl_end_crit(targvp); 4954 if (targvp) VN_RELE(targvp); 4955 if (srcvp) VN_RELE(srcvp); 4956 if (sfp) { 4957 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp); 4958 rfs4_file_rele(sfp); 4959 } 4960 if (fp) { 4961 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp); 4962 rfs4_file_rele(fp); 4963 } 4964 4965 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs, 4966 RENAME4res *, resp); 4967 } 4968 4969 /* ARGSUSED */ 4970 static void 4971 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4972 struct compound_state *cs) 4973 { 4974 RENEW4args *args = &argop->nfs_argop4_u.oprenew; 4975 RENEW4res *resp = &resop->nfs_resop4_u.oprenew; 4976 rfs4_client_t *cp; 4977 4978 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs, 4979 RENEW4args *, args); 4980 4981 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) { 4982 *cs->statusp = resp->status = 4983 rfs4_check_clientid(&args->clientid, 0); 4984 goto out; 4985 } 4986 4987 if (rfs4_lease_expired(cp)) { 4988 rfs4_client_rele(cp); 4989 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 4990 goto out; 4991 } 4992 4993 rfs4_update_lease(cp); 4994 4995 mutex_enter(cp->rc_cbinfo.cb_lock); 4996 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) { 4997 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE; 4998 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN; 4999 } else { 5000 *cs->statusp = resp->status = NFS4_OK; 5001 } 5002 mutex_exit(cp->rc_cbinfo.cb_lock); 5003 5004 rfs4_client_rele(cp); 5005 5006 out: 5007 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs, 5008 RENEW4res *, resp); 5009 } 5010 5011 /* ARGSUSED */ 5012 static void 5013 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 5014 struct compound_state *cs) 5015 { 5016 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh; 5017 5018 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs); 5019 5020 /* No need to check cs->access - we are not accessing any object */ 5021 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) { 5022 *cs->statusp = resp->status = NFS4ERR_RESTOREFH; 5023 goto out; 5024 } 5025 if (cs->vp != NULL) { 5026 VN_RELE(cs->vp); 5027 } 5028 cs->vp = cs->saved_vp; 5029 cs->saved_vp = NULL; 5030 cs->exi = cs->saved_exi; 5031 nfs_fh4_copy(&cs->saved_fh, &cs->fh); 5032 *cs->statusp = resp->status = NFS4_OK; 5033 cs->deleg = FALSE; 5034 5035 out: 5036 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs, 5037 RESTOREFH4res *, resp); 5038 } 5039 5040 /* ARGSUSED */ 5041 static void 5042 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5043 struct compound_state *cs) 5044 { 5045 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh; 5046 5047 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs); 5048 5049 /* No need to check cs->access - we are not accessing any object */ 5050 if (cs->vp == NULL) { 5051 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5052 goto out; 5053 } 5054 if (cs->saved_vp != NULL) { 5055 VN_RELE(cs->saved_vp); 5056 } 5057 cs->saved_vp = cs->vp; 5058 VN_HOLD(cs->saved_vp); 5059 cs->saved_exi = cs->exi; 5060 /* 5061 * since SAVEFH is fairly rare, don't alloc space for its fh 5062 * unless necessary. 5063 */ 5064 if (cs->saved_fh.nfs_fh4_val == NULL) { 5065 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP); 5066 } 5067 nfs_fh4_copy(&cs->fh, &cs->saved_fh); 5068 *cs->statusp = resp->status = NFS4_OK; 5069 5070 out: 5071 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs, 5072 SAVEFH4res *, resp); 5073 } 5074 5075 /* 5076 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to 5077 * return the bitmap of attrs that were set successfully. It is also 5078 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should 5079 * always be called only after rfs4_do_set_attrs(). 5080 * 5081 * Verify that the attributes are same as the expected ones. sargp->vap 5082 * and sargp->sbp contain the input attributes as translated from fattr4. 5083 * 5084 * This function verifies only the attrs that correspond to a vattr or 5085 * vfsstat struct. That is because of the extra step needed to get the 5086 * corresponding system structs. Other attributes have already been set or 5087 * verified by do_rfs4_set_attrs. 5088 * 5089 * Return 0 if all attrs match, -1 if some don't, error if error processing. 5090 */ 5091 static int 5092 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp, 5093 bitmap4 *resp, struct nfs4_ntov_table *ntovp) 5094 { 5095 int error, ret_error = 0; 5096 int i, k; 5097 uint_t sva_mask = sargp->vap->va_mask; 5098 uint_t vbit; 5099 union nfs4_attr_u *na; 5100 uint8_t *amap; 5101 bool_t getsb = ntovp->vfsstat; 5102 5103 if (sva_mask != 0) { 5104 /* 5105 * Okay to overwrite sargp->vap because we verify based 5106 * on the incoming values. 5107 */ 5108 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0, 5109 sargp->cs->cr, NULL); 5110 if (ret_error) { 5111 if (resp == NULL) 5112 return (ret_error); 5113 /* 5114 * Must return bitmap of successful attrs 5115 */ 5116 sva_mask = 0; /* to prevent checking vap later */ 5117 } else { 5118 /* 5119 * Some file systems clobber va_mask. it is probably 5120 * wrong of them to do so, nonethless we practice 5121 * defensive coding. 5122 * See bug id 4276830. 5123 */ 5124 sargp->vap->va_mask = sva_mask; 5125 } 5126 } 5127 5128 if (getsb) { 5129 /* 5130 * Now get the superblock and loop on the bitmap, as there is 5131 * no simple way of translating from superblock to bitmap4. 5132 */ 5133 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp); 5134 if (ret_error) { 5135 if (resp == NULL) 5136 goto errout; 5137 getsb = FALSE; 5138 } 5139 } 5140 5141 /* 5142 * Now loop and verify each attribute which getattr returned 5143 * whether it's the same as the input. 5144 */ 5145 if (resp == NULL && !getsb && (sva_mask == 0)) 5146 goto errout; 5147 5148 na = ntovp->na; 5149 amap = ntovp->amap; 5150 k = 0; 5151 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) { 5152 k = *amap; 5153 ASSERT(nfs4_ntov_map[k].nval == k); 5154 vbit = nfs4_ntov_map[k].vbit; 5155 5156 /* 5157 * If vattr attribute but VOP_GETATTR failed, or it's 5158 * superblock attribute but VFS_STATVFS failed, skip 5159 */ 5160 if (vbit) { 5161 if ((vbit & sva_mask) == 0) 5162 continue; 5163 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) { 5164 continue; 5165 } 5166 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na); 5167 if (resp != NULL) { 5168 if (error) 5169 ret_error = -1; /* not all match */ 5170 else /* update response bitmap */ 5171 *resp |= nfs4_ntov_map[k].fbit; 5172 continue; 5173 } 5174 if (error) { 5175 ret_error = -1; /* not all match */ 5176 break; 5177 } 5178 } 5179 errout: 5180 return (ret_error); 5181 } 5182 5183 /* 5184 * Decode the attribute to be set/verified. If the attr requires a sys op 5185 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't 5186 * call the sv_getit function for it, because the sys op hasn't yet been done. 5187 * Return 0 for success, error code if failed. 5188 * 5189 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free. 5190 */ 5191 static int 5192 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp, 5193 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap) 5194 { 5195 int error = 0; 5196 bool_t set_later; 5197 5198 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit; 5199 5200 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) { 5201 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat; 5202 /* 5203 * don't verify yet if a vattr or sb dependent attr, 5204 * because we don't have their sys values yet. 5205 * Will be done later. 5206 */ 5207 if (! (set_later && (cmd == NFS4ATTR_VERIT))) { 5208 /* 5209 * ACLs are a special case, since setting the MODE 5210 * conflicts with setting the ACL. We delay setting 5211 * the ACL until all other attributes have been set. 5212 * The ACL gets set in do_rfs4_op_setattr(). 5213 */ 5214 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) { 5215 error = (*nfs4_ntov_map[k].sv_getit)(cmd, 5216 sargp, nap); 5217 if (error) { 5218 xdr_free(nfs4_ntov_map[k].xfunc, 5219 (caddr_t)nap); 5220 } 5221 } 5222 } 5223 } else { 5224 #ifdef DEBUG 5225 cmn_err(CE_NOTE, "decode_fattr4_attr: error " 5226 "decoding attribute %d\n", k); 5227 #endif 5228 error = EINVAL; 5229 } 5230 if (!error && resp_bval && !set_later) { 5231 *resp_bval |= nfs4_ntov_map[k].fbit; 5232 } 5233 5234 return (error); 5235 } 5236 5237 /* 5238 * Set vattr based on incoming fattr4 attrs - used by setattr. 5239 * Set response mask. Ignore any values that are not writable vattr attrs. 5240 */ 5241 static nfsstat4 5242 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, 5243 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp, 5244 nfs4_attr_cmd_t cmd) 5245 { 5246 int error = 0; 5247 int i; 5248 char *attrs = fattrp->attrlist4; 5249 uint32_t attrslen = fattrp->attrlist4_len; 5250 XDR xdr; 5251 nfsstat4 status = NFS4_OK; 5252 vnode_t *vp = cs->vp; 5253 union nfs4_attr_u *na; 5254 uint8_t *amap; 5255 5256 #ifndef lint 5257 /* 5258 * Make sure that maximum attribute number can be expressed as an 5259 * 8 bit quantity. 5260 */ 5261 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1)); 5262 #endif 5263 5264 if (vp == NULL) { 5265 if (resp) 5266 *resp = 0; 5267 return (NFS4ERR_NOFILEHANDLE); 5268 } 5269 if (cs->access == CS_ACCESS_DENIED) { 5270 if (resp) 5271 *resp = 0; 5272 return (NFS4ERR_ACCESS); 5273 } 5274 5275 sargp->op = cmd; 5276 sargp->cs = cs; 5277 sargp->flag = 0; /* may be set later */ 5278 sargp->vap->va_mask = 0; 5279 sargp->rdattr_error = NFS4_OK; 5280 sargp->rdattr_error_req = FALSE; 5281 /* sargp->sbp is set by the caller */ 5282 5283 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE); 5284 5285 na = ntovp->na; 5286 amap = ntovp->amap; 5287 5288 /* 5289 * The following loop iterates on the nfs4_ntov_map checking 5290 * if the fbit is set in the requested bitmap. 5291 * If set then we process the arguments using the 5292 * rfs4_fattr4 conversion functions to populate the setattr 5293 * vattr and va_mask. Any settable attrs that are not using vattr 5294 * will be set in this loop. 5295 */ 5296 for (i = 0; i < nfs4_ntov_map_size; i++) { 5297 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) { 5298 continue; 5299 } 5300 /* 5301 * If setattr, must be a writable attr. 5302 * If verify/nverify, must be a readable attr. 5303 */ 5304 if ((error = (*nfs4_ntov_map[i].sv_getit)( 5305 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) { 5306 /* 5307 * Client tries to set/verify an 5308 * unsupported attribute, tries to set 5309 * a read only attr or verify a write 5310 * only one - error! 5311 */ 5312 break; 5313 } 5314 /* 5315 * Decode the attribute to set/verify 5316 */ 5317 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval, 5318 &xdr, resp ? resp : NULL, na); 5319 if (error) 5320 break; 5321 *amap++ = (uint8_t)nfs4_ntov_map[i].nval; 5322 na++; 5323 (ntovp->attrcnt)++; 5324 if (nfs4_ntov_map[i].vfsstat) 5325 ntovp->vfsstat = TRUE; 5326 } 5327 5328 if (error != 0) 5329 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP : 5330 puterrno4(error)); 5331 /* xdrmem_destroy(&xdrs); */ /* NO-OP */ 5332 return (status); 5333 } 5334 5335 static nfsstat4 5336 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, 5337 stateid4 *stateid) 5338 { 5339 int error = 0; 5340 struct nfs4_svgetit_arg sarg; 5341 bool_t trunc; 5342 5343 nfsstat4 status = NFS4_OK; 5344 cred_t *cr = cs->cr; 5345 vnode_t *vp = cs->vp; 5346 struct nfs4_ntov_table ntov; 5347 struct statvfs64 sb; 5348 struct vattr bva; 5349 struct flock64 bf; 5350 int in_crit = 0; 5351 uint_t saved_mask = 0; 5352 caller_context_t ct; 5353 5354 *resp = 0; 5355 sarg.sbp = &sb; 5356 sarg.is_referral = B_FALSE; 5357 nfs4_ntov_table_init(&ntov); 5358 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov, 5359 NFS4ATTR_SETIT); 5360 if (status != NFS4_OK) { 5361 /* 5362 * failed set attrs 5363 */ 5364 goto done; 5365 } 5366 5367 if ((sarg.vap->va_mask == 0) && 5368 (! (fattrp->attrmask & FATTR4_ACL_MASK))) { 5369 /* 5370 * no further work to be done 5371 */ 5372 goto done; 5373 } 5374 5375 /* 5376 * If we got a request to set the ACL and the MODE, only 5377 * allow changing VSUID, VSGID, and VSVTX. Attempting 5378 * to change any other bits, along with setting an ACL, 5379 * gives NFS4ERR_INVAL. 5380 */ 5381 if ((fattrp->attrmask & FATTR4_ACL_MASK) && 5382 (fattrp->attrmask & FATTR4_MODE_MASK)) { 5383 vattr_t va; 5384 5385 va.va_mask = AT_MODE; 5386 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL); 5387 if (error) { 5388 status = puterrno4(error); 5389 goto done; 5390 } 5391 if ((sarg.vap->va_mode ^ va.va_mode) & 5392 ~(VSUID | VSGID | VSVTX)) { 5393 status = NFS4ERR_INVAL; 5394 goto done; 5395 } 5396 } 5397 5398 /* Check stateid only if size has been set */ 5399 if (sarg.vap->va_mask & AT_SIZE) { 5400 trunc = (sarg.vap->va_size == 0); 5401 status = rfs4_check_stateid(FWRITE, cs->vp, stateid, 5402 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct, cs); 5403 if (status != NFS4_OK) 5404 goto done; 5405 } else { 5406 ct.cc_sysid = 0; 5407 ct.cc_pid = 0; 5408 ct.cc_caller_id = nfs4_srv_caller_id; 5409 ct.cc_flags = CC_DONTBLOCK; 5410 } 5411 5412 /* XXX start of possible race with delegations */ 5413 5414 /* 5415 * We need to specially handle size changes because it is 5416 * possible for the client to create a file with read-only 5417 * modes, but with the file opened for writing. If the client 5418 * then tries to set the file size, e.g. ftruncate(3C), 5419 * fcntl(F_FREESP), the normal access checking done in 5420 * VOP_SETATTR would prevent the client from doing it even though 5421 * it should be allowed to do so. To get around this, we do the 5422 * access checking for ourselves and use VOP_SPACE which doesn't 5423 * do the access checking. 5424 * Also the client should not be allowed to change the file 5425 * size if there is a conflicting non-blocking mandatory lock in 5426 * the region of the change. 5427 */ 5428 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) { 5429 u_offset_t offset; 5430 ssize_t length; 5431 5432 /* 5433 * ufs_setattr clears AT_SIZE from vap->va_mask, but 5434 * before returning, sarg.vap->va_mask is used to 5435 * generate the setattr reply bitmap. We also clear 5436 * AT_SIZE below before calling VOP_SPACE. For both 5437 * of these cases, the va_mask needs to be saved here 5438 * and restored after calling VOP_SETATTR. 5439 */ 5440 saved_mask = sarg.vap->va_mask; 5441 5442 /* 5443 * Check any possible conflict due to NBMAND locks. 5444 * Get into critical region before VOP_GETATTR, so the 5445 * size attribute is valid when checking conflicts. 5446 */ 5447 if (nbl_need_check(vp)) { 5448 nbl_start_crit(vp, RW_READER); 5449 in_crit = 1; 5450 } 5451 5452 bva.va_mask = AT_UID|AT_SIZE; 5453 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) { 5454 status = puterrno4(error); 5455 goto done; 5456 } 5457 5458 if (in_crit) { 5459 if (sarg.vap->va_size < bva.va_size) { 5460 offset = sarg.vap->va_size; 5461 length = bva.va_size - sarg.vap->va_size; 5462 } else { 5463 offset = bva.va_size; 5464 length = sarg.vap->va_size - bva.va_size; 5465 } 5466 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 5467 &ct)) { 5468 status = NFS4ERR_LOCKED; 5469 goto done; 5470 } 5471 } 5472 5473 if (crgetuid(cr) == bva.va_uid) { 5474 sarg.vap->va_mask &= ~AT_SIZE; 5475 bf.l_type = F_WRLCK; 5476 bf.l_whence = 0; 5477 bf.l_start = (off64_t)sarg.vap->va_size; 5478 bf.l_len = 0; 5479 bf.l_sysid = 0; 5480 bf.l_pid = 0; 5481 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 5482 (offset_t)sarg.vap->va_size, cr, &ct); 5483 } 5484 } 5485 5486 if (!error && sarg.vap->va_mask != 0) 5487 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct); 5488 5489 /* restore va_mask -- ufs_setattr clears AT_SIZE */ 5490 if (saved_mask & AT_SIZE) 5491 sarg.vap->va_mask |= AT_SIZE; 5492 5493 /* 5494 * If an ACL was being set, it has been delayed until now, 5495 * in order to set the mode (via the VOP_SETATTR() above) first. 5496 */ 5497 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) { 5498 int i; 5499 5500 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++) 5501 if (ntov.amap[i] == FATTR4_ACL) 5502 break; 5503 if (i < NFS4_MAXNUM_ATTRS) { 5504 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)( 5505 NFS4ATTR_SETIT, &sarg, &ntov.na[i]); 5506 if (error == 0) { 5507 *resp |= FATTR4_ACL_MASK; 5508 } else if (error == ENOTSUP) { 5509 (void) rfs4_verify_attr(&sarg, resp, &ntov); 5510 status = NFS4ERR_ATTRNOTSUPP; 5511 goto done; 5512 } 5513 } else { 5514 NFS4_DEBUG(rfs4_debug, 5515 (CE_NOTE, "do_rfs4_op_setattr: " 5516 "unable to find ACL in fattr4")); 5517 error = EINVAL; 5518 } 5519 } 5520 5521 if (error) { 5522 /* check if a monitor detected a delegation conflict */ 5523 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 5524 status = NFS4ERR_DELAY; 5525 else 5526 status = puterrno4(error); 5527 5528 /* 5529 * Set the response bitmap when setattr failed. 5530 * If VOP_SETATTR partially succeeded, test by doing a 5531 * VOP_GETATTR on the object and comparing the data 5532 * to the setattr arguments. 5533 */ 5534 (void) rfs4_verify_attr(&sarg, resp, &ntov); 5535 } else { 5536 /* 5537 * Force modified metadata out to stable storage. 5538 */ 5539 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 5540 /* 5541 * Set response bitmap 5542 */ 5543 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp); 5544 } 5545 5546 /* Return early and already have a NFSv4 error */ 5547 done: 5548 /* 5549 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr 5550 * conversion sets both readable and writeable NFS4 attrs 5551 * for AT_MTIME and AT_ATIME. The line below masks out 5552 * unrequested attrs from the setattr result bitmap. This 5553 * is placed after the done: label to catch the ATTRNOTSUP 5554 * case. 5555 */ 5556 *resp &= fattrp->attrmask; 5557 5558 if (in_crit) 5559 nbl_end_crit(vp); 5560 5561 nfs4_ntov_table_free(&ntov, &sarg); 5562 5563 return (status); 5564 } 5565 5566 /* ARGSUSED */ 5567 static void 5568 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5569 struct compound_state *cs) 5570 { 5571 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr; 5572 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr; 5573 bslabel_t *clabel; 5574 5575 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs, 5576 SETATTR4args *, args); 5577 5578 if (cs->vp == NULL) { 5579 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5580 goto out; 5581 } 5582 5583 /* 5584 * If there is an unshared filesystem mounted on this vnode, 5585 * do not allow to setattr on this vnode. 5586 */ 5587 if (vn_ismntpt(cs->vp)) { 5588 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5589 goto out; 5590 } 5591 5592 resp->attrsset = 0; 5593 5594 if (rdonly4(req, cs)) { 5595 *cs->statusp = resp->status = NFS4ERR_ROFS; 5596 goto out; 5597 } 5598 5599 /* check label before setting attributes */ 5600 if (is_system_labeled()) { 5601 ASSERT(req->rq_label != NULL); 5602 clabel = req->rq_label; 5603 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *, 5604 "got client label from request(1)", 5605 struct svc_req *, req); 5606 if (!blequal(&l_admin_low->tsl_label, clabel)) { 5607 if (!do_rfs_label_check(clabel, cs->vp, 5608 EQUALITY_CHECK, cs->exi)) { 5609 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5610 goto out; 5611 } 5612 } 5613 } 5614 5615 *cs->statusp = resp->status = 5616 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs, 5617 &args->stateid); 5618 5619 out: 5620 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs, 5621 SETATTR4res *, resp); 5622 } 5623 5624 /* ARGSUSED */ 5625 static void 5626 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5627 struct compound_state *cs) 5628 { 5629 /* 5630 * verify and nverify are exactly the same, except that nverify 5631 * succeeds when some argument changed, and verify succeeds when 5632 * when none changed. 5633 */ 5634 5635 VERIFY4args *args = &argop->nfs_argop4_u.opverify; 5636 VERIFY4res *resp = &resop->nfs_resop4_u.opverify; 5637 5638 int error; 5639 struct nfs4_svgetit_arg sarg; 5640 struct statvfs64 sb; 5641 struct nfs4_ntov_table ntov; 5642 5643 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs, 5644 VERIFY4args *, args); 5645 5646 if (cs->vp == NULL) { 5647 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5648 goto out; 5649 } 5650 5651 sarg.sbp = &sb; 5652 sarg.is_referral = B_FALSE; 5653 nfs4_ntov_table_init(&ntov); 5654 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 5655 &sarg, &ntov, NFS4ATTR_VERIT); 5656 if (resp->status != NFS4_OK) { 5657 /* 5658 * do_rfs4_set_attrs will try to verify systemwide attrs, 5659 * so could return -1 for "no match". 5660 */ 5661 if (resp->status == -1) 5662 resp->status = NFS4ERR_NOT_SAME; 5663 goto done; 5664 } 5665 error = rfs4_verify_attr(&sarg, NULL, &ntov); 5666 switch (error) { 5667 case 0: 5668 resp->status = NFS4_OK; 5669 break; 5670 case -1: 5671 resp->status = NFS4ERR_NOT_SAME; 5672 break; 5673 default: 5674 resp->status = puterrno4(error); 5675 break; 5676 } 5677 done: 5678 *cs->statusp = resp->status; 5679 nfs4_ntov_table_free(&ntov, &sarg); 5680 out: 5681 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs, 5682 VERIFY4res *, resp); 5683 } 5684 5685 /* ARGSUSED */ 5686 static void 5687 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5688 struct compound_state *cs) 5689 { 5690 /* 5691 * verify and nverify are exactly the same, except that nverify 5692 * succeeds when some argument changed, and verify succeeds when 5693 * when none changed. 5694 */ 5695 5696 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify; 5697 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify; 5698 5699 int error; 5700 struct nfs4_svgetit_arg sarg; 5701 struct statvfs64 sb; 5702 struct nfs4_ntov_table ntov; 5703 5704 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs, 5705 NVERIFY4args *, args); 5706 5707 if (cs->vp == NULL) { 5708 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5709 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs, 5710 NVERIFY4res *, resp); 5711 return; 5712 } 5713 sarg.sbp = &sb; 5714 sarg.is_referral = B_FALSE; 5715 nfs4_ntov_table_init(&ntov); 5716 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 5717 &sarg, &ntov, NFS4ATTR_VERIT); 5718 if (resp->status != NFS4_OK) { 5719 /* 5720 * do_rfs4_set_attrs will try to verify systemwide attrs, 5721 * so could return -1 for "no match". 5722 */ 5723 if (resp->status == -1) 5724 resp->status = NFS4_OK; 5725 goto done; 5726 } 5727 error = rfs4_verify_attr(&sarg, NULL, &ntov); 5728 switch (error) { 5729 case 0: 5730 resp->status = NFS4ERR_SAME; 5731 break; 5732 case -1: 5733 resp->status = NFS4_OK; 5734 break; 5735 default: 5736 resp->status = puterrno4(error); 5737 break; 5738 } 5739 done: 5740 *cs->statusp = resp->status; 5741 nfs4_ntov_table_free(&ntov, &sarg); 5742 5743 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs, 5744 NVERIFY4res *, resp); 5745 } 5746 5747 /* 5748 * XXX - This should live in an NFS header file. 5749 */ 5750 #define MAX_IOVECS 12 5751 5752 /* ARGSUSED */ 5753 static void 5754 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 5755 struct compound_state *cs) 5756 { 5757 WRITE4args *args = &argop->nfs_argop4_u.opwrite; 5758 WRITE4res *resp = &resop->nfs_resop4_u.opwrite; 5759 int error; 5760 vnode_t *vp; 5761 struct vattr bva; 5762 u_offset_t rlimit; 5763 struct uio uio; 5764 struct iovec iov[MAX_IOVECS]; 5765 struct iovec *iovp; 5766 int iovcnt; 5767 int ioflag; 5768 cred_t *savecred, *cr; 5769 bool_t *deleg = &cs->deleg; 5770 nfsstat4 stat; 5771 int in_crit = 0; 5772 caller_context_t ct; 5773 nfs4_srv_t *nsrv4; 5774 5775 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs, 5776 WRITE4args *, args); 5777 5778 vp = cs->vp; 5779 if (vp == NULL) { 5780 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5781 goto out; 5782 } 5783 if (cs->access == CS_ACCESS_DENIED) { 5784 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5785 goto out; 5786 } 5787 5788 cr = cs->cr; 5789 5790 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE, 5791 deleg, TRUE, &ct, cs)) != NFS4_OK) { 5792 *cs->statusp = resp->status = stat; 5793 goto out; 5794 } 5795 5796 /* 5797 * We have to enter the critical region before calling VOP_RWLOCK 5798 * to avoid a deadlock with ufs. 5799 */ 5800 if (nbl_need_check(vp)) { 5801 nbl_start_crit(vp, RW_READER); 5802 in_crit = 1; 5803 if (nbl_conflict(vp, NBL_WRITE, 5804 args->offset, args->data_len, 0, &ct)) { 5805 *cs->statusp = resp->status = NFS4ERR_LOCKED; 5806 goto out; 5807 } 5808 } 5809 5810 bva.va_mask = AT_MODE | AT_UID; 5811 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 5812 5813 /* 5814 * If we can't get the attributes, then we can't do the 5815 * right access checking. So, we'll fail the request. 5816 */ 5817 if (error) { 5818 *cs->statusp = resp->status = puterrno4(error); 5819 goto out; 5820 } 5821 5822 if (rdonly4(req, cs)) { 5823 *cs->statusp = resp->status = NFS4ERR_ROFS; 5824 goto out; 5825 } 5826 5827 if (vp->v_type != VREG) { 5828 *cs->statusp = resp->status = 5829 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL); 5830 goto out; 5831 } 5832 5833 if (crgetuid(cr) != bva.va_uid && 5834 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) { 5835 *cs->statusp = resp->status = puterrno4(error); 5836 goto out; 5837 } 5838 5839 if (MANDLOCK(vp, bva.va_mode)) { 5840 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5841 goto out; 5842 } 5843 5844 nsrv4 = nfs4_get_srv(); 5845 if (args->data_len == 0) { 5846 *cs->statusp = resp->status = NFS4_OK; 5847 resp->count = 0; 5848 resp->committed = args->stable; 5849 resp->writeverf = nsrv4->write4verf; 5850 goto out; 5851 } 5852 5853 if (args->mblk != NULL) { 5854 mblk_t *m; 5855 uint_t bytes, round_len; 5856 5857 iovcnt = 0; 5858 bytes = 0; 5859 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT); 5860 for (m = args->mblk; 5861 m != NULL && bytes < round_len; 5862 m = m->b_cont) { 5863 iovcnt++; 5864 bytes += MBLKL(m); 5865 } 5866 #ifdef DEBUG 5867 /* should have ended on an mblk boundary */ 5868 if (bytes != round_len) { 5869 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n", 5870 bytes, round_len, args->data_len); 5871 printf("args=%p, args->mblk=%p, m=%p", (void *)args, 5872 (void *)args->mblk, (void *)m); 5873 ASSERT(bytes == round_len); 5874 } 5875 #endif 5876 if (iovcnt <= MAX_IOVECS) { 5877 iovp = iov; 5878 } else { 5879 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 5880 } 5881 mblk_to_iov(args->mblk, iovcnt, iovp); 5882 } else if (args->rlist != NULL) { 5883 iovcnt = 1; 5884 iovp = iov; 5885 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3); 5886 iovp->iov_len = args->data_len; 5887 } else { 5888 iovcnt = 1; 5889 iovp = iov; 5890 iovp->iov_base = args->data_val; 5891 iovp->iov_len = args->data_len; 5892 } 5893 5894 uio.uio_iov = iovp; 5895 uio.uio_iovcnt = iovcnt; 5896 5897 uio.uio_segflg = UIO_SYSSPACE; 5898 uio.uio_extflg = UIO_COPY_DEFAULT; 5899 uio.uio_loffset = args->offset; 5900 uio.uio_resid = args->data_len; 5901 uio.uio_llimit = curproc->p_fsz_ctl; 5902 rlimit = uio.uio_llimit - args->offset; 5903 if (rlimit < (u_offset_t)uio.uio_resid) 5904 uio.uio_resid = (int)rlimit; 5905 5906 if (args->stable == UNSTABLE4) 5907 ioflag = 0; 5908 else if (args->stable == FILE_SYNC4) 5909 ioflag = FSYNC; 5910 else if (args->stable == DATA_SYNC4) 5911 ioflag = FDSYNC; 5912 else { 5913 if (iovp != iov) 5914 kmem_free(iovp, sizeof (*iovp) * iovcnt); 5915 *cs->statusp = resp->status = NFS4ERR_INVAL; 5916 goto out; 5917 } 5918 5919 /* 5920 * We're changing creds because VM may fault and we need 5921 * the cred of the current thread to be used if quota 5922 * checking is enabled. 5923 */ 5924 savecred = curthread->t_cred; 5925 curthread->t_cred = cr; 5926 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct); 5927 curthread->t_cred = savecred; 5928 5929 if (iovp != iov) 5930 kmem_free(iovp, sizeof (*iovp) * iovcnt); 5931 5932 if (error) { 5933 *cs->statusp = resp->status = puterrno4(error); 5934 goto out; 5935 } 5936 5937 *cs->statusp = resp->status = NFS4_OK; 5938 resp->count = args->data_len - uio.uio_resid; 5939 5940 if (ioflag == 0) 5941 resp->committed = UNSTABLE4; 5942 else 5943 resp->committed = FILE_SYNC4; 5944 5945 resp->writeverf = nsrv4->write4verf; 5946 5947 out: 5948 if (in_crit) 5949 nbl_end_crit(vp); 5950 5951 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs, 5952 WRITE4res *, resp); 5953 } 5954 5955 static inline int 5956 rfs4_opnum_in_range(const compound_state_t *cs, int opnum) 5957 { 5958 if (opnum < FIRST_NFS4_OP || opnum > LAST_NFS4_OP) 5959 return (0); 5960 else if (cs->minorversion == 0 && opnum > LAST_NFS40_OP) 5961 return (0); 5962 else if (cs->minorversion == 1 && opnum > LAST_NFS41_OP) 5963 return (0); 5964 else if (cs->minorversion == 2 && opnum > LAST_NFS42_OP) 5965 return (0); 5966 return (1); 5967 } 5968 5969 void 5970 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, compound_state_t *cs, 5971 struct svc_req *req, int *rv) 5972 { 5973 uint_t i; 5974 cred_t *cr; 5975 nfs4_srv_t *nsrv4; 5976 nfs_export_t *ne = nfs_get_export(); 5977 5978 if (rv != NULL) 5979 *rv = 0; 5980 /* 5981 * Form a reply tag by copying over the request tag. 5982 */ 5983 resp->tag.utf8string_len = args->tag.utf8string_len; 5984 if (args->tag.utf8string_len != 0) { 5985 resp->tag.utf8string_val = 5986 kmem_alloc(args->tag.utf8string_len, KM_SLEEP); 5987 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, 5988 resp->tag.utf8string_len); 5989 } else { 5990 resp->tag.utf8string_val = NULL; 5991 } 5992 5993 cs->statusp = &resp->status; 5994 cs->req = req; 5995 cs->minorversion = args->minorversion; 5996 resp->array = NULL; 5997 resp->array_len = 0; 5998 5999 if (args->array_len == 0) { 6000 resp->status = NFS4_OK; 6001 return; 6002 } 6003 6004 cr = svc_xprt_cred(req->rq_xprt); 6005 ASSERT(cr != NULL); 6006 6007 if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) { 6008 DTRACE_NFSV4_2(compound__start, struct compound_state *, 6009 cs, COMPOUND4args *, args); 6010 DTRACE_NFSV4_2(compound__done, struct compound_state *, 6011 cs, COMPOUND4res *, resp); 6012 svcerr_badcred(req->rq_xprt); 6013 if (rv != NULL) 6014 *rv = 1; 6015 return; 6016 } 6017 6018 resp->array_len = args->array_len; 6019 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4), 6020 KM_SLEEP); 6021 6022 cs->op_len = args->array_len; 6023 cs->basecr = cr; 6024 nsrv4 = nfs4_get_srv(); 6025 6026 DTRACE_NFSV4_2(compound__start, struct compound_state *, cs, 6027 COMPOUND4args *, args); 6028 6029 /* 6030 * For now, NFS4 compound processing must be protected by 6031 * exported_lock because it can access more than one exportinfo 6032 * per compound and share/unshare can now change multiple 6033 * exinfo structs. The NFS2/3 code only refs 1 exportinfo 6034 * per proc (excluding public exinfo), and exi_count design 6035 * is sufficient to protect concurrent execution of NFS2/3 6036 * ops along with unexport. This lock will be removed as 6037 * part of the NFSv4 phase 2 namespace redesign work. 6038 */ 6039 rw_enter(&ne->exported_lock, RW_READER); 6040 6041 /* 6042 * If this is the first compound we've seen, we need to start all 6043 * new instances' grace periods. 6044 */ 6045 if (nsrv4->seen_first_compound == 0) { 6046 rfs4_grace_start_new(nsrv4); 6047 /* 6048 * This must be set after rfs4_grace_start_new(), otherwise 6049 * another thread could proceed past here before the former 6050 * is finished. 6051 */ 6052 nsrv4->seen_first_compound = 1; 6053 } 6054 6055 for (i = 0; i < args->array_len && cs->cont; i++) { 6056 nfs_argop4 *argop; 6057 nfs_resop4 *resop; 6058 uint_t op; 6059 kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4]; 6060 6061 argop = &args->array[i]; 6062 resop = &resp->array[i]; 6063 resop->resop = argop->argop; 6064 op = (uint_t)resop->resop; 6065 6066 cs->op_pos = i; 6067 if (op < rfsv4disp_cnt && rfs4_opnum_in_range(cs, op)) { 6068 /* 6069 * Count the individual ops here; NULL and COMPOUND 6070 * are counted in common_dispatch() 6071 */ 6072 stat[op].value.ui64++; 6073 6074 NFS4_DEBUG(rfs4_debug > 1, 6075 (CE_NOTE, "Executing %s", rfs4_op_string[op])); 6076 (*rfsv4disptab[op].dis_proc)(argop, resop, req, cs); 6077 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d", 6078 rfs4_op_string[op], *cs->statusp)); 6079 if (*cs->statusp != NFS4_OK) 6080 cs->cont = FALSE; 6081 } else { 6082 /* 6083 * This is effectively dead code since XDR code 6084 * will have already returned BADXDR if op doesn't 6085 * decode to legal value. This only done for a 6086 * day when XDR code doesn't verify v4 opcodes. 6087 */ 6088 op = OP_ILLEGAL; 6089 stat[OP_ILLEGAL_IDX].value.ui64++; 6090 6091 rfs4_op_illegal(argop, resop, req, cs); 6092 cs->cont = FALSE; 6093 } 6094 6095 /* 6096 * If not at last op, and if we are to stop, then 6097 * compact the results array. 6098 */ 6099 if ((i + 1) < args->array_len && !cs->cont) { 6100 nfs_resop4 *new_res = kmem_alloc( 6101 (i+1) * sizeof (nfs_resop4), KM_SLEEP); 6102 bcopy(resp->array, 6103 new_res, (i+1) * sizeof (nfs_resop4)); 6104 kmem_free(resp->array, 6105 args->array_len * sizeof (nfs_resop4)); 6106 6107 resp->array_len = i + 1; 6108 resp->array = new_res; 6109 } 6110 } 6111 6112 rw_exit(&ne->exported_lock); 6113 6114 DTRACE_NFSV4_2(compound__done, struct compound_state *, cs, 6115 COMPOUND4res *, resp); 6116 6117 /* 6118 * done with this compound request, free the label 6119 */ 6120 6121 if (req->rq_label != NULL) { 6122 kmem_free(req->rq_label, sizeof (bslabel_t)); 6123 req->rq_label = NULL; 6124 } 6125 } 6126 6127 /* 6128 * XXX because of what appears to be duplicate calls to rfs4_compound_free 6129 * XXX zero out the tag and array values. Need to investigate why the 6130 * XXX calls occur, but at least prevent the panic for now. 6131 */ 6132 void 6133 rfs4_compound_free(COMPOUND4res *resp) 6134 { 6135 uint_t i; 6136 6137 if (resp->tag.utf8string_val) { 6138 UTF8STRING_FREE(resp->tag) 6139 } 6140 6141 for (i = 0; i < resp->array_len; i++) { 6142 nfs_resop4 *resop; 6143 uint_t op; 6144 6145 resop = &resp->array[i]; 6146 op = (uint_t)resop->resop; 6147 if (op < rfsv4disp_cnt) { 6148 (*rfsv4disptab[op].dis_resfree)(resop); 6149 } 6150 } 6151 if (resp->array != NULL) { 6152 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4)); 6153 } 6154 } 6155 6156 /* 6157 * Process the value of the compound request rpc flags, as a bit-AND 6158 * of the individual per-op flags (idempotent, allowork, publicfh_ok) 6159 */ 6160 void 6161 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp) 6162 { 6163 int i; 6164 int flag = RPC_ALL; 6165 6166 for (i = 0; flag && i < args->array_len; i++) { 6167 uint_t op; 6168 6169 op = (uint_t)args->array[i].argop; 6170 6171 if (op < rfsv4disp_cnt) 6172 flag &= rfsv4disptab[op].dis_flags; 6173 else 6174 flag = 0; 6175 } 6176 *flagp = flag; 6177 } 6178 6179 nfsstat4 6180 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp) 6181 { 6182 nfsstat4 e; 6183 6184 rfs4_dbe_lock(cp->rc_dbe); 6185 6186 if (cp->rc_sysidt != LM_NOSYSID) { 6187 *sp = cp->rc_sysidt; 6188 e = NFS4_OK; 6189 6190 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) { 6191 *sp = cp->rc_sysidt; 6192 e = NFS4_OK; 6193 6194 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 6195 "rfs4_client_sysid: allocated 0x%x\n", *sp)); 6196 } else 6197 e = NFS4ERR_DELAY; 6198 6199 rfs4_dbe_unlock(cp->rc_dbe); 6200 return (e); 6201 } 6202 6203 #if defined(DEBUG) && ! defined(lint) 6204 static void lock_print(char *str, int operation, struct flock64 *flk) 6205 { 6206 char *op, *type; 6207 6208 switch (operation) { 6209 case F_GETLK: op = "F_GETLK"; 6210 break; 6211 case F_SETLK: op = "F_SETLK"; 6212 break; 6213 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND"; 6214 break; 6215 default: op = "F_UNKNOWN"; 6216 break; 6217 } 6218 switch (flk->l_type) { 6219 case F_UNLCK: type = "F_UNLCK"; 6220 break; 6221 case F_RDLCK: type = "F_RDLCK"; 6222 break; 6223 case F_WRLCK: type = "F_WRLCK"; 6224 break; 6225 default: type = "F_UNKNOWN"; 6226 break; 6227 } 6228 6229 ASSERT(flk->l_whence == 0); 6230 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d", 6231 str, op, type, (longlong_t)flk->l_start, 6232 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid); 6233 } 6234 6235 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f) 6236 #else 6237 #define LOCK_PRINT(d, s, t, f) 6238 #endif 6239 6240 /*ARGSUSED*/ 6241 static bool_t 6242 creds_ok(cred_set_t *cr_set, struct svc_req *req, struct compound_state *cs) 6243 { 6244 return (TRUE); 6245 } 6246 6247 /* 6248 * Look up the pathname using the vp in cs as the directory vnode. 6249 * cs->vp will be the vnode for the file on success 6250 */ 6251 6252 static nfsstat4 6253 rfs4_lookup(component4 *component, struct svc_req *req, 6254 struct compound_state *cs) 6255 { 6256 char *nm; 6257 uint32_t len; 6258 nfsstat4 status; 6259 struct sockaddr *ca; 6260 char *name; 6261 6262 if (cs->vp == NULL) { 6263 return (NFS4ERR_NOFILEHANDLE); 6264 } 6265 if (cs->vp->v_type != VDIR) { 6266 return (NFS4ERR_NOTDIR); 6267 } 6268 6269 status = utf8_dir_verify(component); 6270 if (status != NFS4_OK) 6271 return (status); 6272 6273 nm = utf8_to_fn(component, &len, NULL); 6274 if (nm == NULL) { 6275 return (NFS4ERR_INVAL); 6276 } 6277 6278 if (len > MAXNAMELEN) { 6279 kmem_free(nm, len); 6280 return (NFS4ERR_NAMETOOLONG); 6281 } 6282 6283 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 6284 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 6285 MAXPATHLEN + 1); 6286 6287 if (name == NULL) { 6288 kmem_free(nm, len); 6289 return (NFS4ERR_INVAL); 6290 } 6291 6292 status = do_rfs4_op_lookup(name, req, cs); 6293 6294 if (name != nm) 6295 kmem_free(name, MAXPATHLEN + 1); 6296 6297 kmem_free(nm, len); 6298 6299 return (status); 6300 } 6301 6302 static nfsstat4 6303 rfs4_lookupfile(component4 *component, struct svc_req *req, 6304 struct compound_state *cs, uint32_t access, change_info4 *cinfo) 6305 { 6306 nfsstat4 status; 6307 vnode_t *dvp = cs->vp; 6308 vattr_t bva, ava, fva; 6309 int error; 6310 6311 /* Get "before" change value */ 6312 bva.va_mask = AT_CTIME|AT_SEQ; 6313 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL); 6314 if (error) 6315 return (puterrno4(error)); 6316 6317 /* rfs4_lookup may VN_RELE directory */ 6318 VN_HOLD(dvp); 6319 6320 status = rfs4_lookup(component, req, cs); 6321 if (status != NFS4_OK) { 6322 VN_RELE(dvp); 6323 return (status); 6324 } 6325 6326 /* 6327 * Get "after" change value, if it fails, simply return the 6328 * before value. 6329 */ 6330 ava.va_mask = AT_CTIME|AT_SEQ; 6331 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) { 6332 ava.va_ctime = bva.va_ctime; 6333 ava.va_seq = 0; 6334 } 6335 VN_RELE(dvp); 6336 6337 /* 6338 * Validate the file is a file 6339 */ 6340 fva.va_mask = AT_TYPE|AT_MODE; 6341 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL); 6342 if (error) 6343 return (puterrno4(error)); 6344 6345 if (fva.va_type != VREG) { 6346 if (fva.va_type == VDIR) 6347 return (NFS4ERR_ISDIR); 6348 if (fva.va_type == VLNK) 6349 return (NFS4ERR_SYMLINK); 6350 return (NFS4ERR_INVAL); 6351 } 6352 6353 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime); 6354 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime); 6355 6356 /* 6357 * It is undefined if VOP_LOOKUP will change va_seq, so 6358 * cinfo.atomic = TRUE only if we have 6359 * non-zero va_seq's, and they have not changed. 6360 */ 6361 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq) 6362 cinfo->atomic = TRUE; 6363 else 6364 cinfo->atomic = FALSE; 6365 6366 /* Check for mandatory locking */ 6367 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode); 6368 return (check_open_access(access, cs, req)); 6369 } 6370 6371 static nfsstat4 6372 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode, 6373 cred_t *cr, vnode_t **vpp, bool_t *created) 6374 { 6375 int error; 6376 nfsstat4 status = NFS4_OK; 6377 vattr_t va; 6378 6379 tryagain: 6380 6381 /* 6382 * The file open mode used is VWRITE. If the client needs 6383 * some other semantic, then it should do the access checking 6384 * itself. It would have been nice to have the file open mode 6385 * passed as part of the arguments. 6386 */ 6387 6388 *created = TRUE; 6389 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL); 6390 6391 if (error) { 6392 *created = FALSE; 6393 6394 /* 6395 * If we got something other than file already exists 6396 * then just return this error. Otherwise, we got 6397 * EEXIST. If we were doing a GUARDED create, then 6398 * just return this error. Otherwise, we need to 6399 * make sure that this wasn't a duplicate of an 6400 * exclusive create request. 6401 * 6402 * The assumption is made that a non-exclusive create 6403 * request will never return EEXIST. 6404 */ 6405 6406 if (error != EEXIST || mode == GUARDED4) { 6407 status = puterrno4(error); 6408 return (status); 6409 } 6410 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr, 6411 NULL, NULL, NULL); 6412 6413 if (error) { 6414 /* 6415 * We couldn't find the file that we thought that 6416 * we just created. So, we'll just try creating 6417 * it again. 6418 */ 6419 if (error == ENOENT) 6420 goto tryagain; 6421 6422 status = puterrno4(error); 6423 return (status); 6424 } 6425 6426 if (mode == UNCHECKED4) { 6427 /* existing object must be regular file */ 6428 if ((*vpp)->v_type != VREG) { 6429 if ((*vpp)->v_type == VDIR) 6430 status = NFS4ERR_ISDIR; 6431 else if ((*vpp)->v_type == VLNK) 6432 status = NFS4ERR_SYMLINK; 6433 else 6434 status = NFS4ERR_INVAL; 6435 VN_RELE(*vpp); 6436 return (status); 6437 } 6438 6439 return (NFS4_OK); 6440 } 6441 6442 /* Check for duplicate request */ 6443 va.va_mask = AT_MTIME; 6444 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL); 6445 if (!error) { 6446 /* We found the file */ 6447 const timestruc_t *mtime = &vap->va_mtime; 6448 6449 if (va.va_mtime.tv_sec != mtime->tv_sec || 6450 va.va_mtime.tv_nsec != mtime->tv_nsec) { 6451 /* but its not our creation */ 6452 VN_RELE(*vpp); 6453 return (NFS4ERR_EXIST); 6454 } 6455 *created = TRUE; /* retrans of create == created */ 6456 return (NFS4_OK); 6457 } 6458 VN_RELE(*vpp); 6459 return (NFS4ERR_EXIST); 6460 } 6461 6462 return (NFS4_OK); 6463 } 6464 6465 static nfsstat4 6466 check_open_access(uint32_t access, struct compound_state *cs, 6467 struct svc_req *req) 6468 { 6469 int error; 6470 vnode_t *vp; 6471 bool_t readonly; 6472 cred_t *cr = cs->cr; 6473 6474 /* For now we don't allow mandatory locking as per V2/V3 */ 6475 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) { 6476 return (NFS4ERR_ACCESS); 6477 } 6478 6479 vp = cs->vp; 6480 ASSERT(cr != NULL && vp->v_type == VREG); 6481 6482 /* 6483 * If the file system is exported read only and we are trying 6484 * to open for write, then return NFS4ERR_ROFS 6485 */ 6486 6487 readonly = rdonly4(req, cs); 6488 6489 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly) 6490 return (NFS4ERR_ROFS); 6491 6492 if (access & OPEN4_SHARE_ACCESS_READ) { 6493 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) && 6494 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) { 6495 return (NFS4ERR_ACCESS); 6496 } 6497 } 6498 6499 if (access & OPEN4_SHARE_ACCESS_WRITE) { 6500 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 6501 if (error) 6502 return (NFS4ERR_ACCESS); 6503 } 6504 6505 return (NFS4_OK); 6506 } 6507 6508 static void 6509 rfs4_verifier_to_mtime(verifier4 v, timestruc_t *mtime) 6510 { 6511 timespec32_t *time = (timespec32_t *)&v; 6512 6513 /* 6514 * Ensure no time overflows. Assumes underlying 6515 * filesystem supports at least 32 bits. 6516 * Truncate nsec to usec resolution to allow valid 6517 * compares even if the underlying filesystem truncates. 6518 */ 6519 mtime->tv_sec = time->tv_sec % TIME32_MAX; 6520 mtime->tv_nsec = (time->tv_nsec / 1000) * 1000; 6521 } 6522 6523 static nfsstat4 6524 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs, 6525 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid) 6526 { 6527 struct nfs4_svgetit_arg sarg; 6528 struct nfs4_ntov_table ntov; 6529 6530 bool_t ntov_table_init = FALSE; 6531 struct statvfs64 sb; 6532 nfsstat4 status; 6533 vnode_t *vp; 6534 vattr_t bva, ava, iva, cva, *vap; 6535 vnode_t *dvp; 6536 char *nm = NULL; 6537 uint_t buflen; 6538 bool_t created; 6539 bool_t setsize = FALSE; 6540 len_t reqsize; 6541 int error; 6542 bool_t trunc; 6543 caller_context_t ct; 6544 component4 *component; 6545 bslabel_t *clabel; 6546 struct sockaddr *ca; 6547 char *name = NULL; 6548 fattr4 *fattr = NULL; 6549 6550 ASSERT(*attrset == 0); 6551 6552 sarg.sbp = &sb; 6553 sarg.is_referral = B_FALSE; 6554 6555 dvp = cs->vp; 6556 6557 /* Check if the file system is read only */ 6558 if (rdonly4(req, cs)) 6559 return (NFS4ERR_ROFS); 6560 6561 /* check the label of including directory */ 6562 if (is_system_labeled()) { 6563 ASSERT(req->rq_label != NULL); 6564 clabel = req->rq_label; 6565 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 6566 "got client label from request(1)", 6567 struct svc_req *, req); 6568 if (!blequal(&l_admin_low->tsl_label, clabel)) { 6569 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK, 6570 cs->exi)) { 6571 return (NFS4ERR_ACCESS); 6572 } 6573 } 6574 } 6575 6576 if ((args->mode == EXCLUSIVE4 || args->mode == EXCLUSIVE4_1) && 6577 dvp->v_flag & V_XATTRDIR) { 6578 /* prohibit EXCL create of named attributes */ 6579 return (NFS4ERR_INVAL); 6580 } 6581 6582 /* 6583 * Get the last component of path name in nm. cs will reference 6584 * the including directory on success. 6585 */ 6586 component = &args->claim.open_claim4_u.file; 6587 status = utf8_dir_verify(component); 6588 if (status != NFS4_OK) 6589 return (status); 6590 6591 nm = utf8_to_fn(component, &buflen, NULL); 6592 6593 if (nm == NULL) 6594 return (NFS4ERR_RESOURCE); 6595 6596 if (buflen > MAXNAMELEN) { 6597 kmem_free(nm, buflen); 6598 return (NFS4ERR_NAMETOOLONG); 6599 } 6600 6601 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ; 6602 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL); 6603 if (error) { 6604 kmem_free(nm, buflen); 6605 return (puterrno4(error)); 6606 } 6607 6608 if (bva.va_type != VDIR) { 6609 kmem_free(nm, buflen); 6610 return (NFS4ERR_NOTDIR); 6611 } 6612 6613 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime) 6614 6615 switch (args->mode) { 6616 case GUARDED4: 6617 /*FALLTHROUGH*/ 6618 case UNCHECKED4: 6619 case EXCLUSIVE4_1: 6620 nfs4_ntov_table_init(&ntov); 6621 ntov_table_init = TRUE; 6622 6623 if (args->mode == EXCLUSIVE4_1) 6624 fattr = &args->createhow4_u.ch_createboth.cva_attrs; 6625 else 6626 fattr = &args->createhow4_u.createattrs; 6627 6628 status = do_rfs4_set_attrs(attrset, 6629 fattr, 6630 cs, &sarg, &ntov, NFS4ATTR_SETIT); 6631 6632 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) && 6633 sarg.vap->va_type != VREG) { 6634 if (sarg.vap->va_type == VDIR) 6635 status = NFS4ERR_ISDIR; 6636 else if (sarg.vap->va_type == VLNK) 6637 status = NFS4ERR_SYMLINK; 6638 else 6639 status = NFS4ERR_INVAL; 6640 } 6641 6642 if (status != NFS4_OK) { 6643 kmem_free(nm, buflen); 6644 nfs4_ntov_table_free(&ntov, &sarg); 6645 *attrset = 0; 6646 return (status); 6647 } 6648 6649 vap = sarg.vap; 6650 vap->va_type = VREG; 6651 vap->va_mask |= AT_TYPE; 6652 6653 if ((vap->va_mask & AT_MODE) == 0) { 6654 vap->va_mask |= AT_MODE; 6655 vap->va_mode = (mode_t)0600; 6656 } 6657 6658 if (vap->va_mask & AT_SIZE) { 6659 6660 /* Disallow create with a non-zero size */ 6661 6662 if ((reqsize = sarg.vap->va_size) != 0) { 6663 kmem_free(nm, buflen); 6664 nfs4_ntov_table_free(&ntov, &sarg); 6665 *attrset = 0; 6666 return (NFS4ERR_INVAL); 6667 } 6668 setsize = TRUE; 6669 } 6670 if (args->mode == EXCLUSIVE4_1) { 6671 rfs4_verifier_to_mtime( 6672 args->createhow4_u.ch_createboth.cva_verf, 6673 &vap->va_mtime); 6674 /* attrset will be set later */ 6675 fattr->attrmask |= FATTR4_TIME_MODIFY_MASK; 6676 vap->va_mask |= AT_MTIME; 6677 } 6678 break; 6679 6680 case EXCLUSIVE4: 6681 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE; 6682 cva.va_type = VREG; 6683 cva.va_mode = (mode_t)0; 6684 6685 rfs4_verifier_to_mtime(args->createhow4_u.createverf, 6686 &cva.va_mtime); 6687 6688 vap = &cva; 6689 6690 /* 6691 * For EXCL create, attrset is set to the server attr 6692 * used to cache the client's verifier. 6693 */ 6694 *attrset = FATTR4_TIME_MODIFY_MASK; 6695 break; 6696 } 6697 6698 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 6699 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND, 6700 MAXPATHLEN + 1); 6701 6702 if (name == NULL) { 6703 kmem_free(nm, buflen); 6704 return (NFS4ERR_SERVERFAULT); 6705 } 6706 6707 status = create_vnode(dvp, name, vap, args->mode, 6708 cs->cr, &vp, &created); 6709 if (nm != name) 6710 kmem_free(name, MAXPATHLEN + 1); 6711 kmem_free(nm, buflen); 6712 6713 if (status != NFS4_OK) { 6714 if (ntov_table_init) 6715 nfs4_ntov_table_free(&ntov, &sarg); 6716 *attrset = 0; 6717 return (status); 6718 } 6719 6720 trunc = (setsize && !created); 6721 6722 if (args->mode != EXCLUSIVE4) { 6723 bitmap4 createmask = fattr->attrmask; 6724 6725 /* 6726 * True verification that object was created with correct 6727 * attrs is impossible. The attrs could have been changed 6728 * immediately after object creation. If attributes did 6729 * not verify, the only recourse for the server is to 6730 * destroy the object. Maybe if some attrs (like gid) 6731 * are set incorrectly, the object should be destroyed; 6732 * however, seems bad as a default policy. Do we really 6733 * want to destroy an object over one of the times not 6734 * verifying correctly? For these reasons, the server 6735 * currently sets bits in attrset for createattrs 6736 * that were set; however, no verification is done. 6737 * 6738 * vmask_to_nmask accounts for vattr bits set on create 6739 * [do_rfs4_set_attrs() only sets resp bits for 6740 * non-vattr/vfs bits.] 6741 * Mask off any bits we set by default so as not to return 6742 * more attrset bits than were requested in createattrs 6743 */ 6744 if (created) { 6745 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset); 6746 *attrset &= createmask; 6747 } else { 6748 /* 6749 * We did not create the vnode (we tried but it 6750 * already existed). In this case, the only createattr 6751 * that the spec allows the server to set is size, 6752 * and even then, it can only be set if it is 0. 6753 */ 6754 *attrset = 0; 6755 if (trunc) 6756 *attrset = FATTR4_SIZE_MASK; 6757 } 6758 } 6759 if (ntov_table_init) 6760 nfs4_ntov_table_free(&ntov, &sarg); 6761 6762 /* 6763 * Get the initial "after" sequence number, if it fails, 6764 * set to zero, time to before. 6765 */ 6766 iva.va_mask = AT_CTIME|AT_SEQ; 6767 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) { 6768 iva.va_seq = 0; 6769 iva.va_ctime = bva.va_ctime; 6770 } 6771 6772 /* 6773 * create_vnode attempts to create the file exclusive, 6774 * if it already exists the VOP_CREATE will fail and 6775 * may not increase va_seq. It is atomic if 6776 * we haven't changed the directory, but if it has changed 6777 * we don't know what changed it. 6778 */ 6779 if (!created) { 6780 if (bva.va_seq && iva.va_seq && 6781 bva.va_seq == iva.va_seq) 6782 cinfo->atomic = TRUE; 6783 else 6784 cinfo->atomic = FALSE; 6785 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime); 6786 } else { 6787 /* 6788 * The entry was created, we need to sync the 6789 * directory metadata. 6790 */ 6791 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 6792 6793 /* 6794 * Get "after" change value, if it fails, simply return the 6795 * before value. 6796 */ 6797 ava.va_mask = AT_CTIME|AT_SEQ; 6798 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) { 6799 ava.va_ctime = bva.va_ctime; 6800 ava.va_seq = 0; 6801 } 6802 6803 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime); 6804 6805 /* 6806 * The cinfo->atomic = TRUE only if we have 6807 * non-zero va_seq's, and it has incremented by exactly one 6808 * during the create_vnode and it didn't 6809 * change during the VOP_FSYNC. 6810 */ 6811 if (bva.va_seq && iva.va_seq && ava.va_seq && 6812 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq) 6813 cinfo->atomic = TRUE; 6814 else 6815 cinfo->atomic = FALSE; 6816 } 6817 6818 /* Check for mandatory locking and that the size gets set. */ 6819 cva.va_mask = AT_MODE; 6820 if (setsize) 6821 cva.va_mask |= AT_SIZE; 6822 6823 /* Assume the worst */ 6824 cs->mandlock = TRUE; 6825 6826 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) { 6827 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode); 6828 6829 /* 6830 * Truncate the file if necessary; this would be 6831 * the case for create over an existing file. 6832 */ 6833 6834 if (trunc) { 6835 int in_crit = 0; 6836 rfs4_file_t *fp; 6837 nfs4_srv_t *nsrv4; 6838 bool_t create = FALSE; 6839 6840 /* 6841 * We are writing over an existing file. 6842 * Check to see if we need to recall a delegation. 6843 */ 6844 nsrv4 = nfs4_get_srv(); 6845 rfs4_hold_deleg_policy(nsrv4); 6846 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) { 6847 if (rfs4_check_delegated_byfp(FWRITE, fp, 6848 (reqsize == 0), FALSE, FALSE, &clientid)) { 6849 rfs4_file_rele(fp); 6850 rfs4_rele_deleg_policy(nsrv4); 6851 VN_RELE(vp); 6852 *attrset = 0; 6853 return (NFS4ERR_DELAY); 6854 } 6855 rfs4_file_rele(fp); 6856 } 6857 rfs4_rele_deleg_policy(nsrv4); 6858 6859 if (nbl_need_check(vp)) { 6860 in_crit = 1; 6861 6862 ASSERT(reqsize == 0); 6863 6864 nbl_start_crit(vp, RW_READER); 6865 if (nbl_conflict(vp, NBL_WRITE, 0, 6866 cva.va_size, 0, NULL)) { 6867 in_crit = 0; 6868 nbl_end_crit(vp); 6869 VN_RELE(vp); 6870 *attrset = 0; 6871 return (NFS4ERR_ACCESS); 6872 } 6873 } 6874 ct.cc_sysid = 0; 6875 ct.cc_pid = 0; 6876 ct.cc_caller_id = nfs4_srv_caller_id; 6877 ct.cc_flags = CC_DONTBLOCK; 6878 6879 cva.va_mask = AT_SIZE; 6880 cva.va_size = reqsize; 6881 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct); 6882 if (in_crit) 6883 nbl_end_crit(vp); 6884 } 6885 } 6886 6887 error = makefh4(&cs->fh, vp, cs->exi); 6888 6889 /* 6890 * Force modified data and metadata out to stable storage. 6891 */ 6892 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL); 6893 6894 if (error) { 6895 VN_RELE(vp); 6896 *attrset = 0; 6897 return (puterrno4(error)); 6898 } 6899 6900 /* if parent dir is attrdir, set namedattr fh flag */ 6901 if (dvp->v_flag & V_XATTRDIR) 6902 set_fh4_flag(&cs->fh, FH4_NAMEDATTR); 6903 6904 if (cs->vp) 6905 VN_RELE(cs->vp); 6906 6907 cs->vp = vp; 6908 6909 /* 6910 * if we did not create the file, we will need to check 6911 * the access bits on the file 6912 */ 6913 6914 if (!created) { 6915 if (setsize) 6916 args->share_access |= OPEN4_SHARE_ACCESS_WRITE; 6917 status = check_open_access(args->share_access, cs, req); 6918 if (status != NFS4_OK) 6919 *attrset = 0; 6920 } 6921 return (status); 6922 } 6923 6924 /*ARGSUSED*/ 6925 static void 6926 rfs4_do_open(struct compound_state *cs, struct svc_req *req, 6927 rfs4_openowner_t *oo, delegreq_t deleg, 6928 uint32_t access, uint32_t deny, 6929 OPEN4res *resp, int deleg_cur) 6930 { 6931 /* XXX Currently not using req */ 6932 rfs4_state_t *sp; 6933 rfs4_file_t *fp; 6934 bool_t screate = TRUE; 6935 bool_t fcreate = TRUE; 6936 uint32_t open_a, share_a; 6937 uint32_t open_d, share_d; 6938 rfs4_deleg_state_t *dsp; 6939 sysid_t sysid; 6940 nfsstat4 status; 6941 caller_context_t ct; 6942 int fflags = 0; 6943 int recall = 0; 6944 int err; 6945 int first_open; 6946 6947 /* get the file struct and hold a lock on it during initial open */ 6948 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate); 6949 if (fp == NULL) { 6950 resp->status = NFS4ERR_RESOURCE; 6951 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status); 6952 return; 6953 } 6954 6955 sp = rfs4_findstate_by_owner_file(oo, fp, &screate); 6956 if (sp == NULL) { 6957 resp->status = NFS4ERR_RESOURCE; 6958 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status); 6959 /* No need to keep any reference */ 6960 rw_exit(&fp->rf_file_rwlock); 6961 rfs4_file_rele(fp); 6962 return; 6963 } 6964 6965 /* try to get the sysid before continuing */ 6966 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) { 6967 resp->status = status; 6968 rfs4_file_rele(fp); 6969 /* Not a fully formed open; "close" it */ 6970 if (screate == TRUE) 6971 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 6972 rfs4_state_rele(sp); 6973 return; 6974 } 6975 6976 /* Calculate the fflags for this OPEN. */ 6977 if (access & OPEN4_SHARE_ACCESS_READ) 6978 fflags |= FREAD; 6979 if (access & OPEN4_SHARE_ACCESS_WRITE) 6980 fflags |= FWRITE; 6981 6982 rfs4_dbe_lock(sp->rs_dbe); 6983 6984 /* 6985 * Calculate the new deny and access mode that this open is adding to 6986 * the file for this open owner; 6987 */ 6988 open_d = (deny & ~sp->rs_open_deny); 6989 open_a = (access & ~sp->rs_open_access); 6990 6991 /* 6992 * Calculate the new share access and share deny modes that this open 6993 * is adding to the file for this open owner; 6994 */ 6995 share_a = (access & ~sp->rs_share_access); 6996 share_d = (deny & ~sp->rs_share_deny); 6997 6998 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0; 6999 7000 /* 7001 * Check to see the client has already sent an open for this 7002 * open owner on this file with the same share/deny modes. 7003 * If so, we don't need to check for a conflict and we don't 7004 * need to add another shrlock. If not, then we need to 7005 * check for conflicts in deny and access before checking for 7006 * conflicts in delegation. We don't want to recall a 7007 * delegation based on an open that will eventually fail based 7008 * on shares modes. 7009 */ 7010 7011 if (share_a || share_d) { 7012 if ((err = rfs4_share(sp, access, deny)) != 0) { 7013 rfs4_dbe_unlock(sp->rs_dbe); 7014 resp->status = err; 7015 7016 rfs4_file_rele(fp); 7017 /* Not a fully formed open; "close" it */ 7018 if (screate == TRUE) 7019 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7020 rfs4_state_rele(sp); 7021 return; 7022 } 7023 } 7024 7025 rfs4_dbe_lock(fp->rf_dbe); 7026 7027 /* 7028 * Check to see if this file is delegated and if so, if a 7029 * recall needs to be done. 7030 */ 7031 if (rfs4_check_recall(sp, access)) { 7032 rfs4_dbe_unlock(fp->rf_dbe); 7033 rfs4_dbe_unlock(sp->rs_dbe); 7034 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client); 7035 delay(NFS4_DELEGATION_CONFLICT_DELAY); 7036 rfs4_dbe_lock(sp->rs_dbe); 7037 7038 /* if state closed while lock was dropped */ 7039 if (sp->rs_closed) { 7040 if (share_a || share_d) 7041 (void) rfs4_unshare(sp); 7042 rfs4_dbe_unlock(sp->rs_dbe); 7043 rfs4_file_rele(fp); 7044 /* Not a fully formed open; "close" it */ 7045 if (screate == TRUE) 7046 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7047 rfs4_state_rele(sp); 7048 resp->status = NFS4ERR_OLD_STATEID; 7049 return; 7050 } 7051 7052 rfs4_dbe_lock(fp->rf_dbe); 7053 /* Let's see if the delegation was returned */ 7054 if (rfs4_check_recall(sp, access)) { 7055 rfs4_dbe_unlock(fp->rf_dbe); 7056 if (share_a || share_d) 7057 (void) rfs4_unshare(sp); 7058 rfs4_dbe_unlock(sp->rs_dbe); 7059 rfs4_file_rele(fp); 7060 rfs4_update_lease(sp->rs_owner->ro_client); 7061 7062 /* Not a fully formed open; "close" it */ 7063 if (screate == TRUE) 7064 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7065 rfs4_state_rele(sp); 7066 resp->status = NFS4ERR_DELAY; 7067 return; 7068 } 7069 } 7070 /* 7071 * the share check passed and any delegation conflict has been 7072 * taken care of, now call vop_open. 7073 * if this is the first open then call vop_open with fflags. 7074 * if not, call vn_open_upgrade with just the upgrade flags. 7075 * 7076 * if the file has been opened already, it will have the current 7077 * access mode in the state struct. if it has no share access, then 7078 * this is a new open. 7079 * 7080 * However, if this is open with CLAIM_DLEGATE_CUR, then don't 7081 * call VOP_OPEN(), just do the open upgrade. 7082 */ 7083 if (first_open && !deleg_cur) { 7084 ct.cc_sysid = sysid; 7085 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe); 7086 ct.cc_caller_id = nfs4_srv_caller_id; 7087 ct.cc_flags = CC_DONTBLOCK; 7088 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct); 7089 if (err) { 7090 rfs4_dbe_unlock(fp->rf_dbe); 7091 if (share_a || share_d) 7092 (void) rfs4_unshare(sp); 7093 rfs4_dbe_unlock(sp->rs_dbe); 7094 rfs4_file_rele(fp); 7095 7096 /* Not a fully formed open; "close" it */ 7097 if (screate == TRUE) 7098 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 7099 rfs4_state_rele(sp); 7100 /* check if a monitor detected a delegation conflict */ 7101 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 7102 resp->status = NFS4ERR_DELAY; 7103 else 7104 resp->status = NFS4ERR_SERVERFAULT; 7105 return; 7106 } 7107 } else { /* open upgrade */ 7108 /* 7109 * calculate the fflags for the new mode that is being added 7110 * by this upgrade. 7111 */ 7112 fflags = 0; 7113 if (open_a & OPEN4_SHARE_ACCESS_READ) 7114 fflags |= FREAD; 7115 if (open_a & OPEN4_SHARE_ACCESS_WRITE) 7116 fflags |= FWRITE; 7117 vn_open_upgrade(cs->vp, fflags); 7118 } 7119 sp->rs_open_access |= access; 7120 sp->rs_open_deny |= deny; 7121 7122 if (open_d & OPEN4_SHARE_DENY_READ) 7123 fp->rf_deny_read++; 7124 if (open_d & OPEN4_SHARE_DENY_WRITE) 7125 fp->rf_deny_write++; 7126 fp->rf_share_deny |= deny; 7127 7128 if (open_a & OPEN4_SHARE_ACCESS_READ) 7129 fp->rf_access_read++; 7130 if (open_a & OPEN4_SHARE_ACCESS_WRITE) 7131 fp->rf_access_write++; 7132 fp->rf_share_access |= access; 7133 7134 /* 7135 * Check for delegation here. if the deleg argument is not 7136 * DELEG_ANY, then this is a reclaim from a client and 7137 * we must honor the delegation requested. If necessary we can 7138 * set the recall flag. 7139 */ 7140 7141 dsp = rfs4_grant_delegation(deleg, sp, &recall); 7142 7143 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE); 7144 7145 next_stateid(&sp->rs_stateid); 7146 7147 resp->stateid = sp->rs_stateid.stateid; 7148 7149 rfs4_dbe_unlock(fp->rf_dbe); 7150 rfs4_dbe_unlock(sp->rs_dbe); 7151 7152 if (dsp) { 7153 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall); 7154 rfs4_deleg_state_rele(dsp); 7155 } 7156 7157 rfs4_file_rele(fp); 7158 rfs4_state_rele(sp); 7159 7160 resp->status = NFS4_OK; 7161 } 7162 7163 /*ARGSUSED*/ 7164 static void 7165 rfs4_do_openfh(struct compound_state *cs, struct svc_req *req, OPEN4args *args, 7166 rfs4_openowner_t *oo, OPEN4res *resp) 7167 { 7168 /* cs->vp and cs->fh have been updated by putfh. */ 7169 rfs4_do_open(cs, req, oo, DELEG_ANY, 7170 (args->share_access & 0xff), args->share_deny, resp, 0); 7171 } 7172 7173 /*ARGSUSED*/ 7174 static void 7175 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req, 7176 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7177 { 7178 change_info4 *cinfo = &resp->cinfo; 7179 bitmap4 *attrset = &resp->attrset; 7180 7181 if (args->opentype == OPEN4_NOCREATE) 7182 resp->status = rfs4_lookupfile(&args->claim.open_claim4_u.file, 7183 req, cs, args->share_access, cinfo); 7184 else { 7185 /* inhibit delegation grants during exclusive create */ 7186 7187 if (args->mode == EXCLUSIVE4) 7188 rfs4_disable_delegation(); 7189 7190 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset, 7191 oo->ro_client->rc_clientid); 7192 } 7193 7194 if (resp->status == NFS4_OK) { 7195 7196 /* cs->vp cs->fh now reference the desired file */ 7197 7198 rfs4_do_open(cs, req, oo, 7199 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY, 7200 args->share_access, args->share_deny, resp, 0); 7201 7202 /* 7203 * If rfs4_createfile set attrset, we must 7204 * clear this attrset before the response is copied. 7205 */ 7206 if (resp->status != NFS4_OK && resp->attrset) { 7207 resp->attrset = 0; 7208 } 7209 } 7210 else 7211 *cs->statusp = resp->status; 7212 7213 if (args->mode == EXCLUSIVE4) 7214 rfs4_enable_delegation(); 7215 } 7216 7217 /*ARGSUSED*/ 7218 static void 7219 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req, 7220 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7221 { 7222 change_info4 *cinfo = &resp->cinfo; 7223 vattr_t va; 7224 vtype_t v_type = cs->vp->v_type; 7225 int error = 0; 7226 7227 /* Verify that we have a regular file */ 7228 if (v_type != VREG) { 7229 if (v_type == VDIR) 7230 resp->status = NFS4ERR_ISDIR; 7231 else if (v_type == VLNK) 7232 resp->status = NFS4ERR_SYMLINK; 7233 else 7234 resp->status = NFS4ERR_INVAL; 7235 return; 7236 } 7237 7238 va.va_mask = AT_MODE|AT_UID; 7239 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL); 7240 if (error) { 7241 resp->status = puterrno4(error); 7242 return; 7243 } 7244 7245 cs->mandlock = MANDLOCK(cs->vp, va.va_mode); 7246 7247 /* 7248 * Check if we have access to the file, Note the the file 7249 * could have originally been open UNCHECKED or GUARDED 7250 * with mode bits that will now fail, but there is nothing 7251 * we can really do about that except in the case that the 7252 * owner of the file is the one requesting the open. 7253 */ 7254 if (crgetuid(cs->cr) != va.va_uid) { 7255 resp->status = check_open_access(args->share_access, cs, req); 7256 if (resp->status != NFS4_OK) { 7257 return; 7258 } 7259 } 7260 7261 /* 7262 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero 7263 */ 7264 cinfo->before = 0; 7265 cinfo->after = 0; 7266 cinfo->atomic = FALSE; 7267 7268 rfs4_do_open(cs, req, oo, 7269 NFS4_DELEG4TYPE2REQTYPE(args->claim.open_claim4_u.delegate_type), 7270 args->share_access, args->share_deny, resp, 0); 7271 } 7272 7273 static void 7274 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req, 7275 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7276 { 7277 int error; 7278 nfsstat4 status; 7279 stateid4 stateid = 7280 args->claim.open_claim4_u.delegate_cur_info.delegate_stateid; 7281 rfs4_deleg_state_t *dsp; 7282 7283 /* 7284 * Find the state info from the stateid and confirm that the 7285 * file is delegated. If the state openowner is the same as 7286 * the supplied openowner we're done. If not, get the file 7287 * info from the found state info. Use that file info to 7288 * create the state for this lock owner. Note solaris doen't 7289 * really need the pathname to find the file. We may want to 7290 * lookup the pathname and make sure that the vp exist and 7291 * matches the vp in the file structure. However it is 7292 * possible that the pathname nolonger exists (local process 7293 * unlinks the file), so this may not be that useful. 7294 */ 7295 7296 status = rfs4_get_deleg_state(&stateid, &dsp); 7297 if (status != NFS4_OK) { 7298 resp->status = status; 7299 return; 7300 } 7301 7302 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE); 7303 7304 /* 7305 * New lock owner, create state. Since this was probably called 7306 * in response to a CB_RECALL we set deleg to DELEG_NONE 7307 */ 7308 7309 ASSERT(cs->vp != NULL); 7310 VN_RELE(cs->vp); 7311 VN_HOLD(dsp->rds_finfo->rf_vp); 7312 cs->vp = dsp->rds_finfo->rf_vp; 7313 7314 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) { 7315 rfs4_deleg_state_rele(dsp); 7316 *cs->statusp = resp->status = puterrno4(error); 7317 return; 7318 } 7319 7320 /* Mark progress for delegation returns */ 7321 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec(); 7322 rfs4_deleg_state_rele(dsp); 7323 rfs4_do_open(cs, req, oo, DELEG_NONE, 7324 args->share_access, args->share_deny, resp, 1); 7325 } 7326 7327 /*ARGSUSED*/ 7328 static void 7329 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req, 7330 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 7331 { 7332 /* 7333 * Lookup the pathname, it must already exist since this file 7334 * was delegated. 7335 * 7336 * Find the file and state info for this vp and open owner pair. 7337 * check that they are in fact delegated. 7338 * check that the state access and deny modes are the same. 7339 * 7340 * Return the delgation possibly seting the recall flag. 7341 */ 7342 rfs4_file_t *fp; 7343 rfs4_state_t *sp; 7344 bool_t create = FALSE; 7345 bool_t dcreate = FALSE; 7346 rfs4_deleg_state_t *dsp; 7347 nfsace4 *ace; 7348 7349 /* Note we ignore oflags */ 7350 resp->status = rfs4_lookupfile( 7351 &args->claim.open_claim4_u.file_delegate_prev, 7352 req, cs, args->share_access, &resp->cinfo); 7353 7354 if (resp->status != NFS4_OK) { 7355 return; 7356 } 7357 7358 /* get the file struct and hold a lock on it during initial open */ 7359 fp = rfs4_findfile_withlock(cs->vp, NULL, &create); 7360 if (fp == NULL) { 7361 resp->status = NFS4ERR_RESOURCE; 7362 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status); 7363 return; 7364 } 7365 7366 sp = rfs4_findstate_by_owner_file(oo, fp, &create); 7367 if (sp == NULL) { 7368 resp->status = NFS4ERR_SERVERFAULT; 7369 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status); 7370 rw_exit(&fp->rf_file_rwlock); 7371 rfs4_file_rele(fp); 7372 return; 7373 } 7374 7375 rfs4_dbe_lock(sp->rs_dbe); 7376 rfs4_dbe_lock(fp->rf_dbe); 7377 if (args->share_access != sp->rs_share_access || 7378 args->share_deny != sp->rs_share_deny || 7379 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 7380 NFS4_DEBUG(rfs4_debug, 7381 (CE_NOTE, "rfs4_do_opendelprev: state mixup")); 7382 rfs4_dbe_unlock(fp->rf_dbe); 7383 rfs4_dbe_unlock(sp->rs_dbe); 7384 rfs4_file_rele(fp); 7385 rfs4_state_rele(sp); 7386 resp->status = NFS4ERR_SERVERFAULT; 7387 return; 7388 } 7389 rfs4_dbe_unlock(fp->rf_dbe); 7390 rfs4_dbe_unlock(sp->rs_dbe); 7391 7392 dsp = rfs4_finddeleg(sp, &dcreate); 7393 if (dsp == NULL) { 7394 rfs4_state_rele(sp); 7395 rfs4_file_rele(fp); 7396 resp->status = NFS4ERR_SERVERFAULT; 7397 return; 7398 } 7399 7400 next_stateid(&sp->rs_stateid); 7401 7402 resp->stateid = sp->rs_stateid.stateid; 7403 7404 resp->delegation.delegation_type = dsp->rds_dtype; 7405 7406 if (dsp->rds_dtype == OPEN_DELEGATE_READ) { 7407 open_read_delegation4 *rv = 7408 &resp->delegation.open_delegation4_u.read; 7409 7410 rv->stateid = dsp->rds_delegid.stateid; 7411 rv->recall = FALSE; /* no policy in place to set to TRUE */ 7412 ace = &rv->permissions; 7413 } else { 7414 open_write_delegation4 *rv = 7415 &resp->delegation.open_delegation4_u.write; 7416 7417 rv->stateid = dsp->rds_delegid.stateid; 7418 rv->recall = FALSE; /* no policy in place to set to TRUE */ 7419 ace = &rv->permissions; 7420 rv->space_limit.limitby = NFS_LIMIT_SIZE; 7421 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX; 7422 } 7423 7424 /* XXX For now */ 7425 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE; 7426 ace->flag = 0; 7427 ace->access_mask = 0; 7428 ace->who.utf8string_len = 0; 7429 ace->who.utf8string_val = 0; 7430 7431 rfs4_deleg_state_rele(dsp); 7432 rfs4_state_rele(sp); 7433 rfs4_file_rele(fp); 7434 } 7435 7436 typedef enum { 7437 NFS4_CHKSEQ_OKAY = 0, 7438 NFS4_CHKSEQ_REPLAY = 1, 7439 NFS4_CHKSEQ_BAD = 2 7440 } rfs4_chkseq_t; 7441 7442 /* 7443 * Generic function for sequence number checks. 7444 */ 7445 static rfs4_chkseq_t 7446 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop, 7447 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres) 7448 { 7449 /* Same sequence ids and matching operations? */ 7450 if (seqid == rqst_seq && resop->resop == lastop->resop) { 7451 if (copyres == TRUE) { 7452 rfs4_free_reply(resop); 7453 rfs4_copy_reply(resop, lastop); 7454 } 7455 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 7456 "Replayed SEQID %d\n", seqid)); 7457 return (NFS4_CHKSEQ_REPLAY); 7458 } 7459 7460 /* If the incoming sequence is not the next expected then it is bad */ 7461 if (rqst_seq != seqid + 1) { 7462 if (rqst_seq == seqid) { 7463 NFS4_DEBUG(rfs4_debug, 7464 (CE_NOTE, "BAD SEQID: Replayed sequence id " 7465 "but last op was %d current op is %d\n", 7466 lastop->resop, resop->resop)); 7467 return (NFS4_CHKSEQ_BAD); 7468 } 7469 NFS4_DEBUG(rfs4_debug, 7470 (CE_NOTE, "BAD SEQID: got %u expecting %u\n", 7471 rqst_seq, seqid)); 7472 return (NFS4_CHKSEQ_BAD); 7473 } 7474 7475 /* Everything okay -- next expected */ 7476 return (NFS4_CHKSEQ_OKAY); 7477 } 7478 7479 7480 static rfs4_chkseq_t 7481 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop, 7482 const compound_state_t *cs) 7483 { 7484 rfs4_chkseq_t rc; 7485 7486 if (rfs4_has_session(cs)) 7487 return (NFS4_CHKSEQ_OKAY); 7488 7489 rfs4_dbe_lock(op->ro_dbe); 7490 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop, 7491 TRUE); 7492 rfs4_dbe_unlock(op->ro_dbe); 7493 7494 if (rc == NFS4_CHKSEQ_OKAY) 7495 rfs4_update_lease(op->ro_client); 7496 7497 return (rc); 7498 } 7499 7500 static rfs4_chkseq_t 7501 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop) 7502 { 7503 rfs4_chkseq_t rc; 7504 7505 rfs4_dbe_lock(op->ro_dbe); 7506 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, 7507 olo_seqid, resop, FALSE); 7508 rfs4_dbe_unlock(op->ro_dbe); 7509 7510 return (rc); 7511 } 7512 7513 static rfs4_chkseq_t 7514 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop) 7515 { 7516 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY; 7517 7518 rfs4_dbe_lock(lsp->rls_dbe); 7519 if (!lsp->rls_skip_seqid_check) 7520 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid, 7521 resop, TRUE); 7522 rfs4_dbe_unlock(lsp->rls_dbe); 7523 7524 return (rc); 7525 } 7526 7527 static void 7528 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop, 7529 struct svc_req *req, struct compound_state *cs) 7530 { 7531 OPEN4args *args = &argop->nfs_argop4_u.opopen; 7532 OPEN4res *resp = &resop->nfs_resop4_u.opopen; 7533 open_owner4 *owner = &args->owner; 7534 open_claim_type4 claim = args->claim.claim; 7535 rfs4_client_t *cp; 7536 rfs4_openowner_t *oo; 7537 bool_t create; 7538 bool_t replay = FALSE; 7539 int can_reclaim; 7540 7541 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs, 7542 OPEN4args *, args); 7543 7544 if (cs->vp == NULL) { 7545 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 7546 goto end; 7547 } 7548 7549 /* rfc5661 section 18.16.3 */ 7550 if (rfs4_has_session(cs)) 7551 owner->clientid = cs->client->rc_clientid; 7552 7553 /* 7554 * Need to check clientid and lease expiration first based on 7555 * error ordering and incrementing sequence id. 7556 */ 7557 cp = rfs4_findclient_by_id(owner->clientid, FALSE); 7558 if (cp == NULL) { 7559 *cs->statusp = resp->status = 7560 rfs4_check_clientid(&owner->clientid, 0); 7561 goto end; 7562 } 7563 7564 if (rfs4_lease_expired(cp)) { 7565 rfs4_client_close(cp); 7566 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 7567 goto end; 7568 } 7569 can_reclaim = cp->rc_can_reclaim; 7570 7571 /* 7572 * Find the open_owner for use from this point forward. Take 7573 * care in updating the sequence id based on the type of error 7574 * being returned. 7575 */ 7576 retry: 7577 create = TRUE; 7578 oo = rfs4_findopenowner(owner, &create, args->seqid); 7579 if (oo == NULL) { 7580 *cs->statusp = resp->status = NFS4ERR_RESOURCE; 7581 rfs4_client_rele(cp); 7582 goto end; 7583 } 7584 7585 /* 7586 * OPEN_CONFIRM must not be implemented in v4.1 7587 */ 7588 if (rfs4_has_session(cs)) { 7589 oo->ro_need_confirm = FALSE; 7590 } 7591 7592 /* Hold off access to the sequence space while the open is done */ 7593 /* Workaround to avoid deadlock */ 7594 if (!rfs4_has_session(cs)) 7595 rfs4_sw_enter(&oo->ro_sw); 7596 7597 /* 7598 * If the open_owner existed before at the server, then check 7599 * the sequence id. 7600 */ 7601 if (!create && !oo->ro_postpone_confirm) { 7602 switch (rfs4_check_open_seqid(args->seqid, oo, resop, cs)) { 7603 case NFS4_CHKSEQ_BAD: 7604 ASSERT(!rfs4_has_session(cs)); 7605 if ((args->seqid > oo->ro_open_seqid) && 7606 oo->ro_need_confirm) { 7607 rfs4_free_opens(oo, TRUE, FALSE); 7608 rfs4_sw_exit(&oo->ro_sw); 7609 rfs4_openowner_rele(oo); 7610 goto retry; 7611 } 7612 resp->status = NFS4ERR_BAD_SEQID; 7613 goto out; 7614 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */ 7615 replay = TRUE; 7616 goto out; 7617 default: 7618 break; 7619 } 7620 7621 /* 7622 * Sequence was ok and open owner exists 7623 * check to see if we have yet to see an 7624 * open_confirm. 7625 */ 7626 if (oo->ro_need_confirm) { 7627 rfs4_free_opens(oo, TRUE, FALSE); 7628 ASSERT(!rfs4_has_session(cs)); 7629 rfs4_sw_exit(&oo->ro_sw); 7630 rfs4_openowner_rele(oo); 7631 goto retry; 7632 } 7633 } 7634 /* Grace only applies to regular-type OPENs */ 7635 if (rfs4_clnt_in_grace(cp) && 7636 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR || 7637 claim == CLAIM_FH)) { 7638 *cs->statusp = resp->status = NFS4ERR_GRACE; 7639 goto out; 7640 } 7641 7642 /* 7643 * If previous state at the server existed then can_reclaim 7644 * will be set. If not reply NFS4ERR_NO_GRACE to the 7645 * client. 7646 */ 7647 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) { 7648 *cs->statusp = resp->status = NFS4ERR_NO_GRACE; 7649 goto out; 7650 } 7651 7652 7653 /* 7654 * Reject the open if the client has missed the grace period 7655 */ 7656 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) { 7657 *cs->statusp = resp->status = NFS4ERR_NO_GRACE; 7658 goto out; 7659 } 7660 7661 /* Couple of up-front bookkeeping items */ 7662 if (oo->ro_need_confirm) { 7663 /* 7664 * If this is a reclaim OPEN then we should not ask 7665 * for a confirmation of the open_owner per the 7666 * protocol specification. 7667 */ 7668 if (claim == CLAIM_PREVIOUS) 7669 oo->ro_need_confirm = FALSE; 7670 else 7671 resp->rflags |= OPEN4_RESULT_CONFIRM; 7672 } 7673 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX; 7674 7675 /* 7676 * If there is an unshared filesystem mounted on this vnode, 7677 * do not allow to open/create in this directory. 7678 */ 7679 if (vn_ismntpt(cs->vp)) { 7680 *cs->statusp = resp->status = NFS4ERR_ACCESS; 7681 goto out; 7682 } 7683 7684 /* 7685 * access must READ, WRITE, or BOTH. No access is invalid. 7686 * deny can be READ, WRITE, BOTH, or NONE. 7687 * bits not defined for access/deny are invalid. 7688 */ 7689 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) || 7690 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) || 7691 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) { 7692 *cs->statusp = resp->status = NFS4ERR_INVAL; 7693 goto out; 7694 } 7695 7696 7697 /* 7698 * make sure attrset is zero before response is built. 7699 */ 7700 resp->attrset = 0; 7701 7702 switch (claim) { 7703 case CLAIM_NULL: 7704 rfs4_do_opennull(cs, req, args, oo, resp); 7705 break; 7706 case CLAIM_PREVIOUS: 7707 rfs4_do_openprev(cs, req, args, oo, resp); 7708 break; 7709 case CLAIM_DELEGATE_CUR: 7710 rfs4_do_opendelcur(cs, req, args, oo, resp); 7711 break; 7712 case CLAIM_DELEGATE_PREV: 7713 rfs4_do_opendelprev(cs, req, args, oo, resp); 7714 break; 7715 case CLAIM_FH: 7716 rfs4_do_openfh(cs, req, args, oo, resp); 7717 break; 7718 default: 7719 resp->status = NFS4ERR_INVAL; 7720 break; 7721 } 7722 7723 out: 7724 rfs4_client_rele(cp); 7725 7726 /* Catch sequence id handling here to make it a little easier */ 7727 switch (resp->status) { 7728 case NFS4ERR_BADXDR: 7729 case NFS4ERR_BAD_SEQID: 7730 case NFS4ERR_BAD_STATEID: 7731 case NFS4ERR_NOFILEHANDLE: 7732 case NFS4ERR_RESOURCE: 7733 case NFS4ERR_STALE_CLIENTID: 7734 case NFS4ERR_STALE_STATEID: 7735 /* 7736 * The protocol states that if any of these errors are 7737 * being returned, the sequence id should not be 7738 * incremented. Any other return requires an 7739 * increment. 7740 */ 7741 break; 7742 default: 7743 /* Always update the lease in this case */ 7744 rfs4_update_lease(oo->ro_client); 7745 7746 /* Regular response - copy the result */ 7747 if (!replay) 7748 rfs4_update_open_resp(oo, resop, &cs->fh); 7749 7750 /* 7751 * REPLAY case: Only if the previous response was OK 7752 * do we copy the filehandle. If not OK, no 7753 * filehandle to copy. 7754 */ 7755 if (replay == TRUE && 7756 resp->status == NFS4_OK && 7757 oo->ro_reply_fh.nfs_fh4_val) { 7758 /* 7759 * If this is a replay, we must restore the 7760 * current filehandle/vp to that of what was 7761 * returned originally. Try our best to do 7762 * it. 7763 */ 7764 nfs_fh4_fmt_t *fh_fmtp = 7765 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val; 7766 7767 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, 7768 (fid_t *)&fh_fmtp->fh4_xlen, NULL); 7769 7770 if (cs->exi == NULL) { 7771 resp->status = NFS4ERR_STALE; 7772 goto finish; 7773 } 7774 7775 VN_RELE(cs->vp); 7776 7777 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi, 7778 &resp->status); 7779 7780 if (cs->vp == NULL) 7781 goto finish; 7782 7783 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh); 7784 } 7785 7786 /* 7787 * If this was a replay, no need to update the 7788 * sequence id. If the open_owner was not created on 7789 * this pass, then update. The first use of an 7790 * open_owner will not bump the sequence id. 7791 */ 7792 if (replay == FALSE && !create) 7793 rfs4_update_open_sequence(oo); 7794 /* 7795 * If the client is receiving an error and the 7796 * open_owner needs to be confirmed, there is no way 7797 * to notify the client of this fact ignoring the fact 7798 * that the server has no method of returning a 7799 * stateid to confirm. Therefore, the server needs to 7800 * mark this open_owner in a way as to avoid the 7801 * sequence id checking the next time the client uses 7802 * this open_owner. 7803 */ 7804 if (resp->status != NFS4_OK && oo->ro_need_confirm) 7805 oo->ro_postpone_confirm = TRUE; 7806 /* 7807 * If OK response then clear the postpone flag and 7808 * reset the sequence id to keep in sync with the 7809 * client. 7810 */ 7811 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) { 7812 oo->ro_postpone_confirm = FALSE; 7813 oo->ro_open_seqid = args->seqid; 7814 } 7815 break; 7816 } 7817 7818 finish: 7819 *cs->statusp = resp->status; 7820 7821 if (!rfs4_has_session(cs)) 7822 rfs4_sw_exit(&oo->ro_sw); 7823 rfs4_openowner_rele(oo); 7824 7825 end: 7826 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs, 7827 OPEN4res *, resp); 7828 } 7829 7830 /*ARGSUSED*/ 7831 void 7832 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop, 7833 struct svc_req *req, struct compound_state *cs) 7834 { 7835 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm; 7836 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm; 7837 rfs4_state_t *sp; 7838 nfsstat4 status; 7839 7840 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs, 7841 OPEN_CONFIRM4args *, args); 7842 7843 ASSERT(!rfs4_has_session(cs)); 7844 7845 if (cs->vp == NULL) { 7846 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 7847 goto out; 7848 } 7849 7850 if (cs->vp->v_type != VREG) { 7851 *cs->statusp = resp->status = 7852 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL; 7853 return; 7854 } 7855 7856 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID); 7857 if (status != NFS4_OK) { 7858 *cs->statusp = resp->status = status; 7859 goto out; 7860 } 7861 7862 /* Ensure specified filehandle matches */ 7863 if (cs->vp != sp->rs_finfo->rf_vp) { 7864 rfs4_state_rele(sp); 7865 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7866 goto out; 7867 } 7868 7869 /* hold off other access to open_owner while we tinker */ 7870 rfs4_sw_enter(&sp->rs_owner->ro_sw); 7871 7872 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) { 7873 case NFS4_CHECK_STATEID_OKAY: 7874 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 7875 resop, cs) != 0) { 7876 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7877 break; 7878 } 7879 /* 7880 * If it is the appropriate stateid and determined to 7881 * be "OKAY" then this means that the stateid does not 7882 * need to be confirmed and the client is in error for 7883 * sending an OPEN_CONFIRM. 7884 */ 7885 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7886 break; 7887 case NFS4_CHECK_STATEID_OLD: 7888 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 7889 break; 7890 case NFS4_CHECK_STATEID_BAD: 7891 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7892 break; 7893 case NFS4_CHECK_STATEID_EXPIRED: 7894 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 7895 break; 7896 case NFS4_CHECK_STATEID_CLOSED: 7897 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 7898 break; 7899 case NFS4_CHECK_STATEID_REPLAY: 7900 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 7901 resop, cs)) { 7902 case NFS4_CHKSEQ_OKAY: 7903 /* 7904 * This is replayed stateid; if seqid matches 7905 * next expected, then client is using wrong seqid. 7906 */ 7907 /* fall through */ 7908 case NFS4_CHKSEQ_BAD: 7909 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7910 break; 7911 case NFS4_CHKSEQ_REPLAY: 7912 /* 7913 * Note this case is the duplicate case so 7914 * resp->status is already set. 7915 */ 7916 *cs->statusp = resp->status; 7917 rfs4_update_lease(sp->rs_owner->ro_client); 7918 break; 7919 } 7920 break; 7921 case NFS4_CHECK_STATEID_UNCONFIRMED: 7922 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 7923 resop, cs) != NFS4_CHKSEQ_OKAY) { 7924 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7925 break; 7926 } 7927 *cs->statusp = resp->status = NFS4_OK; 7928 7929 next_stateid(&sp->rs_stateid); 7930 resp->open_stateid = sp->rs_stateid.stateid; 7931 sp->rs_owner->ro_need_confirm = FALSE; 7932 rfs4_update_lease(sp->rs_owner->ro_client); 7933 rfs4_update_open_sequence(sp->rs_owner); 7934 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 7935 break; 7936 default: 7937 ASSERT(FALSE); 7938 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 7939 break; 7940 } 7941 rfs4_sw_exit(&sp->rs_owner->ro_sw); 7942 rfs4_state_rele(sp); 7943 7944 out: 7945 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs, 7946 OPEN_CONFIRM4res *, resp); 7947 } 7948 7949 /*ARGSUSED*/ 7950 void 7951 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop, 7952 struct svc_req *req, struct compound_state *cs) 7953 { 7954 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade; 7955 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade; 7956 uint32_t access = args->share_access; 7957 uint32_t deny = args->share_deny; 7958 nfsstat4 status; 7959 rfs4_state_t *sp; 7960 rfs4_file_t *fp; 7961 int fflags = 0; 7962 7963 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs, 7964 OPEN_DOWNGRADE4args *, args); 7965 7966 if (cs->vp == NULL) { 7967 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 7968 goto out; 7969 } 7970 7971 if (cs->vp->v_type != VREG) { 7972 *cs->statusp = resp->status = NFS4ERR_INVAL; 7973 return; 7974 } 7975 7976 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID); 7977 if (status != NFS4_OK) { 7978 *cs->statusp = resp->status = status; 7979 goto out; 7980 } 7981 7982 /* Ensure specified filehandle matches */ 7983 if (cs->vp != sp->rs_finfo->rf_vp) { 7984 rfs4_state_rele(sp); 7985 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 7986 goto out; 7987 } 7988 7989 /* hold off other access to open_owner while we tinker */ 7990 rfs4_sw_enter(&sp->rs_owner->ro_sw); 7991 7992 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) { 7993 case NFS4_CHECK_STATEID_OKAY: 7994 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 7995 resop, cs) != NFS4_CHKSEQ_OKAY) { 7996 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 7997 goto end; 7998 } 7999 break; 8000 case NFS4_CHECK_STATEID_OLD: 8001 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8002 goto end; 8003 case NFS4_CHECK_STATEID_BAD: 8004 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8005 goto end; 8006 case NFS4_CHECK_STATEID_EXPIRED: 8007 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 8008 goto end; 8009 case NFS4_CHECK_STATEID_CLOSED: 8010 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8011 goto end; 8012 case NFS4_CHECK_STATEID_UNCONFIRMED: 8013 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8014 goto end; 8015 case NFS4_CHECK_STATEID_REPLAY: 8016 ASSERT(!rfs4_has_session(cs)); 8017 8018 /* Check the sequence id for the open owner */ 8019 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8020 resop, cs)) { 8021 case NFS4_CHKSEQ_OKAY: 8022 /* 8023 * This is replayed stateid; if seqid matches 8024 * next expected, then client is using wrong seqid. 8025 */ 8026 /* fall through */ 8027 case NFS4_CHKSEQ_BAD: 8028 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8029 goto end; 8030 case NFS4_CHKSEQ_REPLAY: 8031 /* 8032 * Note this case is the duplicate case so 8033 * resp->status is already set. 8034 */ 8035 *cs->statusp = resp->status; 8036 rfs4_update_lease(sp->rs_owner->ro_client); 8037 goto end; 8038 } 8039 break; 8040 default: 8041 ASSERT(FALSE); 8042 break; 8043 } 8044 8045 rfs4_dbe_lock(sp->rs_dbe); 8046 /* 8047 * Check that the new access modes and deny modes are valid. 8048 * Check that no invalid bits are set. 8049 */ 8050 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) || 8051 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) { 8052 *cs->statusp = resp->status = NFS4ERR_INVAL; 8053 rfs4_update_open_sequence(sp->rs_owner); 8054 rfs4_dbe_unlock(sp->rs_dbe); 8055 goto end; 8056 } 8057 8058 /* 8059 * The new modes must be a subset of the current modes and 8060 * the access must specify at least one mode. To test that 8061 * the new mode is a subset of the current modes we bitwise 8062 * AND them together and check that the result equals the new 8063 * mode. For example: 8064 * New mode, access == R and current mode, sp->rs_open_access == RW 8065 * access & sp->rs_open_access == R == access, so the new access mode 8066 * is valid. Consider access == RW, sp->rs_open_access = R 8067 * access & sp->rs_open_access == R != access, so the new access mode 8068 * is invalid. 8069 */ 8070 if ((access & sp->rs_open_access) != access || 8071 (deny & sp->rs_open_deny) != deny || 8072 (access & 8073 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) { 8074 *cs->statusp = resp->status = NFS4ERR_INVAL; 8075 rfs4_update_open_sequence(sp->rs_owner); 8076 rfs4_dbe_unlock(sp->rs_dbe); 8077 goto end; 8078 } 8079 8080 /* 8081 * Release any share locks associated with this stateID. 8082 * Strictly speaking, this violates the spec because the 8083 * spec effectively requires that open downgrade be atomic. 8084 * At present, fs_shrlock does not have this capability. 8085 */ 8086 (void) rfs4_unshare(sp); 8087 8088 status = rfs4_share(sp, access, deny); 8089 if (status != NFS4_OK) { 8090 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 8091 rfs4_update_open_sequence(sp->rs_owner); 8092 rfs4_dbe_unlock(sp->rs_dbe); 8093 goto end; 8094 } 8095 8096 fp = sp->rs_finfo; 8097 rfs4_dbe_lock(fp->rf_dbe); 8098 8099 /* 8100 * If the current mode has deny read and the new mode 8101 * does not, decrement the number of deny read mode bits 8102 * and if it goes to zero turn off the deny read bit 8103 * on the file. 8104 */ 8105 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) && 8106 (deny & OPEN4_SHARE_DENY_READ) == 0) { 8107 fp->rf_deny_read--; 8108 if (fp->rf_deny_read == 0) 8109 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ; 8110 } 8111 8112 /* 8113 * If the current mode has deny write and the new mode 8114 * does not, decrement the number of deny write mode bits 8115 * and if it goes to zero turn off the deny write bit 8116 * on the file. 8117 */ 8118 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) && 8119 (deny & OPEN4_SHARE_DENY_WRITE) == 0) { 8120 fp->rf_deny_write--; 8121 if (fp->rf_deny_write == 0) 8122 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE; 8123 } 8124 8125 /* 8126 * If the current mode has access read and the new mode 8127 * does not, decrement the number of access read mode bits 8128 * and if it goes to zero turn off the access read bit 8129 * on the file. set fflags to FREAD for the call to 8130 * vn_open_downgrade(). 8131 */ 8132 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) && 8133 (access & OPEN4_SHARE_ACCESS_READ) == 0) { 8134 fp->rf_access_read--; 8135 if (fp->rf_access_read == 0) 8136 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ; 8137 fflags |= FREAD; 8138 } 8139 8140 /* 8141 * If the current mode has access write and the new mode 8142 * does not, decrement the number of access write mode bits 8143 * and if it goes to zero turn off the access write bit 8144 * on the file. set fflags to FWRITE for the call to 8145 * vn_open_downgrade(). 8146 */ 8147 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) && 8148 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) { 8149 fp->rf_access_write--; 8150 if (fp->rf_access_write == 0) 8151 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE; 8152 fflags |= FWRITE; 8153 } 8154 8155 /* Check that the file is still accessible */ 8156 ASSERT(fp->rf_share_access); 8157 8158 rfs4_dbe_unlock(fp->rf_dbe); 8159 8160 /* now set the new open access and deny modes */ 8161 sp->rs_open_access = access; 8162 sp->rs_open_deny = deny; 8163 8164 /* 8165 * we successfully downgraded the share lock, now we need to downgrade 8166 * the open. it is possible that the downgrade was only for a deny 8167 * mode and we have nothing else to do. 8168 */ 8169 if ((fflags & (FREAD|FWRITE)) != 0) 8170 vn_open_downgrade(cs->vp, fflags); 8171 8172 /* Update the stateid */ 8173 next_stateid(&sp->rs_stateid); 8174 resp->open_stateid = sp->rs_stateid.stateid; 8175 8176 rfs4_dbe_unlock(sp->rs_dbe); 8177 8178 *cs->statusp = resp->status = NFS4_OK; 8179 /* Update the lease */ 8180 rfs4_update_lease(sp->rs_owner->ro_client); 8181 /* And the sequence */ 8182 rfs4_update_open_sequence(sp->rs_owner); 8183 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 8184 8185 end: 8186 rfs4_sw_exit(&sp->rs_owner->ro_sw); 8187 rfs4_state_rele(sp); 8188 out: 8189 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs, 8190 OPEN_DOWNGRADE4res *, resp); 8191 } 8192 8193 static void * 8194 memstr(const void *s1, const char *s2, size_t n) 8195 { 8196 size_t l = strlen(s2); 8197 char *p = (char *)s1; 8198 8199 while (n >= l) { 8200 if (bcmp(p, s2, l) == 0) 8201 return (p); 8202 p++; 8203 n--; 8204 } 8205 8206 return (NULL); 8207 } 8208 8209 /* 8210 * The logic behind this function is detailed in the NFSv4 RFC in the 8211 * SETCLIENTID operation description under IMPLEMENTATION. Refer to 8212 * that section for explicit guidance to server behavior for 8213 * SETCLIENTID. 8214 */ 8215 void 8216 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop, 8217 struct svc_req *req, struct compound_state *cs) 8218 { 8219 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid; 8220 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid; 8221 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed; 8222 rfs4_clntip_t *ci; 8223 bool_t create; 8224 char *addr, *netid; 8225 int len; 8226 8227 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs, 8228 SETCLIENTID4args *, args); 8229 retry: 8230 newcp = cp_confirmed = cp_unconfirmed = NULL; 8231 8232 /* 8233 * Save the caller's IP address 8234 */ 8235 args->client.cl_addr = 8236 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 8237 8238 /* 8239 * Record if it is a Solaris client that cannot handle referrals. 8240 */ 8241 if (memstr(args->client.id_val, "Solaris", args->client.id_len) && 8242 !memstr(args->client.id_val, "+referrals", args->client.id_len)) { 8243 /* Add a "yes, it's downrev" record */ 8244 create = TRUE; 8245 ci = rfs4_find_clntip(args->client.cl_addr, &create); 8246 ASSERT(ci != NULL); 8247 rfs4_dbe_rele(ci->ri_dbe); 8248 } else { 8249 /* Remove any previous record */ 8250 rfs4_invalidate_clntip(args->client.cl_addr); 8251 } 8252 8253 /* 8254 * In search of an EXISTING client matching the incoming 8255 * request to establish a new client identifier at the server 8256 */ 8257 create = TRUE; 8258 cp = rfs4_findclient(&args->client, &create, NULL); 8259 8260 /* Should never happen */ 8261 ASSERT(cp != NULL); 8262 8263 if (cp == NULL) { 8264 *cs->statusp = res->status = NFS4ERR_SERVERFAULT; 8265 goto out; 8266 } 8267 8268 /* 8269 * Easiest case. Client identifier is newly created and is 8270 * unconfirmed. Also note that for this case, no other 8271 * entries exist for the client identifier. Nothing else to 8272 * check. Just setup the response and respond. 8273 */ 8274 if (create) { 8275 *cs->statusp = res->status = NFS4_OK; 8276 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid; 8277 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 8278 cp->rc_confirm_verf; 8279 /* Setup callback information; CB_NULL confirmation later */ 8280 rfs4_client_setcb(cp, &args->callback, args->callback_ident); 8281 8282 rfs4_client_rele(cp); 8283 goto out; 8284 } 8285 8286 /* 8287 * An existing, confirmed client may exist but it may not have 8288 * been active for at least one lease period. If so, then 8289 * "close" the client and create a new client identifier 8290 */ 8291 if (rfs4_lease_expired(cp)) { 8292 rfs4_client_close(cp); 8293 goto retry; 8294 } 8295 8296 if (cp->rc_need_confirm == TRUE) 8297 cp_unconfirmed = cp; 8298 else 8299 cp_confirmed = cp; 8300 8301 cp = NULL; 8302 8303 /* 8304 * We have a confirmed client, now check for an 8305 * unconfimred entry 8306 */ 8307 if (cp_confirmed) { 8308 /* If creds don't match then client identifier is inuse */ 8309 if (!creds_ok(&cp_confirmed->rc_cr_set, req, cs)) { 8310 rfs4_cbinfo_t *cbp; 8311 /* 8312 * Some one else has established this client 8313 * id. Try and say * who they are. We will use 8314 * the call back address supplied by * the 8315 * first client. 8316 */ 8317 *cs->statusp = res->status = NFS4ERR_CLID_INUSE; 8318 8319 addr = netid = NULL; 8320 8321 cbp = &cp_confirmed->rc_cbinfo; 8322 if (cbp->cb_callback.cb_location.r_addr && 8323 cbp->cb_callback.cb_location.r_netid) { 8324 cb_client4 *cbcp = &cbp->cb_callback; 8325 8326 len = strlen(cbcp->cb_location.r_addr)+1; 8327 addr = kmem_alloc(len, KM_SLEEP); 8328 bcopy(cbcp->cb_location.r_addr, addr, len); 8329 len = strlen(cbcp->cb_location.r_netid)+1; 8330 netid = kmem_alloc(len, KM_SLEEP); 8331 bcopy(cbcp->cb_location.r_netid, netid, len); 8332 } 8333 8334 res->SETCLIENTID4res_u.client_using.r_addr = addr; 8335 res->SETCLIENTID4res_u.client_using.r_netid = netid; 8336 8337 rfs4_client_rele(cp_confirmed); 8338 } 8339 8340 /* 8341 * Confirmed, creds match, and verifier matches; must 8342 * be an update of the callback info 8343 */ 8344 if (cp_confirmed->rc_nfs_client.verifier == 8345 args->client.verifier) { 8346 /* Setup callback information */ 8347 rfs4_client_setcb(cp_confirmed, &args->callback, 8348 args->callback_ident); 8349 8350 /* everything okay -- move ahead */ 8351 *cs->statusp = res->status = NFS4_OK; 8352 res->SETCLIENTID4res_u.resok4.clientid = 8353 cp_confirmed->rc_clientid; 8354 8355 /* update the confirm_verifier and return it */ 8356 rfs4_client_scv_next(cp_confirmed); 8357 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 8358 cp_confirmed->rc_confirm_verf; 8359 8360 rfs4_client_rele(cp_confirmed); 8361 goto out; 8362 } 8363 8364 /* 8365 * Creds match but the verifier doesn't. Must search 8366 * for an unconfirmed client that would be replaced by 8367 * this request. 8368 */ 8369 create = FALSE; 8370 cp_unconfirmed = rfs4_findclient(&args->client, &create, 8371 cp_confirmed); 8372 } 8373 8374 /* 8375 * At this point, we have taken care of the brand new client 8376 * struct, INUSE case, update of an existing, and confirmed 8377 * client struct. 8378 */ 8379 8380 /* 8381 * check to see if things have changed while we originally 8382 * picked up the client struct. If they have, then return and 8383 * retry the processing of this SETCLIENTID request. 8384 */ 8385 if (cp_unconfirmed) { 8386 rfs4_dbe_lock(cp_unconfirmed->rc_dbe); 8387 if (!cp_unconfirmed->rc_need_confirm) { 8388 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe); 8389 rfs4_client_rele(cp_unconfirmed); 8390 if (cp_confirmed) 8391 rfs4_client_rele(cp_confirmed); 8392 goto retry; 8393 } 8394 /* do away with the old unconfirmed one */ 8395 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe); 8396 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe); 8397 rfs4_client_rele(cp_unconfirmed); 8398 cp_unconfirmed = NULL; 8399 } 8400 8401 /* 8402 * This search will temporarily hide the confirmed client 8403 * struct while a new client struct is created as the 8404 * unconfirmed one. 8405 */ 8406 create = TRUE; 8407 newcp = rfs4_findclient(&args->client, &create, cp_confirmed); 8408 8409 ASSERT(newcp != NULL); 8410 8411 if (newcp == NULL) { 8412 *cs->statusp = res->status = NFS4ERR_SERVERFAULT; 8413 rfs4_client_rele(cp_confirmed); 8414 goto out; 8415 } 8416 8417 /* 8418 * If one was not created, then a similar request must be in 8419 * process so release and start over with this one 8420 */ 8421 if (create != TRUE) { 8422 rfs4_client_rele(newcp); 8423 if (cp_confirmed) 8424 rfs4_client_rele(cp_confirmed); 8425 goto retry; 8426 } 8427 8428 *cs->statusp = res->status = NFS4_OK; 8429 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid; 8430 res->SETCLIENTID4res_u.resok4.setclientid_confirm = 8431 newcp->rc_confirm_verf; 8432 /* Setup callback information; CB_NULL confirmation later */ 8433 rfs4_client_setcb(newcp, &args->callback, args->callback_ident); 8434 8435 newcp->rc_cp_confirmed = cp_confirmed; 8436 8437 rfs4_client_rele(newcp); 8438 8439 out: 8440 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs, 8441 SETCLIENTID4res *, res); 8442 } 8443 8444 /*ARGSUSED*/ 8445 void 8446 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, 8447 struct svc_req *req, struct compound_state *cs) 8448 { 8449 SETCLIENTID_CONFIRM4args *args = 8450 &argop->nfs_argop4_u.opsetclientid_confirm; 8451 SETCLIENTID_CONFIRM4res *res = 8452 &resop->nfs_resop4_u.opsetclientid_confirm; 8453 rfs4_client_t *cp, *cptoclose = NULL; 8454 nfs4_srv_t *nsrv4; 8455 8456 DTRACE_NFSV4_2(op__setclientid__confirm__start, 8457 struct compound_state *, cs, 8458 SETCLIENTID_CONFIRM4args *, args); 8459 8460 nsrv4 = nfs4_get_srv(); 8461 *cs->statusp = res->status = NFS4_OK; 8462 8463 cp = rfs4_findclient_by_id(args->clientid, TRUE); 8464 8465 if (cp == NULL) { 8466 *cs->statusp = res->status = 8467 rfs4_check_clientid(&args->clientid, 1); 8468 goto out; 8469 } 8470 8471 if (!creds_ok(&cp->rc_cr_set, req, cs)) { 8472 *cs->statusp = res->status = NFS4ERR_CLID_INUSE; 8473 rfs4_client_rele(cp); 8474 goto out; 8475 } 8476 8477 /* If the verifier doesn't match, the record doesn't match */ 8478 if (cp->rc_confirm_verf != args->setclientid_confirm) { 8479 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID; 8480 rfs4_client_rele(cp); 8481 goto out; 8482 } 8483 8484 rfs4_dbe_lock(cp->rc_dbe); 8485 cp->rc_need_confirm = FALSE; 8486 if (cp->rc_cp_confirmed) { 8487 cptoclose = cp->rc_cp_confirmed; 8488 cptoclose->rc_ss_remove = 1; 8489 cp->rc_cp_confirmed = NULL; 8490 } 8491 8492 /* 8493 * Update the client's associated server instance, if it's changed 8494 * since the client was created. 8495 */ 8496 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst) 8497 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst); 8498 8499 /* 8500 * Record clientid in stable storage. 8501 * Must be done after server instance has been assigned. 8502 */ 8503 rfs4_ss_clid(nsrv4, cp); 8504 8505 rfs4_dbe_unlock(cp->rc_dbe); 8506 8507 if (cptoclose) 8508 /* don't need to rele, client_close does it */ 8509 rfs4_client_close(cptoclose); 8510 8511 /* If needed, initiate CB_NULL call for callback path */ 8512 rfs4_deleg_cb_check(cp); 8513 rfs4_update_lease(cp); 8514 8515 /* 8516 * Check to see if client can perform reclaims 8517 */ 8518 rfs4_ss_chkclid(nsrv4, cp); 8519 8520 rfs4_client_rele(cp); 8521 8522 out: 8523 DTRACE_NFSV4_2(op__setclientid__confirm__done, 8524 struct compound_state *, cs, 8525 SETCLIENTID_CONFIRM4 *, res); 8526 } 8527 8528 8529 /*ARGSUSED*/ 8530 void 8531 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop, 8532 struct svc_req *req, struct compound_state *cs) 8533 { 8534 CLOSE4args *args = &argop->nfs_argop4_u.opclose; 8535 CLOSE4res *resp = &resop->nfs_resop4_u.opclose; 8536 rfs4_state_t *sp; 8537 nfsstat4 status; 8538 8539 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs, 8540 CLOSE4args *, args); 8541 8542 if (cs->vp == NULL) { 8543 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 8544 goto out; 8545 } 8546 8547 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID); 8548 if (status != NFS4_OK) { 8549 *cs->statusp = resp->status = status; 8550 goto out; 8551 } 8552 8553 /* Ensure specified filehandle matches */ 8554 if (cs->vp != sp->rs_finfo->rf_vp) { 8555 rfs4_state_rele(sp); 8556 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8557 goto out; 8558 } 8559 8560 /* hold off other access to open_owner while we tinker */ 8561 rfs4_sw_enter(&sp->rs_owner->ro_sw); 8562 8563 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) { 8564 case NFS4_CHECK_STATEID_OKAY: 8565 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8566 resop, cs) != NFS4_CHKSEQ_OKAY) { 8567 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8568 goto end; 8569 } 8570 break; 8571 case NFS4_CHECK_STATEID_OLD: 8572 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8573 goto end; 8574 case NFS4_CHECK_STATEID_BAD: 8575 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8576 goto end; 8577 case NFS4_CHECK_STATEID_EXPIRED: 8578 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 8579 goto end; 8580 case NFS4_CHECK_STATEID_CLOSED: 8581 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 8582 goto end; 8583 case NFS4_CHECK_STATEID_UNCONFIRMED: 8584 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 8585 goto end; 8586 case NFS4_CHECK_STATEID_REPLAY: 8587 ASSERT(!rfs4_has_session(cs)); 8588 8589 /* Check the sequence id for the open owner */ 8590 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner, 8591 resop, cs)) { 8592 case NFS4_CHKSEQ_OKAY: 8593 /* 8594 * This is replayed stateid; if seqid matches 8595 * next expected, then client is using wrong seqid. 8596 */ 8597 /* FALL THROUGH */ 8598 case NFS4_CHKSEQ_BAD: 8599 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 8600 goto end; 8601 case NFS4_CHKSEQ_REPLAY: 8602 /* 8603 * Note this case is the duplicate case so 8604 * resp->status is already set. 8605 */ 8606 *cs->statusp = resp->status; 8607 rfs4_update_lease(sp->rs_owner->ro_client); 8608 goto end; 8609 } 8610 break; 8611 default: 8612 ASSERT(FALSE); 8613 break; 8614 } 8615 8616 rfs4_dbe_lock(sp->rs_dbe); 8617 8618 /* Update the stateid. */ 8619 next_stateid(&sp->rs_stateid); 8620 resp->open_stateid = sp->rs_stateid.stateid; 8621 8622 rfs4_dbe_unlock(sp->rs_dbe); 8623 8624 rfs4_update_lease(sp->rs_owner->ro_client); 8625 rfs4_update_open_sequence(sp->rs_owner); 8626 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 8627 8628 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 8629 8630 *cs->statusp = resp->status = status; 8631 8632 end: 8633 rfs4_sw_exit(&sp->rs_owner->ro_sw); 8634 rfs4_state_rele(sp); 8635 out: 8636 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs, 8637 CLOSE4res *, resp); 8638 } 8639 8640 /* 8641 * Manage the counts on the file struct and close all file locks 8642 */ 8643 /*ARGSUSED*/ 8644 void 8645 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr, 8646 bool_t close_of_client) 8647 { 8648 rfs4_file_t *fp = sp->rs_finfo; 8649 rfs4_lo_state_t *lsp; 8650 int fflags = 0; 8651 8652 /* 8653 * If this call is part of the larger closing down of client 8654 * state then it is just easier to release all locks 8655 * associated with this client instead of going through each 8656 * individual file and cleaning locks there. 8657 */ 8658 if (close_of_client) { 8659 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE && 8660 !list_is_empty(&sp->rs_lostatelist) && 8661 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) { 8662 /* Is the PxFS kernel module loaded? */ 8663 if (lm_remove_file_locks != NULL) { 8664 int new_sysid; 8665 8666 /* Encode the cluster nodeid in new sysid */ 8667 new_sysid = sp->rs_owner->ro_client->rc_sysidt; 8668 lm_set_nlmid_flk(&new_sysid); 8669 8670 /* 8671 * This PxFS routine removes file locks for a 8672 * client over all nodes of a cluster. 8673 */ 8674 NFS4_DEBUG(rfs4_debug, (CE_NOTE, 8675 "lm_remove_file_locks(sysid=0x%x)\n", 8676 new_sysid)); 8677 (*lm_remove_file_locks)(new_sysid); 8678 } else { 8679 struct flock64 flk; 8680 8681 /* Release all locks for this client */ 8682 flk.l_type = F_UNLKSYS; 8683 flk.l_whence = 0; 8684 flk.l_start = 0; 8685 flk.l_len = 0; 8686 flk.l_sysid = 8687 sp->rs_owner->ro_client->rc_sysidt; 8688 flk.l_pid = 0; 8689 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK, 8690 &flk, F_REMOTELOCK | FREAD | FWRITE, 8691 (u_offset_t)0, NULL, CRED(), NULL); 8692 } 8693 8694 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE; 8695 } 8696 } 8697 8698 /* 8699 * Release all locks on this file by this lock owner or at 8700 * least mark the locks as having been released 8701 */ 8702 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL; 8703 lsp = list_next(&sp->rs_lostatelist, lsp)) { 8704 lsp->rls_locks_cleaned = TRUE; 8705 8706 /* Was this already taken care of above? */ 8707 if (!close_of_client && 8708 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) 8709 (void) cleanlocks(sp->rs_finfo->rf_vp, 8710 lsp->rls_locker->rl_pid, 8711 lsp->rls_locker->rl_client->rc_sysidt); 8712 } 8713 8714 /* 8715 * Release any shrlocks associated with this open state ID. 8716 * This must be done before the rfs4_state gets marked closed. 8717 */ 8718 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) 8719 (void) rfs4_unshare(sp); 8720 8721 if (sp->rs_open_access) { 8722 rfs4_dbe_lock(fp->rf_dbe); 8723 8724 /* 8725 * Decrement the count for each access and deny bit that this 8726 * state has contributed to the file. 8727 * If the file counts go to zero 8728 * clear the appropriate bit in the appropriate mask. 8729 */ 8730 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) { 8731 fp->rf_access_read--; 8732 fflags |= FREAD; 8733 if (fp->rf_access_read == 0) 8734 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ; 8735 } 8736 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) { 8737 fp->rf_access_write--; 8738 fflags |= FWRITE; 8739 if (fp->rf_access_write == 0) 8740 fp->rf_share_access &= 8741 ~OPEN4_SHARE_ACCESS_WRITE; 8742 } 8743 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) { 8744 fp->rf_deny_read--; 8745 if (fp->rf_deny_read == 0) 8746 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ; 8747 } 8748 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) { 8749 fp->rf_deny_write--; 8750 if (fp->rf_deny_write == 0) 8751 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE; 8752 } 8753 8754 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL); 8755 8756 rfs4_dbe_unlock(fp->rf_dbe); 8757 8758 sp->rs_open_access = 0; 8759 sp->rs_open_deny = 0; 8760 } 8761 } 8762 8763 /* 8764 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure. 8765 */ 8766 static nfsstat4 8767 lock_denied(LOCK4denied *dp, struct flock64 *flk) 8768 { 8769 rfs4_lockowner_t *lo; 8770 rfs4_client_t *cp; 8771 uint32_t len; 8772 8773 lo = rfs4_findlockowner_by_pid(flk->l_pid); 8774 if (lo != NULL) { 8775 cp = lo->rl_client; 8776 if (rfs4_lease_expired(cp)) { 8777 rfs4_lockowner_rele(lo); 8778 rfs4_dbe_hold(cp->rc_dbe); 8779 rfs4_client_close(cp); 8780 return (NFS4ERR_EXPIRED); 8781 } 8782 dp->owner.clientid = lo->rl_owner.clientid; 8783 len = lo->rl_owner.owner_len; 8784 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP); 8785 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len); 8786 dp->owner.owner_len = len; 8787 rfs4_lockowner_rele(lo); 8788 goto finish; 8789 } 8790 8791 /* 8792 * Its not a NFS4 lock. We take advantage that the upper 32 bits 8793 * of the client id contain the boot time for a NFS4 lock. So we 8794 * fabricate and identity by setting clientid to the sysid, and 8795 * the lock owner to the pid. 8796 */ 8797 dp->owner.clientid = flk->l_sysid; 8798 len = sizeof (pid_t); 8799 dp->owner.owner_len = len; 8800 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP); 8801 bcopy(&flk->l_pid, dp->owner.owner_val, len); 8802 finish: 8803 dp->offset = flk->l_start; 8804 dp->length = flk->l_len; 8805 8806 if (flk->l_type == F_RDLCK) 8807 dp->locktype = READ_LT; 8808 else if (flk->l_type == F_WRLCK) 8809 dp->locktype = WRITE_LT; 8810 else 8811 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */ 8812 8813 return (NFS4_OK); 8814 } 8815 8816 /* 8817 * The NFSv4.0 LOCK operation does not support the blocking lock (at the 8818 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a 8819 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared 8820 * for that (obviously); they are sending the LOCK requests with some delays 8821 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the 8822 * locking and delay implementation at the client side. 8823 * 8824 * To make the life of the clients easier, the NFSv4.0 server tries to do some 8825 * fast retries on its own (the for loop below) in a hope the lock will be 8826 * available soon. And if not, the client won't need to resend the LOCK 8827 * requests so fast to check the lock availability. This basically saves some 8828 * network traffic and tries to make sure the client gets the lock ASAP. 8829 */ 8830 static int 8831 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred) 8832 { 8833 int error; 8834 struct flock64 flk; 8835 int i; 8836 clock_t delaytime; 8837 int cmd; 8838 int spin_cnt = 0; 8839 8840 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK; 8841 retry: 8842 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay); 8843 8844 for (i = 0; i < rfs4_maxlock_tries; i++) { 8845 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock); 8846 error = VOP_FRLOCK(vp, cmd, 8847 flock, flag, (u_offset_t)0, NULL, cred, NULL); 8848 8849 if (error != EAGAIN && error != EACCES) 8850 break; 8851 8852 if (i < rfs4_maxlock_tries - 1) { 8853 delay(delaytime); 8854 delaytime *= 2; 8855 } 8856 } 8857 8858 if (error == EAGAIN || error == EACCES) { 8859 /* Get the owner of the lock */ 8860 flk = *flock; 8861 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk); 8862 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred, 8863 NULL) == 0) { 8864 /* 8865 * There's a race inherent in the current VOP_FRLOCK 8866 * design where: 8867 * a: "other guy" takes a lock that conflicts with a 8868 * lock we want 8869 * b: we attempt to take our lock (non-blocking) and 8870 * the attempt fails. 8871 * c: "other guy" releases the conflicting lock 8872 * d: we ask what lock conflicts with the lock we want, 8873 * getting F_UNLCK (no lock blocks us) 8874 * 8875 * If we retry the non-blocking lock attempt in this 8876 * case (restart at step 'b') there's some possibility 8877 * that many such attempts might fail. However a test 8878 * designed to actually provoke this race shows that 8879 * the vast majority of cases require no retry, and 8880 * only a few took as many as three retries. Here's 8881 * the test outcome: 8882 * 8883 * number of retries how many times we needed 8884 * that many retries 8885 * 0 79461 8886 * 1 862 8887 * 2 49 8888 * 3 5 8889 * 8890 * Given those empirical results, we arbitrarily limit 8891 * the retry count to ten. 8892 * 8893 * If we actually make to ten retries and give up, 8894 * nothing catastrophic happens, but we're unable to 8895 * return the information about the conflicting lock to 8896 * the NFS client. That's an acceptable trade off vs. 8897 * letting this retry loop run forever. 8898 */ 8899 if (flk.l_type == F_UNLCK) { 8900 if (spin_cnt++ < 10) { 8901 /* No longer locked, retry */ 8902 goto retry; 8903 } 8904 } else { 8905 *flock = flk; 8906 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)", 8907 F_GETLK, &flk); 8908 } 8909 } 8910 } 8911 8912 return (error); 8913 } 8914 8915 /*ARGSUSED*/ 8916 static nfsstat4 8917 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype, 8918 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop) 8919 { 8920 nfsstat4 status; 8921 rfs4_lockowner_t *lo = lsp->rls_locker; 8922 rfs4_state_t *sp = lsp->rls_state; 8923 struct flock64 flock; 8924 int16_t ltype; 8925 int flag; 8926 int error; 8927 sysid_t sysid; 8928 LOCK4res *lres; 8929 vnode_t *vp; 8930 8931 if (rfs4_lease_expired(lo->rl_client)) { 8932 return (NFS4ERR_EXPIRED); 8933 } 8934 8935 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK) 8936 return (status); 8937 8938 /* Check for zero length. To lock to end of file use all ones for V4 */ 8939 if (length == 0) 8940 return (NFS4ERR_INVAL); 8941 else if (length == (length4)(~0)) 8942 length = 0; /* Posix to end of file */ 8943 8944 retry: 8945 rfs4_dbe_lock(sp->rs_dbe); 8946 if (sp->rs_closed == TRUE) { 8947 rfs4_dbe_unlock(sp->rs_dbe); 8948 return (NFS4ERR_OLD_STATEID); 8949 } 8950 8951 if (resop->resop != OP_LOCKU) { 8952 switch (locktype) { 8953 case READ_LT: 8954 case READW_LT: 8955 if ((sp->rs_share_access 8956 & OPEN4_SHARE_ACCESS_READ) == 0) { 8957 rfs4_dbe_unlock(sp->rs_dbe); 8958 8959 return (NFS4ERR_OPENMODE); 8960 } 8961 ltype = F_RDLCK; 8962 break; 8963 case WRITE_LT: 8964 case WRITEW_LT: 8965 if ((sp->rs_share_access 8966 & OPEN4_SHARE_ACCESS_WRITE) == 0) { 8967 rfs4_dbe_unlock(sp->rs_dbe); 8968 8969 return (NFS4ERR_OPENMODE); 8970 } 8971 ltype = F_WRLCK; 8972 break; 8973 } 8974 } else 8975 ltype = F_UNLCK; 8976 8977 flock.l_type = ltype; 8978 flock.l_whence = 0; /* SEEK_SET */ 8979 flock.l_start = offset; 8980 flock.l_len = length; 8981 flock.l_sysid = sysid; 8982 flock.l_pid = lsp->rls_locker->rl_pid; 8983 8984 /* Note that length4 is uint64_t but l_len and l_start are off64_t */ 8985 if (flock.l_len < 0 || flock.l_start < 0) { 8986 rfs4_dbe_unlock(sp->rs_dbe); 8987 return (NFS4ERR_INVAL); 8988 } 8989 8990 /* 8991 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and 8992 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE. 8993 */ 8994 flag = (int)sp->rs_share_access | F_REMOTELOCK; 8995 8996 vp = sp->rs_finfo->rf_vp; 8997 VN_HOLD(vp); 8998 8999 /* 9000 * We need to unlock sp before we call the underlying filesystem to 9001 * acquire the file lock. 9002 */ 9003 rfs4_dbe_unlock(sp->rs_dbe); 9004 9005 error = setlock(vp, &flock, flag, cred); 9006 9007 /* 9008 * Make sure the file is still open. In a case the file was closed in 9009 * the meantime, clean the lock we acquired using the setlock() call 9010 * above, and return the appropriate error. 9011 */ 9012 rfs4_dbe_lock(sp->rs_dbe); 9013 if (sp->rs_closed == TRUE) { 9014 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid); 9015 rfs4_dbe_unlock(sp->rs_dbe); 9016 9017 VN_RELE(vp); 9018 9019 return (NFS4ERR_OLD_STATEID); 9020 } 9021 rfs4_dbe_unlock(sp->rs_dbe); 9022 9023 VN_RELE(vp); 9024 9025 if (error == 0) { 9026 rfs4_dbe_lock(lsp->rls_dbe); 9027 next_stateid(&lsp->rls_lockid); 9028 rfs4_dbe_unlock(lsp->rls_dbe); 9029 } 9030 9031 /* 9032 * N.B. We map error values to nfsv4 errors. This is differrent 9033 * than puterrno4 routine. 9034 */ 9035 switch (error) { 9036 case 0: 9037 status = NFS4_OK; 9038 break; 9039 case EAGAIN: 9040 case EACCES: /* Old value */ 9041 /* Can only get here if op is OP_LOCK */ 9042 ASSERT(resop->resop == OP_LOCK); 9043 lres = &resop->nfs_resop4_u.oplock; 9044 status = NFS4ERR_DENIED; 9045 if (lock_denied(&lres->LOCK4res_u.denied, &flock) 9046 == NFS4ERR_EXPIRED) 9047 goto retry; 9048 break; 9049 case ENOLCK: 9050 status = NFS4ERR_DELAY; 9051 break; 9052 case EOVERFLOW: 9053 status = NFS4ERR_INVAL; 9054 break; 9055 case EINVAL: 9056 status = NFS4ERR_NOTSUPP; 9057 break; 9058 default: 9059 status = NFS4ERR_SERVERFAULT; 9060 break; 9061 } 9062 9063 return (status); 9064 } 9065 9066 /*ARGSUSED*/ 9067 void 9068 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop, 9069 struct svc_req *req, struct compound_state *cs) 9070 { 9071 LOCK4args *args = &argop->nfs_argop4_u.oplock; 9072 LOCK4res *resp = &resop->nfs_resop4_u.oplock; 9073 nfsstat4 status; 9074 stateid4 *stateid; 9075 rfs4_lockowner_t *lo; 9076 rfs4_client_t *cp; 9077 rfs4_state_t *sp = NULL; 9078 rfs4_lo_state_t *lsp = NULL; 9079 bool_t ls_sw_held = FALSE; 9080 bool_t create = TRUE; 9081 bool_t lcreate = TRUE; 9082 bool_t dup_lock = FALSE; 9083 int rc; 9084 9085 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs, 9086 LOCK4args *, args); 9087 9088 if (cs->vp == NULL) { 9089 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 9090 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9091 cs, LOCK4res *, resp); 9092 return; 9093 } 9094 9095 if (args->locker.new_lock_owner) { 9096 /* Create a new lockowner for this instance */ 9097 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner; 9098 9099 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner")); 9100 9101 stateid = &olo->open_stateid; 9102 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID); 9103 if (status != NFS4_OK) { 9104 NFS4_DEBUG(rfs4_debug, 9105 (CE_NOTE, "Get state failed in lock %d", status)); 9106 *cs->statusp = resp->status = status; 9107 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9108 cs, LOCK4res *, resp); 9109 return; 9110 } 9111 9112 /* Ensure specified filehandle matches */ 9113 if (cs->vp != sp->rs_finfo->rf_vp) { 9114 rfs4_state_rele(sp); 9115 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9116 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9117 cs, LOCK4res *, resp); 9118 return; 9119 } 9120 9121 /* hold off other access to open_owner while we tinker */ 9122 rfs4_sw_enter(&sp->rs_owner->ro_sw); 9123 9124 switch (rc = rfs4_check_stateid_seqid(sp, stateid, cs)) { 9125 case NFS4_CHECK_STATEID_OLD: 9126 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9127 goto end; 9128 case NFS4_CHECK_STATEID_BAD: 9129 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9130 goto end; 9131 case NFS4_CHECK_STATEID_EXPIRED: 9132 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 9133 goto end; 9134 case NFS4_CHECK_STATEID_UNCONFIRMED: 9135 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9136 goto end; 9137 case NFS4_CHECK_STATEID_CLOSED: 9138 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9139 goto end; 9140 case NFS4_CHECK_STATEID_OKAY: 9141 if (rfs4_has_session(cs)) 9142 break; 9143 /* FALLTHROUGH */ 9144 case NFS4_CHECK_STATEID_REPLAY: 9145 ASSERT(!rfs4_has_session(cs)); 9146 9147 switch (rfs4_check_olo_seqid(olo->open_seqid, 9148 sp->rs_owner, resop)) { 9149 case NFS4_CHKSEQ_OKAY: 9150 if (rc == NFS4_CHECK_STATEID_OKAY) 9151 break; 9152 /* 9153 * This is replayed stateid; if seqid 9154 * matches next expected, then client 9155 * is using wrong seqid. 9156 */ 9157 /* FALLTHROUGH */ 9158 case NFS4_CHKSEQ_BAD: 9159 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9160 goto end; 9161 case NFS4_CHKSEQ_REPLAY: 9162 /* This is a duplicate LOCK request */ 9163 dup_lock = TRUE; 9164 9165 /* 9166 * For a duplicate we do not want to 9167 * create a new lockowner as it should 9168 * already exist. 9169 * Turn off the lockowner create flag. 9170 */ 9171 lcreate = FALSE; 9172 } 9173 break; 9174 } 9175 9176 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate); 9177 if (lo == NULL) { 9178 NFS4_DEBUG(rfs4_debug, 9179 (CE_NOTE, "rfs4_op_lock: no lock owner")); 9180 *cs->statusp = resp->status = NFS4ERR_RESOURCE; 9181 goto end; 9182 } 9183 9184 lsp = rfs4_findlo_state_by_owner(lo, sp, &create); 9185 if (lsp == NULL) { 9186 rfs4_update_lease(sp->rs_owner->ro_client); 9187 /* 9188 * Only update theh open_seqid if this is not 9189 * a duplicate request 9190 */ 9191 if (dup_lock == FALSE) { 9192 rfs4_update_open_sequence(sp->rs_owner); 9193 } 9194 9195 NFS4_DEBUG(rfs4_debug, 9196 (CE_NOTE, "rfs4_op_lock: no state")); 9197 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 9198 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 9199 rfs4_lockowner_rele(lo); 9200 goto end; 9201 } 9202 9203 /* 9204 * This is the new_lock_owner branch and the client is 9205 * supposed to be associating a new lock_owner with 9206 * the open file at this point. If we find that a 9207 * lock_owner/state association already exists and a 9208 * successful LOCK request was returned to the client, 9209 * an error is returned to the client since this is 9210 * not appropriate. The client should be using the 9211 * existing lock_owner branch. 9212 */ 9213 if (!rfs4_has_session(cs) && !dup_lock && !create) { 9214 if (lsp->rls_lock_completed == TRUE) { 9215 *cs->statusp = 9216 resp->status = NFS4ERR_BAD_SEQID; 9217 rfs4_lockowner_rele(lo); 9218 goto end; 9219 } 9220 } 9221 9222 rfs4_update_lease(sp->rs_owner->ro_client); 9223 9224 /* 9225 * Only update theh open_seqid if this is not 9226 * a duplicate request 9227 */ 9228 if (dup_lock == FALSE) { 9229 rfs4_update_open_sequence(sp->rs_owner); 9230 } 9231 9232 /* 9233 * If this is a duplicate lock request, just copy the 9234 * previously saved reply and return. 9235 */ 9236 if (dup_lock == TRUE) { 9237 /* verify that lock_seqid's match */ 9238 if (lsp->rls_seqid != olo->lock_seqid) { 9239 NFS4_DEBUG(rfs4_debug, 9240 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad" 9241 "lsp->seqid=%d old->seqid=%d", 9242 lsp->rls_seqid, olo->lock_seqid)); 9243 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9244 } else { 9245 rfs4_copy_reply(resop, &lsp->rls_reply); 9246 /* 9247 * Make sure to copy the just 9248 * retrieved reply status into the 9249 * overall compound status 9250 */ 9251 *cs->statusp = resp->status; 9252 } 9253 rfs4_lockowner_rele(lo); 9254 goto end; 9255 } 9256 9257 rfs4_dbe_lock(lsp->rls_dbe); 9258 9259 /* Make sure to update the lock sequence id */ 9260 lsp->rls_seqid = olo->lock_seqid; 9261 9262 NFS4_DEBUG(rfs4_debug, 9263 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid)); 9264 9265 /* 9266 * This is used to signify the newly created lockowner 9267 * stateid and its sequence number. The checks for 9268 * sequence number and increment don't occur on the 9269 * very first lock request for a lockowner. 9270 */ 9271 lsp->rls_skip_seqid_check = TRUE; 9272 9273 /* hold off other access to lsp while we tinker */ 9274 rfs4_sw_enter(&lsp->rls_sw); 9275 ls_sw_held = TRUE; 9276 9277 rfs4_dbe_unlock(lsp->rls_dbe); 9278 9279 rfs4_lockowner_rele(lo); 9280 } else { 9281 stateid = &args->locker.locker4_u.lock_owner.lock_stateid; 9282 /* get lsp and hold the lock on the underlying file struct */ 9283 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) 9284 != NFS4_OK) { 9285 *cs->statusp = resp->status = status; 9286 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9287 cs, LOCK4res *, resp); 9288 return; 9289 } 9290 create = FALSE; /* We didn't create lsp */ 9291 9292 /* Ensure specified filehandle matches */ 9293 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) { 9294 rfs4_lo_state_rele(lsp, TRUE); 9295 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9296 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, 9297 cs, LOCK4res *, resp); 9298 return; 9299 } 9300 9301 /* hold off other access to lsp while we tinker */ 9302 rfs4_sw_enter(&lsp->rls_sw); 9303 ls_sw_held = TRUE; 9304 9305 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) { 9306 /* 9307 * The stateid looks like it was okay (expected to be 9308 * the next one) 9309 */ 9310 case NFS4_CHECK_STATEID_OKAY: 9311 if (rfs4_has_session(cs)) 9312 break; 9313 9314 /* 9315 * The sequence id is now checked. Determine 9316 * if this is a replay or if it is in the 9317 * expected (next) sequence. In the case of a 9318 * replay, there are two replay conditions 9319 * that may occur. The first is the normal 9320 * condition where a LOCK is done with a 9321 * NFS4_OK response and the stateid is 9322 * updated. That case is handled below when 9323 * the stateid is identified as a REPLAY. The 9324 * second is the case where an error is 9325 * returned, like NFS4ERR_DENIED, and the 9326 * sequence number is updated but the stateid 9327 * is not updated. This second case is dealt 9328 * with here. So it may seem odd that the 9329 * stateid is okay but the sequence id is a 9330 * replay but it is okay. 9331 */ 9332 switch (rfs4_check_lock_seqid( 9333 args->locker.locker4_u.lock_owner.lock_seqid, 9334 lsp, resop)) { 9335 case NFS4_CHKSEQ_REPLAY: 9336 if (resp->status != NFS4_OK) { 9337 /* 9338 * Here is our replay and need 9339 * to verify that the last 9340 * response was an error. 9341 */ 9342 *cs->statusp = resp->status; 9343 goto end; 9344 } 9345 /* 9346 * This is done since the sequence id 9347 * looked like a replay but it didn't 9348 * pass our check so a BAD_SEQID is 9349 * returned as a result. 9350 */ 9351 /*FALLTHROUGH*/ 9352 case NFS4_CHKSEQ_BAD: 9353 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9354 goto end; 9355 case NFS4_CHKSEQ_OKAY: 9356 /* Everything looks okay move ahead */ 9357 break; 9358 } 9359 break; 9360 case NFS4_CHECK_STATEID_OLD: 9361 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9362 goto end; 9363 case NFS4_CHECK_STATEID_BAD: 9364 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9365 goto end; 9366 case NFS4_CHECK_STATEID_EXPIRED: 9367 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 9368 goto end; 9369 case NFS4_CHECK_STATEID_CLOSED: 9370 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9371 goto end; 9372 case NFS4_CHECK_STATEID_REPLAY: 9373 ASSERT(!rfs4_has_session(cs)); 9374 9375 switch (rfs4_check_lock_seqid( 9376 args->locker.locker4_u.lock_owner.lock_seqid, 9377 lsp, resop)) { 9378 case NFS4_CHKSEQ_OKAY: 9379 /* 9380 * This is a replayed stateid; if 9381 * seqid matches the next expected, 9382 * then client is using wrong seqid. 9383 */ 9384 case NFS4_CHKSEQ_BAD: 9385 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9386 goto end; 9387 case NFS4_CHKSEQ_REPLAY: 9388 rfs4_update_lease(lsp->rls_locker->rl_client); 9389 *cs->statusp = status = resp->status; 9390 goto end; 9391 } 9392 break; 9393 default: 9394 ASSERT(FALSE); 9395 break; 9396 } 9397 9398 rfs4_update_lock_sequence(lsp); 9399 rfs4_update_lease(lsp->rls_locker->rl_client); 9400 } 9401 9402 /* 9403 * NFS4 only allows locking on regular files, so 9404 * verify type of object. 9405 */ 9406 if (cs->vp->v_type != VREG) { 9407 if (cs->vp->v_type == VDIR) 9408 status = NFS4ERR_ISDIR; 9409 else 9410 status = NFS4ERR_INVAL; 9411 goto out; 9412 } 9413 9414 cp = lsp->rls_state->rs_owner->ro_client; 9415 9416 if (rfs4_clnt_in_grace(cp) && !args->reclaim) { 9417 status = NFS4ERR_GRACE; 9418 goto out; 9419 } 9420 9421 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) { 9422 status = NFS4ERR_NO_GRACE; 9423 goto out; 9424 } 9425 9426 if (!rfs4_clnt_in_grace(cp) && args->reclaim) { 9427 status = NFS4ERR_NO_GRACE; 9428 goto out; 9429 } 9430 9431 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) 9432 cs->deleg = TRUE; 9433 9434 status = rfs4_do_lock(lsp, args->locktype, 9435 args->offset, args->length, cs->cr, resop); 9436 9437 out: 9438 lsp->rls_skip_seqid_check = FALSE; 9439 9440 *cs->statusp = resp->status = status; 9441 9442 if (status == NFS4_OK) { 9443 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid; 9444 lsp->rls_lock_completed = TRUE; 9445 } 9446 /* 9447 * Only update the "OPEN" response here if this was a new 9448 * lock_owner 9449 */ 9450 if (sp) 9451 rfs4_update_open_resp(sp->rs_owner, resop, NULL); 9452 9453 rfs4_update_lock_resp(lsp, resop); 9454 9455 end: 9456 if (lsp) { 9457 if (ls_sw_held) 9458 rfs4_sw_exit(&lsp->rls_sw); 9459 /* 9460 * If an sp obtained, then the lsp does not represent 9461 * a lock on the file struct. 9462 */ 9463 if (sp != NULL) 9464 rfs4_lo_state_rele(lsp, FALSE); 9465 else 9466 rfs4_lo_state_rele(lsp, TRUE); 9467 } 9468 if (sp) { 9469 rfs4_sw_exit(&sp->rs_owner->ro_sw); 9470 rfs4_state_rele(sp); 9471 } 9472 9473 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs, 9474 LOCK4res *, resp); 9475 } 9476 9477 /* free function for LOCK/LOCKT */ 9478 static void 9479 lock_denied_free(nfs_resop4 *resop) 9480 { 9481 LOCK4denied *dp = NULL; 9482 9483 switch (resop->resop) { 9484 case OP_LOCK: 9485 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED) 9486 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied; 9487 break; 9488 case OP_LOCKT: 9489 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED) 9490 dp = &resop->nfs_resop4_u.oplockt.denied; 9491 break; 9492 default: 9493 break; 9494 } 9495 9496 if (dp) 9497 kmem_free(dp->owner.owner_val, dp->owner.owner_len); 9498 } 9499 9500 /*ARGSUSED*/ 9501 void 9502 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop, 9503 struct svc_req *req, struct compound_state *cs) 9504 { 9505 LOCKU4args *args = &argop->nfs_argop4_u.oplocku; 9506 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku; 9507 nfsstat4 status; 9508 stateid4 *stateid = &args->lock_stateid; 9509 rfs4_lo_state_t *lsp; 9510 9511 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs, 9512 LOCKU4args *, args); 9513 9514 if (cs->vp == NULL) { 9515 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 9516 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9517 LOCKU4res *, resp); 9518 return; 9519 } 9520 9521 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) { 9522 *cs->statusp = resp->status = status; 9523 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9524 LOCKU4res *, resp); 9525 return; 9526 } 9527 9528 /* Ensure specified filehandle matches */ 9529 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) { 9530 rfs4_lo_state_rele(lsp, TRUE); 9531 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9532 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9533 LOCKU4res *, resp); 9534 return; 9535 } 9536 9537 /* hold off other access to lsp while we tinker */ 9538 rfs4_sw_enter(&lsp->rls_sw); 9539 9540 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) { 9541 case NFS4_CHECK_STATEID_OKAY: 9542 if (rfs4_has_session(cs)) 9543 break; 9544 9545 if (rfs4_check_lock_seqid(args->seqid, lsp, resop) 9546 != NFS4_CHKSEQ_OKAY) { 9547 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9548 goto end; 9549 } 9550 break; 9551 case NFS4_CHECK_STATEID_OLD: 9552 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9553 goto end; 9554 case NFS4_CHECK_STATEID_BAD: 9555 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 9556 goto end; 9557 case NFS4_CHECK_STATEID_EXPIRED: 9558 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 9559 goto end; 9560 case NFS4_CHECK_STATEID_CLOSED: 9561 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 9562 goto end; 9563 case NFS4_CHECK_STATEID_REPLAY: 9564 ASSERT(!rfs4_has_session(cs)); 9565 9566 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) { 9567 case NFS4_CHKSEQ_OKAY: 9568 /* 9569 * This is a replayed stateid; if 9570 * seqid matches the next expected, 9571 * then client is using wrong seqid. 9572 */ 9573 case NFS4_CHKSEQ_BAD: 9574 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID; 9575 goto end; 9576 case NFS4_CHKSEQ_REPLAY: 9577 rfs4_update_lease(lsp->rls_locker->rl_client); 9578 *cs->statusp = status = resp->status; 9579 goto end; 9580 } 9581 break; 9582 default: 9583 ASSERT(FALSE); 9584 break; 9585 } 9586 9587 rfs4_update_lock_sequence(lsp); 9588 rfs4_update_lease(lsp->rls_locker->rl_client); 9589 9590 /* 9591 * NFS4 only allows locking on regular files, so 9592 * verify type of object. 9593 */ 9594 if (cs->vp->v_type != VREG) { 9595 if (cs->vp->v_type == VDIR) 9596 status = NFS4ERR_ISDIR; 9597 else 9598 status = NFS4ERR_INVAL; 9599 goto out; 9600 } 9601 9602 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) { 9603 status = NFS4ERR_GRACE; 9604 goto out; 9605 } 9606 9607 status = rfs4_do_lock(lsp, args->locktype, 9608 args->offset, args->length, cs->cr, resop); 9609 9610 out: 9611 *cs->statusp = resp->status = status; 9612 9613 if (status == NFS4_OK) 9614 resp->lock_stateid = lsp->rls_lockid.stateid; 9615 9616 rfs4_update_lock_resp(lsp, resop); 9617 9618 end: 9619 rfs4_sw_exit(&lsp->rls_sw); 9620 rfs4_lo_state_rele(lsp, TRUE); 9621 9622 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 9623 LOCKU4res *, resp); 9624 } 9625 9626 /* 9627 * LOCKT is a best effort routine, the client can not be guaranteed that 9628 * the status return is still in effect by the time the reply is received. 9629 * They are numerous race conditions in this routine, but we are not required 9630 * and can not be accurate. 9631 */ 9632 /*ARGSUSED*/ 9633 void 9634 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop, 9635 struct svc_req *req, struct compound_state *cs) 9636 { 9637 LOCKT4args *args = &argop->nfs_argop4_u.oplockt; 9638 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt; 9639 rfs4_lockowner_t *lo; 9640 rfs4_client_t *cp; 9641 bool_t create = FALSE; 9642 struct flock64 flk; 9643 int error; 9644 int flag = FREAD | FWRITE; 9645 int ltype; 9646 length4 posix_length; 9647 sysid_t sysid; 9648 pid_t pid; 9649 9650 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs, 9651 LOCKT4args *, args); 9652 9653 if (cs->vp == NULL) { 9654 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 9655 goto out; 9656 } 9657 9658 /* 9659 * NFS4 only allows locking on regular files, so 9660 * verify type of object. 9661 */ 9662 if (cs->vp->v_type != VREG) { 9663 if (cs->vp->v_type == VDIR) 9664 *cs->statusp = resp->status = NFS4ERR_ISDIR; 9665 else 9666 *cs->statusp = resp->status = NFS4ERR_INVAL; 9667 goto out; 9668 } 9669 9670 /* 9671 * Check out the clientid to ensure the server knows about it 9672 * so that we correctly inform the client of a server reboot. 9673 */ 9674 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE)) 9675 == NULL) { 9676 *cs->statusp = resp->status = 9677 rfs4_check_clientid(&args->owner.clientid, 0); 9678 goto out; 9679 } 9680 if (rfs4_lease_expired(cp)) { 9681 rfs4_client_close(cp); 9682 /* 9683 * Protocol doesn't allow returning NFS4ERR_STALE as 9684 * other operations do on this check so STALE_CLIENTID 9685 * is returned instead 9686 */ 9687 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID; 9688 goto out; 9689 } 9690 9691 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) { 9692 *cs->statusp = resp->status = NFS4ERR_GRACE; 9693 rfs4_client_rele(cp); 9694 goto out; 9695 } 9696 rfs4_client_rele(cp); 9697 9698 resp->status = NFS4_OK; 9699 9700 switch (args->locktype) { 9701 case READ_LT: 9702 case READW_LT: 9703 ltype = F_RDLCK; 9704 break; 9705 case WRITE_LT: 9706 case WRITEW_LT: 9707 ltype = F_WRLCK; 9708 break; 9709 } 9710 9711 posix_length = args->length; 9712 /* Check for zero length. To lock to end of file use all ones for V4 */ 9713 if (posix_length == 0) { 9714 *cs->statusp = resp->status = NFS4ERR_INVAL; 9715 goto out; 9716 } else if (posix_length == (length4)(~0)) { 9717 posix_length = 0; /* Posix to end of file */ 9718 } 9719 9720 /* Find or create a lockowner */ 9721 lo = rfs4_findlockowner(&args->owner, &create); 9722 9723 if (lo) { 9724 pid = lo->rl_pid; 9725 if ((resp->status = 9726 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK) 9727 goto err; 9728 } else { 9729 pid = 0; 9730 sysid = lockt_sysid; 9731 } 9732 retry: 9733 flk.l_type = ltype; 9734 flk.l_whence = 0; /* SEEK_SET */ 9735 flk.l_start = args->offset; 9736 flk.l_len = posix_length; 9737 flk.l_sysid = sysid; 9738 flk.l_pid = pid; 9739 flag |= F_REMOTELOCK; 9740 9741 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk); 9742 9743 /* Note that length4 is uint64_t but l_len and l_start are off64_t */ 9744 if (flk.l_len < 0 || flk.l_start < 0) { 9745 resp->status = NFS4ERR_INVAL; 9746 goto err; 9747 } 9748 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0, 9749 NULL, cs->cr, NULL); 9750 9751 /* 9752 * N.B. We map error values to nfsv4 errors. This is differrent 9753 * than puterrno4 routine. 9754 */ 9755 switch (error) { 9756 case 0: 9757 if (flk.l_type == F_UNLCK) 9758 resp->status = NFS4_OK; 9759 else { 9760 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED) 9761 goto retry; 9762 resp->status = NFS4ERR_DENIED; 9763 } 9764 break; 9765 case EOVERFLOW: 9766 resp->status = NFS4ERR_INVAL; 9767 break; 9768 case EINVAL: 9769 resp->status = NFS4ERR_NOTSUPP; 9770 break; 9771 default: 9772 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)", 9773 error); 9774 resp->status = NFS4ERR_SERVERFAULT; 9775 break; 9776 } 9777 9778 err: 9779 if (lo) 9780 rfs4_lockowner_rele(lo); 9781 *cs->statusp = resp->status; 9782 out: 9783 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs, 9784 LOCKT4res *, resp); 9785 } 9786 9787 int 9788 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny) 9789 { 9790 int err; 9791 int cmd; 9792 vnode_t *vp; 9793 struct shrlock shr; 9794 struct shr_locowner shr_loco; 9795 int fflags = 0; 9796 9797 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 9798 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID); 9799 9800 if (sp->rs_closed) 9801 return (NFS4ERR_OLD_STATEID); 9802 9803 vp = sp->rs_finfo->rf_vp; 9804 ASSERT(vp); 9805 9806 shr.s_access = shr.s_deny = 0; 9807 9808 if (access & OPEN4_SHARE_ACCESS_READ) { 9809 fflags |= FREAD; 9810 shr.s_access |= F_RDACC; 9811 } 9812 if (access & OPEN4_SHARE_ACCESS_WRITE) { 9813 fflags |= FWRITE; 9814 shr.s_access |= F_WRACC; 9815 } 9816 ASSERT(shr.s_access); 9817 9818 if (deny & OPEN4_SHARE_DENY_READ) 9819 shr.s_deny |= F_RDDNY; 9820 if (deny & OPEN4_SHARE_DENY_WRITE) 9821 shr.s_deny |= F_WRDNY; 9822 9823 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe); 9824 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt; 9825 shr_loco.sl_pid = shr.s_pid; 9826 shr_loco.sl_id = shr.s_sysid; 9827 shr.s_owner = (caddr_t)&shr_loco; 9828 shr.s_own_len = sizeof (shr_loco); 9829 9830 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE; 9831 9832 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL); 9833 if (err != 0) { 9834 if (err == EAGAIN) 9835 err = NFS4ERR_SHARE_DENIED; 9836 else 9837 err = puterrno4(err); 9838 return (err); 9839 } 9840 9841 sp->rs_share_access |= access; 9842 sp->rs_share_deny |= deny; 9843 9844 return (0); 9845 } 9846 9847 int 9848 rfs4_unshare(rfs4_state_t *sp) 9849 { 9850 int err; 9851 struct shrlock shr; 9852 struct shr_locowner shr_loco; 9853 9854 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 9855 9856 if (sp->rs_closed || sp->rs_share_access == 0) 9857 return (0); 9858 9859 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID); 9860 ASSERT(sp->rs_finfo->rf_vp); 9861 9862 shr.s_access = shr.s_deny = 0; 9863 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe); 9864 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt; 9865 shr_loco.sl_pid = shr.s_pid; 9866 shr_loco.sl_id = shr.s_sysid; 9867 shr.s_owner = (caddr_t)&shr_loco; 9868 shr.s_own_len = sizeof (shr_loco); 9869 9870 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(), 9871 NULL); 9872 if (err != 0) { 9873 err = puterrno4(err); 9874 return (err); 9875 } 9876 9877 sp->rs_share_access = 0; 9878 sp->rs_share_deny = 0; 9879 9880 return (0); 9881 9882 } 9883 9884 static int 9885 rdma_setup_read_data4(READ4args *args, READ4res *rok) 9886 { 9887 struct clist *wcl; 9888 count4 count = rok->data_len; 9889 int wlist_len; 9890 9891 wcl = args->wlist; 9892 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 9893 return (FALSE); 9894 } 9895 wcl = args->wlist; 9896 rok->wlist_len = wlist_len; 9897 rok->wlist = wcl; 9898 return (TRUE); 9899 } 9900 9901 /* tunable to disable server referrals */ 9902 int rfs4_no_referrals = 0; 9903 9904 /* 9905 * Find an NFS record in reparse point data. 9906 * Returns 0 for success and <0 or an errno value on failure. 9907 */ 9908 int 9909 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap) 9910 { 9911 int err; 9912 char *stype, *val; 9913 nvlist_t *nvl; 9914 nvpair_t *curr; 9915 9916 if ((nvl = reparse_init()) == NULL) 9917 return (-1); 9918 9919 if ((err = reparse_vnode_parse(vp, nvl)) != 0) { 9920 reparse_free(nvl); 9921 return (err); 9922 } 9923 9924 curr = NULL; 9925 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) { 9926 if ((stype = nvpair_name(curr)) == NULL) { 9927 reparse_free(nvl); 9928 return (-2); 9929 } 9930 if (strncasecmp(stype, "NFS", 3) == 0) 9931 break; 9932 } 9933 9934 if ((curr == NULL) || 9935 (nvpair_value_string(curr, &val))) { 9936 reparse_free(nvl); 9937 return (-3); 9938 } 9939 *nvlp = nvl; 9940 *svcp = stype; 9941 *datap = val; 9942 return (0); 9943 } 9944 9945 int 9946 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr) 9947 { 9948 nvlist_t *nvl; 9949 char *s, *d; 9950 9951 if (rfs4_no_referrals != 0) 9952 return (B_FALSE); 9953 9954 if (vn_is_reparse(vp, cr, NULL) == B_FALSE) 9955 return (B_FALSE); 9956 9957 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0) 9958 return (B_FALSE); 9959 9960 reparse_free(nvl); 9961 9962 return (B_TRUE); 9963 } 9964 9965 /* 9966 * There is a user-level copy of this routine in ref_subr.c. 9967 * Changes should be kept in sync. 9968 */ 9969 static int 9970 nfs4_create_components(char *path, component4 *comp4) 9971 { 9972 int slen, plen, ncomp; 9973 char *ori_path, *nxtc, buf[MAXNAMELEN]; 9974 9975 if (path == NULL) 9976 return (0); 9977 9978 plen = strlen(path) + 1; /* include the terminator */ 9979 ori_path = path; 9980 ncomp = 0; 9981 9982 /* count number of components in the path */ 9983 for (nxtc = path; nxtc < ori_path + plen; nxtc++) { 9984 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') { 9985 if ((slen = nxtc - path) == 0) { 9986 path = nxtc + 1; 9987 continue; 9988 } 9989 9990 if (comp4 != NULL) { 9991 bcopy(path, buf, slen); 9992 buf[slen] = '\0'; 9993 (void) str_to_utf8(buf, &comp4[ncomp]); 9994 } 9995 9996 ncomp++; /* 1 valid component */ 9997 path = nxtc + 1; 9998 } 9999 if (*nxtc == '\0' || *nxtc == '\n') 10000 break; 10001 } 10002 10003 return (ncomp); 10004 } 10005 10006 /* 10007 * There is a user-level copy of this routine in ref_subr.c. 10008 * Changes should be kept in sync. 10009 */ 10010 static int 10011 make_pathname4(char *path, pathname4 *pathname) 10012 { 10013 int ncomp; 10014 component4 *comp4; 10015 10016 if (pathname == NULL) 10017 return (0); 10018 10019 if (path == NULL) { 10020 pathname->pathname4_val = NULL; 10021 pathname->pathname4_len = 0; 10022 return (0); 10023 } 10024 10025 /* count number of components to alloc buffer */ 10026 if ((ncomp = nfs4_create_components(path, NULL)) == 0) { 10027 pathname->pathname4_val = NULL; 10028 pathname->pathname4_len = 0; 10029 return (0); 10030 } 10031 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP); 10032 10033 /* copy components into allocated buffer */ 10034 ncomp = nfs4_create_components(path, comp4); 10035 10036 pathname->pathname4_val = comp4; 10037 pathname->pathname4_len = ncomp; 10038 10039 return (ncomp); 10040 } 10041 10042 #define xdr_fs_locations4 xdr_fattr4_fs_locations 10043 10044 fs_locations4 * 10045 fetch_referral(vnode_t *vp, cred_t *cr) 10046 { 10047 nvlist_t *nvl; 10048 char *stype, *sdata; 10049 fs_locations4 *result; 10050 char buf[1024]; 10051 size_t bufsize; 10052 XDR xdr; 10053 int err; 10054 10055 /* 10056 * Check attrs to ensure it's a reparse point 10057 */ 10058 if (vn_is_reparse(vp, cr, NULL) == B_FALSE) 10059 return (NULL); 10060 10061 /* 10062 * Look for an NFS record and get the type and data 10063 */ 10064 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0) 10065 return (NULL); 10066 10067 /* 10068 * With the type and data, upcall to get the referral 10069 */ 10070 bufsize = sizeof (buf); 10071 bzero(buf, sizeof (buf)); 10072 err = reparse_kderef((const char *)stype, (const char *)sdata, 10073 buf, &bufsize); 10074 reparse_free(nvl); 10075 10076 DTRACE_PROBE4(nfs4serv__func__referral__upcall, 10077 char *, stype, char *, sdata, char *, buf, int, err); 10078 if (err) { 10079 cmn_err(CE_NOTE, 10080 "reparsed daemon not running: unable to get referral (%d)", 10081 err); 10082 return (NULL); 10083 } 10084 10085 /* 10086 * We get an XDR'ed record back from the kderef call 10087 */ 10088 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE); 10089 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP); 10090 err = xdr_fs_locations4(&xdr, result); 10091 XDR_DESTROY(&xdr); 10092 if (err != TRUE) { 10093 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail, 10094 int, err); 10095 return (NULL); 10096 } 10097 10098 /* 10099 * Look at path to recover fs_root, ignoring the leading '/' 10100 */ 10101 (void) make_pathname4(vp->v_path, &result->fs_root); 10102 10103 return (result); 10104 } 10105 10106 char * 10107 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz) 10108 { 10109 fs_locations4 *fsl; 10110 fs_location4 *fs; 10111 char *server, *path, *symbuf; 10112 static char *prefix = "/net/"; 10113 int i, size, npaths; 10114 uint_t len; 10115 10116 /* Get the referral */ 10117 if ((fsl = fetch_referral(vp, cr)) == NULL) 10118 return (NULL); 10119 10120 /* Deal with only the first location and first server */ 10121 fs = &fsl->locations_val[0]; 10122 server = utf8_to_str(&fs->server_val[0], &len, NULL); 10123 if (server == NULL) { 10124 rfs4_free_fs_locations4(fsl); 10125 kmem_free(fsl, sizeof (fs_locations4)); 10126 return (NULL); 10127 } 10128 10129 /* Figure out size for "/net/" + host + /path/path/path + NULL */ 10130 size = strlen(prefix) + len; 10131 for (i = 0; i < fs->rootpath.pathname4_len; i++) 10132 size += fs->rootpath.pathname4_val[i].utf8string_len + 1; 10133 10134 /* Allocate the symlink buffer and fill it */ 10135 symbuf = kmem_zalloc(size, KM_SLEEP); 10136 (void) strcat(symbuf, prefix); 10137 (void) strcat(symbuf, server); 10138 kmem_free(server, len); 10139 10140 npaths = 0; 10141 for (i = 0; i < fs->rootpath.pathname4_len; i++) { 10142 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL); 10143 if (path == NULL) 10144 continue; 10145 (void) strcat(symbuf, "/"); 10146 (void) strcat(symbuf, path); 10147 npaths++; 10148 kmem_free(path, len); 10149 } 10150 10151 rfs4_free_fs_locations4(fsl); 10152 kmem_free(fsl, sizeof (fs_locations4)); 10153 10154 if (strsz != NULL) 10155 *strsz = size; 10156 return (symbuf); 10157 } 10158 10159 /* 10160 * Check to see if we have a downrev Solaris client, so that we 10161 * can send it a symlink instead of a referral. 10162 */ 10163 int 10164 client_is_downrev(struct svc_req *req) 10165 { 10166 struct sockaddr *ca; 10167 rfs4_clntip_t *ci; 10168 bool_t create = FALSE; 10169 int is_downrev; 10170 10171 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 10172 ASSERT(ca); 10173 ci = rfs4_find_clntip(ca, &create); 10174 if (ci == NULL) 10175 return (0); 10176 is_downrev = ci->ri_no_referrals; 10177 rfs4_dbe_rele(ci->ri_dbe); 10178 return (is_downrev); 10179 } 10180 10181 /* 10182 * Do the main work of handling HA-NFSv4 Resource Group failover on 10183 * Sun Cluster. 10184 * We need to detect whether any RG admin paths have been added or removed, 10185 * and adjust resources accordingly. 10186 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In 10187 * order to scale, the list and array of paths need to be held in more 10188 * suitable data structures. 10189 */ 10190 static void 10191 hanfsv4_failover(nfs4_srv_t *nsrv4) 10192 { 10193 int i, start_grace, numadded_paths = 0; 10194 char **added_paths = NULL; 10195 rfs4_dss_path_t *dss_path; 10196 10197 /* 10198 * Note: currently, dss_pathlist cannot be NULL, since 10199 * it will always include an entry for NFS4_DSS_VAR_DIR. If we 10200 * make the latter dynamically specified too, the following will 10201 * need to be adjusted. 10202 */ 10203 10204 /* 10205 * First, look for removed paths: RGs that have been failed-over 10206 * away from this node. 10207 * Walk the "currently-serving" dss_pathlist and, for each 10208 * path, check if it is on the "passed-in" rfs4_dss_newpaths array 10209 * from nfsd. If not, that RG path has been removed. 10210 * 10211 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed 10212 * any duplicates. 10213 */ 10214 dss_path = nsrv4->dss_pathlist; 10215 do { 10216 int found = 0; 10217 char *path = dss_path->path; 10218 10219 /* used only for non-HA so may not be removed */ 10220 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { 10221 dss_path = dss_path->next; 10222 continue; 10223 } 10224 10225 for (i = 0; i < rfs4_dss_numnewpaths; i++) { 10226 int cmpret; 10227 char *newpath = rfs4_dss_newpaths[i]; 10228 10229 /* 10230 * Since nfsd has sorted rfs4_dss_newpaths for us, 10231 * once the return from strcmp is negative we know 10232 * we've passed the point where "path" should be, 10233 * and can stop searching: "path" has been removed. 10234 */ 10235 cmpret = strcmp(path, newpath); 10236 if (cmpret < 0) 10237 break; 10238 if (cmpret == 0) { 10239 found = 1; 10240 break; 10241 } 10242 } 10243 10244 if (found == 0) { 10245 unsigned index = dss_path->index; 10246 rfs4_servinst_t *sip = dss_path->sip; 10247 rfs4_dss_path_t *path_next = dss_path->next; 10248 10249 /* 10250 * This path has been removed. 10251 * We must clear out the servinst reference to 10252 * it, since it's now owned by another 10253 * node: we should not attempt to touch it. 10254 */ 10255 ASSERT(dss_path == sip->dss_paths[index]); 10256 sip->dss_paths[index] = NULL; 10257 10258 /* remove from "currently-serving" list, and destroy */ 10259 remque(dss_path); 10260 /* allow for NUL */ 10261 kmem_free(dss_path->path, strlen(dss_path->path) + 1); 10262 kmem_free(dss_path, sizeof (rfs4_dss_path_t)); 10263 10264 dss_path = path_next; 10265 } else { 10266 /* path was found; not removed */ 10267 dss_path = dss_path->next; 10268 } 10269 } while (dss_path != nsrv4->dss_pathlist); 10270 10271 /* 10272 * Now, look for added paths: RGs that have been failed-over 10273 * to this node. 10274 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and, 10275 * for each path, check if it is on the "currently-serving" 10276 * dss_pathlist. If not, that RG path has been added. 10277 * 10278 * Note: we don't do duplicate detection here; nfsd does that for us. 10279 * 10280 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us 10281 * an upper bound for the size needed for added_paths[numadded_paths]. 10282 */ 10283 10284 /* probably more space than we need, but guaranteed to be enough */ 10285 if (rfs4_dss_numnewpaths > 0) { 10286 size_t sz = rfs4_dss_numnewpaths * sizeof (char *); 10287 added_paths = kmem_zalloc(sz, KM_SLEEP); 10288 } 10289 10290 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */ 10291 for (i = 0; i < rfs4_dss_numnewpaths; i++) { 10292 int found = 0; 10293 char *newpath = rfs4_dss_newpaths[i]; 10294 10295 dss_path = nsrv4->dss_pathlist; 10296 do { 10297 char *path = dss_path->path; 10298 10299 /* used only for non-HA */ 10300 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { 10301 dss_path = dss_path->next; 10302 continue; 10303 } 10304 10305 if (strncmp(path, newpath, strlen(path)) == 0) { 10306 found = 1; 10307 break; 10308 } 10309 10310 dss_path = dss_path->next; 10311 } while (dss_path != nsrv4->dss_pathlist); 10312 10313 if (found == 0) { 10314 added_paths[numadded_paths] = newpath; 10315 numadded_paths++; 10316 } 10317 } 10318 10319 /* did we find any added paths? */ 10320 if (numadded_paths > 0) { 10321 10322 /* create a new server instance, and start its grace period */ 10323 start_grace = 1; 10324 /* CSTYLED */ 10325 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths); 10326 10327 /* read in the stable storage state from these paths */ 10328 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths); 10329 10330 /* 10331 * Multiple failovers during a grace period will cause 10332 * clients of the same resource group to be partitioned 10333 * into different server instances, with different 10334 * grace periods. Since clients of the same resource 10335 * group must be subject to the same grace period, 10336 * we need to reset all currently active grace periods. 10337 */ 10338 rfs4_grace_reset_all(nsrv4); 10339 } 10340 10341 if (rfs4_dss_numnewpaths > 0) 10342 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *)); 10343 } 10344