1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2020 Joyent, Inc. 14 * Copyright 2022 Tintri by DDN, Inc. All rights reserved. 15 * Copyright 2023 Oxide Computer Company 16 */ 17 18 /* 19 * This file drives topo node enumeration of NVMe controllers. A single "nvme" 20 * node is enumerated for each NVMe controller. Child "disk" nodes are then 21 * enumerated for each active or attached NVMe namespace. 22 * 23 * nvme nodes are expected to be enumerated under either a "bay" node (for U.2 24 * devices) or a "slot" node (for M.2 devices) or a "pciexfn" node (for AIC 25 * devices). 26 * 27 * Enumeration of NVMe controllers on PCIe add-in cards is automatically driven 28 * by the pcibus topo module. 29 * 30 * In order to allow for associating a given NVMe controller with a physical 31 * location, enumeration of U.2 and M.2 devices should be driven by a 32 * platform-specific topo map which statically sets the following two 33 * properties on the parent "bay" or "slot" node: 34 * 35 * propgroup property description 36 * --------- -------- ------------ 37 * binding driver "nvme" 38 * binding parent-device devpath of parent PCIe device 39 * 40 * for example: 41 * 42 * <propgroup name="binding" version="1" name-stability="Private" 43 * data-stability="Private"> 44 * <propval name="driver" type="string" value="nvme"/> 45 * <propval name="parent-device" type="string" 46 * value="/pci@0,0/pci8086,6f09@3,1"/> 47 * </propgroup> 48 * <dependents grouping="children"> 49 * <range name="nvme" min="0" max="0"> 50 * <enum-method name="disk" version="1"/> 51 * </range> 52 * </dependents> 53 */ 54 #include <stdlib.h> 55 #include <sys/types.h> 56 #include <sys/stat.h> 57 #include <fcntl.h> 58 #include <unistd.h> 59 #include <string.h> 60 #include <strings.h> 61 #include <stdbool.h> 62 63 #include <sys/fm/protocol.h> 64 #include <fm/topo_hc.h> 65 #include <fm/topo_mod.h> 66 #include <topo_ufm.h> 67 68 #include <sys/dkio.h> 69 #include <sys/scsi/generic/inquiry.h> 70 71 #include <sys/nvme.h> 72 #include "disk.h" 73 #include "disk_drivers.h" 74 75 typedef struct nvme_enum_info { 76 topo_mod_t *nei_mod; 77 di_node_t nei_dinode; 78 nvme_identify_ctrl_t *nei_idctl; 79 nvme_version_t nei_vers; 80 tnode_t *nei_parent; 81 tnode_t *nei_nvme; 82 nvlist_t *nei_nvme_fmri; 83 const char *nei_nvme_path; 84 int nei_fd; 85 } nvme_enum_info_t; 86 87 typedef struct devlink_arg { 88 topo_mod_t *dla_mod; 89 char *dla_logical_disk; 90 uint_t dla_strsz; 91 } devlink_arg_t; 92 93 static int 94 devlink_cb(di_devlink_t dl, void *arg) 95 { 96 devlink_arg_t *dlarg = (devlink_arg_t *)arg; 97 topo_mod_t *mod = dlarg->dla_mod; 98 const char *devpath; 99 char *slice, *ctds; 100 101 if ((devpath = di_devlink_path(dl)) == NULL || 102 (dlarg->dla_logical_disk = topo_mod_strdup(mod, devpath)) == 103 NULL) { 104 return (DI_WALK_TERMINATE); 105 } 106 107 /* 108 * We need to keep track of the original string size before we 109 * truncate it with a NUL, so that we can free the right number of 110 * bytes when we're done, otherwise libumem will complain. 111 */ 112 dlarg->dla_strsz = strlen(dlarg->dla_logical_disk) + 1; 113 114 /* trim the slice off the public name */ 115 if (((ctds = strrchr(dlarg->dla_logical_disk, '/')) != NULL) && 116 ((slice = strchr(ctds, 's')) != NULL)) 117 *slice = '\0'; 118 119 return (DI_WALK_TERMINATE); 120 } 121 122 static char * 123 get_logical_disk(topo_mod_t *mod, const char *devpath, uint_t *bufsz) 124 { 125 di_devlink_handle_t devhdl; 126 devlink_arg_t dlarg = { 0 }; 127 char *minorpath = NULL; 128 129 if (asprintf(&minorpath, "%s:a", devpath) < 0) { 130 return (NULL); 131 } 132 133 if ((devhdl = di_devlink_init(NULL, 0)) == DI_NODE_NIL) { 134 topo_mod_dprintf(mod, "%s: di_devlink_init failed", __func__); 135 free(minorpath); 136 return (NULL); 137 } 138 139 dlarg.dla_mod = mod; 140 141 (void) di_devlink_walk(devhdl, "^dsk/", minorpath, DI_PRIMARY_LINK, 142 &dlarg, devlink_cb); 143 144 (void) di_devlink_fini(&devhdl); 145 free(minorpath); 146 147 *bufsz = dlarg.dla_strsz; 148 return (dlarg.dla_logical_disk); 149 } 150 151 static bool 152 disk_nvme_make_ns_serial(topo_mod_t *mod, const nvme_identify_nsid_t *id, 153 uint32_t nsid, char *buf, size_t buflen) 154 { 155 uint8_t zero_guid[16] = { 0 }; 156 int ret; 157 158 if (bcmp(zero_guid, id->id_nguid, sizeof (id->id_nguid)) != 0) { 159 ret = snprintf(buf, buflen, "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X" 160 "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X", 161 id->id_nguid[0], id->id_nguid[1], id->id_nguid[2], 162 id->id_nguid[3], id->id_nguid[4], id->id_nguid[5], 163 id->id_nguid[6], id->id_nguid[7], id->id_nguid[8], 164 id->id_nguid[9], id->id_nguid[10], id->id_nguid[11], 165 id->id_nguid[12], id->id_nguid[13], id->id_nguid[14], 166 id->id_nguid[15]); 167 } else if (bcmp(zero_guid, id->id_eui64, sizeof (id->id_eui64)) != 0) { 168 ret = snprintf(buf, buflen, 169 "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X", 170 id->id_eui64[0], id->id_eui64[1], id->id_eui64[2], 171 id->id_eui64[3], id->id_eui64[4], id->id_eui64[5], 172 id->id_eui64[6], id->id_eui64[7]); 173 } else { 174 ret = snprintf(buf, buflen, "%u", nsid); 175 } 176 177 if ((size_t)ret >= buflen) { 178 topo_mod_dprintf(mod, "overflowed serial number for nsid %u: " 179 "needed %zu bytes, got %d", nsid, buflen, ret); 180 return (false); 181 } 182 183 return (true); 184 } 185 186 /* 187 * Create the common I/O property group properties that are shared between 188 * controllers and namespaces. We assume the property group was already created. 189 */ 190 static bool 191 disk_nvme_common_io(topo_mod_t *mod, tnode_t *tn, di_node_t di) 192 { 193 int err; 194 int inst = di_instance(di); 195 const char *drv = di_driver_name(di); 196 char *path; 197 const char *ppaths[1]; 198 199 if (inst != -1 && topo_prop_set_uint32(tn, TOPO_PGROUP_IO, 200 TOPO_IO_INSTANCE, TOPO_PROP_IMMUTABLE, (uint32_t)inst, &err) != 0) { 201 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 202 "%s", TOPO_PGROUP_IO, TOPO_IO_INSTANCE, topo_node_name(tn), 203 topo_node_instance(tn), topo_strerror(err)); 204 return (false); 205 } 206 207 if (drv != NULL && topo_prop_set_string(tn, TOPO_PGROUP_IO, 208 TOPO_IO_DRIVER, TOPO_PROP_IMMUTABLE, drv, &err) != 0) { 209 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 210 "%s", TOPO_PGROUP_IO, TOPO_IO_DRIVER, topo_node_name(tn), 211 topo_node_instance(tn), topo_strerror(err)); 212 return (false); 213 } 214 215 if (drv != NULL) { 216 nvlist_t *fmri = topo_mod_modfmri(mod, FM_MOD_SCHEME_VERSION, 217 drv); 218 if (mod != NULL && topo_prop_set_fmri(tn, TOPO_PGROUP_IO, 219 TOPO_IO_MODULE, TOPO_PROP_IMMUTABLE, fmri, &err) != 0) { 220 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" 221 PRIu64 "]: %s", TOPO_PGROUP_IO, TOPO_IO_MODULE, 222 topo_node_name(tn), topo_node_instance(tn), 223 topo_strerror(err)); 224 nvlist_free(fmri); 225 return (false); 226 } 227 nvlist_free(fmri); 228 } 229 230 path = di_devfs_path(di); 231 ppaths[0] = path; 232 if (path != NULL && topo_prop_set_string(tn, TOPO_PGROUP_IO, 233 TOPO_IO_DEV_PATH, TOPO_PROP_IMMUTABLE, path, &err) != 0) { 234 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 235 "%s", TOPO_PGROUP_IO, TOPO_IO_DRIVER, topo_node_name(tn), 236 topo_node_instance(tn), topo_strerror(err)); 237 di_devfs_path_free(path); 238 return (false); 239 } 240 241 if (path != NULL && topo_prop_set_string_array(tn, TOPO_PGROUP_IO, 242 TOPO_IO_PHYS_PATH, TOPO_PROP_IMMUTABLE, ppaths, 1, &err) != 0) { 243 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" PRIu64 "]: " 244 "%s", TOPO_PGROUP_IO, TOPO_IO_PHYS_PATH, topo_node_name(tn), 245 topo_node_instance(tn), topo_strerror(err)); 246 di_devfs_path_free(path); 247 return (false); 248 } 249 di_devfs_path_free(path); 250 251 return (true); 252 } 253 254 /* 255 * Add the various storage and I/O property group items that are appropriate 256 * given that we have a devinfo node. The storage property group has already 257 * been created, but the I/O property group has not. 258 */ 259 static void 260 disk_nvme_make_ns_di_props(topo_mod_t *mod, tnode_t *tn, di_node_t di) 261 { 262 int err; 263 char *devid, *mfg, *model, *rev, *serial, *log, *path; 264 uint_t buflen; 265 266 if (di_prop_lookup_strings(DDI_DEV_T_ANY, di, DEVID_PROP_NAME, 267 &devid) != 1 || 268 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_VENDOR_ID, 269 &mfg) != 1 || 270 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_PRODUCT_ID, 271 &model) != 1 || 272 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_REVISION_ID, 273 &rev) != 1 || 274 di_prop_lookup_strings(DDI_DEV_T_ANY, di, INQUIRY_SERIAL_NO, 275 &serial) != 1) { 276 topo_mod_dprintf(mod, "failed to get devinfo props for %s[%" 277 PRIu64 "]", topo_node_name(tn), topo_node_instance(tn)); 278 return; 279 } 280 281 /* 282 * Set the basic storage manufacturer information. Yes, this is 283 * information really about the NVMe controller and not the namespace. 284 * That's how the storage property group basically works here. 285 */ 286 if (topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 287 TOPO_STORAGE_MANUFACTURER, TOPO_PROP_IMMUTABLE, mfg, &err) != 0 || 288 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 289 TOPO_STORAGE_SERIAL_NUM, TOPO_PROP_IMMUTABLE, serial, &err) != 0 || 290 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 291 TOPO_STORAGE_FIRMWARE_REV, TOPO_PROP_IMMUTABLE, rev, &err) != 0 || 292 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 293 TOPO_STORAGE_MODEL, TOPO_PROP_IMMUTABLE, model, &err) != 0) { 294 topo_mod_dprintf(mod, "failed to set storage properties on " 295 "%s[%" PRIu64 "]: %s", topo_node_name(tn), 296 topo_node_instance(tn), topo_strerror(err)); 297 return; 298 } 299 300 if (topo_pgroup_create(tn, &io_pgroup, &err) != 0) { 301 topo_mod_dprintf(mod, "failed to create I/O property " 302 "group on %s[%" PRIu64 "]: %s", topo_node_name(tn), 303 topo_node_instance(tn), topo_strerror(err)); 304 } 305 306 if (!disk_nvme_common_io(mod, tn, di)) { 307 return; 308 } 309 310 /* 311 * The last property that we'd like to attempt to create for a namespace 312 * is a mapping back to its corresponding logical disk entry in /dev. 313 * The logical disk will be everything past the trailing /, i.e. a 314 * cXtXdX value. 315 */ 316 path = di_devfs_path(di); 317 if (path == NULL) { 318 return; 319 } 320 log = get_logical_disk(mod, path, &buflen); 321 di_devfs_path_free(path); 322 if (log == NULL) { 323 return; 324 } 325 path = strrchr(log, '/'); 326 if (path != NULL && path[1] != '\0' && 327 topo_prop_set_string(tn, TOPO_PGROUP_STORAGE, 328 TOPO_STORAGE_LOGICAL_DISK_NAME, TOPO_PROP_IMMUTABLE, path + 1, 329 &err) != 0) { 330 topo_mod_dprintf(mod, "failed to set %s:%s on %s[%" 331 PRIu64 "]: %s", TOPO_PGROUP_STORAGE, 332 TOPO_STORAGE_LOGICAL_DISK_NAME, topo_node_name(tn), 333 topo_node_instance(tn), topo_strerror(err)); 334 } 335 topo_mod_free(mod, log, buflen); 336 } 337 338 static void 339 disk_nvme_make_ns(nvme_enum_info_t *nei, uint32_t nsid) 340 { 341 topo_mod_t *mod = nei->nei_mod; 342 nvlist_t *auth = NULL, *fmri = NULL; 343 const topo_instance_t inst = nsid - 1; 344 nvme_ns_info_t info; 345 nvme_ioctl_t ioc; 346 char serial[64], capstr[64]; 347 uint64_t cap, blksz; 348 tnode_t *tn; 349 uint8_t lba; 350 int err; 351 352 bzero(&ioc, sizeof (ioc)); 353 bzero(&info, sizeof (info)); 354 ioc.n_len = sizeof (nvme_ns_info_t); 355 ioc.n_buf = (uintptr_t)&info; 356 ioc.n_arg = nsid; 357 358 if (ioctl(nei->nei_fd, NVME_IOC_NS_INFO, &ioc) != 0) { 359 topo_mod_dprintf(mod, "failed to get namespace info for ns %u: " 360 "%s", nsid, strerror(errno)); 361 return; 362 } 363 364 if ((info.nni_state & NVME_NS_STATE_IGNORED) != 0) { 365 return; 366 } 367 368 if ((info.nni_state & 369 (NVME_NS_STATE_ACTIVE | NVME_NS_STATE_ATTACHED)) == 0) { 370 topo_mod_dprintf(mod, "skipping nsid %u because it is not " 371 "active or attached (state: 0x%x)", nsid, info.nni_state); 372 return; 373 } 374 375 auth = topo_mod_auth(mod, nei->nei_nvme); 376 if (auth == NULL) { 377 topo_mod_dprintf(mod, "failed to get auth for nsid %u from " 378 "parent %s[%" PRIu64 "]: %s", nsid, 379 topo_node_name(nei->nei_nvme), 380 topo_node_instance(nei->nei_nvme), topo_mod_errmsg(mod)); 381 goto done; 382 } 383 384 /* 385 * We want to construct the FMRI for the namespace. The namespace is a 386 * little awkward in terms of things like the model, revision, and 387 * serial. While blkdev sets up standard inquiry properties to map these 388 * to the parent device which makes sense in the context of trying to 389 * use this as a normal block device, it's not really appropriate here. 390 * The namespace is not the NVMe controller. We construct the namespace 391 * serial number from the preferential ordering of information that 392 * we're given of the NGUID, EUI64, and then fall back to the namespace 393 * number. 394 */ 395 if (!disk_nvme_make_ns_serial(mod, &info.nni_id, nsid, serial, 396 sizeof (serial))) { 397 goto done; 398 } 399 fmri = topo_mod_hcfmri(mod, nei->nei_nvme, FM_HC_SCHEME_VERSION, 400 DISK, inst, NULL, auth, NULL, NULL, serial); 401 if (fmri == NULL) { 402 topo_mod_dprintf(mod, "failed to make fmri for %s[%" PRIu64 403 "] on nsid %u: %s", DISK, inst, nsid, topo_mod_errmsg(mod)); 404 goto done; 405 } 406 407 tn = topo_node_bind(mod, nei->nei_nvme, DISK, inst, fmri); 408 if (tn == NULL) { 409 topo_mod_dprintf(mod, "failed to bind fmri for %s[%" PRIu64 410 "] on nsid %u: %s", DISK, inst, nsid, topo_mod_errmsg(mod)); 411 goto done; 412 } 413 414 /* 415 * Always inherit our parent's FRU. The namespace is just a part of the 416 * device in reality. 417 */ 418 if (topo_node_fru_set(tn, NULL, 0, &err) != 0) { 419 topo_mod_dprintf(mod, "failed to set FRU for %s[%" PRIu64 420 "] on nsid %u: %s", DISK, inst, nsid, topo_strerror(err)); 421 goto done; 422 423 } 424 425 /* 426 * Our namespace may or may not be attached. From the namespace we will 427 * always get the capacity and block information. The rest of it will 428 * end up being filled in if we find a devinfo node. 429 */ 430 if (topo_pgroup_create(tn, &storage_pgroup, &err) != 0) { 431 topo_mod_dprintf(mod, "failed to create storage property " 432 "group on %s[%" PRIu64 "]: %s", DISK, inst, 433 topo_strerror(err)); 434 } 435 436 lba = info.nni_id.id_flbas.lba_format; 437 blksz = 1ULL << info.nni_id.id_lbaf[lba].lbaf_lbads; 438 if (blksz != 0 && topo_prop_set_uint64(tn, TOPO_PGROUP_STORAGE, 439 TOPO_STORAGE_LOG_BLOCK_SIZE, TOPO_PROP_IMMUTABLE, blksz, &err) != 440 0) { 441 topo_mod_dprintf(mod, "failed to create property %s:%s on %s[%" 442 PRIu64 "]: %s", TOPO_PGROUP_STORAGE, 443 TOPO_STORAGE_LOG_BLOCK_SIZE, DISK, inst, 444 topo_strerror(err)); 445 goto done; 446 } 447 448 cap = blksz * info.nni_id.id_nsize; 449 if (snprintf(capstr, sizeof (capstr), "%" PRIu64, cap) >= 450 sizeof (capstr)) { 451 topo_mod_dprintf(mod, "overflowed capacity calculation on " 452 "nsid %u", nsid); 453 goto done; 454 } 455 456 /* 457 * Finally attempt to find a child node that has a matching name and go 458 * from there. Sorry, this does result in node creation being O(n^2), 459 * but at least n is usually small today. 460 */ 461 for (di_node_t di = di_child_node(nei->nei_dinode); di != DI_NODE_NIL; 462 di = di_sibling_node(di)) { 463 const char *addr = di_bus_addr(di); 464 if (addr != NULL && strcmp(addr, info.nni_addr) == 0) { 465 disk_nvme_make_ns_di_props(mod, tn, di); 466 } 467 } 468 469 done: 470 nvlist_free(auth); 471 nvlist_free(fmri); 472 } 473 474 /* 475 * Attempt to make a ufm node, but swallow the error so we can try to get as 476 * much of the disk information as possible. 477 */ 478 static void 479 disk_nvme_make_ufm(topo_mod_t *mod, nvme_enum_info_t *nei) 480 { 481 topo_ufm_devinfo_t tud; 482 char *path = di_devfs_path(nei->nei_dinode); 483 if (path == NULL) { 484 return; 485 } 486 487 tud.tud_method = TOPO_UFM_M_DEVINFO; 488 tud.tud_path = path; 489 if (topo_mod_load(mod, TOPO_MOD_UFM, TOPO_VERSION) == NULL) { 490 topo_mod_dprintf(mod, "disk enum could not load ufm module"); 491 di_devfs_path_free(path); 492 return; 493 } 494 495 (void) topo_mod_enumerate(mod, nei->nei_nvme, TOPO_MOD_UFM, UFM, 0, 0, 496 &tud); 497 di_devfs_path_free(path); 498 } 499 500 static const topo_pgroup_info_t nvme_pgroup = { 501 TOPO_PGROUP_NVME, 502 TOPO_STABILITY_PRIVATE, 503 TOPO_STABILITY_PRIVATE, 504 1 505 }; 506 507 static int 508 make_nvme_node(nvme_enum_info_t *nvme_info) 509 { 510 topo_mod_t *mod = nvme_info->nei_mod; 511 nvlist_t *auth = NULL, *fmri = NULL, *fru; 512 tnode_t *nvme; 513 char *rev = NULL, *model = NULL, *serial = NULL, *vers = NULL; 514 char *pname = topo_node_name(nvme_info->nei_parent); 515 char *label = NULL; 516 topo_instance_t pinst = topo_node_instance(nvme_info->nei_parent); 517 int err = 0, ret = -1; 518 519 /* 520 * Next we pass the strings through a function that sanitizes them of 521 * any characters that can't be used in an FMRI string. This also takes 522 * care of making them properly terminated. 523 */ 524 rev = topo_mod_clean_strn(mod, nvme_info->nei_idctl->id_fwrev, 525 NVME_FWVER_SZ); 526 model = topo_mod_clean_strn(mod, nvme_info->nei_idctl->id_model, 527 NVME_MODEL_SZ); 528 serial = topo_mod_clean_strn(mod, nvme_info->nei_idctl->id_serial, 529 NVME_SERIAL_SZ); 530 531 auth = topo_mod_auth(mod, nvme_info->nei_parent); 532 fmri = topo_mod_hcfmri(mod, nvme_info->nei_parent, FM_HC_SCHEME_VERSION, 533 NVME, 0, NULL, auth, model, rev, serial); 534 535 if (fmri == NULL) { 536 /* errno set */ 537 topo_mod_dprintf(mod, "%s: hcfmri failed for %s=%" PRIu64 538 "/%s=0", __func__, pname, pinst, NVME); 539 goto error; 540 } 541 542 /* 543 * If our parent is a pciexfn node, then we need to create a nvme range 544 * underneath it to hold the nvme hierarchy. For other cases, where 545 * enumeration is being driven by a topo map file, this range will have 546 * already been statically defined in the XML. 547 */ 548 if (strcmp(pname, PCIEX_FUNCTION) == 0) { 549 if (topo_node_range_create(mod, nvme_info->nei_parent, NVME, 0, 550 0) < 0) { 551 /* errno set */ 552 topo_mod_dprintf(mod, "%s: error creating %s range", 553 __func__, NVME); 554 goto error; 555 } 556 } 557 558 /* 559 * Create a new topo node to represent the NVMe controller and bind it 560 * to the parent node. 561 */ 562 if ((nvme = topo_node_bind(mod, nvme_info->nei_parent, NVME, 0, 563 fmri)) == NULL) { 564 /* errno set */ 565 topo_mod_dprintf(mod, "%s: bind failed for %s=%" PRIu64 566 "/%s=0", __func__, pname, pinst, NVME); 567 goto error; 568 } 569 nvme_info->nei_nvme = nvme; 570 nvme_info->nei_nvme_fmri = fmri; 571 572 /* 573 * If our parent node is a "pciexfn" node then this is a NVMe device on 574 * a PCIe AIC, so we inherit our parent's FRU. Otherwise, we set the 575 * FRU to ourself. 576 */ 577 if (strcmp(topo_node_name(nvme_info->nei_parent), PCIEX_FUNCTION) == 0) 578 fru = NULL; 579 else 580 fru = fmri; 581 582 if (topo_node_fru_set(nvme, fru, 0, &err) != 0) { 583 topo_mod_dprintf(mod, "%s: failed to set FRU: %s", __func__, 584 topo_strerror(err)); 585 (void) topo_mod_seterrno(mod, err); 586 goto error; 587 } 588 589 /* 590 * Clone the label from our parent node. We can't inherit the property 591 * because the label prop is mutable on bay nodes and only immutable 592 * properties can be inherited. 593 */ 594 if ((topo_node_label(nvme_info->nei_parent, &label, &err) != 0 && 595 err != ETOPO_PROP_NOENT) || 596 topo_node_label_set(nvme, label, &err) != 0) { 597 topo_mod_dprintf(mod, "%s: failed to set label: %s", 598 __func__, topo_strerror(err)); 599 (void) topo_mod_seterrno(mod, err); 600 goto error; 601 } 602 603 /* 604 * Ensure that we have a UFM property set based on our devinfo path. 605 * This is a little repetitive if our parent actually did so as well, 606 * but given that the majority of such nodes are under bays and slots 607 * right now, it's a worthwhile tradeoff. 608 */ 609 disk_nvme_make_ufm(mod, nvme_info); 610 611 if (topo_pgroup_create(nvme, &nvme_pgroup, &err) != 0) { 612 topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s", 613 __func__, TOPO_PGROUP_NVME, topo_strerror(err)); 614 (void) topo_mod_seterrno(mod, err); 615 goto error; 616 } 617 618 if (asprintf(&vers, "%u.%u", nvme_info->nei_vers.v_major, 619 nvme_info->nei_vers.v_minor) < 0) { 620 topo_mod_dprintf(mod, "%s: failed to alloc string", __func__); 621 (void) topo_mod_seterrno(mod, EMOD_NOMEM); 622 goto error; 623 } 624 if (topo_prop_set_string(nvme, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER, 625 TOPO_PROP_IMMUTABLE, vers, &err) != 0) { 626 topo_mod_dprintf(mod, "%s: failed to set %s/%s property", 627 __func__, TOPO_PGROUP_NVME, TOPO_PROP_NVME_VER); 628 (void) topo_mod_seterrno(mod, err); 629 goto error; 630 } 631 632 if (topo_pgroup_create(nvme, &io_pgroup, &err) != 0) { 633 topo_mod_dprintf(mod, "%s: failed to create %s pgroup: %s", 634 __func__, TOPO_PGROUP_IO, topo_strerror(err)); 635 (void) topo_mod_seterrno(mod, err); 636 goto error; 637 } 638 639 if (!disk_nvme_common_io(mod, nvme, nvme_info->nei_dinode)) { 640 goto error; 641 } 642 643 /* 644 * Create a child disk node for each namespace. 645 */ 646 if (topo_node_range_create(mod, nvme, DISK, 0, 647 (nvme_info->nei_idctl->id_nn - 1)) < 0) { 648 /* errno set */ 649 topo_mod_dprintf(mod, "%s: error creating %s range", __func__, 650 DISK); 651 goto error; 652 } 653 654 /* 655 * Iterate over each namespace to see if it's a candidate for inclusion. 656 * Namespaces start at index 1 and not every namespace will be included. 657 * We map things such that a disk instance is always namespace - 1 to 658 * fit into the above mapping. 659 */ 660 for (uint32_t i = 1; i <= nvme_info->nei_idctl->id_nn; i++) { 661 disk_nvme_make_ns(nvme_info, i); 662 } 663 ret = 0; 664 665 error: 666 free(vers); 667 nvlist_free(auth); 668 nvlist_free(fmri); 669 topo_mod_strfree(mod, rev); 670 topo_mod_strfree(mod, model); 671 topo_mod_strfree(mod, serial); 672 topo_mod_strfree(mod, label); 673 return (ret); 674 } 675 676 struct diwalk_arg { 677 topo_mod_t *diwk_mod; 678 tnode_t *diwk_parent; 679 }; 680 681 /* 682 * This function gathers identity information from the NVMe controller and 683 * stores it in a struct. This struct is passed to make_nvme_node(), which 684 * does the actual topo node creation. 685 */ 686 static int 687 discover_nvme_ctl(di_node_t node, di_minor_t minor, void *arg) 688 { 689 struct diwalk_arg *wkarg = arg; 690 topo_mod_t *mod = wkarg->diwk_mod; 691 char *path = NULL, *devctl = NULL; 692 nvme_ioctl_t nioc = { 0 }; 693 nvme_identify_ctrl_t *idctl = NULL; 694 nvme_enum_info_t nvme_info = { 0 }; 695 int fd = -1, ret = DI_WALK_TERMINATE; 696 697 if ((path = di_devfs_minor_path(minor)) == NULL) { 698 topo_mod_dprintf(mod, "failed to get minor path"); 699 (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 700 return (ret); 701 } 702 703 topo_mod_dprintf(mod, "%s=%" PRIu64 ": found nvme controller: %s", 704 topo_node_name(wkarg->diwk_parent), 705 topo_node_instance(wkarg->diwk_parent), path); 706 707 if (asprintf(&devctl, "/devices%s", path) < 0) { 708 topo_mod_dprintf(mod, "failed to alloc string"); 709 (void) topo_mod_seterrno(mod, EMOD_NOMEM); 710 goto error; 711 } 712 713 if ((fd = open(devctl, O_RDWR)) < 0) { 714 topo_mod_dprintf(mod, "failed to open %s", devctl); 715 (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 716 goto error; 717 } 718 if ((idctl = topo_mod_zalloc(mod, NVME_IDENTIFY_BUFSIZE)) == NULL) { 719 topo_mod_dprintf(mod, "zalloc failed"); 720 (void) topo_mod_seterrno(mod, EMOD_NOMEM); 721 goto error; 722 } 723 nioc.n_len = NVME_IDENTIFY_BUFSIZE; 724 nioc.n_buf = (uintptr_t)idctl; 725 nioc.n_arg = NVME_IDENTIFY_CTRL; 726 727 if (ioctl(fd, NVME_IOC_IDENTIFY, &nioc) != 0) { 728 topo_mod_dprintf(mod, "NVME_IOC_IDENTIFY ioctl " 729 "failed: %s", strerror(errno)); 730 (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 731 goto error; 732 } 733 734 nioc.n_len = sizeof (nvme_version_t); 735 nioc.n_buf = (uintptr_t)&nvme_info.nei_vers; 736 nioc.n_arg = 0; 737 738 if (ioctl(fd, NVME_IOC_VERSION, &nioc) != 0) { 739 topo_mod_dprintf(mod, "NVME_IOC_VERSION ioctl failed: %s", 740 strerror(errno)); 741 (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 742 goto error; 743 } 744 745 nvme_info.nei_mod = mod; 746 nvme_info.nei_nvme_path = path; 747 nvme_info.nei_dinode = node; 748 nvme_info.nei_idctl = idctl; 749 nvme_info.nei_parent = wkarg->diwk_parent; 750 nvme_info.nei_fd = fd; 751 752 if (make_nvme_node(&nvme_info) != 0) { 753 /* errno set */ 754 goto error; 755 } 756 757 ret = DI_WALK_CONTINUE; 758 759 error: 760 if (fd > 0) 761 (void) close(fd); 762 di_devfs_path_free(path); 763 free(devctl); 764 if (idctl != NULL) 765 topo_mod_free(mod, idctl, NVME_IDENTIFY_BUFSIZE); 766 return (ret); 767 } 768 769 int 770 disk_nvme_enum_disk(topo_mod_t *mod, tnode_t *pnode) 771 { 772 char *parent = NULL; 773 int err; 774 di_node_t devtree; 775 di_node_t dnode; 776 struct diwalk_arg wkarg = { 0 }; 777 int ret = -1; 778 779 /* 780 * Lookup a property containing the devfs path of the parent PCIe 781 * device of the NVMe device we're attempting to enumerate. This 782 * property is hard-coded in per-platform topo XML maps that are 783 * delivered with the OS. This hard-coded path allows topo to map a 784 * given NVMe controller to a physical location (bay or slot) on the 785 * platform, when generating the topo snapshot. 786 */ 787 if (topo_prop_get_string(pnode, TOPO_PGROUP_BINDING, 788 TOPO_BINDING_PARENT_DEV, &parent, &err) != 0) { 789 topo_mod_dprintf(mod, "parent node was missing nvme binding " 790 "properties\n"); 791 (void) topo_mod_seterrno(mod, err); 792 goto out; 793 } 794 if ((devtree = topo_mod_devinfo(mod)) == DI_NODE_NIL) { 795 topo_mod_dprintf(mod, "failed to get devinfo snapshot"); 796 (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 797 goto out; 798 } 799 800 /* 801 * Walk the devinfo tree looking NVMe devices. For each NVMe device, 802 * check if the devfs path of the parent matches the one specified in 803 * TOPO_BINDING_PARENT_DEV. 804 */ 805 wkarg.diwk_mod = mod; 806 wkarg.diwk_parent = pnode; 807 dnode = di_drv_first_node(NVME_DRV, devtree); 808 while (dnode != DI_NODE_NIL) { 809 char *path; 810 811 if ((path = di_devfs_path(di_parent_node(dnode))) == NULL) { 812 topo_mod_dprintf(mod, "failed to get dev path"); 813 (void) topo_mod_seterrno(mod, EMOD_UNKNOWN); 814 goto out; 815 } 816 if (strcmp(parent, path) == 0) { 817 if (di_walk_minor(dnode, DDI_NT_NVME_NEXUS, 0, 818 &wkarg, discover_nvme_ctl) < 0) { 819 di_devfs_path_free(path); 820 goto out; 821 } 822 } 823 di_devfs_path_free(path); 824 dnode = di_drv_next_node(dnode); 825 } 826 ret = 0; 827 828 out: 829 topo_mod_strfree(mod, parent); 830 return (ret); 831 } 832