xref: /illumos-gate/usr/src/cmd/pcieadm/pcieadm.c (revision 8222814ef8560ee0ba222eca8ca5acffc6cd0e44)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2021 Oxide Computer Company
14  */
15 
16 /*
17  * PCIe shenanigans
18  *
19  * Currently this implements several different views at seeing into PCIe devices
20  * and is designed to (hopefully) replace pcitool and be a vector for new system
21  * functionality such as dealing with multicast filtering, ACS, etc.
22  *
23  * While most subcommands have their own implementations, there are a couple of
24  * things that are worth bearing in mind:
25  *
26  *  1) Where possible, prefer the use of libofmt. In particular, having good,
27  *  parsable output is important. New subcommands should strive to meet that.
28  *
29  *  2) Because we're often processing binary data (and it's good hygiene),
30  *  subcommands should make sure to drop privileges as early as they can by
31  *  calling pcieadm_init_privs(). More on privileges below.
32  *
33  * Privilege Management
34  * --------------------
35  *
36  * In an attempt to minimize privilege exposure, but to allow subcommands
37  * flexibility when required (e.g. show-cfgspace needs full privs to read from
38  * the kernel), we have two privilege sets that we maintain. One which is the
39  * minimial privs, which basically is a set that has stripped everything. This
40  * is 'pia_priv_min'. The second is one that allows a subcommand to add in
41  * privileges that it requires which will be left in the permitted set. These
42  * are in 'pia_priv_eff'. It's important to know that this set is always
43  * intersected with what the user actually has, so this is not meant to be a way
44  * for a caller to get more privileges than they already have.
45  *
46  * A subcommand is expected to call pcieadm_init_privs() once they have
47  * processed enough arguments that they can set an upper bound on privileges.
48  * It's worth noting that a subcommand will be executed in an already minimial
49  * environment; however, we will have already set up a libdevinfo handle for
50  * them, which should make the need to do much more not so bad.
51  */
52 
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <stdarg.h>
56 #include <unistd.h>
57 #include <err.h>
58 #include <libdevinfo.h>
59 #include <strings.h>
60 #include <sys/stat.h>
61 #include <sys/pci_tools.h>
62 #include <sys/pci.h>
63 #include <sys/types.h>
64 #include <fcntl.h>
65 #include <sys/debug.h>
66 #include <upanic.h>
67 #include <libgen.h>
68 
69 #include "pcieadm.h"
70 
71 pcieadm_t pcieadm;
72 const char *pcieadm_progname;
73 
74 void
75 pcieadm_init_privs(pcieadm_t *pcip)
76 {
77 	static const char *msg = "attempted to re-initialize privileges";
78 	if (pcip->pia_priv_init == NULL) {
79 		upanic(msg, strlen(msg));
80 	}
81 
82 	priv_intersect(pcip->pia_priv_init, pcip->pia_priv_eff);
83 
84 	if (setppriv(PRIV_SET, PRIV_PERMITTED, pcieadm.pia_priv_eff) != 0) {
85 		err(EXIT_FAILURE, "failed to reduce privileges");
86 	}
87 
88 	if (setppriv(PRIV_SET, PRIV_LIMIT, pcieadm.pia_priv_eff) != 0) {
89 		err(EXIT_FAILURE, "failed to reduce privileges");
90 	}
91 
92 	priv_freeset(pcip->pia_priv_init);
93 	pcip->pia_priv_init = NULL;
94 }
95 
96 void
97 pcieadm_indent(void)
98 {
99 	pcieadm.pia_indent += 2;
100 }
101 
102 void
103 pcieadm_deindent(void)
104 {
105 	VERIFY3U(pcieadm.pia_indent, >, 0);
106 	pcieadm.pia_indent -= 2;
107 }
108 
109 void
110 pcieadm_print(const char *fmt, ...)
111 {
112 	va_list ap;
113 
114 	if (pcieadm.pia_indent > 0) {
115 		(void) printf("%*s", pcieadm.pia_indent, "");
116 	}
117 
118 	va_start(ap, fmt);
119 	(void) vprintf(fmt, ap);
120 	va_end(ap);
121 }
122 
123 void
124 pcieadm_ofmt_errx(const char *fmt, ...)
125 {
126 	va_list ap;
127 
128 	va_start(ap, fmt);
129 	verrx(EXIT_FAILURE, fmt, ap);
130 }
131 
132 boolean_t
133 pcieadm_di_node_is_pci(di_node_t node)
134 {
135 	const char *name;
136 
137 	name = di_node_name(node);
138 	return (strncmp("pci", name, 3) == 0);
139 }
140 
141 static int
142 pcieadm_di_walk_cb(di_node_t node, void *arg)
143 {
144 	pcieadm_di_walk_t *walk = arg;
145 
146 	if (!pcieadm_di_node_is_pci(node)) {
147 		return (DI_WALK_CONTINUE);
148 	}
149 
150 	/*
151 	 * We create synthetic nodes for the root of PCIe tree basically
152 	 * functions as all the resources available for one or more bridges.
153 	 * When we encounter that top-level node skip it.
154 	 */
155 	if (strcmp("pci", di_node_name(node)) == 0) {
156 		return (DI_WALK_CONTINUE);
157 	}
158 
159 	return (walk->pdw_func(node, walk->pdw_arg));
160 }
161 
162 void
163 pcieadm_di_walk(pcieadm_t *pcip, pcieadm_di_walk_t *arg)
164 {
165 	(void) di_walk_node(pcip->pia_root, DI_WALK_CLDFIRST, arg,
166 	    pcieadm_di_walk_cb);
167 }
168 
169 /*
170  * Attempt to find the nexus that corresponds to this device. To do this, we
171  * walk up and walk the minors until we find a "reg" minor.
172  */
173 void
174 pcieadm_find_nexus(pcieadm_t *pia)
175 {
176 	di_node_t cur;
177 
178 	for (cur = di_parent_node(pia->pia_devi); cur != DI_NODE_NIL;
179 	    cur = di_parent_node(cur)) {
180 		di_minor_t minor = DI_MINOR_NIL;
181 
182 		while ((minor = di_minor_next(cur, minor)) != DI_MINOR_NIL) {
183 			if (di_minor_spectype(minor) == S_IFCHR &&
184 			    strcmp(di_minor_name(minor), "reg") == 0) {
185 				pia->pia_nexus = cur;
186 				return;
187 			}
188 		}
189 	}
190 }
191 
192 static int
193 pcieadm_find_dip_cb(di_node_t node, void *arg)
194 {
195 	char *path = NULL, *driver;
196 	char dinst[128], bdf[128], altbdf[128];
197 	int inst, nprop, *regs;
198 	pcieadm_t *pia = arg;
199 
200 	path = di_devfs_path(node);
201 	if (path == NULL) {
202 		err(EXIT_FAILURE, "failed to construct devfs path for node: "
203 		    "%s (%s)", di_node_name(node));
204 	}
205 
206 	driver = di_driver_name(node);
207 	inst = di_instance(node);
208 	if (driver != NULL && inst != -1) {
209 		(void) snprintf(dinst, sizeof (dinst), "%s%d", driver, inst);
210 	}
211 
212 	nprop = di_prop_lookup_ints(DDI_DEV_T_ANY, node, "reg", &regs);
213 	if (nprop <= 0) {
214 		errx(EXIT_FAILURE, "failed to lookup regs array for %s",
215 		    path);
216 	}
217 	(void) snprintf(bdf, sizeof (bdf), "%x/%x/%x", PCI_REG_BUS_G(regs[0]),
218 	    PCI_REG_DEV_G(regs[0]), PCI_REG_FUNC_G(regs[0]));
219 	(void) snprintf(bdf, sizeof (bdf), "%02x/%02x/%02x",
220 	    PCI_REG_BUS_G(regs[0]), PCI_REG_DEV_G(regs[0]),
221 	    PCI_REG_FUNC_G(regs[0]));
222 
223 	if (strcmp(pia->pia_devstr, path) == 0 ||
224 	    strcmp(pia->pia_devstr, bdf) == 0 ||
225 	    strcmp(pia->pia_devstr, altbdf) == 0 ||
226 	    (driver != NULL && inst != -1 &&
227 	    strcmp(pia->pia_devstr, dinst) == 0)) {
228 		if (pia->pia_devi != DI_NODE_NIL) {
229 			errx(EXIT_FAILURE, "device name matched two device "
230 			    "nodes: %s and %s", di_node_name(pia->pia_devi),
231 			    di_node_name(node));
232 		}
233 
234 		pia->pia_devi = node;
235 	}
236 
237 	if (path != NULL) {
238 		di_devfs_path_free(path);
239 	}
240 
241 	return (DI_WALK_CONTINUE);
242 }
243 
244 void
245 pcieadm_find_dip(pcieadm_t *pcip, const char *device)
246 {
247 	pcieadm_di_walk_t walk;
248 
249 	/*
250 	 * If someone specifies /devices, just skip over it.
251 	 */
252 	pcip->pia_devstr = device;
253 	if (strncmp("/devices", device, strlen("/devices")) == 0) {
254 		pcip->pia_devstr += strlen("/devices");
255 	}
256 
257 	pcip->pia_devi = DI_NODE_NIL;
258 	walk.pdw_arg = pcip;
259 	walk.pdw_func = pcieadm_find_dip_cb;
260 	pcieadm_di_walk(pcip, &walk);
261 
262 	if (pcip->pia_devi == DI_NODE_NIL) {
263 		errx(EXIT_FAILURE, "failed to find device node %s", device);
264 	}
265 
266 	pcip->pia_nexus = DI_NODE_NIL;
267 	pcieadm_find_nexus(pcip);
268 	if (pcip->pia_nexus == DI_NODE_NIL) {
269 		errx(EXIT_FAILURE, "failed to find nexus for %s", device);
270 	}
271 }
272 
273 typedef struct pcieadm_cfgspace_file {
274 	int pcfi_fd;
275 } pcieadm_cfgspace_file_t;
276 
277 static boolean_t
278 pcieadm_read_cfgspace_file(uint32_t off, uint8_t len, void *buf, void *arg)
279 {
280 	uint32_t bufoff = 0;
281 	pcieadm_cfgspace_file_t *pcfi = arg;
282 
283 	while (len > 0) {
284 		ssize_t ret = pread(pcfi->pcfi_fd, buf + bufoff, len, off);
285 		if (ret < 0) {
286 			err(EXIT_FAILURE, "failed to read %u bytes at %"
287 			    PRIu32, len, off);
288 		} else if (ret == 0) {
289 			warnx("hit unexpected EOF reading cfgspace from file "
290 			    "at offest %" PRIu32 ", still wanted to read %u "
291 			    "bytes", off, len);
292 			return (B_FALSE);
293 		} else {
294 			len -= ret;
295 			off += ret;
296 			bufoff += ret;
297 		}
298 
299 	}
300 
301 	return (B_TRUE);
302 }
303 
304 void
305 pcieadm_init_cfgspace_file(pcieadm_t *pcip, const char *path,
306     pcieadm_cfgspace_f *funcp, void **arg)
307 {
308 	int fd;
309 	struct stat st;
310 	pcieadm_cfgspace_file_t *pcfi;
311 
312 	if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_eff) != 0) {
313 		err(EXIT_FAILURE, "failed to raise privileges");
314 	}
315 
316 	if ((fd = open(path, O_RDONLY)) < 0) {
317 		err(EXIT_FAILURE, "failed to open input file %s", path);
318 	}
319 
320 	if (fstat(fd, &st) != 0) {
321 		err(EXIT_FAILURE, "failed to get stat information for %s",
322 		    path);
323 	}
324 
325 	if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_min) != 0) {
326 		err(EXIT_FAILURE, "failed to reduce privileges");
327 	}
328 
329 	if (S_ISDIR(st.st_mode)) {
330 		errx(EXIT_FAILURE, "input file %s is a directory, unable "
331 		    "to read data", path);
332 	}
333 
334 	if (S_ISLNK(st.st_mode)) {
335 		errx(EXIT_FAILURE, "input file %s is a symbolic link, unable "
336 		    "to read data", path);
337 	}
338 
339 	if (S_ISDOOR(st.st_mode)) {
340 		errx(EXIT_FAILURE, "input file %s is a door, unable "
341 		    "to read data", path);
342 	}
343 
344 	if (S_ISPORT(st.st_mode)) {
345 		errx(EXIT_FAILURE, "input file %s is an event port, unable "
346 		    "to read data", path);
347 	}
348 
349 	/*
350 	 * Assume if we were given a FIFO, character/block device, socket, or
351 	 * something else that it's probably fine.
352 	 */
353 	pcfi = calloc(1, sizeof (*pcfi));
354 	if (pcfi == NULL) {
355 		err(EXIT_FAILURE, "failed to allocate memory for reading "
356 		    "cfgspace data from a file");
357 	}
358 
359 	pcfi->pcfi_fd = fd;
360 	*arg = pcfi;
361 	*funcp = pcieadm_read_cfgspace_file;
362 }
363 
364 void
365 pcieadm_fini_cfgspace_file(void *arg)
366 {
367 	pcieadm_cfgspace_file_t *pcfi = arg;
368 	VERIFY0(close(pcfi->pcfi_fd));
369 	free(pcfi);
370 }
371 
372 typedef struct pcieadm_cfgspace_kernel {
373 	pcieadm_t *pck_pci;
374 	int pck_fd;
375 	uint8_t pck_bus;
376 	uint8_t pck_dev;
377 	uint8_t pck_func;
378 } pcieadm_cfgspace_kernel_t;
379 
380 static boolean_t
381 pcieadm_read_cfgspace_kernel(uint32_t off, uint8_t len, void *buf, void *arg)
382 {
383 	pcieadm_cfgspace_kernel_t *pck = arg;
384 	pcieadm_t *pcip = pck->pck_pci;
385 	pcitool_reg_t pci_reg;
386 
387 	bzero(&pci_reg, sizeof (pci_reg));
388 	pci_reg.user_version = PCITOOL_VERSION;
389 	pci_reg.bus_no = pck->pck_bus;
390 	pci_reg.dev_no = pck->pck_dev;
391 	pci_reg.func_no = pck->pck_func;
392 	pci_reg.barnum = 0;
393 	pci_reg.offset = off;
394 	pci_reg.acc_attr = PCITOOL_ACC_ATTR_ENDN_LTL;
395 
396 	switch (len) {
397 	case 1:
398 		pci_reg.acc_attr += PCITOOL_ACC_ATTR_SIZE_1;
399 		break;
400 	case 2:
401 		pci_reg.acc_attr += PCITOOL_ACC_ATTR_SIZE_2;
402 		break;
403 	case 4:
404 		pci_reg.acc_attr += PCITOOL_ACC_ATTR_SIZE_4;
405 		break;
406 	case 8:
407 		pci_reg.acc_attr += PCITOOL_ACC_ATTR_SIZE_8;
408 		break;
409 	default:
410 		errx(EXIT_FAILURE, "asked to read invalid size from kernel: %u",
411 		    len);
412 	}
413 
414 	if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_eff) != 0) {
415 		err(EXIT_FAILURE, "failed to raise privileges");
416 	}
417 
418 	if (ioctl(pck->pck_fd, PCITOOL_DEVICE_GET_REG, &pci_reg) != 0) {
419 		err(EXIT_FAILURE, "failed to read device offset 0x%x", off);
420 	}
421 
422 	if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_min) != 0) {
423 		err(EXIT_FAILURE, "failed to reduce privileges");
424 	}
425 
426 	switch (len) {
427 	case 1:
428 		*(uint8_t *)buf = (uint8_t)pci_reg.data;
429 		break;
430 	case 2:
431 		*(uint16_t *)buf = (uint16_t)pci_reg.data;
432 		break;
433 	case 4:
434 		*(uint32_t *)buf = (uint32_t)pci_reg.data;
435 		break;
436 	case 8:
437 		*(uint64_t *)buf = (uint64_t)pci_reg.data;
438 		break;
439 	}
440 
441 	return (B_TRUE);
442 }
443 
444 void
445 pcieadm_init_cfgspace_kernel(pcieadm_t *pcip, pcieadm_cfgspace_f *funcp,
446     void **arg)
447 {
448 	char *nexus_base;
449 	char nexus_reg[PATH_MAX];
450 	int fd, nregs, *regs;
451 	pcieadm_cfgspace_kernel_t *pck;
452 
453 	if ((nexus_base = di_devfs_path(pcip->pia_nexus)) == NULL) {
454 		err(EXIT_FAILURE, "failed to get path to nexus node");
455 	}
456 
457 	if (snprintf(nexus_reg, sizeof (nexus_reg), "/devices%s:reg",
458 	    nexus_base) >= sizeof (nexus_reg)) {
459 		errx(EXIT_FAILURE, "failed to construct nexus path, path "
460 		    "overflow");
461 	}
462 	free(nexus_base);
463 
464 	if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_eff) != 0) {
465 		err(EXIT_FAILURE, "failed to raise privileges");
466 	}
467 
468 	if ((fd = open(nexus_reg, O_RDONLY)) < 0) {
469 		err(EXIT_FAILURE, "failed to open %s", nexus_reg);
470 	}
471 
472 	if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_min) != 0) {
473 		err(EXIT_FAILURE, "failed to reduce privileges");
474 	}
475 
476 	nregs = di_prop_lookup_ints(DDI_DEV_T_ANY, pcip->pia_devi, "reg",
477 	    &regs);
478 	if (nregs <= 0) {
479 		errx(EXIT_FAILURE, "failed to lookup regs array for %s",
480 		    pcip->pia_devstr);
481 	}
482 
483 	pck = calloc(1, sizeof (pcieadm_cfgspace_kernel_t));
484 	if (pck == NULL) {
485 		err(EXIT_FAILURE, "failed to allocate memory for reading "
486 		    "kernel cfgspace data");
487 	}
488 
489 	pck->pck_pci = pcip;
490 	pck->pck_fd = fd;
491 	pck->pck_bus = PCI_REG_BUS_G(regs[0]);
492 	pck->pck_dev = PCI_REG_DEV_G(regs[0]);
493 	pck->pck_func = PCI_REG_FUNC_G(regs[0]);
494 
495 	*funcp = pcieadm_read_cfgspace_kernel;
496 	*arg = pck;
497 }
498 
499 void
500 pcieadm_fini_cfgspace_kernel(void *arg)
501 {
502 	pcieadm_cfgspace_kernel_t *pck = arg;
503 
504 	VERIFY0(close(pck->pck_fd));
505 	free(pck);
506 }
507 
508 static const pcieadm_cmdtab_t pcieadm_cmds[] = {
509 	{ "save-cfgspace", pcieadm_save_cfgspace, pcieadm_save_cfgspace_usage },
510 	{ "show-cfgspace", pcieadm_show_cfgspace, pcieadm_show_cfgspace_usage },
511 	{ "show-devs", pcieadm_show_devs, pcieadm_show_devs_usage },
512 	{ NULL }
513 };
514 
515 static void
516 pcieadm_usage(const char *format, ...)
517 {
518 	uint_t cmd;
519 
520 	if (format != NULL) {
521 		va_list ap;
522 
523 		va_start(ap, format);
524 		vwarnx(format, ap);
525 		va_end(ap);
526 	}
527 
528 	(void) fprintf(stderr, "usage:  %s <subcommand> <args> ...\n\n",
529 	    pcieadm_progname);
530 
531 	for (cmd = 0; pcieadm_cmds[cmd].pct_name != NULL; cmd++) {
532 		if (pcieadm_cmds[cmd].pct_use != NULL) {
533 			pcieadm_cmds[cmd].pct_use(stderr);
534 		}
535 	}
536 }
537 
538 int
539 main(int argc, char *argv[])
540 {
541 	uint_t cmd;
542 
543 	pcieadm_progname = basename(argv[0]);
544 
545 	if (argc < 2) {
546 		pcieadm_usage("missing required sub-command");
547 		exit(EXIT_USAGE);
548 	}
549 
550 	for (cmd = 0; pcieadm_cmds[cmd].pct_name != NULL; cmd++) {
551 		if (strcmp(pcieadm_cmds[cmd].pct_name, argv[1]) == 0) {
552 			break;
553 		}
554 	}
555 
556 	if (pcieadm_cmds[cmd].pct_name == NULL) {
557 		pcieadm_usage("unknown sub-command: %s", argv[1]);
558 		exit(EXIT_USAGE);
559 	}
560 	argc -= 2;
561 	argv += 2;
562 	optind = 0;
563 	pcieadm.pia_cmdtab = &pcieadm_cmds[cmd];
564 
565 	/*
566 	 * Set up common things that all of pcieadm needs before dispatching to
567 	 * a specific sub-command.
568 	 */
569 	pcieadm.pia_pcidb = pcidb_open(PCIDB_VERSION);
570 	if (pcieadm.pia_pcidb == NULL) {
571 		err(EXIT_FAILURE, "failed to open PCI ID database");
572 	}
573 
574 	pcieadm.pia_root = di_init("/", DINFOCPYALL);
575 	if (pcieadm.pia_root == DI_NODE_NIL) {
576 		err(EXIT_FAILURE, "failed to initialize devinfo tree");
577 	}
578 
579 	/*
580 	 * Set up privileges now that we have already opened our core libraries.
581 	 * We first set up the minimum actual privilege set that we use while
582 	 * running. We next set up a second privilege set that has additional
583 	 * privileges that are intersected with the users actual privileges and
584 	 * are appended to by the underlying command backends.
585 	 */
586 	if ((pcieadm.pia_priv_init = priv_allocset()) == NULL) {
587 		err(EXIT_FAILURE, "failed to allocate privilege set");
588 	}
589 
590 	if (getppriv(PRIV_EFFECTIVE, pcieadm.pia_priv_init) != 0) {
591 		err(EXIT_FAILURE, "failed to get current privileges");
592 	}
593 
594 	if ((pcieadm.pia_priv_min = priv_allocset()) == NULL) {
595 		err(EXIT_FAILURE, "failed to allocate privilege set");
596 	}
597 
598 	if ((pcieadm.pia_priv_eff = priv_allocset()) == NULL) {
599 		err(EXIT_FAILURE, "failed to allocate privilege set");
600 	}
601 
602 	/*
603 	 * Note, PRIV_FILE_READ is not removed from the basic set so that way we
604 	 * can still open libraries that are required due to lazy loading.
605 	 */
606 	priv_basicset(pcieadm.pia_priv_min);
607 	VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_FILE_LINK_ANY));
608 	VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_PROC_INFO));
609 	VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_PROC_SESSION));
610 	VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_PROC_FORK));
611 	VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_NET_ACCESS));
612 	VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_FILE_WRITE));
613 	VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_PROC_EXEC));
614 	VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_PROC_EXEC));
615 
616 	priv_copyset(pcieadm.pia_priv_min, pcieadm.pia_priv_eff);
617 	priv_intersect(pcieadm.pia_priv_init, pcieadm.pia_priv_eff);
618 
619 	if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcieadm.pia_priv_min) != 0) {
620 		err(EXIT_FAILURE, "failed to reduce privileges");
621 	}
622 
623 	return (pcieadm.pia_cmdtab->pct_func(&pcieadm, argc, argv));
624 }
625