xref: /linux/drivers/gpu/drm/i915/i915_gpu_error.h (revision e5a52fd2b8cdb700b3c07b030e050a49ef3156b9)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright � 2008-2018 Intel Corporation
5  */
6 
7 #ifndef _I915_GPU_ERROR_H_
8 #define _I915_GPU_ERROR_H_
9 
10 #include <linux/atomic.h>
11 #include <linux/kref.h>
12 #include <linux/ktime.h>
13 #include <linux/sched.h>
14 
15 #include <drm/drm_mm.h>
16 
17 #include "gt/intel_engine.h"
18 #include "gt/uc/intel_uc_fw.h"
19 
20 #include "intel_device_info.h"
21 
22 #include "i915_gem.h"
23 #include "i915_gem_gtt.h"
24 #include "i915_params.h"
25 #include "i915_scheduler.h"
26 
27 struct drm_i915_private;
28 struct i915_vma_compress;
29 struct intel_engine_capture_vma;
30 struct intel_overlay_error_state;
31 struct intel_display_error_state;
32 
33 struct i915_vma_coredump {
34 	struct i915_vma_coredump *next;
35 
36 	char name[20];
37 
38 	u64 gtt_offset;
39 	u64 gtt_size;
40 	u32 gtt_page_sizes;
41 
42 	int num_pages;
43 	int page_count;
44 	int unused;
45 	u32 *pages[];
46 };
47 
48 struct i915_request_coredump {
49 	unsigned long flags;
50 	pid_t pid;
51 	u32 context;
52 	u32 seqno;
53 	u32 head;
54 	u32 tail;
55 	struct i915_sched_attr sched_attr;
56 };
57 
58 struct intel_engine_coredump {
59 	const struct intel_engine_cs *engine;
60 
61 	bool simulated;
62 	u32 reset_count;
63 
64 	/* position of active request inside the ring */
65 	u32 rq_head, rq_post, rq_tail;
66 
67 	/* Register state */
68 	u32 ccid;
69 	u32 start;
70 	u32 tail;
71 	u32 head;
72 	u32 ctl;
73 	u32 mode;
74 	u32 hws;
75 	u32 ipeir;
76 	u32 ipehr;
77 	u32 esr;
78 	u32 bbstate;
79 	u32 instpm;
80 	u32 instps;
81 	u64 bbaddr;
82 	u64 acthd;
83 	u32 fault_reg;
84 	u64 faddr;
85 	u32 rc_psmi; /* sleep state */
86 	struct intel_instdone instdone;
87 
88 	struct i915_gem_context_coredump {
89 		char comm[TASK_COMM_LEN];
90 
91 		u64 total_runtime;
92 		u32 avg_runtime;
93 
94 		pid_t pid;
95 		int active;
96 		int guilty;
97 		struct i915_sched_attr sched_attr;
98 	} context;
99 
100 	struct i915_vma_coredump *vma;
101 
102 	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
103 	unsigned int num_ports;
104 
105 	struct {
106 		u32 gfx_mode;
107 		union {
108 			u64 pdp[4];
109 			u32 pp_dir_base;
110 		};
111 	} vm_info;
112 
113 	struct intel_engine_coredump *next;
114 };
115 
116 struct intel_gt_coredump {
117 	const struct intel_gt *_gt;
118 	bool awake;
119 	bool simulated;
120 
121 	/* Generic register state */
122 	u32 eir;
123 	u32 pgtbl_er;
124 	u32 ier;
125 	u32 gtier[6], ngtier;
126 	u32 derrmr;
127 	u32 forcewake;
128 	u32 error; /* gen6+ */
129 	u32 err_int; /* gen7 */
130 	u32 fault_data0; /* gen8, gen9 */
131 	u32 fault_data1; /* gen8, gen9 */
132 	u32 done_reg;
133 	u32 gac_eco;
134 	u32 gam_ecochk;
135 	u32 gab_ctl;
136 	u32 gfx_mode;
137 	u32 gtt_cache;
138 	u32 aux_err; /* gen12 */
139 	u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
140 	u32 gam_done; /* gen12 */
141 
142 	u32 nfence;
143 	u64 fence[I915_MAX_NUM_FENCES];
144 
145 	struct intel_engine_coredump *engine;
146 
147 	struct intel_uc_coredump {
148 		struct intel_uc_fw guc_fw;
149 		struct intel_uc_fw huc_fw;
150 		struct i915_vma_coredump *guc_log;
151 	} *uc;
152 
153 	struct intel_gt_coredump *next;
154 };
155 
156 struct i915_gpu_coredump {
157 	struct kref ref;
158 	ktime_t time;
159 	ktime_t boottime;
160 	ktime_t uptime;
161 	unsigned long capture;
162 
163 	struct drm_i915_private *i915;
164 
165 	struct intel_gt_coredump *gt;
166 
167 	char error_msg[128];
168 	bool simulated;
169 	bool wakelock;
170 	bool suspended;
171 	int iommu;
172 	u32 reset_count;
173 	u32 suspend_count;
174 
175 	struct intel_device_info device_info;
176 	struct intel_runtime_info runtime_info;
177 	struct intel_driver_caps driver_caps;
178 	struct i915_params params;
179 
180 	struct intel_overlay_error_state *overlay;
181 	struct intel_display_error_state *display;
182 
183 	struct scatterlist *sgl, *fit;
184 };
185 
186 struct i915_gpu_error {
187 	/* For reset and error_state handling. */
188 	spinlock_t lock;
189 	/* Protected by the above dev->gpu_error.lock. */
190 	struct i915_gpu_coredump *first_error;
191 
192 	atomic_t pending_fb_pin;
193 
194 	/** Number of times the device has been reset (global) */
195 	atomic_t reset_count;
196 
197 	/** Number of times an engine has been reset */
198 	atomic_t reset_engine_count[I915_NUM_ENGINES];
199 };
200 
201 struct drm_i915_error_state_buf {
202 	struct drm_i915_private *i915;
203 	struct scatterlist *sgl, *cur, *end;
204 
205 	char *buf;
206 	size_t bytes;
207 	size_t size;
208 	loff_t iter;
209 
210 	int err;
211 };
212 
213 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
214 
215 __printf(2, 3)
216 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
217 
218 struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
219 void i915_capture_error_state(struct drm_i915_private *i915);
220 
221 struct i915_gpu_coredump *
222 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
223 
224 struct intel_gt_coredump *
225 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
226 
227 struct intel_engine_coredump *
228 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
229 
230 struct intel_engine_capture_vma *
231 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
232 				  struct i915_request *rq,
233 				  gfp_t gfp);
234 
235 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
236 				   struct intel_engine_capture_vma *capture,
237 				   struct i915_vma_compress *compress);
238 
239 struct i915_vma_compress *
240 i915_vma_capture_prepare(struct intel_gt_coredump *gt);
241 
242 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
243 			     struct i915_vma_compress *compress);
244 
245 void i915_error_state_store(struct i915_gpu_coredump *error);
246 
247 static inline struct i915_gpu_coredump *
248 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
249 {
250 	kref_get(&gpu->ref);
251 	return gpu;
252 }
253 
254 ssize_t
255 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
256 				 char *buf, loff_t offset, size_t count);
257 
258 void __i915_gpu_coredump_free(struct kref *kref);
259 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
260 {
261 	if (gpu)
262 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
263 }
264 
265 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
266 void i915_reset_error_state(struct drm_i915_private *i915);
267 void i915_disable_error_state(struct drm_i915_private *i915, int err);
268 
269 #else
270 
271 static inline void i915_capture_error_state(struct drm_i915_private *i915)
272 {
273 }
274 
275 static inline struct i915_gpu_coredump *
276 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
277 {
278 	return NULL;
279 }
280 
281 static inline struct intel_gt_coredump *
282 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
283 {
284 	return NULL;
285 }
286 
287 static inline struct intel_engine_coredump *
288 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
289 {
290 	return NULL;
291 }
292 
293 static inline struct intel_engine_capture_vma *
294 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
295 				  struct i915_request *rq,
296 				  gfp_t gfp)
297 {
298 	return NULL;
299 }
300 
301 static inline void
302 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
303 			      struct intel_engine_capture_vma *capture,
304 			      struct i915_vma_compress *compress)
305 {
306 }
307 
308 static inline struct i915_vma_compress *
309 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
310 {
311 	return NULL;
312 }
313 
314 static inline void
315 i915_vma_capture_finish(struct intel_gt_coredump *gt,
316 			struct i915_vma_compress *compress)
317 {
318 }
319 
320 static inline void
321 i915_error_state_store(struct i915_gpu_coredump *error)
322 {
323 }
324 
325 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
326 {
327 }
328 
329 static inline struct i915_gpu_coredump *
330 i915_first_error_state(struct drm_i915_private *i915)
331 {
332 	return ERR_PTR(-ENODEV);
333 }
334 
335 static inline void i915_reset_error_state(struct drm_i915_private *i915)
336 {
337 }
338 
339 static inline void i915_disable_error_state(struct drm_i915_private *i915,
340 					    int err)
341 {
342 }
343 
344 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
345 
346 #endif /* _I915_GPU_ERROR_H_ */
347