pluto:
I tried everything I could find, and then some more. Nothing worked so far. The problem seems to be forced preemption, which cannot be safely disabled (DRM_I915_PREEMPT_TIMEOUT=0 does not work, and if you unset the various timeouts, kconfig asks for them; they cannot be unset).
Here is the outcome of "git show 3a7a92aba8fb77162e1e9963360fd81fc15c39a5" in linux-mainline, which seems to be the relevant commit:
commit 3a7a92aba8fb77162e1e9963360fd81fc15c39a5
Author: Chris Wilson <[email protected]>
Date: Wed Oct 23 14:31:05 2019 +0100
drm/i915/execlists: Force preemption
If the preempted context takes too long to relinquish control, e.g. it
is stuck inside a shader with arbitration disabled, evict that context
with an engine reset. This ensures that preemptions are reasonably
responsive, providing a tighter QoS for the more important context at
the cost of flagging unresponsive contexts more frequently (i.e. instead
of using an ~10s hangcheck, we now evict at ~100ms). The challenge of
lies in picking a timeout that can be reasonably serviced by HW for
typical workloads, balancing the existing clients against the needs for
responsiveness.
Note that coupled with timeslicing, this will lead to rapid GPU "hang"
detection with multiple active contexts vying for GPU time.
The forced preemption mechanism can be compiled out with
./scripts/config --set-val DRM_I915_PREEMPT_TIMEOUT 0
Signed-off-by: Chris Wilson <[email protected]>
Cc: Mika Kuoppala <[email protected]>
Cc: Tvrtko Ursulin <[email protected]>
Reviewed-by: Mika Kuoppala <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 3a3881d5e44b..b071b6024152 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -12,6 +12,18 @@ config DRM_I915_USERFAULT_AUTOSUSPEND
May be 0 to disable the extra delay and solely use the device level
runtime pm autosuspend delay tunable.
+config DRM_I915_PREEMPT_TIMEOUT
+ int "Preempt timeout (ms, jiffy granularity)"
+ default 100 # milliseconds
+ help
+ How long to wait (in milliseconds) for a preemption event to occur
+ when submitting a new context via execlists. If the current context
+ does not hit an arbitration point and yield to HW before the timer
+ expires, the HW will be reset to allow the more important context
+ to execute.
+
+ May be 0 to disable the timeout.
+
config DRM_I915_SPIN_REQUEST
int "Busywait for request completion (us)"
default 5 # microseconds
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index c2d9d67c63d9..9409b7856299 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -527,4 +527,13 @@ void intel_engine_init_active(struct intel_engine_cs *engine,
#define ENGINE_MOCK 1
#define ENGINE_VIRTUAL 2
+static inline bool
+intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
+{
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+ return 0;
+
+ return intel_engine_has_preemption(engine);
+}
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index e4203eb44139..b91ea07f4819 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -308,6 +308,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->instance = info->instance;
__sprint_engine_name(engine);
+ engine->props.preempt_timeout_ms =
+ CONFIG_DRM_I915_PREEMPT_TIMEOUT;
engine->props.stop_timeout_ms =
CONFIG_DRM_I915_STOP_TIMEOUT;
@@ -1338,10 +1340,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
unsigned int idx;
u8 read, write;
- drm_printf(m, "\tExeclist tasklet queued? %s (%s), timeslice? %s\n",
+ drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
yesno(test_bit(TASKLET_STATE_SCHED,
&engine->execlists.tasklet.state)),
enableddisabled(!atomic_read(&engine->execlists.tasklet.count)),
+ repr_timer(&engine->execlists.preempt),
repr_timer(&engine->execlists.timer));
read = execlists->csb_head;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 87d5c4ef3ae7..1251dac91f31 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -174,6 +174,11 @@ struct intel_engine_execlists {
*/
struct timer_list timer;
+ /**
+ * @preempt: reset the current context if it fails to give way
+ */
+ struct timer_list preempt;
+
/**
* @default_priolist: priority list for I915_PRIORITY_NORMAL
*/
@@ -544,6 +549,7 @@ struct intel_engine_cs {
} stats;
struct {
+ unsigned long preempt_timeout_ms;
unsigned long stop_timeout_ms;
} props;
};
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f9f3e985bb79..ff0dd297e782 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1372,6 +1372,26 @@ static void record_preemption(struct intel_engine_execlists *execlists)
(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
}
+static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = last_active(&engine->execlists);
+ if (!rq)
+ return 0;
+
+ return READ_ONCE(engine->props.preempt_timeout_ms);
+}
+
+static void set_preempt_timeout(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_has_preempt_reset(engine))
+ return;
+
+ set_timer_ms(&engine->execlists.preempt,
+ active_preempt_timeout(engine));
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1747,6 +1767,8 @@ done:
memset(port + 1, 0, (last_port - port) * sizeof(*port));
execlists_submit_ports(engine);
+
+ set_preempt_timeout(engine);
} else {
skip_submit:
ring_set_paused(engine, 0);
@@ -1987,6 +2009,43 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
}
}
+static noinline void preempt_reset(struct intel_engine_cs *engine)
+{
+ const unsigned int bit = I915_RESET_ENGINE + engine->id;
+ unsigned long *lock = &engine->gt->reset.flags;
+
+ if (i915_modparams.reset < 3)
+ return;
+
+ if (test_and_set_bit(bit, lock))
+ return;
+
+ /* Mark this tasklet as disabled to avoid waiting for it to complete */
+ tasklet_disable_nosync(&engine->execlists.tasklet);
+
+ GEM_TRACE("%s: preempt timeout %lu+%ums\n",
+ engine->name,
+ READ_ONCE(engine->props.preempt_timeout_ms),
+ jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+ intel_engine_reset(engine, "preemption time out");
+
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(bit, lock);
+}
+
+static bool preempt_timeout(const struct intel_engine_cs *const engine)
+{
+ const struct timer_list *t = &engine->execlists.preempt;
+
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+ return false;
+
+ if (!timer_expired(t))
+ return false;
+
+ return READ_ONCE(engine->execlists.pending[0]);
+}
+
/*
* Check the unread Context Status Buffers and manage the submission of new
* contexts to the ELSP accordingly.
@@ -1994,23 +2053,39 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
static void execlists_submission_tasklet(unsigned long data)
{
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
- unsigned long flags;
+ bool timeout = preempt_timeout(engine);
process_csb(engine);
- if (!READ_ONCE(engine->execlists.pending[0])) {
+ if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
+ unsigned long flags;
+
spin_lock_irqsave(&engine->active.lock, flags);
__execlists_submission_tasklet(engine);
spin_unlock_irqrestore(&engine->active.lock, flags);
+
+ /* Recheck after serialising with direct-submission */
+ if (timeout && preempt_timeout(engine))
+ preempt_reset(engine);
}
}
-static void execlists_submission_timer(struct timer_list *timer)
+static void __execlists_kick(struct intel_engine_execlists *execlists)
{
- struct intel_engine_cs *engine =
- from_timer(engine, timer, execlists.timer);
-
/* Kick the tasklet for some interrupt coalescing and reset handling */
- tasklet_hi_schedule(&engine->execlists.tasklet);
+ tasklet_hi_schedule(&execlists->tasklet);
+}
+
+#define execlists_kick(t, member) \
+ __execlists_kick(container_of(t, struct intel_engine_execlists, member))
+
+static void execlists_timeslice(struct timer_list *timer)
+{
+ execlists_kick(timer, timer);
+}
+
+static void execlists_preempt(struct timer_list *timer)
+{
+ execlists_kick(timer, preempt);
}
static void queue_request(struct intel_engine_cs *engine,
@@ -3455,6 +3530,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
static void execlists_park(struct intel_engine_cs *engine)
{
cancel_timer(&engine->execlists.timer);
+ cancel_timer(&engine->execlists.preempt);
}
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
@@ -3572,7 +3648,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
{
tasklet_init(&engine->execlists.tasklet,
execlists_submission_tasklet, (unsigned long)engine);
- timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
+ timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
+ timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 7516d1c90925..b6352671c5a0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1697,6 +1697,105 @@ err_spin_hi:
return err;
}
+static int live_preempt_timeout(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct i915_gem_context *ctx_hi, *ctx_lo;
+ struct igt_spinner spin_lo;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = -ENOMEM;
+
+ /*
+ * Check that we force preemption to occur by cancelling the previous
+ * context if it refuses to yield the GPU.
+ */
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+ return 0;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ if (igt_spinner_init(&spin_lo, gt))
+ return -ENOMEM;
+
+ ctx_hi = kernel_context(gt->i915);
+ if (!ctx_hi)
+ goto err_spin_lo;
+ ctx_hi->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
+
+ ctx_lo = kernel_context(gt->i915);
+ if (!ctx_lo)
+ goto err_ctx_hi;
+ ctx_lo->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_timeout;
+ struct i915_request *rq;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+ MI_NOOP); /* preemption disabled */
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin_lo, rq)) {
+ intel_gt_set_wedged(gt);
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ rq = igt_request_alloc(ctx_hi, engine);
+ if (IS_ERR(rq)) {
+ igt_spinner_end(&spin_lo);
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ /* Flush the previous CS ack before changing timeouts */
+ while (READ_ONCE(engine->execlists.pending[0]))
+ cpu_relax();
+
+ saved_timeout = engine->props.preempt_timeout_ms;
+ engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ intel_engine_flush_submission(engine);
+ engine->props.preempt_timeout_ms = saved_timeout;
+
+ if (i915_request_wait(rq, 0, HZ / 10) < 0) {
+ intel_gt_set_wedged(gt);
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_ctx_lo;
+ }
+
+ igt_spinner_end(&spin_lo);
+ i915_request_put(rq);
+ }
+
+ err = 0;
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
+err_spin_lo:
+ igt_spinner_fini(&spin_lo);
+ return err;
+}
+
static int random_range(struct rnd_state *rnd, int min, int max)
{
return i915_prandom_u32_max_state(max - min, rnd) + min;
@@ -2598,6 +2697,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_suppress_wait_preempt),
SUBTEST(live_chain_preempt),
SUBTEST(live_preempt_hang),
+ SUBTEST(live_preempt_timeout),
SUBTEST(live_preempt_smoke),
SUBTEST(live_virtual_engine),
SUBTEST(live_virtual_mask),
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 2011f8e9a9f1..f6f9675848b8 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -112,18 +112,4 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
return test_bit(TASKLET_STATE_SCHED, &t->state);
}
-static inline void cancel_timer(struct timer_list *t)
-{
- if (!READ_ONCE(t->expires))
- return;
-
- del_timer(t);
- WRITE_ONCE(t->expires, 0);
-}
-
-static inline bool timer_expired(const struct timer_list *t)
-{
- return READ_ONCE(t->expires) && !timer_pending(t);
-}
-
#endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index d29ade3b7de6..56058978bb27 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -61,7 +61,7 @@ struct drm_printer;
param(char *, dmc_firmware_path, NULL) \
param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO)) \
param(int, edp_vswing, 0) \
- param(int, reset, 2) \
+ param(int, reset, 3) \
param(unsigned int, inject_load_failure, 0) \
param(int, fastboot, -1) \
param(int, enable_dpcd_backlight, 0) \
diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c
index 16acdf7bdbe6..02e969b64505 100644
--- a/drivers/gpu/drm/i915/i915_utils.c
+++ b/drivers/gpu/drm/i915/i915_utils.c
@@ -76,3 +76,32 @@ bool i915_error_injected(void)
}
#endif
+
+void cancel_timer(struct timer_list *t)
+{
+ if (!READ_ONCE(t->expires))
+ return;
+
+ del_timer(t);
+ WRITE_ONCE(t->expires, 0);
+}
+
+void set_timer_ms(struct timer_list *t, unsigned long timeout)
+{
+ if (!timeout) {
+ cancel_timer(t);
+ return;
+ }
+
+ timeout = msecs_to_jiffies_timeout(timeout);
+
+ /*
+ * Paranoia to make sure the compiler computes the timeout before
+ * loading 'jiffies' as jiffies is volatile and may be updated in
+ * the background by a timer tick. All to reduce the complexity
+ * of the addition and reduce the risk of losing a jiffie.
+ */
+ barrier();
+
+ mod_timer(t, jiffies + timeout);
+}
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 562f756da421..94f136d8a5fd 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -32,6 +32,7 @@
#include <linux/workqueue.h>
struct drm_i915_private;
+struct timer_list;
#undef WARN_ON
/* Many gcc seem to no see through this and fall over :( */
@@ -421,4 +422,12 @@ static inline void add_taint_for_CI(unsigned int taint)
add_taint(taint, LOCKDEP_STILL_OK);
}
+void cancel_timer(struct timer_list *t);
+void set_timer_ms(struct timer_list *t, unsigned long timeout);
+
+static inline bool timer_expired(const struct timer_list *t)
+{
+ return READ_ONCE(t->expires) && !timer_pending(t);
+}
+
#endif /* !__I915_UTILS_H */
October 2019 is about right; that's when the trouble started.
I tried adding
./scripts/config --set-val DRM_I915_PREEMPT_TIMEOUT 0
to my linux-mainline PKGBUILD (before make oldconfig), but all it does is set CONFIG_DRM_I915_PREEMPT_TIMEOUT to 0, which I already tried.
I am currently testing "i915.enable_dc=0 processor.max_cstate=1 intel_idle.max_cstate=0" added to my kernel commandline, and disabling timeslicing (CONFIG_DRM_I915_TIMESLICE_DURATION=0). If that doesn't work, I'm out of options, unless the kernel guys come to their senses and provide a way to disable forced preemption safely on hardware that doesn't support it.
Timeslicing can also be disabled at runtime (as root; the actual names of the engines may differ):
# echo 0 > /sys/class/drm/card0/engine/bcs0/timeslice_duration_ms
# echo 0 > /sys/class/drm/card0/engine/rcs0/timeslice_duration_ms
# echo 0 > /sys/class/drm/card0/engine/vcs0/timeslice_duration_ms
# echo 0 > /sys/class/drm/card0/engine/vecs0/timeslice_duration_ms
Similarly for preempt_timeout_ms.