Re: Scheduler bug related to rq->skip_clock_update?

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Bjoern B. Brandenburg
Date: Monday, November 22, 2010 - 11:14 am

On Mon, 22 Nov 2010, Mike Galbraith wrote:


Unfortunately that doesn't seem to do it yet.

After running five 'find /' instances to completion on the ARM platform,
I'm still seeing delays close to 10ms.

    bbb@district10:~$ egrep 'cpu#|skip' /proc/sched_debug
    cpu#0
      .skip_clock_count              : 89606
      .skip_clock_recent_max         : 9817250
      .skip_clock_max                : 21992375
    cpu#1
      .skip_clock_count              : 81978
      .skip_clock_recent_max         : 9582500
      .skip_clock_max                : 17201750
    cpu#2
      .skip_clock_count              : 74565
      .skip_clock_recent_max         : 9678000
      .skip_clock_max                : 9879250
    cpu#3
      .skip_clock_count              : 81685
      .skip_clock_recent_max         : 9300125
      .skip_clock_max                : 14115750

On the x86_64 host, I've changed to HZ=100 and am now also seeing delays
close to 10ms after 'make clean && make -j8 bzImage'.

    bbb@koruna:~$ egrep 'cpu#|skip' /proc/sched_debug
    cpu#0, 2493.476 MHz
      .skip_clock_count              : 29703
      .skip_clock_recent_max         : 9999858
      .skip_clock_max                : 40645942
    cpu#1, 2493.476 MHz
      .skip_clock_count              : 32696
      .skip_clock_recent_max         : 9959118
      .skip_clock_max                : 35074771
    cpu#2, 2493.476 MHz
      .skip_clock_count              : 31742
      .skip_clock_recent_max         : 9788654
      .skip_clock_max                : 24821765
    cpu#3, 2493.476 MHz
      .skip_clock_count              : 31123
      .skip_clock_recent_max         : 9858546
      .skip_clock_max                : 44276033
    cpu#4, 2493.476 MHz
      .skip_clock_count              : 28346
      .skip_clock_recent_max         : 10000775
      .skip_clock_max                : 18681753
    cpu#5, 2493.476 MHz
      .skip_clock_count              : 29421
      .skip_clock_recent_max         : 9997656
      .skip_clock_max                : 138473407
    cpu#6, 2493.476 MHz
      .skip_clock_count              : 27721
      .skip_clock_recent_max         : 9992074
      .skip_clock_max                : 53436918
    cpu#7, 2493.476 MHz
      .skip_clock_count              : 29637
      .skip_clock_recent_max         : 9994516
      .skip_clock_max                : 566793528

These numbers were recorded with the below patch.

Please let me know if I can help by testing or tracing something else.

Thanks,
Bjoern

---
sched: show length of runqueue clock deactivation in /proc/sched_debug

The runqueue clock update should obviously not be skipped for
prolonged times (otherwise the consumed time is not correctly kept
track of). This patch measures the time between setting and clearing
the rq->skip_clock_update flag. The maximum observed value is exported
in /proc/sched_debug.
---
 kernel/sched.c       |   30 +++++++++++++++++++++++++++---
 kernel/sched_debug.c |    4 ++++
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 29509e1..08839ce 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -541,6 +541,11 @@ struct rq {
 	unsigned long long rq_cpu_time;
 	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */

+	u64 skip_clock_max;
+	u64 skip_clock_recent_max;
+	u64 skip_clock_set;
+	unsigned int skip_clock_count;
+
 	/* sys_sched_yield() stats */
 	unsigned int yld_count;

@@ -639,6 +644,22 @@ static inline struct task_group *task_group(struct task_struct *p)
 static u64 irq_time_cpu(int cpu);
 static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);

+static void clear_skip_clock_update(struct rq *rq)
+{
+#ifdef CONFIG_SCHEDSTATS
+	if (unlikely(rq->skip_clock_update)) {
+		u64 skipped = sched_clock_cpu(cpu_of(rq)) - rq->skip_clock_set;
+		rq->skip_clock_max = max(rq->skip_clock_max, skipped);
+		/* reset infrequently to expose changes */
+		if (!(++rq->skip_clock_count % 10000))
+			rq->skip_clock_recent_max = 0;
+		rq->skip_clock_recent_max = max(rq->skip_clock_recent_max,
+						skipped);
+	}
+#endif
+	rq->skip_clock_update = 0;
+}
+
 inline void update_rq_clock(struct rq *rq)
 {
 	if (!rq->skip_clock_update) {
@@ -652,7 +673,7 @@ inline void update_rq_clock(struct rq *rq)

 		sched_irq_time_avg_update(rq, irq_time);
 	}
-	rq->skip_clock_update = 0;
+	clear_skip_clock_update(rq);
 }

 /*
@@ -2130,8 +2151,11 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 	 * A queue event has occurred, and we're going to schedule.  In
 	 * this case, we can save a useless back to back clock update.
 	 */
-	if (test_tsk_need_resched(rq->curr))
+	if (test_tsk_need_resched(rq->curr)) {
+		schedstat_set(rq->skip_clock_set,
+			      sched_clock_cpu(cpu_of(rq)));
 		rq->skip_clock_update = 1;
+	}
 }

 #ifdef CONFIG_SMP
@@ -3938,7 +3962,7 @@ need_resched_nonpreemptible:
 		 * Clock updates should not be skipped while we're away.
 		 */
 		clear_tsk_need_resched(prev);
-		rq->skip_clock_update = 0;
+		clear_skip_clock_update(rq);
 	}

 	put_prev_task(rq, prev);
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 2e1b0d1..8924be2 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -298,6 +298,10 @@ static void print_cpu(struct seq_file *m, int cpu)

 	P(bkl_count);

+	P(skip_clock_count);
+	P64(skip_clock_recent_max);
+	P64(skip_clock_max);
+
 #undef P
 #endif
 	print_cfs_stats(m, cpu);
-- 
1.7.3.1
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
Scheduler bug related to rq->skip_clock_update?, Bjoern B. Brandenburg, (Sat Nov 20, 9:22 pm)
Re: Scheduler bug related to rq->skip_clock_update?, Mike Galbraith, (Sun Nov 21, 10:14 am)
Re: Scheduler bug related to rq->skip_clock_update?, Bjoern B. Brandenburg, (Sun Nov 21, 9:29 pm)
Re: Scheduler bug related to rq->skip_clock_update?, Mike Galbraith, (Mon Nov 22, 9:19 am)
Re: Scheduler bug related to rq->skip_clock_update?, Bjoern B. Brandenburg, (Mon Nov 22, 11:14 am)
Re: Scheduler bug related to rq->skip_clock_update?, Peter Zijlstra, (Fri Dec 3, 5:41 am)
Re: Scheduler bug related to rq->skip_clock_update?, Mike Galbraith, (Sun Dec 5, 10:33 pm)
Re: Scheduler bug related to rq->skip_clock_update?, Bjoern B. Brandenburg, (Mon Dec 6, 8:40 am)
[tip:sched/urgent] Sched: fix skip_clock_update optimization, tip-bot for Mike Gal ..., (Wed Dec 8, 1:40 pm)