Re: floppy.c soft lockup

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Oleg Nesterov
Date: Saturday, June 2, 2007 - 5:30 am

On 06/01, Mark Hounschell wrote:

Please try this patch, it should dump some debug info when flush_workqueue()
hangs (after 30 seconds). You can use it with or without the previous patch
I sent. Please wait for a couple of minutes to collect more info.

Oleg.

--- OLD/kernel/sched.c~TST	2007-04-05 12:20:35.000000000 +0400
+++ OLD/kernel/sched.c	2007-06-02 15:41:53.000000000 +0400
@@ -4177,6 +4177,20 @@ struct task_struct *idle_task(int cpu)
 	return cpu_rq(cpu)->idle;
 }
 
+struct task_struct *get_cpu_curr(int cpu)
+{
+	unsigned long flags;
+	struct task_struct *curr;
+	struct rq *rq = cpu_rq(cpu);
+
+	spin_lock_irqsave(&rq->lock, flags);
+	curr = rq->curr;
+	get_task_struct(curr);
+	spin_unlock_irqrestore(&rq->lock, flags);
+
+	return curr;
+}
+
 /**
  * find_process_by_pid - find a process with a matching PID value.
  * @pid: the pid in question.
--- OLD/kernel/workqueue.c~TST	2007-06-02 13:34:57.000000000 +0400
+++ OLD/kernel/workqueue.c	2007-06-02 16:18:02.000000000 +0400
@@ -49,6 +49,7 @@ struct cpu_workqueue_struct {
 	struct task_struct *thread;
 
 	int run_depth;		/* Detect run_workqueue() recursion depth */
+	int jobs;
 } ____cacheline_aligned;
 
 /*
@@ -253,6 +254,7 @@ static void run_workqueue(struct cpu_wor
 
 		cwq->current_work = work;
 		list_del_init(cwq->worklist.next);
+		cwq->jobs++;
 		spin_unlock_irq(&cwq->lock);
 
 		BUG_ON(get_wq_data(work) != cwq);
@@ -328,7 +330,48 @@ static void insert_wq_barrier(struct cpu
 	insert_work(cwq, &barr->work, tail);
 }
 
-static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
+extern struct task_struct *get_cpu_curr(int cpu);
+
+static void flush_wait(struct cpu_workqueue_struct *cwq, int cpu, struct completion *done)
+{
+	struct task_struct *curr;
+	struct work_struct *work;
+	int old_pid, jobs;
+
+	if (is_single_threaded(cwq->wq))
+		cpu = raw_smp_processor_id();
+
+again:
+	work = cwq->current_work;
+	jobs = cwq->jobs;
+
+	curr = get_cpu_curr(cpu);
+	old_pid = curr->pid;
+	put_task_struct(curr);
+
+	if (wait_for_completion_timeout(done, HZ * 30))
+		return;
+
+	printk(KERN_ERR "ERR!! %s flush hang: %p %p %d %d\n", cwq->thread->comm,
+			work, cwq->current_work, jobs, cwq->jobs);
+
+	curr = get_cpu_curr(cpu);
+	printk(KERN_ERR "CURR: %d %d %s %ld %ld\n", old_pid, curr->pid,
+			curr->comm, curr->nivcsw, curr->nvcsw);
+	put_task_struct(curr);
+
+	spin_lock_irq(&cwq->lock);
+	list_for_each_entry(work, &cwq->worklist, entry)
+		print_symbol("    %s\n", (unsigned long) work->func);
+	printk("    ----\n");
+	if (cwq->current_work)
+		print_symbol("    %s\n", (unsigned long) cwq->current_work->func);
+	spin_unlock_irq(&cwq->lock);
+
+	goto again;
+}
+
+static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq, int cpu)
 {
 	int active;
 
@@ -351,7 +394,7 @@ static int flush_cpu_workqueue(struct cp
 		spin_unlock_irq(&cwq->lock);
 
 		if (active)
-			wait_for_completion(&barr.done);
+			flush_wait(cwq, cpu, &barr.done);
 	}
 
 	return active;
@@ -377,7 +420,7 @@ void fastcall flush_workqueue(struct wor
 
 	might_sleep();
 	for_each_cpu_mask(cpu, *cpu_map)
-		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
+		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu), cpu);
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
@@ -748,7 +791,7 @@ static void cleanup_workqueue_thread(str
 	 * checks list_empty(), and a "normal" queue_work() can't use
 	 * a dead CPU.
 	 */
-	while (flush_cpu_workqueue(cwq))
+	while (flush_cpu_workqueue(cwq, cpu))
 		;
 
 	kthread_stop(cwq->thread);

-
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
floppy.c soft lockup, Mark Hounschell, (Tue May 29, 10:31 am)
Re: floppy.c soft lockup, Andrew Morton, (Wed May 30, 10:46 pm)
Re: floppy.c soft lockup, Mark Hounschell, (Thu May 31, 7:28 am)
Re: floppy.c soft lockup, Oleg Nesterov, (Thu May 31, 10:06 am)
Re: floppy.c soft lockup, Mark Hounschell, (Thu May 31, 11:01 am)
Re: floppy.c soft lockup, Mark Hounschell, (Thu May 31, 11:44 am)
Re: floppy.c soft lockup, Oleg Nesterov, (Thu May 31, 12:22 pm)
Re: floppy.c soft lockup, Mark Hounschell, (Thu May 31, 1:18 pm)
Re: floppy.c soft lockup, Mark Hounschell, (Fri Jun 1, 2:51 am)
Re: floppy.c soft lockup, Oleg Nesterov, (Fri Jun 1, 4:00 am)
Re: floppy.c soft lockup, Mark Hounschell, (Fri Jun 1, 7:10 am)
Re: floppy.c soft lockup, Oleg Nesterov, (Fri Jun 1, 8:16 am)
Re: floppy.c soft lockup, Mark Hounschell, (Fri Jun 1, 10:11 am)
Re: floppy.c soft lockup, Oleg Nesterov, (Fri Jun 1, 11:36 am)
Re: floppy.c soft lockup, Mark Hounschell, (Fri Jun 1, 12:52 pm)
Re: floppy.c soft lockup, Oleg Nesterov, (Sat Jun 2, 5:30 am)
Re: floppy.c soft lockup, Mark Hounschell, (Sat Jun 2, 1:44 pm)
Re: floppy.c soft lockup, Oleg Nesterov, (Sun Jun 3, 1:14 am)
Re: floppy.c soft lockup, Mark Hounschell, (Mon Jun 4, 7:00 am)
Re: floppy.c soft lockup, Mark Hounschell, (Wed Jun 6, 6:12 am)
Re: floppy.c soft lockup, Andrew Morton, (Wed Jun 6, 10:28 am)
Re: floppy.c soft lockup, Matt Mackall, (Wed Jun 6, 6:31 pm)
Re: floppy.c soft lockup, Mark Hounschell, (Thu Jun 7, 3:18 am)
Re: floppy.c soft lockup, Matt Mackall, (Thu Jun 7, 7:25 am)
Re: floppy.c soft lockup, Mark Hounschell, (Fri Jun 8, 2:54 am)
Re: floppy.c soft lockup, Oleg Nesterov, (Wed Jun 13, 9:17 am)