Hmm, odd. So, here's the said debug patch. It will periodically
check all works and report if any work is being delayed for too long.
If the max wait goes over 30secs, it will dump all task states and
disable itself. Can you please apply the patch on top of rc2 +
wq#for-linus and report the output? It should tell us who's stuck
where.
Thanks.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f11100f..282322c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -83,6 +83,8 @@ struct work_struct {
#ifdef CONFIG_LOCKDEP
struct lockdep_map lockdep_map;
#endif
+ unsigned long queued_on;
+ unsigned long activated_on;
};
#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a2dccfc..9f95169 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -913,6 +913,8 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
{
struct global_cwq *gcwq = cwq->gcwq;
+ work->queued_on = work->activated_on = jiffies;
+
/* we own @work, set data and link */
set_work_cwq(work, cwq, extra_flags);
@@ -996,13 +998,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
if (likely(cwq->nr_active < cwq->max_active)) {
cwq->nr_active++;
worklist = gcwq_determine_ins_pos(gcwq, cwq);
+ insert_work(cwq, work, worklist, work_flags);
} else {
work_flags |= WORK_STRUCT_DELAYED;
worklist = &cwq->delayed_works;
+ insert_work(cwq, work, worklist, work_flags);
+ work->activated_on--;
}
- insert_work(cwq, work, worklist, work_flags);
-
spin_unlock_irqrestore(&gcwq->lock, flags);
}
@@ -1669,6 +1672,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
struct work_struct, entry);
struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
+ work->activated_on = jiffies;
move_linked_works(work, pos, NULL);
__clear_bit(WORK_STRUCT_DELAYED_BIT, ...