On Sat, 22 Mar 2008, Gabriel C wrote: > Now some time later CPU1 gets woken by an interrupt/IPI and runs theDoh, stupid me. We do not reevaluate the timer wheel, when we just wake up via the smp_reschedule IPI when the resched flag on the other CPU is not set. That's a separate vector which is not going through irq_enter() / irq_exit(). Does the patch below solve the problem ? Thanks, tglx --- include/linux/tick.h | 4 +++ kernel/time/tick-sched.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/timer.c | 14 ++++++++++++- 3 files changed, 67 insertions(+), 1 deletion(-) Index: linux-2.6/include/linux/tick.h =================================================================== --- linux-2.6.orig/include/linux/tick.h +++ linux-2.6/include/linux/tick.h @@ -111,6 +111,8 @@ extern void tick_nohz_update_jiffies(voi extern ktime_t tick_nohz_get_sleep_length(void); extern void tick_nohz_stop_idle(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); +extern int tick_nohz_cpu_needs_wakeup(int cpu); +extern void tick_nohz_rescan_timers_on(int cpu); # else static inline void tick_nohz_stop_sched_tick(void) { } static inline void tick_nohz_restart_sched_tick(void) { } @@ -123,6 +125,8 @@ static inline ktime_t tick_nohz_get_slee } static inline void tick_nohz_stop_idle(int cpu) { } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; } +static inline int tick_nohz_cpu_needs_wakeup(int cpu) { return 0; } +static inline void tick_nohz_rescan_timers_on(int cpu) { } # endif /* !NO_HZ */ #endif Index: linux-2.6/kernel/time/tick-sched.c =================================================================== --- linux-2.6.orig/kernel/time/tick-sched.c +++ linux-2.6/kernel/time/tick-sched.c @@ -183,6 +183,56 @@ u64 get_cpu_idle_time_us(int cpu, u64 *l } /** + * tick_nohz_cpu_needs_wakeup - check possible wakeup of cpu in add_timer_on() + * + * when add_timer_on() happens on a CPU which is in a long idle sleep, + * then we need to wake it up so the timer wheel gets reevaluated. + * + * Note: we use idle_cpu() which checks the idle state lockless, but + * we are ordered against the other cpu which might be on the way to + * idle by the timer base lock, which we hold. + */ +int tick_nohz_cpu_needs_wakeup(int cpu) +{ + return tick_nohz_enabled && idle_cpu(cpu) && + (cpu != smp_processor_id()); +} + +/* + * Rescan the timer wheel, when + * + * - the CPU is idle + * - the CPU is not processing an interupt + * - the need_resched flag is off + */ +static void tick_nohz_rescan_timers(void *unused) +{ + int cpu = smp_processor_id(); + + if (!idle_cpu(cpu) || in_interrupt() || need_resched()) + return; + + tick_nohz_stop_idle(cpu); + tick_nohz_update_jiffies(); + tick_nohz_stop_sched_tick(); +} + +/** + * tick_nohz_rescan_timers_on - reevaluate the idle sleep time of a CPU + * + * When a CPU is idle and a timer got added to this CPU timer wheel + * via add_timer_on() then we need to make sure that the CPU + * reevaluates the timer wheel. Otherwise the timer might be delayed + * for a real long time. + */ +void tick_nohz_rescan_timers_on(int cpu) +{ + if (tick_nohz_enabled && idle_cpu(cpu)) + smp_call_function_single(cpu, tick_nohz_rescan_timers, NULL, + 0, 0); +} + +/** * tick_nohz_stop_sched_tick - stop the idle tick from the idle task * * When the next event is more than a tick into the future, stop the idle tick Index: linux-2.6/kernel/timer.c =================================================================== --- linux-2.6.orig/kernel/timer.c +++ linux-2.6/kernel/timer.c @@ -445,15 +445,27 @@ void add_timer_on(struct timer_list *tim { struct tvec_base *base = per_cpu(tvec_bases, cpu); unsigned long flags; + int wakeidle; timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); spin_lock_irqsave(&base->lock, flags); timer_set_base(timer, base); internal_add_timer(base, timer); + /* + * Check whether the other CPU is idle and needs to be + * triggered to reevaluate the timer wheel when nohz is + * active. We are protected against the other CPU fiddling + * with the timer by holding the timer base lock. This also + * makes sure that a CPU on the way to idle can not evaluate + * the timer wheel. + */ + wakeidle = tick_nohz_cpu_needs_wakeup(cpu); spin_unlock_irqrestore(&base->lock, flags); -} + if (wakeidle) + tick_nohz_rescan_timers_on(cpu); +} /** * mod_timer - modify a timer's timeout --
| Eric Paris | [RFC 0/5] [TALPA] Intro to a linux interface for on access scanning |
| Bart Van Assche | Integration of SCST in the mainstream Linux kernel |
| Greg KH | [GIT PATCH] driver core patches against 2.6.24 |
| Chris Mason | Btrfs v0.16 released |
git: | |
| Michael Hendricks | removing content from git history |
| Jakub Narebski | Re: VCS comparison table |
| Ken Pratt | pack operation is thrashing my server |
| Aubrey Li | git proxy issue |
| Kevin Neff | Patching a SSH 'Weakness' |
| GVG GVG | ssh_exchange_identification: Connection closed by remote host |
| Theo de Raadt | Re: dmesg IBM x3650 OpenBSD 4.3 |
| F. Caulier | [Perl/locales] Warning about locales |
| KOSAKI Motohiro | [bug?] tg3: Failed to load firmware "tigon/tg3_tso.bin" |
| Jens Axboe | Re: [BUG] New Kernel Bugs |
| Rémi | [PATCH 0/6] [RFC] Phonet pipes protocol (v2) |
| Oliver Hartkopp | Re: [RFC] Patch to option HSO driver to the kernel |
| Treason Uncloaked | 2 hours ago | Linux kernel |
| Shared swap partition | 13 hours ago | Linux general |
| high memory | 2 days ago | Linux kernel |
| semaphore access speed | 2 days ago | Applications and Utilities |
| the kernel how to power off the machine | 2 days ago | Linux kernel |
| Easter Eggs in windows XP | 2 days ago | Windows |
| Root password | 2 days ago | Linux general |
| Where/when DNOTIFY is used? | 2 days ago | Linux kernel |
| How to convert Linux Kernel built-in module into a loadable module | 2 days ago | Linux kernel |
| Linux 2.6.24 and I/O schedulers | 2 days ago | Linux kernel |
