On Tue, Apr 17, 2007 at 02:57:49AM -0700, William Lee Irwin III wrote:
Con helped me dredge up the vdls bits, so here is the last version I
before I got tired of toying with the idea. It's not all that clean,
with a fair amount of debug code floating around and a number of
idiocies (it seems there was a plot to use a heap somewhere I forgot
about entirely, never mind other cruft), but I thought I should at least
say something more provable than "there was a patch I never posted."
Enjoy!
-- wli
diff -prauN linux-2.6.0-test11/fs/proc/array.c sched-2.6.0-test11-5/fs/proc/array.c
--- linux-2.6.0-test11/fs/proc/array.c 2003-11-26 12:44:26.000000000 -0800
+++ sched-2.6.0-test11-5/fs/proc/array.c 2003-12-17 07:37:11.000000000 -0800
@@ -162,7 +162,7 @@ static inline char * task_state(struct t
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p),
- (p->sleep_avg/1024)*100/(1000000000/1024),
+ 0UL, /* was ->sleep_avg */
p->tgid,
p->pid, p->pid ? p->real_parent->pid : 0,
p->pid && p->ptrace ? p->parent->pid : 0,
@@ -345,7 +345,7 @@ int proc_pid_stat(struct task_struct *ta
read_unlock(&tasklist_lock);
res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %d %d\n",
task->pid,
task->comm,
state,
@@ -390,8 +390,8 @@ int proc_pid_stat(struct task_struct *ta
task->cnswap,
task->exit_signal,
task_cpu(task),
- task->rt_priority,
- task->policy);
+ task_prio(task),
+ task_sched_policy(task));
if(mm)
mmput(mm);
return res;
diff -prauN linux-2.6.0-test11/include/asm-i386/thread_info.h sched-2.6.0-test11-5/include/asm-i386/thread_info.h
--- linux-2.6.0-test11/include/asm-i386/thread_info.h 2003-11-26 12:43:06.000000000 -0800
+++ sched-2.6.0-test11-5/include/asm-i386/thread_info.h 2003-12-17 04:55:22.000000000 -0800
@@ -114,6 +114,8 @@ static inline struct thread_info *curren
#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
#define TIF_IRET 5 /* return with iret */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_QUEUED 17
+#define TIF_PREEMPT 18
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
diff -prauN linux-2.6.0-test11/include/linux/binomial.h sched-2.6.0-test11-5/include/linux/binomial.h
--- linux-2.6.0-test11/include/linux/binomial.h 1969-12-31 16:00:00.000000000 -0800
+++ sched-2.6.0-test11-5/include/linux/binomial.h 2003-12-20 15:53:33.000000000 -0800
@@ -0,0 +1,16 @@
+/*
+ * Simple binomial heaps.
+ */
+
+struct binomial {
+ unsigned priority, degree;
+ struct binomial *parent, *child, *sibling;
+};
+
+
+struct binomial *binomial_minimum(struct binomial **);
+void binomial_union(struct binomial **, struct binomial **, struct binomial **);
+void binomial_insert(struct binomial **, struct binomial *);
+struct binomial *binomial_extract_min(struct binomial **);
+void binomial_decrease(struct binomial **, struct binomial *, unsigned);
+void binomial_delete(struct binomial **, struct binomial *);
diff -prauN linux-2.6.0-test11/include/linux/init_task.h sched-2.6.0-test11-5/include/linux/init_task.h
--- linux-2.6.0-test11/include/linux/init_task.h 2003-11-26 12:42:58.000000000 -0800
+++ sched-2.6.0-test11-5/include/linux/init_task.h 2003-12-18 05:51:16.000000000 -0800
@@ -56,6 +56,12 @@
.siglock = SPIN_LOCK_UNLOCKED, \
}
+#define INIT_SCHED_INFO(info) \
+{ \
+ .run_list = LIST_HEAD_INIT((info).run_list), \
+ .policy = 1 /* SCHED_POLICY_TS */, \
+}
+
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -67,14 +73,10 @@
.usage = ATOMIC_INIT(2), \
.flags = 0, \
.lock_depth = -1, \
- .prio = MAX_PRIO-20, \
- .static_prio = MAX_PRIO-20, \
- .policy = SCHED_NORMAL, \
+ .sched_info = INIT_SCHED_INFO(tsk.sched_info), \
.cpus_allowed = CPU_MASK_ALL, \
.mm = NULL, \
.active_mm = &init_mm, \
- .run_list = LIST_HEAD_INIT(tsk.run_list), \
- .time_slice = HZ, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \
.ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \
diff -prauN linux-2.6.0-test11/include/linux/sched.h sched-2.6.0-test11-5/include/linux/sched.h
--- linux-2.6.0-test11/include/linux/sched.h 2003-11-26 12:42:58.000000000 -0800
+++ sched-2.6.0-test11-5/include/linux/sched.h 2003-12-23 03:47:45.000000000 -0800
@@ -126,6 +126,8 @@ extern unsigned long nr_iowait(void);
#define SCHED_NORMAL 0
#define SCHED_FIFO 1
#define SCHED_RR 2
+#define SCHED_BATCH 3
+#define SCHED_IDLE 4
struct sched_param {
int sched_priority;
@@ -281,10 +283,14 @@ struct signal_struct {
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO
-
-#define MAX_PRIO (MAX_RT_PRIO + 40)
-
-#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
+#define NICE_QLEN 128
+#define MIN_TS_PRIO MAX_RT_PRIO
+#define MAX_TS_PRIO (40*NICE_QLEN)
+#define MIN_BATCH_PRIO (MAX_RT_PRIO + MAX_TS_PRIO)
+#define MAX_BATCH_PRIO 100
+#define MAX_PRIO (MIN_BATCH_PRIO + MAX_BATCH_PRIO)
+#define USER_PRIO(prio) ((prio) - MAX_RT_PRIO)
+#define MAX_USER_PRIO USER_PRIO(MAX_PRIO)
/*
* Some day this will be a full-fledged user tracking system..
@@ -330,6 +336,36 @@ struct k_itimer {
struct io_context; /* See blkdev.h */
void exit_io_context(void);
+struct rt_data {
+ int prio, rt_policy;
+ unsigned long quantum, ticks;
+};
+
+/* XXX: do %cpu estimation for ts wakeup levels */
+struct ts_data {
+ int nice;
+ unsigned long ticks, frac_cpu;
+ unsigned long sample_start, sample_ticks;
+};
+
+struct bt_data {
+ int prio;
+ unsigned long ticks;
+};
+
+union class_data {
+ struct rt_data rt;
+ struct ts_data ts;
+ struct bt_data bt;
+};
+
+struct sched_info {
+ int idx; /* queue index, used by all classes */
+ unsigned long policy; /* scheduling policy */
+ struct list_head run_list; /* list links for priority queues */
+ union class_data cl_data; /* class-specific data */
+};
+
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
struct thread_info *thread_info;
@@ -339,18 +375,9 @@ struct task_struct {
int lock_depth; /* Lock depth */
- int prio, static_prio;
- struct list_head run_list;
- prio_array_t *array;
-
- unsigned long sleep_avg;
- long interactive_credit;
- unsigned long long timestamp;
- int activated;
+ struct sched_info sched_info;
- unsigned long policy;
cpumask_t cpus_allowed;
- unsigned int time_slice, first_time_slice;
struct list_head tasks;
struct list_head ptrace_children;
@@ -391,7 +418,6 @@ struct task_struct {
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
- unsigned long rt_priority;
unsigned long it_real_value, it_prof_value, it_virt_value;
unsigned long it_real_incr, it_prof_incr, it_virt_incr;
struct timer_list real_timer;
@@ -520,12 +546,14 @@ extern void node_nr_running_init(void);
#define node_nr_running_init() {}
#endif
-extern void set_user_nice(task_t *p, long nice);
-extern int task_prio(task_t *p);
-extern int task_nice(task_t *p);
-extern int task_curr(task_t *p);
-extern int idle_cpu(int cpu);
-
+void set_user_nice(task_t *task, long nice);
+int task_prio(task_t *task);
+int task_nice(task_t *task);
+int task_sched_policy(task_t *task);
+void set_task_sched_policy(task_t *task, int policy);
+int rt_task(task_t *task);
+int task_curr(task_t *task);
+int idle_cpu(int cpu);
void yield(void);
/*
@@ -844,6 +872,21 @@ static inline int need_resched(void)
return unlikely(test_thread_flag(TIF_NEED_RESCHED));
}
+static inline void set_task_queued(task_t *task)
+{
+ set_tsk_thread_flag(task, TIF_QUEUED);
+}
+
+static inline void clear_task_queued(task_t *task)
+{
+ clear_tsk_thread_flag(task, TIF_QUEUED);
+}
+
+static inline int task_queued(task_t *task)
+{
+ return test_tsk_thread_flag(task, TIF_QUEUED);
+}
+
extern void __cond_resched(void);
static inline void cond_resched(void)
{
diff -prauN linux-2.6.0-test11/kernel/Makefile sched-2.6.0-test11-5/kernel/Makefile
--- linux-2.6.0-test11/kernel/Makefile 2003-11-26 12:43:24.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/Makefile 2003-12-17 03:30:08.000000000 -0800
@@ -6,7 +6,7 @@ obj-y = sched.o fork.o exec_domain.o
exit.o itimer.o time.o softirq.o resource.o \
sysctl.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \
- rcupdate.o intermodule.o extable.o params.o posix-timers.o
+ rcupdate.o intermodule.o extable.o params.o posix-timers.o sched/
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
diff -prauN linux-2.6.0-test11/kernel/exit.c sched-2.6.0-test11-5/kernel/exit.c
--- linux-2.6.0-test11/kernel/exit.c 2003-11-26 12:45:29.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/exit.c 2003-12-17 07:04:02.000000000 -0800
@@ -225,7 +225,7 @@ void reparent_to_init(void)
/* Set the exit signal to SIGCHLD so we signal init on exit */
current->exit_signal = SIGCHLD;
- if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0))
+ if (task_nice(current) < 0)
set_user_nice(current, 0);
/* cpus_allowed? */
/* rt_priority? */
diff -prauN linux-2.6.0-test11/kernel/fork.c sched-2.6.0-test11-5/kernel/fork.c
--- linux-2.6.0-test11/kernel/fork.c 2003-11-26 12:42:58.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/fork.c 2003-12-23 06:22:59.000000000 -0800
@@ -836,6 +836,9 @@ struct task_struct *copy_process(unsigne
atomic_inc(&p->user->__count);
atomic_inc(&p->user->processes);
+ clear_tsk_thread_flag(p, TIF_SIGPENDING);
+ clear_tsk_thread_flag(p, TIF_QUEUED);
+
/*
* If multiple threads are within copy_process(), then this check
* triggers too late. This doesn't hurt, the check is only there
@@ -861,13 +864,21 @@ struct task_struct *copy_process(unsigne
p->state = TASK_UNINTERRUPTIBLE;
copy_flags(clone_flags, p);
- if (clone_flags & CLONE_IDLETASK)
+ if (clone_flags & CLONE_IDLETASK) {
p->pid = 0;
- else {
+ set_task_sched_policy(p, SCHED_IDLE);
+ } else {
+ if (task_sched_policy(p) == SCHED_IDLE) {
+ memset(&p->sched_info, 0, sizeof(struct sched_info));
+ set_task_sched_policy(p, SCHED_NORMAL);
+ set_user_nice(p, 0);
+ }
p->pid = alloc_pidmap();
if (p->pid == -1)
goto bad_fork_cleanup;
}
+ if (p->pid == 1)
+ BUG_ON(task_nice(p));
retval = -EFAULT;
if (clone_flags & CLONE_PARENT_SETTID)
if (put_user(p->pid, parent_tidptr))
@@ -875,8 +886,7 @@ struct task_struct *copy_process(unsigne
p->proc_dentry = NULL;
- INIT_LIST_HEAD(&p->run_list);
-
+ INIT_LIST_HEAD(&p->sched_info.run_list);
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
INIT_LIST_HEAD(&p->posix_timers);
@@ -885,8 +895,6 @@ struct task_struct *copy_process(unsigne
spin_lock_init(&p->alloc_lock);
spin_lock_init(&p->switch_lock);
spin_lock_init(&p->proc_lock);
-
- clear_tsk_thread_flag(p, TIF_SIGPENDING);
init_sigpending(&p->pending);
p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
@@ -898,7 +906,6 @@ struct task_struct *copy_process(unsigne
p->tty_old_pgrp = 0;
p->utime = p->stime = 0;
p->cutime = p->cstime = 0;
- p->array = NULL;
p->lock_depth = -1; /* -1 = no lock */
p->start_time = get_jiffies_64();
p->security = NULL;
@@ -948,33 +955,6 @@ struct task_struct *copy_process(unsigne
p->pdeath_signal = 0;
/*
- * Share the timeslice between parent and child, thus the
- * total amount of pending timeslices in the system doesn't change,
- * resulting in more scheduling fairness.
- */
- local_irq_disable();
- p->time_slice = (current->time_slice + 1) >> 1;
- /*
- * The remainder of the first timeslice might be recovered by
- * the parent if the child exits early enough.
- */
- p->first_time_slice = 1;
- current->time_slice >>= 1;
- p->timestamp = sched_clock();
- if (!current->time_slice) {
- /*
- * This case is rare, it happens when the parent has only
- * a single jiffy left from its timeslice. Taking the
- * runqueue lock is not a problem.
- */
- current->time_slice = 1;
- preempt_disable();
- scheduler_tick(0, 0);
- local_irq_enable();
- preempt_enable();
- } else
- local_irq_enable();
- /*
* Ok, add it to the run-queues and make it
* visible to the rest of the system.
*
diff -prauN linux-2.6.0-test11/kernel/sched/Makefile sched-2.6.0-test11-5/kernel/sched/Makefile
--- linux-2.6.0-test11/kernel/sched/Makefile 1969-12-31 16:00:00.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/sched/Makefile 2003-12-17 03:32:21.000000000 -0800
@@ -0,0 +1 @@
+obj-y = util.o ts.o idle.o rt.o batch.o
diff -prauN linux-2.6.0-test11/kernel/sched/batch.c sched-2.6.0-test11-5/kernel/sched/batch.c
--- linux-2.6.0-test11/kernel/sched/batch.c 1969-12-31 16:00:00.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/sched/batch.c 2003-12-19 21:32:49.000000000 -0800
@@ -0,0 +1,190 @@
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/kernel_stat.h>
+#include <asm/page.h>
+#include "queue.h"
+
+struct batch_queue {
+ int base, tasks;
+ task_t *curr;
+ unsigned long bitmap[BITS_TO_LONGS(MAX_BATCH_PRIO)];
+ struct list_head queue[MAX_BATCH_PRIO];
+};
+
+static int batch_quantum = 1024;
+static DEFINE_PER_CPU(struct batch_queue, batch_queues);
+
+static int batch_init(struct policy *policy, int cpu)
+{
+ int k;
+ struct batch_queue *queue = &per_cpu(batch_queues, cpu);
+
+ policy->queue = (struct queue *)queue;
+ for (k = 0; k < MAX_BATCH_PRIO; ++k)
+ INIT_LIST_HEAD(&queue->queue[k]);
+ return 0;
+}
+
+static int batch_tick(struct queue *__queue, task_t *task, int user_ticks, int sys_ticks)
+{
+ struct batch_queue *queue = (struct batch_queue *)__queue;
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+
+ cpustat->nice += user_ticks;
+ cpustat->system += sys_ticks;
+
+ task->sched_info.cl_data.bt.ticks--;
+ if (!task->sched_info.cl_data.bt.ticks) {
+ int new_idx;
+
+ task->sched_info.cl_data.bt.ticks = batch_quantum;
+ new_idx = (task->sched_info.idx + task->sched_info.cl_data.bt.prio)
+ % MAX_BATCH_PRIO;
+ if (!test_bit(new_idx, queue->bitmap))
+ __set_bit(new_idx, queue->bitmap);
+ list_move_tail(&task->sched_info.run_list,
+ &queue->queue[new_idx]);
+ if (list_empty(&queue->queue[task->sched_info.idx]))
+ __clear_bit(task->sched_info.idx, queue->bitmap);
+ task->sched_info.idx = new_idx;
+ queue->base = find_first_circular_bit(queue->bitmap,
+ queue->base,
+ MAX_BATCH_PRIO);
+ set_need_resched();
+ }
+ return 0;
+}
+
+static void batch_yield(struct queue *__queue, task_t *task)
+{
+ int new_idx;
+ struct batch_queue *queue = (struct batch_queue *)__queue;
+
+ new_idx = (queue->base + MAX_BATCH_PRIO - 1) % MAX_BATCH_PRIO;
+ if (!test_bit(new_idx, queue->bitmap))
+ __set_bit(new_idx, queue->bitmap);
+ list_move_tail(&task->sched_info.run_list, &queue->queue[new_idx]);
+ if (list_empty(&queue->queue[task->sched_info.idx]))
+ __clear_bit(task->sched_info.idx, queue->bitmap);
+ task->sched_info.idx = new_idx;
+ queue->base = find_first_circular_bit(queue->bitmap,
+ queue->base,
+ MAX_BATCH_PRIO);
+ set_need_resched();
+}
+
+static task_t *batch_curr(struct queue *__queue)
+{
+ struct batch_queue *queue = (struct batch_queue *)__queue;
+ return queue->curr;
+}
+
+static void batch_set_curr(struct queue *__queue, task_t *task)
+{
+ struct batch_queue *queue = (struct batch_queue *)__queue;
+ queue->curr = task;
+}
+
+static task_t *batch_best(struct queue *__queue)
+{
+ int idx;
+ struct batch_queue *queue = (struct batch_queue *)__queue;
+
+ idx = find_first_circular_bit(queue->bitmap,
+ queue->base,
+ MAX_BATCH_PRIO);
+ BUG_ON(idx >= MAX_BATCH_PRIO);
+ BUG_ON(list_empty(&queue->queue[idx]));
+ return list_entry(queue->queue[idx].next, task_t, sched_info.run_list);
+}
+
+static void batch_enqueue(struct queue *__queue, task_t *task)
+{
+ int idx;
+ struct batch_queue *queue = (struct batch_queue *)__queue;
+
+ idx = (queue->base + task->sched_info.cl_data.bt.prio) % MAX_BATCH_PRIO;
+ if (!test_bit(idx, queue->bitmap))
+ __set_bit(idx, queue->bitmap);
+ list_add_tail(&task->sched_info.run_list, &queue->queue[idx]);
+ task->sched_info.idx = idx;
+ task->sched_info.cl_data.bt.ticks = batch_quantum;
+ queue->tasks++;
+ if (!queue->curr)
+ queue->curr = task;
+}
+
+static void batch_dequeue(struct queue *__queue, task_t *task)
+{
+ struct batch_queue *queue = (struct batch_queue *)__queue;
+ list_del(&task->sched_info.run_list);
+ if (list_empty(&queue->queue[task->sched_info.idx]))
+ __clear_bit(task->sched_info.idx, queue->bitmap);
+ queue->tasks--;
+ if (!queue->tasks)
+ queue->curr = NULL;
+ else if (task == queue->curr)
+ queue->curr = batch_best(__queue);
+}
+
+static int batch_preempt(struct queue *__queue, task_t *task)
+{
+ struct batch_queue *queue = (struct batch_queue *)__queue;
+ if (!queue->curr)
+ return 1;
+ else
+ return task->sched_info.cl_data.bt.prio
+ < queue->curr->sched_info.cl_data.bt.prio;
+}
+
+static int batch_tasks(struct queue *__queue)
+{
+ struct batch_queue *queue = (struct batch_queue *)__queue;
+ return queue->tasks;
+}
+
+static int batch_nice(struct queue *queue, task_t *task)
+{
+ return 20;
+}
+
+static int batch_prio(task_t *task)
+{
+ return USER_PRIO(task->sched_info.cl_data.bt.prio + MIN_BATCH_PRIO);
+}
+
+static void batch_setprio(task_t *task, int prio)
+{
+ BUG_ON(prio < 0);
+ BUG_ON(prio >= MAX_BATCH_PRIO);
+ task->sched_info.cl_data.bt.prio = prio;
+}
+
+struct queue_ops batch_ops = {
+ .init = batch_init,
+ .fini = nop_fini,
+ .tick = batch_tick,
+ .yield = batch_yield,
+ .curr = batch_curr,
+ .set_curr = batch_set_curr,
+ .tasks = batch_tasks,
+ .best = batch_best,
+ .enqueue = batch_enqueue,
+ .dequeue = batch_dequeue,
+ .start_wait = queue_nop,
+ .stop_wait = queue_nop,
+ .sleep = queue_nop,
+ .wake = queue_nop,
+ .preempt = batch_preempt,
+ .nice = batch_nice,
+ .renice = nop_renice,
+ .prio = batch_prio,
+ .setprio = batch_setprio,
+ .timeslice = nop_timeslice,
+ .set_timeslice = nop_set_timeslice,
+};
+
+struct policy batch_policy = {
+ .ops = &batch_ops,
+};
diff -prauN linux-2.6.0-test11/kernel/sched/idle.c sched-2.6.0-test11-5/kernel/sched/idle.c
--- linux-2.6.0-test11/kernel/sched/idle.c 1969-12-31 16:00:00.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/sched/idle.c 2003-12-19 17:31:39.000000000 -0800
@@ -0,0 +1,99 @@
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/kernel_stat.h>
+#include <asm/page.h>
+#include "queue.h"
+
+static DEFINE_PER_CPU(task_t *, idle_tasks) = NULL;
+
+static int idle_nice(struct queue *queue, task_t *task)
+{
+ return 20;
+}
+
+static int idle_tasks(struct queue *queue)
+{
+ task_t **idle = (task_t **)queue;
+ return !!(*idle);
+}
+
+static task_t *idle_task(struct queue *queue)
+{
+ return *((task_t **)queue);
+}
+
+static void idle_yield(struct queue *queue, task_t *task)
+{
+ set_need_resched();
+}
+
+static void idle_enqueue(struct queue *queue, task_t *task)
+{
+ task_t **idle = (task_t **)queue;
+ *idle = task;
+}
+
+static void idle_dequeue(struct queue *queue, task_t *task)
+{
+}
+
+static int idle_preempt(struct queue *queue, task_t *task)
+{
+ return 0;
+}
+
+static int idle_tick(struct queue *queue, task_t *task, int user_ticks, int sys_ticks)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ runqueue_t *rq = &per_cpu(runqueues, smp_processor_id());
+
+ if (atomic_read(&rq->nr_iowait) > 0)
+ cpustat->iowait += sys_ticks;
+ else
+ cpustat->idle += sys_ticks;
+ return 1;
+}
+
+static int idle_init(struct policy *policy, int cpu)
+{
+ policy->queue = (struct queue *)&per_cpu(idle_tasks, cpu);
+ return 0;
+}
+
+static int idle_prio(task_t *task)
+{
+ return MAX_USER_PRIO;
+}
+
+static void idle_setprio(task_t *task, int prio)
+{
+}
+
+static struct queue_ops idle_ops = {
+ .init = idle_init,
+ .fini = nop_fini,
+ .tick = idle_tick,
+ .yield = idle_yield,
+ .curr = idle_task,
+ .set_curr = queue_nop,
+ .tasks = idle_tasks,
+ .best = idle_task,
+ .enqueue = idle_enqueue,
+ .dequeue = idle_dequeue,
+ .start_wait = queue_nop,
+ .stop_wait = queue_nop,
+ .sleep = queue_nop,
+ .wake = queue_nop,
+ .preempt = idle_preempt,
+ .nice = idle_nice,
+ .renice = nop_renice,
+ .prio = idle_prio,
+ .setprio = idle_setprio,
+ .timeslice = nop_timeslice,
+ .set_timeslice = nop_set_timeslice,
+};
+
+struct policy idle_policy = {
+ .ops = &idle_ops,
+};
diff -prauN linux-2.6.0-test11/kernel/sched/queue.h sched-2.6.0-test11-5/kernel/sched/queue.h
--- linux-2.6.0-test11/kernel/sched/queue.h 1969-12-31 16:00:00.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/sched/queue.h 2003-12-23 03:58:02.000000000 -0800
@@ -0,0 +1,104 @@
+#define SCHED_POLICY_RT 0
+#define SCHED_POLICY_TS 1
+#define SCHED_POLICY_BATCH 2
+#define SCHED_POLICY_IDLE 3
+
+#define RT_POLICY_FIFO 0
+#define RT_POLICY_RR 1
+
+#define NODE_THRESHOLD 125
+
+struct queue;
+struct queue_ops;
+
+struct policy {
+ struct queue *queue;
+ struct queue_ops *ops;
+};
+
+extern struct policy rt_policy, ts_policy, batch_policy, idle_policy;
+
+struct runqueue {
+ spinlock_t lock;
+ int curr;
+ task_t *__curr;
+ unsigned long policy_bitmap;
+ struct policy *policies[BITS_PER_LONG];
+ unsigned long nr_running, nr_switches, nr_uninterruptible;
+ struct mm_struct *prev_mm;
+ int prev_cpu_load[NR_CPUS];
+#ifdef CONFIG_NUMA
+ atomic_t *node_nr_running;
+ int prev_node_load[MAX_NUMNODES];
+#endif
+ task_t *migration_thread;
+ struct list_head migration_queue;
+
+ atomic_t nr_iowait;
+};
+
+typedef struct runqueue runqueue_t;
+
+struct queue_ops {
+ int (*init)(struct policy *, int);
+ void (*fini)(struct policy *, int);
+ task_t *(*curr)(struct queue *);
+ void (*set_curr)(struct queue *, task_t *);
+ task_t *(*best)(struct queue *);
+ int (*tick)(struct queue *, task_t *, int, int);
+ int (*tasks)(struct queue *);
+ void (*enqueue)(struct queue *, task_t *);
+ void (*dequeue)(struct queue *, task_t *);
+ void (*start_wait)(struct queue *, task_t *);
+ void (*stop_wait)(struct queue *, task_t *);
+ void (*sleep)(struct queue *, task_t *);
+ void (*wake)(struct queue *, task_t *);
+ int (*preempt)(struct queue *, task_t *);
+ void (*yield)(struct queue *, task_t *);
+ int (*prio)(task_t *);
+ void (*setprio)(task_t *, int);
+ int (*nice)(struct queue *, task_t *);
+ void (*renice)(struct queue *, task_t *, int);
+ unsigned long (*timeslice)(struct queue *, task_t *);
+ void (*set_timeslice)(struct queue *, task_t *, unsigned long);
+};
+
+DECLARE_PER_CPU(runqueue_t, runqueues);
+
+int find_first_circular_bit(unsigned long *, int, int);
+void queue_nop(struct queue *, task_t *);
+void nop_renice(struct queue *, task_t *, int);
+void nop_fini(struct policy *, int);
+unsigned long nop_timeslice(struct queue *, task_t *);
+void nop_set_timeslice(struct queue *, task_t *, unsigned long);
+
+/* #define DEBUG_SCHED */
+
+#ifdef DEBUG_SCHED
+#define __check_task_policy(idx) \
+do { \
+ unsigned long __idx__ = (idx); \
+ if (__idx__ > SCHED_POLICY_IDLE) { \
+ printk("invalid policy 0x%lx\n", __idx__); \
+ BUG(); \
+ } \
+} while (0)
+
+#define check_task_policy(task) \
+do { \
+ __check_task_policy((task)->sched_info.policy); \
+} while (0)
+
+#define check_policy(policy) \
+do { \
+ BUG_ON((policy) != &rt_policy && \
+ (policy) != &ts_policy && \
+ (policy) != &batch_policy && \
+ (policy) != &idle_policy); \
+} while (0)
+
+#else /* !DEBUG_SCHED */
+#define __check_task_policy(idx) do { } while (0)
+#define check_task_policy(task) do { } while (0)
+#define check_policy(policy) do { } while (0)
+#endif /* !DEBUG_SCHED */
diff -prauN linux-2.6.0-test11/kernel/sched/rt.c sched-2.6.0-test11-5/kernel/sched/rt.c
--- linux-2.6.0-test11/kernel/sched/rt.c 1969-12-31 16:00:00.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/sched/rt.c 2003-12-19 18:16:07.000000000 -0800
@@ -0,0 +1,208 @@
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/kernel_stat.h>
+#include <asm/page.h>
+#include "queue.h"
+
+#ifdef DEBUG_SCHED
+#define check_rt_policy(task) \
+do { \
+ BUG_ON((task)->sched_info.policy != SCHED_POLICY_RT); \
+ BUG_ON((task)->sched_info.cl_data.rt.rt_policy != RT_POLICY_RR \
+ && \
+ (task)->sched_info.cl_data.rt.rt_policy!=RT_POLICY_FIFO); \
+ BUG_ON((task)->sched_info.cl_data.rt.prio < 0); \
+ BUG_ON((task)->sched_info.cl_data.rt.prio >= MAX_RT_PRIO); \
+} while (0)
+#else
+#define check_rt_policy(task) do { } while (0)
+#endif
+
+struct rt_queue {
+ unsigned long bitmap[BITS_TO_LONGS(MAX_RT_PRIO)];
+ struct list_head queue[MAX_RT_PRIO];
+ task_t *curr;
+ int tasks;
+};
+
+static DEFINE_PER_CPU(struct rt_queue, rt_queues);
+
+static int rt_init(struct policy *policy, int cpu)
+{
+ int k;
+ struct rt_queue *queue = &per_cpu(rt_queues, cpu);
+
+ policy->queue = (struct queue *)queue;
+ for (k = 0; k < MAX_RT_PRIO; ++k)
+ INIT_LIST_HEAD(&queue->queue[k]);
+ return 0;
+}
+
+static void rt_yield(struct queue *__queue, task_t *task)
+{
+ struct rt_queue *queue = (struct rt_queue *)__queue;
+ check_rt_policy(task);
+ list_del(&task->sched_info.run_list);
+ if (list_empty(&queue->queue[task->sched_info.cl_data.rt.prio]))
+ set_need_resched();
+ list_add_tail(&task->sched_info.run_list,
+ &queue->queue[task->sched_info.cl_data.rt.prio]);
+ check_rt_policy(task);
+}
+
+static int rt_tick(struct queue *queue, task_t *task, int user_ticks, int sys_ticks)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ check_rt_policy(task);
+ cpustat->user += user_ticks;
+ cpustat->system += sys_ticks;
+ if (task->sched_info.cl_data.rt.rt_policy == RT_POLICY_RR) {
+ task->sched_info.cl_data.rt.ticks--;
+ if (!task->sched_info.cl_data.rt.ticks) {
+ task->sched_info.cl_data.rt.ticks =
+ task->sched_info.cl_data.rt.quantum;
+ rt_yield(queue, task);
+ }
+ }
+ check_rt_policy(task);
+ return 0;
+}
+
+static task_t *rt_curr(struct queue *__queue)
+{
+ struct rt_queue *queue = (struct rt_queue *)__queue;
+ task_t *task = queue->curr;
+ check_rt_policy(task);
+ return task;
+}
+
+static void rt_set_curr(struct queue *__queue, task_t *task)
+{
+ struct rt_queue *queue = (struct rt_queue *)__queue;
+ queue->curr = task;
+ check_rt_policy(task);
+}
+
+static task_t *rt_best(struct queue *__queue)
+{
+ struct rt_queue *queue = (struct rt_queue *)__queue;
+ task_t *task;
+ int idx;
+ idx = find_first_bit(queue->bitmap, MAX_RT_PRIO);
+ BUG_ON(idx >= MAX_RT_PRIO);
+ task = list_entry(queue->queue[idx].next, task_t, sched_info.run_list);
+ check_rt_policy(task);
+ return task;
+}
+
+static void rt_enqueue(struct queue *__queue, task_t *task)
+{
+ struct rt_queue *queue = (struct rt_queue *)__queue;
+ check_rt_policy(task);
+ if (!test_bit(task->sched_info.cl_data.rt.prio, queue->bitmap))
+ __set_bit(task->sched_info.cl_data.rt.prio, queue->bitmap);
+ list_add_tail(&task->sched_info.run_list,
+ &queue->queue[task->sched_info.cl_data.rt.prio]);
+ check_rt_policy(task);
+ queue->tasks++;
+ if (!queue->curr)
+ queue->curr = task;
+}
+
+static void rt_dequeue(struct queue *__queue, task_t *task)
+{
+ struct rt_queue *queue = (struct rt_queue *)__queue;
+ check_rt_policy(task);
+ list_del(&task->sched_info.run_list);
+ if (list_empty(&queue->queue[task->sched_info.cl_data.rt.prio]))
+ __clear_bit(task->sched_info.cl_data.rt.prio, queue->bitmap);
+ queue->tasks--;
+ check_rt_policy(task);
+ if (!queue->tasks)
+ queue->curr = NULL;
+ else if (task == queue->curr)
+ queue->curr = rt_best(__queue);
+}
+
+static int rt_preempt(struct queue *__queue, task_t *task)
+{
+ struct rt_queue *queue = (struct rt_queue *)__queue;
+ check_rt_policy(task);
+ if (!queue->curr)
+ return 1;
+ check_rt_policy(queue->curr);
+ return task->sched_info.cl_data.rt.prio
+ < queue->curr->sched_info.cl_data.rt.prio;
+}
+
+static int rt_tasks(struct queue *__queue)
+{
+ struct rt_queue *queue = (struct rt_queue *)__queue;
+ return queue->tasks;
+}
+
+static int rt_nice(struct queue *queue, task_t *task)
+{
+ check_rt_policy(task);
+ return -20;
+}
+
+static unsigned long rt_timeslice(struct queue *queue, task_t *task)
+{
+ check_rt_policy(task);
+ if (task->sched_info.cl_data.rt.rt_policy != RT_POLICY_RR)
+ return 0;
+ else
+ return task->sched_info.cl_data.rt.quantum;
+}
+
+static void rt_set_timeslice(struct queue *queue, task_t *task, unsigned long n)
+{
+ check_rt_policy(task);
+ if (task->sched_info.cl_data.rt.rt_policy == RT_POLICY_RR)
+ task->sched_info.cl_data.rt.quantum = n;
+ check_rt_policy(task);
+}
+
+static void rt_setprio(task_t *task, int prio)
+{
+ check_rt_policy(task);
+ BUG_ON(prio < 0);
+ BUG_ON(prio >= MAX_RT_PRIO);
+ task->sched_info.cl_data.rt.prio = prio;
+}
+
+static int rt_prio(task_t *task)
+{
+ check_rt_policy(task);
+ return USER_PRIO(task->sched_info.cl_data.rt.prio);
+}
+
+static struct queue_ops rt_ops = {
+ .init = rt_init,
+ .fini = nop_fini,
+ .tick = rt_tick,
+ .yield = rt_yield,
+ .curr = rt_curr,
+ .set_curr = rt_set_curr,
+ .tasks = rt_tasks,
+ .best = rt_best,
+ .enqueue = rt_enqueue,
+ .dequeue = rt_dequeue,
+ .start_wait = queue_nop,
+ .stop_wait = queue_nop,
+ .sleep = queue_nop,
+ .wake = queue_nop,
+ .preempt = rt_preempt,
+ .nice = rt_nice,
+ .renice = nop_renice,
+ .prio = rt_prio,
+ .setprio = rt_setprio,
+ .timeslice = rt_timeslice,
+ .set_timeslice = rt_set_timeslice,
+};
+
+struct policy rt_policy = {
+ .ops = &rt_ops,
+};
diff -prauN linux-2.6.0-test11/kernel/sched/ts.c sched-2.6.0-test11-5/kernel/sched/ts.c
--- linux-2.6.0-test11/kernel/sched/ts.c 1969-12-31 16:00:00.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/sched/ts.c 2003-12-23 08:24:55.000000000 -0800
@@ -0,0 +1,841 @@
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/kernel_stat.h>
+#include <asm/page.h>
+#include "queue.h"
+
+#ifdef DEBUG_SCHED
+#define check_ts_policy(task) \
+do { \
+ BUG_ON((task)->sched_info.policy != SCHED_POLICY_TS); \
+} while (0)
+
+#define check_nice(__queue__) \
+({ \
+ int __k__, __count__ = 0; \
+ if ((__queue__)->tasks < 0) { \
+ printk("negative nice task count %d\n", \
+ (__queue__)->tasks); \
+ BUG(); \
+ } \
+ for (__k__ = 0; __k__ < NICE_QLEN; ++__k__) { \
+ task_t *__task__; \
+ if (list_empty(&(__queue__)->queue[__k__])) { \
+ if (test_bit(__k__, (__queue__)->bitmap)) { \
+ printk("wrong nice bit set\n"); \
+ BUG(); \
+ } \
+ } else { \
+ if (!test_bit(__k__, (__queue__)->bitmap)) { \
+ printk("wrong nice bit clear\n"); \
+ BUG(); \
+ } \
+ } \
+ list_for_each_entry(__task__, \
+ &(__queue__)->queue[__k__], \
+ sched_info.run_list) { \
+ check_ts_policy(__task__); \
+ if (__task__->sched_info.idx != __k__) { \
+ printk("nice index mismatch\n"); \
+ BUG(); \
+ } \
+ ++__count__; \
+ } \
+ } \
+ if ((__queue__)->tasks != __count__) { \
+ printk("wrong nice task count\n"); \
+ printk("expected %d, got %d\n", \
+ (__queue__)->tasks, \
+ __count__); \
+ BUG(); \
+ } \
+ __count__; \
+})
+
+#define check_queue(__queue) \
+do { \
+ int __k, __count = 0; \
+ if ((__queue)->tasks < 0) { \
+ printk("negative queue task count %d\n", \
+ (__queue)->tasks); \
+ BUG(); \
+ } \
+ for (__k = 0; __k < 40; ++__k) { \
+ struct nice_queue *__nice; \
+ if (list_empty(&(__queue)->nices[__k])) { \
+ if (test_bit(__k, (__queue)->bitmap)) { \
+ printk("wrong queue bit set\n"); \
+ BUG(); \
+ } \
+ } else { \
+ if (!test_bit(__k, (__queue)->bitmap)) { \
+ printk("wrong queue bit clear\n"); \
+ BUG(); \
+ } \
+ } \
+ list_for_each_entry(__nice, \
+ &(__queue)->nices[__k], \
+ list) { \
+ __count += check_nice(__nice); \
+ if (__nice->idx != __k) { \
+ printk("queue index mismatch\n"); \
+ BUG(); \
+ } \
+ } \
+ } \
+ if ((__queue)->tasks != __count) { \
+ printk("wrong queue task count\n"); \
+ printk("expected %d, got %d\n", \
+ (__queue)->tasks, \
+ __count); \
+ BUG(); \
+ } \
+} while (0)
+
+#else /* !DEBUG_SCHED */
+#define check_ts_policy(task) do { } while (0)
+#define check_nice(nice) do { } while (0)
+#define check_queue(queue) do { } while (0)
+#endif
+
+/*
+ * Hybrid deadline/multilevel scheduling. Cpu utilization
+ * -dependent deadlines at wake. Queue rotation every 50ms or when
+ * demotions empty the highest level, setting demoted deadlines
+ * relative to the new highest level. Intra-level RR quantum at 10ms.
+ */
+struct nice_queue {
+ int idx, nice, base, tasks, level_quantum, expired;
+ unsigned long bitmap[BITS_TO_LONGS(NICE_QLEN)];
+ struct list_head list, queue[NICE_QLEN];
+ task_t *curr;
+};
+
+/*
+ * Deadline schedule nice levels with priority-dependent deadlines,
+ * default quantum of 100ms. Queue rotates at demotions emptying the
+ * highest level, setting the demoted deadline relative to the new
+ * highest level.
+ */
+struct ts_queue {
+ struct nice_queue nice_levels[40];
+ struct list_head nices[40];
+ int base, quantum, tasks;
+ unsigned long bitmap[BITS_TO_LONGS(40)];
+ struct nice_queue *curr;
+};
+
+/*
+ * Make these sysctl-tunable.
+ */
+static int nice_quantum = 100;
+static int rr_quantum = 10;
+static int level_quantum = 50;
+static int sample_interval = HZ;
+
+static DEFINE_PER_CPU(struct ts_queue, ts_queues);
+
+static task_t *nice_best(struct nice_queue *);
+static struct nice_queue *ts_best_nice(struct ts_queue *);
+
+static void nice_init(struct nice_queue *queue)
+{
+ int k;
+
+ INIT_LIST_HEAD(&queue->list);
+ for (k = 0; k < NICE_QLEN; ++k) {
+ INIT_LIST_HEAD(&queue->queue[k]);
+ }
+}
+
+static int ts_init(struct policy *policy, int cpu)
+{
+ int k;
+ struct ts_queue *queue = &per_cpu(ts_queues, cpu);
+
+ policy->queue = (struct queue *)queue;
+ queue->quantum = nice_quantum;
+
+ for (k = 0; k < 40; ++k) {
+ nice_init(&queue->nice_levels[k]);
+ queue->nice_levels[k].nice = k;
+ INIT_LIST_HEAD(&queue->nices[k]);
+ }
+ return 0;
+}
+
+static int task_deadline(task_t *task)
+{
+ u64 frac_cpu = task->sched_info.cl_data.ts.frac_cpu;
+ frac_cpu *= (u64)NICE_QLEN;
+ frac_cpu >>= 32;
+ return (int)min((u32)(NICE_QLEN - 1), (u32)frac_cpu);
+}
+
+static void nice_rotate_queue(struct nice_queue *queue)
+{
+ int idx, new_idx, deadline, idxdiff;
+ task_t *task = queue->curr;
+
+ check_nice(queue);
+
+ /* shit what if idxdiff == NICE_QLEN - 1?? */
+ idx = queue->curr->sched_info.idx;
+ idxdiff = (idx - queue->base + NICE_QLEN) % NICE_QLEN;
+ deadline = min(1 + task_deadline(task), NICE_QLEN - idxdiff - 1);
+ new_idx = (idx + deadline) % NICE_QLEN;
+#if 0
+ if (idx == new_idx) {
+ /*
+ * buggy; it sets queue->base = idx because in this case
+ * we have task_deadline(task) == 0
+ */
+ new_idx = (idx - task_deadline(task) + NICE_QLEN) % NICE_QLEN;
+ if (queue->base != new_idx)
+ queue->base = new_idx;
+ return;
+ }
+ BUG_ON(!deadline);
+ BUG_ON(queue->base <= new_idx && new_idx <= idx);
+ BUG_ON(idx < queue->base && queue->base <= new_idx);
+ BUG_ON(new_idx <= idx && idx < queue->base);
+ if (0 && idx == new_idx) {
+ printk("FUCKUP: pid = %d, tdl = %d, dl = %d, idx = %d, "
+ "base = %d, diff = %d, fcpu = 0x%lx\n",
+ queue->curr->pid,
+ task_deadline(queue->curr),
+ deadline,
+ idx,
+ queue->base,
+ idxdiff,
+ task->sched_info.cl_data.ts.frac_cpu);
+ BUG();
+ }
+#else
+ /*
+ * RR in the last deadline
+ * special-cased so as not to trip BUG_ON()'s below
+ */
+ if (idx == new_idx) {
+ /* if we got here these two things must hold */
+ BUG_ON(idxdiff != NICE_QLEN - 1);
+ BUG_ON(deadline);
+ list_move_tail(&task->sched_info.run_list, &queue->queue[idx]);
+ if (queue->expired) {
+ queue->level_quantum = level_quantum;
+ queue->expired = 0;
+ }
+ return;
+ }
+#endif
+ task->sched_info.idx = new_idx;
+ if (!test_bit(new_idx, queue->bitmap)) {
+ BUG_ON(!list_empty(&queue->queue[new_idx]));
+ __set_bit(new_idx, queue->bitmap);
+ }
+ list_move_tail(&task->sched_info.run_list,
+ &queue->queue[new_idx]);
+
+ /* expired until list drains */
+ if (!list_empty(&queue->queue[idx]))
+ queue->expired = 1;
+ else {
+ int k, w, m = NICE_QLEN % BITS_PER_LONG;
+ BUG_ON(!test_bit(idx, queue->bitmap));
+ __clear_bit(idx, queue->bitmap);
+
+ for (w = 0, k = 0; k < NICE_QLEN/BITS_PER_LONG; ++k)
+ w += hweight_long(queue->bitmap[k]);
+ if (NICE_QLEN % BITS_PER_LONG)
+ w += hweight_long(queue->bitmap[k] & ((1UL << m) - 1));
+ if (w > 1)
+ queue->base = (queue->base + 1) % NICE_QLEN;
+ queue->level_quantum = level_quantum;
+ queue->expired = 0;
+ }
+ check_nice(queue);
+}
+
+static void nice_tick(struct nice_queue *queue, task_t *task)
+{
+ int idx = task->sched_info.idx;
+ BUG_ON(!task_queued(task));
+ BUG_ON(task != queue->curr);
+ BUG_ON(!test_bit(idx, queue->bitmap));
+ BUG_ON(list_empty(&queue->queue[idx]));
+ check_ts_policy(task);
+ check_nice(queue);
+
+ if (task->sched_info.cl_data.ts.ticks)
+ task->sched_info.cl_data.ts.ticks--;
+
+ if (queue->level_quantum > level_quantum) {
+ WARN_ON(1);
+ queue->level_quantum = 1;
+ }
+
+ if (!queue->expired) {
+ if (queue->level_quantum)
+ queue->level_quantum--;
+ } else if (0 && queue->queue[idx].prev != &task->sched_info.run_list) {
+ int queued = 0, new_idx = (queue->base + 1) % NICE_QLEN;
+ task_t *curr, *sav;
+ task_t *victim = list_entry(queue->queue[idx].prev,
+ task_t,
+ sched_info.run_list);
+ victim->sched_info.idx = new_idx;
+ if (!test_bit(new_idx, queue->bitmap))
+ __set_bit(new_idx, queue->bitmap);
+#if 1
+ list_for_each_entry_safe(curr, sav, &queue->queue[new_idx], sched_info.run_list) {
+ if (victim->sched_info.cl_data.ts.frac_cpu
+ < curr->sched_info.cl_data.ts.frac_cpu) {
+ queued = 1;
+ list_move(&victim->sched_info.run_list,
+ curr->sched_info.run_list.prev);
+ break;
+ }
+ }
+ if (!queued)
+ list_move_tail(&victim->sched_info.run_list,
+ &queue->queue[new_idx]);
+#else
+ list_move(&victim->sched_info.run_list, &queue->queue[new_idx]);
+#endif
+ BUG_ON(list_empty(&queue->queue[idx]));
+ }
+
+ if (!queue->level_quantum && !queue->expired) {
+ check_nice(queue);
+ nice_rotate_queue(queue);
+ check_nice(queue);
+ set_need_resched();
+ } else if (!task->sched_info.cl_data.ts.ticks) {
+ int idxdiff = (idx - queue->base + NICE_QLEN) % NICE_QLEN;
+ check_nice(queue);
+ task->sched_info.cl_data.ts.ticks = rr_quantum;
+ BUG_ON(!test_bit(idx, queue->bitmap));
+ BUG_ON(list_empty(&queue->queue[idx]));
+ if (queue->expired)
+ nice_rotate_queue(queue);
+ else if (idxdiff == NICE_QLEN - 1)
+ list_move_tail(&task->sched_info.run_list,
+ &queue->queue[idx]);
+ else {
+ int new_idx = (idx + 1) % NICE_QLEN;
+ list_del(&task->sched_info.run_list);
+ if (list_empty(&queue->queue[idx])) {
+ BUG_ON(!test_bit(idx, queue->bitmap));
+ __clear_bit(idx, queue->bitmap);
+ }
+ if (!test_bit(new_idx, queue->bitmap)) {
+ BUG_ON(!list_empty(&queue->queue[new_idx]));
+ __set_bit(new_idx, queue->bitmap);
+ }
+ task->sched_info.idx = new_idx;
+ list_add(&task->sched_info.run_list,
+ &queue->queue[new_idx]);
+ }
+ check_nice(queue);
+ set_need_resched();
+ }
+ check_nice(queue);
+ check_ts_policy(task);
+}
+
+static void ts_rotate_queue(struct ts_queue *queue)
+{
+ int idx, new_idx, idxdiff, off, deadline;
+
+ queue->base = find_first_circular_bit(queue->bitmap, queue->base, 40);
+
+ /* shit what if idxdiff == 39?? */
+ check_queue(queue);
+ idx = queue->curr->idx;
+ idxdiff = (idx - queue->base + 40) % 40;
+ off = (int)(queue->curr - queue->nice_levels);
+ deadline = min(1 + off, 40 - idxdiff - 1);
+ new_idx = (idx + deadline) % 40;
+ if (idx == new_idx) {
+ new_idx = (idx - off + 40) % 40;
+ if (queue->base != new_idx)
+ queue->base = new_idx;
+ return;
+ }
+ BUG_ON(!deadline);
+ BUG_ON(queue->base <= new_idx && new_idx <= idx);
+ BUG_ON(idx < queue->base && queue->base <= new_idx);
+ BUG_ON(new_idx <= idx && idx < queue->base);
+ if (!test_bit(new_idx, queue->bitmap)) {
+ BUG_ON(!list_empty(&queue->nices[new_idx]));
+ __set_bit(new_idx, queue->bitmap);
+ }
+ list_move_tail(&queue->curr->list, &queue->nices[new_idx]);
+ queue->curr->idx = new_idx;
+
+ if (list_empty(&queue->nices[idx])) {
+ BUG_ON(!test_bit(idx, queue->bitmap));
+ __clear_bit(idx, queue->bitmap);
+ queue->base = (queue->base + 1) % 40;
+ }
+ check_queue(queue);
+}
+
+static int ts_tick(struct queue *__queue, task_t *task, int user_ticks, int sys_ticks)
+{
+ struct ts_queue *queue = (struct ts_queue *)__queue;
+ struct nice_queue *nice = queue->curr;
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ int nice_idx = (int)(queue->curr - queue->nice_levels);
+ unsigned long sample_end, delta;
+
+ check_queue(queue);
+ check_ts_policy(task);
+ BUG_ON(!nice);
+ BUG_ON(nice_idx != task->sched_info.cl_data.ts.nice);
+ BUG_ON(!test_bit(nice->idx, queue->bitmap));
+ BUG_ON(list_empty(&queue->nices[nice->idx]));
+
+ sample_end = jiffies;
+ delta = sample_end - task->sched_info.cl_data.ts.sample_start;
+ if (delta)
+ task->sched_info.cl_data.ts.sample_ticks++;
+ else {
+ task->sched_info.cl_data.ts.sample_start = jiffies;
+ task->sched_info.cl_data.ts.sample_ticks = 1;
+ }
+
+ if (delta >= sample_interval) {
+ u64 frac_cpu;
+ frac_cpu = (u64)task->sched_info.cl_data.ts.sample_ticks << 32;
+ do_div(frac_cpu, delta);
+ frac_cpu = 2*frac_cpu + task->sched_info.cl_data.ts.frac_cpu;
+ do_div(frac_cpu, 3);
+ frac_cpu = min(frac_cpu, (1ULL << 32) - 1);
+ task->sched_info.cl_data.ts.frac_cpu = (unsigned long)frac_cpu;
+ task->sched_info.cl_data.ts.sample_start = sample_end;
+ task->sched_info.cl_data.ts.sample_ticks = 0;
+ }
+
+ cpustat->user += user_ticks;
+ cpustat->system += sys_ticks;
+ nice_tick(nice, task);
+ if (queue->quantum > nice_quantum) {
+ queue->quantum = 0;
+ WARN_ON(1);
+ } else if (queue->quantum)
+ queue->quantum--;
+ if (!queue->quantum) {
+ queue->quantum = nice_quantum;
+ ts_rotate_queue(queue);
+ set_need_resched();
+ }
+ check_queue(queue);
+ check_ts_policy(task);
+ return 0;
+}
+
+static void nice_yield(struct nice_queue *queue, task_t *task)
+{
+ int idx, new_idx = (queue->base + NICE_QLEN - 1) % NICE_QLEN;
+
+ check_nice(queue);
+ check_ts_policy(task);
+ if (!test_bit(new_idx, queue->bitmap)) {
+ BUG_ON(!list_empty(&queue->queue[new_idx]));
+ __set_bit(new_idx, queue->bitmap);
+ }
+ list_move_tail(&task->sched_info.run_list, &queue->queue[new_idx]);
+ idx = task->sched_info.idx;
+ task->sched_info.idx = new_idx;
+ set_need_resched();
+
+ if (list_empty(&queue->queue[idx])) {
+ BUG_ON(!test_bit(idx, queue->bitmap));
+ __clear_bit(idx, queue->bitmap);
+ }
+ queue->curr = nice_best(queue);
+#if 0
+ if (queue->curr->sched_info.idx != queue->base)
+ queue->base = queue->curr->sched_info.idx;
+#endif
+ check_nice(queue);
+ check_ts_policy(task);
+}
+
+/*
+ * This is somewhat problematic; nice_yield() only parks tasks on
+ * the end of their current nice levels.
+ */
+static void ts_yield(struct queue *__queue, task_t *task)
+{
+ struct ts_queue *queue = (struct ts_queue *)__queue;
+ struct nice_queue *nice = queue->curr;
+
+ check_queue(queue);
+ check_ts_policy(task);
+ nice_yield(nice, task);
+
+ /*
+ * If there's no one to yield to, move the whole nice level.
+ * If this is problematic, setting nice-dependent deadlines
+ * on a single unified queue may be in order.
+ */
+ if (nice->tasks == 1) {
+ int idx, new_idx = (queue->base + 40 - 1) % 40;
+ idx = nice->idx;
+ if (!test_bit(new_idx, queue->bitmap)) {
+ BUG_ON(!list_empty(&queue->nices[new_idx]));
+ __set_bit(new_idx, queue->bitmap);
+ }
+ list_move_tail(&nice->list, &queue->nices[new_idx]);
+ if (list_empty(&queue->nices[idx])) {
+ BUG_ON(!test_bit(idx, queue->bitmap));
+ __clear_bit(idx, queue->bitmap);
+ }
+ nice->idx = new_idx;
+ queue->base = find_first_circular_bit(queue->bitmap,
+ queue->base,
+ 40);
+ BUG_ON(queue->base >= 40);
+ BUG_ON(!test_bit(queue->base, queue->bitmap));
+ queue->curr = ts_best_nice(queue);
+ }
+ check_queue(queue);
+ check_ts_policy(task);
+}
+
+static task_t *ts_curr(struct queue *__queue)
+{
+ struct ts_queue *queue = (struct ts_queue *)__queue;
+ task_t *task = queue->curr->curr;
+ check_queue(queue);
+ if (task)
+ check_ts_policy(task);
+ return task;
+}
+
+static void ts_set_curr(struct queue *__queue, task_t *task)
+{
+ struct ts_queue *queue = (struct ts_queue *)__queue;
+ struct nice_queue *nice;
+ check_queue(queue);
+ check_ts_policy(task);
+ nice = &queue->nice_levels[task->sched_info.cl_data.ts.nice];
+ queue->curr = nice;
+ nice->curr = task;
+ check_queue(queue);
+ check_ts_policy(task);
+}
+
+static task_t *nice_best(struct nice_queue *queue)
+{
+ task_t *task;
+ int idx = find_first_circular_bit(queue->bitmap,
+ queue->base,
+ NICE_QLEN);
+ check_nice(queue);
+ if (idx >= NICE_QLEN)
+ return NULL;
+ BUG_ON(list_empty(&queue->queue[idx]));
+ BUG_ON(!test_bit(idx, queue->bitmap));
+ task = list_entry(queue->queue[idx].next, task_t, sched_info.run_list);
+ check_nice(queue);
+ check_ts_policy(task);
+ return task;
+}
+
+static struct nice_queue *ts_best_nice(struct ts_queue *queue)
+{
+ int idx = find_first_circular_bit(queue->bitmap, queue->base, 40);
+ check_queue(queue);
+ if (idx >= 40)
+ return NULL;
+ BUG_ON(list_empty(&queue->nices[idx]));
+ BUG_ON(!test_bit(idx, queue->bitmap));
+ return list_entry(queue->nices[idx].next, struct nice_queue, list);
+}
+
+static task_t *ts_best(struct queue *__queue)
+{
+ struct ts_queue *queue = (struct ts_queue *)__queue;
+ struct nice_queue *nice = ts_best_nice(queue);
+ return nice ? nice_best(nice) : NULL;
+}
+
+static void nice_enqueue(struct nice_queue *queue, task_t *task)
+{
+ task_t *curr, *sav;
+ int queued = 0, idx, deadline, base, idxdiff;
+ check_nice(queue);
+ check_ts_policy(task);
+
+ /* don't livelock when queue->expired */
+ deadline = min(!!queue->expired + task_deadline(task), NICE_QLEN - 1);
+ idx = (queue->base + deadline) % NICE_QLEN;
+
+ if (!test_bit(idx, queue->bitmap)) {
+ BUG_ON(!list_empty(&queue->queue[idx]));
+ __set_bit(idx, queue->bitmap);
+ }
+
+#if 1
+ /* keep nice level's queue sorted -- use binomial heaps here soon */
+ list_for_each_entry_safe(curr, sav, &queue->queue[idx], sched_info.run_list) {
+ if (task->sched_info.cl_data.ts.frac_cpu
+ >= curr->sched_info.cl_data.ts.frac_cpu) {
+ list_add(&task->sched_info.run_list,
+ curr->sched_info.run_list.prev);
+ queued = 1;
+ break;
+ }
+ }
+ if (!queued)
+ list_add_tail(&task->sched_info.run_list, &queue->queue[idx]);
+#else
+ list_add_tail(&task->sched_info.run_list, &queue->queue[idx]);
+#endif
+ task->sched_info.idx = idx;
+ /* if (!task->sched_info.cl_data.ts.ticks) */
+ task->sched_info.cl_data.ts.ticks = rr_quantum;
+
+ if (queue->tasks)
+ BUG_ON(!queue->curr);
+ else {
+ BUG_ON(queue->curr);
+ queue->curr = task;
+ }
+ queue->tasks++;
+ check_nice(queue);
+ check_ts_policy(task);
+}
+
+static void ts_enqueue(struct queue *__queue, task_t *task)
+{
+ struct ts_queue *queue = (struct ts_queue *)__queue;
+ struct nice_queue *nice;
+
+ check_queue(queue);
+ check_ts_policy(task);
+ nice = &queue->nice_levels[task->sched_info.cl_data.ts.nice];
+ if (!nice->tasks) {
+ int idx = (queue->base + task->sched_info.cl_data.ts.nice) % 40;
+ if (!test_bit(idx, queue->bitmap)) {
+ BUG_ON(!list_empty(&queue->nices[idx]));
+ __set_bit(idx, queue->bitmap);
+ }
+ list_add_tail(&nice->list, &queue->nices[idx]);
+ nice->idx = idx;
+ if (!queue->curr)
+ queue->curr = nice;
+ }
+ nice_enqueue(nice, task);
+ queue->tasks++;
+ queue->base = find_first_circular_bit(queue->bitmap, queue->base, 40);
+ check_queue(queue);
+ check_ts_policy(task);
+}
+
+static void nice_dequeue(struct nice_queue *queue, task_t *task)
+{
+ check_nice(queue);
+ check_ts_policy(task);
+ list_del(&task->sched_info.run_list);
+ if (list_empty(&queue->queue[task->sched_info.idx])) {
+ BUG_ON(!test_bit(task->sched_info.idx, queue->bitmap));
+ __clear_bit(task->sched_info.idx, queue->bitmap);
+ }
+ queue->tasks--;
+ if (task == queue->curr) {
+ queue->curr = nice_best(queue);
+#if 0
+ if (queue->curr)
+ queue->base = queue->curr->sched_info.idx;
+#endif
+ }
+ check_nice(queue);
+ check_ts_policy(task);
+}
+
+static void ts_dequeue(struct queue *__queue, task_t *task)
+{
+ struct ts_queue *queue = (struct ts_queue *)__queue;
+ struct nice_queue *nice;
+
+ BUG_ON(!queue->tasks);
+ check_queue(queue);
+ check_ts_policy(task);
+ nice = &queue->nice_levels[task->sched_info.cl_data.ts.nice];
+
+ nice_dequeue(nice, task);
+ queue->tasks--;
+ if (!nice->tasks) {
+ list_del_init(&nice->list);
+ if (list_empty(&queue->nices[nice->idx])) {
+ BUG_ON(!test_bit(nice->idx, queue->bitmap));
+ __clear_bit(nice->idx, queue->bitmap);
+ }
+ if (nice == queue->curr)
+ queue->curr = ts_best_nice(queue);
+ }
+ queue->base = find_first_circular_bit(queue->bitmap, queue->base, 40);
+ if (queue->base >= 40)
+ queue->base = 0;
+ check_queue(queue);
+ check_ts_policy(task);
+}
+
+static int ts_tasks(struct queue *__queue)
+{
+ struct ts_queue *queue = (struct ts_queue *)__queue;
+ check_queue(queue);
+ return queue->tasks;
+}
+
+static int ts_nice(struct queue *__queue, task_t *task)
+{
+ int nice = task->sched_info.cl_data.ts.nice - 20;
+ check_ts_policy(task);
+ BUG_ON(nice < -20);
+ BUG_ON(nice >= 20);
+ return nice;
+}
+
+static void ts_renice(struct queue *queue, task_t *task, int nice)
+{
+ check_queue((struct ts_queue *)queue);
+ check_ts_policy(task);
+ BUG_ON(nice < -20);
+ BUG_ON(nice >= 20);
+ task->sched_info.cl_data.ts.nice = nice + 20;
+ check_queue((struct ts_queue *)queue);
+}
+
+static int nice_task_prio(struct nice_queue *nice, task_t *task)
+{
+ if (!task_queued(task))
+ return task_deadline(task);
+ else {
+ int prio = task->sched_info.idx - nice->base;
+ return prio < 0 ? prio + NICE_QLEN : prio;
+ }
+}
+
+static int ts_nice_prio(struct ts_queue *ts, struct nice_queue *nice)
+{
+ if (list_empty(&nice->list))
+ return (int)(nice - ts->nice_levels);
+ else {
+ int prio = nice->idx - ts->base;
+ return prio < 0 ? prio + 40 : prio;
+ }
+}
+
+/* 100% fake priority to report heuristics and the like */
+static int ts_prio(task_t *task)
+{
+ int policy_idx;
+ struct policy *policy;
+ struct ts_queue *ts;
+ struct nice_queue *nice;
+
+ policy_idx = task->sched_info.policy;
+ policy = per_cpu(runqueues, task_cpu(task)).policies[policy_idx];
+ ts = (struct ts_queue *)policy->queue;
+ nice = &ts->nice_levels[task->sched_info.cl_data.ts.nice];
+ return 40*ts_nice_prio(ts, nice) + nice_task_prio(nice, task);
+}
+
+static void ts_setprio(task_t *task, int prio)
+{
+}
+
+static void ts_start_wait(struct queue *__queue, task_t *task)
+{
+}
+
+static void ts_stop_wait(struct queue *__queue, task_t *task)
+{
+}
+
+static void ts_sleep(struct queue *__queue, task_t *task)
+{
+}
+
+static void ts_wake(struct queue *__queue, task_t *task)
+{
+}
+
+static int nice_preempt(struct nice_queue *queue, task_t *task)
+{
+ check_nice(queue);
+ check_ts_policy(task);
+ /* assume FB style preemption at wakeup */
+ if (!task_queued(task) || !queue->curr)
+ return 1;
+ else {
+ int delta_t, delta_q;
+ delta_t = (task->sched_info.idx - queue->base + NICE_QLEN)
+ % NICE_QLEN;
+ delta_q = (queue->curr->sched_info.idx - queue->base
+ + NICE_QLEN)
+ % NICE_QLEN;
+ if (delta_t < delta_q)
+ return 1;
+ else if (task->sched_info.cl_data.ts.frac_cpu
+ < queue->curr->sched_info.cl_data.ts.frac_cpu)
+ return 1;
+ else
+ return 0;
+ }
+ check_nice(queue);
+}
+
+static int ts_preempt(struct queue *__queue, task_t *task)
+{
+ int curr_nice;
+ struct ts_queue *queue = (struct ts_queue *)__queue;
+ struct nice_queue *nice = queue->curr;
+
+ check_queue(queue);
+ check_ts_policy(task);
+ if (!queue->curr)
+ return 1;
+
+ curr_nice = (int)(nice - queue->nice_levels);
+
+ /* preempt when nice number is lower, or the above for matches */
+ if (task->sched_info.cl_data.ts.nice != curr_nice)
+ return task->sched_info.cl_data.ts.nice < curr_nice;
+ else
+ return nice_preempt(nice, task);
+}
+
+static struct queue_ops ts_ops = {
+ .init = ts_init,
+ .fini = nop_fini,
+ .tick = ts_tick,
+ .yield = ts_yield,
+ .curr = ts_curr,
+ .set_curr = ts_set_curr,
+ .tasks = ts_tasks,
+ .best = ts_best,
+ .enqueue = ts_enqueue,
+ .dequeue = ts_dequeue,
+ .start_wait = ts_start_wait,
+ .stop_wait = ts_stop_wait,
+ .sleep = ts_sleep,
+ .wake = ts_wake,
+ .preempt = ts_preempt,
+ .nice = ts_nice,
+ .renice = ts_renice,
+ .prio = ts_prio,
+ .setprio = ts_setprio,
+ .timeslice = nop_timeslice,
+ .set_timeslice = nop_set_timeslice,
+};
+
+struct policy ts_policy = {
+ .ops = &ts_ops,
+};
diff -prauN linux-2.6.0-test11/kernel/sched/util.c sched-2.6.0-test11-5/kernel/sched/util.c
--- linux-2.6.0-test11/kernel/sched/util.c 1969-12-31 16:00:00.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/sched/util.c 2003-12-19 08:43:20.000000000 -0800
@@ -0,0 +1,37 @@
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <asm/page.h>
+#include "queue.h"
+
+int find_first_circular_bit(unsigned long *addr, int start, int end)
+{
+ int bit = find_next_bit(addr, end, start);
+ if (bit < end)
+ return bit;
+ bit = find_first_bit(addr, start);
+ if (bit < start)
+ return bit;
+ return end;
+}
+
+void queue_nop(struct queue *queue, task_t *task)
+{
+}
+
+void nop_renice(struct queue *queue, task_t *task, int nice)
+{
+}
+
+void nop_fini(struct policy *policy, int cpu)
+{
+}
+
+unsigned long nop_timeslice(struct queue *queue, task_t *task)
+{
+ return 0;
+}
+
+void nop_set_timeslice(struct queue *queue, task_t *task, unsigned long n)
+{
+}
diff -prauN linux-2.6.0-test11/kernel/sched.c sched-2.6.0-test11-5/kernel/sched.c
--- linux-2.6.0-test11/kernel/sched.c 2003-11-26 12:45:17.000000000 -0800
+++ sched-2.6.0-test11-5/kernel/sched.c 2003-12-21 06:06:32.000000000 -0800
@@ -15,6 +15,8 @@
* and per-CPU runqueues. Cleanups and useful suggestions
* by Davide Libenzi, preemptible kernel bits by Robert Love.
* 2003-09-03 Interactivity tuning by Con Kolivas.
+ * 2003-12-17 Total rewrite and generalized scheduler policies
+ * by William Irwin.
*/
#include <linux/mm.h>
@@ -38,6 +40,8 @@
#include <linux/cpu.h>
#include <linux/percpu.h>
+#include "sched/queue.h"
+
#ifdef CONFIG_NUMA
#define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
#else
@@ -45,181 +49,79 @@
#endif
/*
- * Convert user-nice values [ -20 ... 0 ... 19 ]
- * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
- * and back.
- */
-#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
-#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
-#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
-
-/*
- * 'User priority' is the nice value converted to something we
- * can work with better when scaling various scheduler parameters,
- * it's a [ 0 ... 39 ] range.
- */
-#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
-#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
-#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
-#define AVG_TIMESLICE (MIN_TIMESLICE + ((MAX_TIMESLICE - MIN_TIMESLICE) *\
- (MAX_PRIO-1-NICE_TO_PRIO(0))/(MAX_USER_PRIO - 1)))
-
-/*
- * Some helpers for converting nanosecond timing to jiffy resolution
- */
-#define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ))
-#define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ))
-
-/*
- * These are the 'tuning knobs' of the scheduler:
- *
- * Minimum timeslice is 10 msecs, default timeslice is 100 msecs,
- * maximum timeslice is 200 msecs. Timeslices get refilled after
- * they expire.
- */
-#define MIN_TIMESLICE ( 10 * HZ / 1000)
-#define MAX_TIMESLICE (200 * HZ / 1000)
-#define ON_RUNQUEUE_WEIGHT 30
-#define CHILD_PENALTY 95
-#define PARENT_PENALTY 100
-#define EXIT_WEIGHT 3
-#define PRIO_BONUS_RATIO 25
-#define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100)
-#define INTERACTIVE_DELTA 2
-#define MAX_SLEEP_AVG (AVG_TIMESLICE * MAX_BONUS)
-#define STARVATION_LIMIT (MAX_SLEEP_AVG)
-#define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG))
-#define NODE_THRESHOLD 125
-#define CREDIT_LIMIT 100
-
-/*
- * If a task is 'interactive' then we reinsert it in the active
- * array after it has expired its current timeslice. (it will not
- * continue to run immediately, it will still roundrobin with
- * other interactive tasks.)
- *
- * This part scales the interactivity limit depending on niceness.
- *
- * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
- * Here are a few examples of different nice levels:
- *
- * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
- * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
- * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0]
- * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
- * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
- *
- * (the X axis represents the possible -5 ... 0 ... +5 dynamic
- * priority range a task can explore, a value of '1' means the
- * task is rated interactive.)
- *
- * Ie. nice +19 tasks can never get 'interactive' enough to be
- * reinserted into the active array. And only heavily CPU-hog nice -20
- * tasks will be expired. Default nice 0 tasks are somewhere between,
- * it takes some effort for them to get interactive, but it's not
- * too hard.
- */
-
-#define CURRENT_BONUS(p) \
- (NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \
- MAX_SLEEP_AVG)
-
-#ifdef CONFIG_SMP
-#define TIMESLICE_GRANULARITY(p) (MIN_TIMESLICE * \
- (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
- num_online_cpus())
-#else
-#define TIMESLICE_GRANULARITY(p) (MIN_TIMESLICE * \
- (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
-#endif
-
-#define SCALE(v1,v1_max,v2_max) \
- (v1) * (v2_max) / (v1_max)
-
-#define DELTA(p) \
- (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \
- INTERACTIVE_DELTA)
-
-#define TASK_INTERACTIVE(p) \
- ((p)->prio <= (p)->static_prio - DELTA(p))
-
-#define JUST_INTERACTIVE_SLEEP(p) \
- (JIFFIES_TO_NS(MAX_SLEEP_AVG * \
- (MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1))
-
-#define HIGH_CREDIT(p) \
- ((p)->interactive_credit > CREDIT_LIMIT)
-
-#define LOW_CREDIT(p) \
- ((p)->interactive_credit < -CREDIT_LIMIT)
-
-#define TASK_PREEMPTS_CURR(p, rq) \
- ((p)->prio < (rq)->curr->prio)
-
-/*
- * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ]
- * to time slice values.
- *
- * The higher a thread's priority, the bigger timeslices
- * it gets during one round of execution. But even the lowest
- * priority thread gets MIN_TIMESLICE worth of execution time.
- *
- * task_timeslice() is the interface that is used by the scheduler.
- */
-
-#define BASE_TIMESLICE(p) (MIN_TIMESLICE + \
- ((MAX_TIMESLICE - MIN_TIMESLICE) * (MAX_PRIO-1-(p)->static_prio)/(MAX_USER_PRIO - 1)))
-
-static inline unsigned int task_timeslice(task_t *p)
-{
- return BASE_TIMESLICE(p);
-}
-
-/*
- * These are the runqueue data structures:
- */
-
-#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
-
-typedef struct runqueue runqueue_t;
-
-struct prio_array {
- int nr_active;
- unsigned long bitmap[BITMAP_SIZE];
- struct list_head queue[MAX_PRIO];
-};
-
-/*
* This is the main, per-CPU runqueue data structure.
*
* Locking rule: those places that want to lock multiple runqueues
* (such as the load balancing or the thread migration code), lock
* acquire operations must be ordered by ascending &runqueue.
*/
-struct runqueue {
- spinlock_t lock;
- unsigned long nr_running, nr_switches, expired_timestamp,
- nr_uninterruptible;
- task_t *curr, *idle;
- struct mm_struct *prev_mm;
- prio_array_t *active, *expired, arrays[2];
- int prev_cpu_load[NR_CPUS];
-#ifdef CONFIG_NUMA
- atomic_t *node_nr_running;
- int prev_node_load[MAX_NUMNODES];
-#endif
- task_t *migration_thread;
- struct list_head migration_queue;
+DEFINE_PER_CPU(struct runqueue, runqueues);
- atomic_t nr_iowait;
+struct policy *policies[] = {
+ &rt_policy,
+ &ts_policy,
+ &batch_policy,
+ &idle_policy,
+ NULL,
};
-static DEFINE_PER_CPU(struct runqueue, runqueues);
-
#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
#define this_rq() (&__get_cpu_var(runqueues))
#define task_rq(p) cpu_rq(task_cpu(p))
-#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
+#define rq_curr(rq) (rq)->__curr
+#define cpu_curr(cpu) rq_curr(cpu_rq(cpu))
+
+static inline struct policy *task_policy(task_t *task)
+{
+ unsigned long idx;
+ struct policy *policy;
+ idx = task->sched_info.policy;
+ __check_task_policy(idx);
+ policy = task_rq(task)->policies[idx];
+ check_policy(policy);
+ return policy;
+}
+
+static inline struct policy *rq_policy(runqueue_t *rq)
+{
+ unsigned long idx;
+ task_t *task;
+ struct policy *policy;
+
+ task = rq_curr(rq);
+ BUG_ON(!task);
+ BUG_ON((unsigned long)task < PAGE_OFFSET);
+ idx = task->sched_info.policy;
+ __check_task_policy(idx);
+ policy = rq->policies[idx];
+ check_policy(policy);
+ return policy;
+}
+
+static int __task_nice(task_t *task)
+{
+ struct policy *policy = task_policy(task);
+ return policy->ops->nice(policy->queue, task);
+}
+
+static inline void set_rq_curr(runqueue_t *rq, task_t *task)
+{
+ rq->curr = task->sched_info.policy;
+ __check_task_policy(rq->curr);
+ rq->__curr = task;
+}
+
+static inline int task_preempts_curr(task_t *task, runqueue_t *rq)
+{
+ check_task_policy(rq_curr(rq));
+ check_task_policy(task);
+ if (rq_curr(rq)->sched_info.policy != task->sched_info.policy)
+ return task->sched_info.policy < rq_curr(rq)->sched_info.policy;
+ else {
+ struct policy *policy = rq_policy(rq);
+ return policy->ops->preempt(policy->queue, task);
+ }
+}
/*
* Default context-switch locking:
@@ -227,7 +129,7 @@ static DEFINE_PER_CPU(struct runqueue, r
#ifndef prepare_arch_switch
# define prepare_arch_switch(rq, next) do { } while(0)
# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
-# define task_running(rq, p) ((rq)->curr == (p))
+# define task_running(rq, p) (rq_curr(rq) == (p))
#endif
#ifdef CONFIG_NUMA
@@ -320,53 +222,32 @@ static inline void rq_unlock(runqueue_t
}
/*
- * Adding/removing a task to/from a priority array:
+ * Adding/removing a task to/from a policy's queue.