?? sched.c
字號:
/* * kernel/sched.c * * Kernel scheduler and related syscalls * * Copyright (C) 1991-2002 Linus Torvalds * * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and * make semaphores SMP safe * 1998-11-19 Implemented schedule_timeout() and related stuff * by Andrea Arcangeli * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar: * hybrid priority-list and round-robin design with * an array-switch method of distributing timeslices * and per-CPU runqueues. Additional code by Davide * Libenzi, Robert Love, and Rusty Russell. */#include <linux/mm.h>#include <linux/nmi.h>#include <linux/init.h>#include <asm/uaccess.h>#include <linux/highmem.h>#include <linux/smp_lock.h>#include <asm/mmu_context.h>#include <linux/interrupt.h>#include <linux/completion.h>#include <linux/kernel_stat.h>#include <linux/trace.h>/* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], * and back. */#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)/* * 'User priority' is the nice value converted to something we * can work with better when scaling various scheduler parameters, * it's a [ 0 ... 39 ] range. */#define USER_PRIO(p) ((p)-MAX_RT_PRIO)#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))/* * These are the 'tuning knobs' of the scheduler: * * Minimum timeslice is 10 msecs, default timeslice is 150 msecs, * maximum timeslice is 300 msecs. Timeslices get refilled after * they expire. */#define MIN_TIMESLICE ( 10 * HZ / 1000)#define MAX_TIMESLICE (300 * HZ / 1000)#define CHILD_PENALTY 95#define PARENT_PENALTY 100#define EXIT_WEIGHT 3#define PRIO_BONUS_RATIO 25#define INTERACTIVE_DELTA 2#define MAX_SLEEP_AVG (2*HZ)#define STARVATION_LIMIT (2*HZ)/* * If a task is 'interactive' then we reinsert it in the active * array after it has expired its current timeslice. (it will not * continue to run immediately, it will still roundrobin with * other interactive tasks.) * * This part scales the interactivity limit depending on niceness. * * We scale it linearly, offset by the INTERACTIVE_DELTA delta. * Here are a few examples of different nice levels: * * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0] * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0] * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0] * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0] * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0] * * (the X axis represents the possible -5 ... 0 ... +5 dynamic * priority range a task can explore, a value of '1' means the * task is rated interactive.) * * Ie. nice +19 tasks can never get 'interactive' enough to be * reinserted into the active array. And only heavily CPU-hog nice -20 * tasks will be expired. Default nice 0 tasks are somewhere between, * it takes some effort for them to get interactive, but it's not * too hard. */#define SCALE(v1,v1_max,v2_max) \ (v1) * (v2_max) / (v1_max)#define DELTA(p) \ (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \ INTERACTIVE_DELTA)#define TASK_INTERACTIVE(p) \ ((p)->prio <= (p)->static_prio - DELTA(p))/* * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ] * to time slice values. * * The higher a thread's priority, the bigger timeslices * it gets during one round of execution. But even the lowest * priority thread gets MIN_TIMESLICE worth of execution time. * * task_timeslice() is the interface that is used by the scheduler. */#define BASE_TIMESLICE(p) (MIN_TIMESLICE + \ ((MAX_TIMESLICE - MIN_TIMESLICE) * \ (MAX_PRIO-1-(p)->static_prio)/(MAX_USER_PRIO - 1)))static inline unsigned int task_timeslice(task_t *p){ return BASE_TIMESLICE(p);}/* * These are the runqueue data structures: */#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))typedef struct runqueue runqueue_t;struct prio_array { int nr_active; unsigned long bitmap[BITMAP_SIZE]; struct list_head queue[MAX_PRIO];};/* * This is the main, per-CPU runqueue data structure. * * Locking rule: those places that want to lock multiple runqueues * (such as the load balancing or the process migration code), lock * acquire operations must be ordered by ascending &runqueue. */struct runqueue { spinlock_t lock; unsigned long nr_running, nr_switches, expired_timestamp, nr_uninterruptible; task_t *curr, *idle; prio_array_t *active, *expired, arrays[2]; int prev_nr_running[NR_CPUS]; task_t *migration_thread; struct list_head migration_queue;} ____cacheline_aligned;static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;#define cpu_rq(cpu) (runqueues + (cpu))#define this_rq() cpu_rq(smp_processor_id())#define task_rq(p) cpu_rq(task_cpu(p))#define cpu_curr(cpu) (cpu_rq(cpu)->curr)#define rt_task(p) ((p)->prio < MAX_RT_PRIO)/* * Default context-switch locking: */#ifndef prepare_arch_schedule# define prepare_arch_schedule(prev) do { } while(0)# define finish_arch_schedule(prev) do { } while(0)# define prepare_arch_switch(rq) do { } while(0)# define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)#endif/* Stop all tasks running with the given mm, except for the calling task. */int stop_all_threads(struct mm_struct *mm){ struct task_struct * p; int all_stopped = 0; mm->stopping_siblings = 1; read_lock(&tasklist_lock); for_each_task(p) { if (p->mm == mm && p != current && p->state != TASK_STOPPED) { send_sig (SIGSTOP, p, 1); } } /* Now wait for every task to cease running. */ /* Beware: this loop might not terminate in the face of a malicious program sending SIGCONT to threads. But it is still killable, and only moderately disruptive (because of the tasklist_lock). */ for (;;) { all_stopped = 1; for_each_task(p) { if (p->mm == mm && p != current && p->state != TASK_STOPPED) { all_stopped = 0; break; } } if (all_stopped) break; read_unlock(&tasklist_lock); schedule_timeout(1); read_lock(&tasklist_lock); }#ifdef CONFIG_SMP /* Make sure they've all gotten off their CPUs. */ for_each_task (p) { if (p->mm == mm && p != current) wait_task_inactive(p); }#endif read_unlock(&tasklist_lock); mm->stopping_siblings = 0; return 0;}/* Restart all the tasks with the given mm. Hope none of them were in state TASK_STOPPED for some other reason... */void start_all_threads(struct mm_struct *mm){ struct task_struct * p; read_lock(&tasklist_lock); for_each_task(p) { if (p->mm == mm && p != current) { send_sig (SIGCONT, p, 1); } } read_unlock(&tasklist_lock);}/* * task_rq_lock - lock the runqueue a given task resides on and disable * interrupts. Note the ordering: we can safely lookup the task_rq without * explicitly disabling preemption. */static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags){ struct runqueue *rq;repeat_lock_task: local_irq_save(*flags); rq = task_rq(p); spin_lock(&rq->lock); if (unlikely(rq != task_rq(p))) { spin_unlock_irqrestore(&rq->lock, *flags); goto repeat_lock_task; } return rq;}static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags){ spin_unlock_irqrestore(&rq->lock, *flags);}/* * rq_lock - lock a given runqueue and disable interrupts. */static inline runqueue_t *this_rq_lock(void){ runqueue_t *rq; local_irq_disable(); rq = this_rq(); spin_lock(&rq->lock); return rq;}static inline void rq_unlock(runqueue_t *rq){ spin_unlock_irq(&rq->lock);}/* * Adding/removing a task to/from a priority array: */static inline void dequeue_task(struct task_struct *p, prio_array_t *array){ array->nr_active--; list_del(&p->run_list); if (list_empty(array->queue + p->prio)) __clear_bit(p->prio, array->bitmap);}static inline void enqueue_task(struct task_struct *p, prio_array_t *array){ list_add_tail(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); array->nr_active++; p->array = array;}/* * effective_prio - return the priority that is based on the static * priority but is modified by bonuses/penalties. * * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] * into the -5 ... 0 ... +5 bonus/penalty range. * * We use 25% of the full 0...39 priority range so that: * * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. * * Both properties are important to certain workloads. */static inline int effective_prio(task_t *p){ int bonus, prio; bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 - MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2; prio = p->static_prio - bonus; if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) prio = MAX_PRIO-1; return prio;}/* * activate_task - move a task to the runqueue. * * Also update all the scheduling statistics stuff. (sleep average * calculation, priority modifiers, etc.) */static inline void activate_task(task_t *p, runqueue_t *rq){ unsigned long sleep_time = jiffies - p->sleep_timestamp; prio_array_t *array = rq->active; if (!rt_task(p) && sleep_time) { /* * This code gives a bonus to interactive tasks. We update * an 'average sleep time' value here, based on * sleep_timestamp. The more time a task spends sleeping, * the higher the average gets - and the higher the priority * boost gets as well. */ p->sleep_avg += sleep_time; if (p->sleep_avg > MAX_SLEEP_AVG) p->sleep_avg = MAX_SLEEP_AVG; p->prio = effective_prio(p); } enqueue_task(p, array); rq->nr_running++;}/* * deactivate_task - remove a task from the runqueue. */static inline void deactivate_task(struct task_struct *p, runqueue_t *rq){ rq->nr_running--; if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; dequeue_task(p, p->array); p->array = NULL;}/* * resched_task - mark a task 'to be rescheduled now'. * * On UP this means the setting of the need_resched flag, on SMP it * might also involve a cross-CPU call to trigger the scheduler on * the target CPU. */static inline void resched_task(task_t *p){#ifdef CONFIG_SMP int need_resched; preempt_disable(); need_resched = p->need_resched; wmb(); set_tsk_need_resched(p); if (!need_resched && (task_cpu(p) != smp_processor_id())) smp_send_reschedule(task_cpu(p)); preempt_enable();#else set_tsk_need_resched(p);#endif}#ifdef CONFIG_SMP/* * wait_task_inactive - wait for a thread to unschedule. * * The caller must ensure that the task *will* unschedule sometime soon, * else this function might spin for a *long* time. */void wait_task_inactive(task_t * p){ unsigned long flags; runqueue_t *rq;repeat: preempt_disable(); rq = task_rq(p); if (unlikely(rq->curr == p)) { cpu_relax(); /* * enable/disable preemption just to make this * a preemption point - we are busy-waiting * anyway. */ preempt_enable(); goto repeat; } rq = task_rq_lock(p, &flags); if (unlikely(rq->curr == p)) { task_rq_unlock(rq, &flags); preempt_enable(); goto repeat; } task_rq_unlock(rq, &flags); preempt_enable();}/* * kick_if_running - kick the remote CPU if the task is running currently. * * This code is used by the signal code to signal tasks * which are in user-mode, as quickly as possible. * * (Note that we do this lockless - if the task does anything * while the message is in flight then it will notice the * sigpending condition anyway.) */void kick_if_running(task_t * p){ if (p == task_rq(p)->curr) resched_task(p);}#endif/*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread * @sync: do a synchronous wakeup? * * Put it on the run-queue if it's not already there. The "current" * thread is always on the run-queue (except when the actual * re-schedule is in progress), and as such you're allowed to do * the simpler "current->state = TASK_RUNNING" to mark yourself * runnable without the overhead of this. * * returns failure only if the task is already active. */static int try_to_wake_up(task_t * p, int sync){ unsigned long flags; int success = 0; long old_state; runqueue_t *rq; TRACE_PROCESS(TRACE_EV_PROCESS_WAKEUP, p->pid, p->state);repeat_lock_task: rq = task_rq_lock(p, &flags); old_state = p->state; if (!p->array) { /* * Fast-migrate the task if it's not running or runnable * currently. Do not violate hard affinity. */ if (unlikely(sync && (rq->curr != p) && (task_cpu(p) != smp_processor_id()) && (p->cpus_allowed & (1UL << smp_processor_id())))) { set_task_cpu(p, smp_processor_id()); task_rq_unlock(rq, &flags); goto repeat_lock_task; } if (old_state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; activate_task(p, rq); if (p->prio < rq->curr->prio) resched_task(rq->curr); success = 1; } p->state = TASK_RUNNING; task_rq_unlock(rq, &flags); return success;}int wake_up_process(task_t * p){ return try_to_wake_up(p, 0);}/* * wake_up_forked_process - wake up a freshly forked process. * * This function will do some initial scheduler statistics housekeeping * that must be done for every newly created process. */void wake_up_forked_process(task_t * p){ runqueue_t *rq = this_rq_lock(); p->state = TASK_RUNNING; if (!rt_task(p)) { /* * We decrease the sleep average of forking parents * and children as well, to keep max-interactive tasks * from forking tasks that are max-interactive. */ current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100; p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100; p->prio = effective_prio(p); } set_task_cpu(p, smp_processor_id()); activate_task(p, rq); rq_unlock(rq);
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -