调度时机:
1、进程终止、睡眠,这些通常是进程自身行为,当然也有运行异常;
2、时间片用完时,而时间片更新是在时钟中断驱动下完成的;
3、设备驱动程序;
4、进程从异常、中断、及系统调用返回的时候会进行need_resched()检测,会触发调度。
问题:多cpu下,时钟中断如何处理的?时钟中断应该是只被一个cpu捕获并处理,那么其他cpu靠什么来驱动时间片更新?怎么触发调度?
调度具体实现
asmlinkage void schedule(void) { struct schedule_data * sched_data; struct task_struct *prev, *next, *p; struct list_head *tmp; int this_cpu, c; spin_lock_prefetch(&runqueue_lock); //锁运行队列 if (!current->active_mm) BUG(); //内核线程没有mm空间,但其active_mm会借用当前进程 need_resched_back: //的mm,保持与用户线程处理的统一性 prev = current; //当前进程即将被调度出去,current也比较巧妙,task //_struct与内核堆栈共用一个8kB的union体,其中task //_struct占用低位的1kB左右,堆栈占用高位约7KB空间 //通过对堆栈指针esp&0xffffe0000即可 this_cpu = prev->processor; //取得进程运行的cpu if (unlikely(in_interrupt())) { //unlikely用于gcc>=2.96之后的编译优化,表示if内代 //码运行的可能性比较低,这样编译器就可以将else里 //面的代码提前,cpu在进行指令预取方面有性能提高 //反之,likely则是if内代码运行可能性高 printk("Scheduling in interrupt\n"); //如果在中断中进行调度,有问题 BUG(); } release_kernel_lock(prev, this_cpu); //如果prev占用了全局内核锁,释放;如果当前cpu占 //用了全局中断锁,释放;开当前cpu中断线 /* * ‘sched_data‘ is protected by the fact that we can run * only one process per CPU. */ sched_data = &aligned_data[this_cpu].schedule_data; //这个也不大懂,schedule_data里面有64位的last_sch //edule信息,不知道smp中用这个干什么 spin_lock_irq(&runqueue_lock); //锁运行队列,关中断 /* move an exhausted RR process to be last.. */ if (unlikely(prev->policy == SCHED_RR)) //如果是实时进程 if (!prev->counter) { //时间片已经用完 prev->counter = NICE_TO_TICKS(prev->nice); //将nice转换为时间片,nice为UNIX时期沿用的负向优 //向优先级,取值-20~19,值越大越谦让,值越小,优 //先级越高 move_last_runqueue(prev); //将其移动到运行队列尾部 } switch (prev->state) { //获取进程状态 case TASK_INTERRUPTIBLE: //可被信号唤醒的中断 if (signal_pending(prev)) { //如果有信号到来,就让其进入TASK_RUNNING状态 prev->state = TASK_RUNNING; break; } default: del_from_runqueue(prev); //TASK_STOPED,TASK_ZOMBE,TASK_UNINNTERRUPTIBLE状 //态,比如调用exit(),wait4()等 case TASK_RUNNING:; } prev->need_resched = 0; //清空need_resched /* * this is the scheduler proper: */ repeat_schedule: /* * Default process to select.. */ next = idle_task(this_cpu); //获得空闲进程 c = -1000; //找最大值的常用初始化 list_for_each(tmp, &runqueue_head) { //遍历运行队列 p = list_entry(tmp, struct task_struct, run_list); if (can_schedule(p, this_cpu)) { //如果程序可以在cpu上跑,并且允许在这颗cpu上跑 int weight = goodness(p, this_cpu, prev->active_mm); //获取调度权重 if (weight > c) c = weight, next = p; //更新最大权重与选中进程 } } /* Do we need to re-calculate counters? */ if (unlikely(!c)) { //c==0?说明所有进程时间片用完了,可能性很小 struct task_struct *p; spin_unlock_irq(&runqueue_lock); //开运行队列锁,开中断 read_lock(&tasklist_lock); //锁住进程双向链表 for_each_task(p) p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); //更新每个进程的时间片 read_unlock(&tasklist_lock); //开进程双向链表 spin_lock_irq(&runqueue_lock); //锁运行队列,开中断 goto repeat_schedule; //再次寻找最值得调度的进程 } /* * from this point on nothing can prevent us from * switching to the next task, save this fact in * sched_data. */ sched_data->curr = next; //cpu的正在运行进程指向新进程 task_set_cpu(next, this_cpu); //将task_struct的processor与cpus_runnable更新 spin_unlock_irq(&runqueue_lock); //解锁运行队列,开中断 if (unlikely(prev == next)) { //如果选择到的进程仍为之前的进程 /* We won‘t go through the normal tail, so do this by hand */ prev->policy &= ~SCHED_YIELD; //那就不再客气了 goto same_process; } #ifdef CONFIG_SMP /* * maintain the per-process ‘last schedule‘ value. * (this has to be recalculated even if we reschedule to * the same process) Currently this is only used on SMP, * and it‘s approximate, so we do not have to maintain * it while holding the runqueue spinlock. */ sched_data->last_schedule = get_cycles(); //更新调度进程时的时钟,用于smp中另外一个cpu调度参考 /* * We drop the scheduler lock early (it‘s a global spinlock), * thus we have to lock the previous process from getting * rescheduled during switch_to(). */ #endif /* CONFIG_SMP */ kstat.context_swtch++; //记录调度次数 /* * there are 3 processes which are affected by a context switch: * * prev == .... ==> (last => next) * * It‘s the ‘much more previous‘ ‘prev‘ that is on next‘s stack, * but prev is set to (the just run) ‘last‘ process by switch_to(). * This might sound slightly confusing but makes tons of sense. */ prepare_to_switch(); { struct mm_struct *mm = next->mm; //新进程的运行空间 struct mm_struct *oldmm = prev->active_mm; //原进程的运行空间 if (!mm) { //如果新进程没有运行空间,则是内核进程 if (next->active_mm) BUG(); //内核进程在调度出去的时候会释放其借用的运行空间,如 //果此处仍然存在,则有问题 next->active_mm = oldmm; //借用原进程的运行空间 atomic_inc(&oldmm->mm_count); //原进程运行空间计数加1,用于内存交换信息 enter_lazy_tlb(oldmm, next, this_cpu); //tlb采用lazy刷新方式 } else { //如果是用户进程 if (next->active_mm != mm) BUG(); //用户进程的两个运行空间应该相同 switch_mm(oldmm, mm, next, this_cpu); //切换用户空间 } if (!prev->mm) { //如果原进程是内核进程 prev->active_mm = NULL; //释放其引用的运行空间 mmdrop(oldmm); //运行空间计数-1 } } /* * This just switches the register state and the * stack. */ switch_to(prev, next, prev); //切换寄存器状态与堆栈 __schedule_tail(prev); //原进程放入运行队列尾部 same_process: reacquire_kernel_lock(current); //针对smp,要将当前进程的内核深度清0 if (current->need_resched) //再次调度 goto need_resched_back; return; }
原文:http://www.cnblogs.com/hmxb/p/4918140.html