进程在创建它的时刻开始存活。在Linux系统中,这通常是fork()系统的结果,该系统调用通过复制一个现有的进程来创建一个全新的进程。只有在创建init进程时,是通过代码实现数据结构的填充。调用fork()的进程称为父进程,新生的进程称为子进程。在系统调用结束时,在返回点这个相同位置上,父进程恢复执行,子进程开始执行。fork()系统调用从内核两次返回:一次回到父进程,另一次回到创建的新的子进程。
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 进程的状态
void *stack; //进程通过alloc_thread_info函数分配它的内核栈,通过free_thread_info函数释放所分配的内核栈。
unsigned int flags; /* per process flags, defined below */ 进程的标签
进程的调度
int on_rq;
int prio, static_prio, normal_prio;//进程优先级 实时优先级范围是0到MAX_RT_PRIO-1(即99),而普通进程的静态优先级范围是从MAX_RT_PRIO到MAX_PRIO-1(即100到139)。值越大静态优先级越低。
unsigned int rt_priority;//rt_priority用于保存实时优先级。normal_prio值取决于静态优先级和调度策略,static_prio用于保存静态优先级,可以通过nice系统调用来进行修改。
const struct sched_class *sched_class;//sched_class结构体表示调度类
struct sched_entity se;
struct sched_rt_entity rt;
struct sched_dl_entity dl;
unsigned int policy; //policy表示进程的调度策略
int nr_cpus_allowed;
cpumask_t cpus_allowed;//cpus_allowed用于控制进程可以在哪里处理器上运行。
struct list_head tasks; //用于构建进程链表 ,内核的双向循环链表的实现方法 - 一个更简略的双向循环链表
struct mm_struct *mm, *active_mm;//mm指向进程所拥有的内存描述符,而active_mm指向进程运行时所使用的内存描述符。对于普通进程而言,这两个指针变量的值相同。但是,内核线程不 拥有任何内存描述符,所以它们的mm成员总是为NULL。当内核线程得以运行时,它的active_mm成员被初始化为前一个运行进程的 active_mm值。
/* per-thread vma caching */
u32 vmacache_seqnum;
struct vm_area_struct *vmacache[VMACACHE_SIZE];
/* task state */
int exit_state;
int exit_code, exit_signal;//exit_code用于设置进程的终止代号,这个值要么是_exit()或exit_group()系统调用参数(正常终止),要么是由内核提供的一个错误代号(异常终止)。
int pdeath_signal; /* The signal sent when the parent dies *///exit_signal被置为-1时表示是某个线程组中的一员。只有当线程组的最后一个成员终止时,才会产生一个信号,以通知线程组的领头进程的父进程。
unsigned int jobctl; /* JOBCTL_*, siglock protected */
/* Used for emulating ABI behavior of previous Linux versions */
unsigned int personality;
unsigned in_execve:1; /* Tell the LSMs that the process is doing an
* execve */
unsigned in_iowait:1;
/* Revert to default priority/policy when forking */
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned long atomic_flags; /* Flags needing atomic access. */
pid_t pid; //进程标识号
pid_t tgid;
/*程序创建的进程具有父子关系,在编程时往往需要引用这样的父子关系。进程描述符中有几个域用来表示这样的关系
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
* p->real_parent->pid)
*/
struct task_struct __rcu *real_parent; /* real parent process */
struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
/*
* children/sibling forms the list of my natural children
*/
struct list_head children; /* list of my children */
struct list_head sibling; /* linkage in my parent‘s children list */
struct task_struct *group_leader; /* threadgroup leader */
cputime_t utime, stime, utimescaled, stimescaled;//utime/stime用于记录进程在用户态/内核态下所经过的节拍数(定时器)。utimescaled/stimescaled也是用于记录进程在用户态/内核态的运行时间,但它们以处理器的频率为刻度。
cputime_t gtime;//gtime是以节拍计数的虚拟机运行时间(guest time)。
unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */
u64 real_start_time; /* boot based time in nsec */
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
struct task_cputime cputime_expires;
struct list_head cpu_timers[3];
/* process credentials */
const struct cred __rcu *real_cred; /* objective and real subjective task
* credentials (COW) */
const struct cred __rcu *cred; /* effective (overridable) subjective task
* credentials (COW) */
char comm[TASK_COMM_LEN]; /* executable name excluding path
- access with [gs]et_task_comm (which lock
it with task_lock())
- initialized normally by setup_new_exec */
/* file system info */
int link_count, total_link_count;
/* CPU-specific state of this task */
struct thread_struct thread;
/* filesystem information */
struct fs_struct *fs;
/* open file information */
struct files_struct *files;
/* namespaces */
struct nsproxy *nsproxy;
/* signal handlers */
struct signal_struct *signal;//signal指向进程的信号描述符。
struct sighand_struct *sighand;//sighand指向进程的信号处理程序描述符。
sigset_t blocked, real_blocked;//blocked表示被阻塞信号的掩码,real_blocked表示临时掩码。
sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
struct sigpending pending; //pending存放私有挂起信号的数据结构。
unsigned long sas_ss_sp;// sas_ss_sp是信号处理程序备用堆栈的地址,sas_ss_size表示堆栈的大小。
size_t sas_ss_size;
int (*notifier)(void *priv);
void *notifier_data;//设备驱动程序常用notifier指向的函数来阻塞进程的某些信号(notifier_mask是这些信号的位掩码),notifier_data指的是notifier所指向的函数可能使用的数据。
sigset_t *notifier_mask;
struct callback_head *task_works;
struct audit_context *audit_context;//进程审计
/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
* mempolicy */
spinlock_t alloc_lock;
/* Protection of the PI data structures: */
raw_spinlock_t pi_lock;
/* journalling filesystem info */
void *journal_info;
/* stacked block device info */
struct bio_list *bio_list;
/* VM state */
struct reclaim_state *reclaim_state;
struct backing_dev_info *backing_dev_info;
struct io_context *io_context;
unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use. */
struct task_io_accounting ioac;
struct rcu_head rcu;
/*
* cache last used pipe for splice
*/
struct pipe_inode_info *splice_pipe;
struct page_frag task_frag;
/*
* time slack values; these are used to round up poll() and
* select() etc timeout values. These are in nanoseconds.
*/
unsigned long timer_slack_ns;
unsigned long default_timer_slack_ns;
Linux通过slab分配器分配task_struct结构,这样能达到对象复用和缓存着色。
struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space */ struct restart_block restart_block; unsigned long previous_sp; /* sp of previous stack in case of nested IRQ stacks */ __u8 supervisor_stack[0]; };
//include/linux/Sched.h
#define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 #define __TASK_STOPPED 4 #define __TASK_TRACED 8
Linux通过clone()系统调用实现fork()。然后又有clone()去调用do_fork()。do_fork()完成了创建中大部分工作,它定义在kernel/fork.c文件中。调用copy_process()函数,通过copy_process()创建子进程的描述符,并创建子进程执行时所需的其他数据结构,最终则会返回这个创建好的进程描述符(子进程的描述符)。
p = copy_process(clone_flags, stack_start, stack_size,child_tidptr, NULL, trace);//struct task_struct *p;
在copy_process()函数中调用dump_task_struct()。
//在copy_process()函数
p = dup_task_struct(current);//struct task_struct *p; retval = copy_thread(clone_flags, stack_start, stack_size, p);
dump_task_struct()为新进程创建一个内核栈、thread_info结构和task_struct,这些值与当前进程的值相同,此时父子进程的的描述符完全相同。
//在dump_task_struct()函数中
tsk = alloc_task_struct_node(node);//struct task_struct *tsk; ti = alloc_thread_info_node(tsk, node);//struct thread_info *ti; err = arch_dup_task_struct(tsk, orig);//int err;
tsk->stack = ti;
setup_thread_stack(tsk, orig);
#define alloc_task_struct_node(node) \ ({ struct page *page = alloc_pages_node(node, GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER); struct task_struct *ret = page ? page_address(page) : NULL; ret; })
alloc_task_struct_node(node)函数中创建页,其中一部分就用于堆栈
arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)函数复制一个PCB——task_struct
在arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)函数
*dst = *src;//在arch_dup_task_struct()函数中,通过赋值完成复制操作
setup_thread_stack()函数只是复制thread_info,而非复制内核堆栈
static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; }
在copy_thread()函数中,完成初始化
//在copy_thread(unsigned long clone_flags, unsigned long sp,unsigned long arg, struct task_struct *p)函数中
struct pt_regs *childregs = task_pt_regs(p);//内核堆栈的栈底,p代表子进程 struct task_struct *tsk; int err; p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1);
if (unlikely(p->flags & PF_KTHREAD)) {//复制一部分int指令和SAVE_ALL压栈内容
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
p->thread.ip = (unsigned long) ret_from_kernel_thread;
task_user_gs(p) = __KERNEL_STACK_CANARY;
childregs->ds = __USER_DS;
childregs->es = __USER_DS;
childregs->fs = __KERNEL_PERCPU;
childregs->bx = sp; /* function */
childregs->bp = arg;
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
p->thread.io_bitmap_ptr = NULL;
return 0;
}
*childregs = *current_pt_regs();//复制内核堆栈
childregs->ax = 0;//为什么子进程的fork返回0,这里就是原因!
if (sp)
childregs->sp = sp;//调度到子进程时的内核栈顶
p->thread.ip = (unsigned long) ret_from_fork; //调度到子进程时的第一条指令地址
参考文献:
http://blog.csdn.net/npy_lp/article/details/7335187
原文:http://www.cnblogs.com/pingandezhufu/p/4420074.html