structtask_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* * For reasons of header soup (see current_thread_info()), this * must be the first element of task_struct. */ structthread_infothread_info; #endif /* 就绪态: -1, 运行态: 0, 终止态: >0 */ volatilelong state; /* * This begins the randomizable portion of task_struct. Only * scheduling-critical items should be added above here. */ randomized_struct_fields_start /* 指向内核栈 */ void *stack; /* 有几个进程在使用此结构体 */ refcount_t usage; /* 标记 Per task flags (PF_*), defined further below: */ unsignedint flags; /* ptrace系统调用, 常被用于追踪调试 */ unsignedint ptrace; // 条件编译,多处理器用到 #ifdef CONFIG_SMP structllist_nodewake_entry; int on_cpu; #ifdef CONFIG_THREAD_INFO_IN_TASK /* Current CPU: */ unsignedint cpu; #endif unsignedint wakee_flips; unsignedlong wakee_flip_decay_ts; structtask_struct *last_wakee;
/* * recent_used_cpu is initially set as the last CPU used by a task * that wakes affine another task. Waker/wakee relationships can * push tasks around a CPU where each wakeup moves to the next one. * Tracking a recently used CPU allows a quick search for a recently * used CPU that may be idle. */ int recent_used_cpu; int wake_cpu; #endif /* 运行队列 */ int on_rq; /* 进程调度策略和优先级 */ int prio; // 调度优先级 int static_prio; // 静态优先级 int normal_prio; // 正常优先级 unsignedint rt_priority; // 实时优先级
#ifdef CONFIG_UCLAMP_TASK /* Clamp values requested for a scheduling entity */ structuclamp_seuclamp_req[UCLAMP_CNT]; /* Effective clamp values used for a scheduling entity */ structuclamp_seuclamp[UCLAMP_CNT]; #endif
#ifdef CONFIG_PREEMPT_NOTIFIERS /* List of struct preempt_notifier: */ structhlist_headpreempt_notifiers; #endif
#ifdef SPLIT_RSS_COUNTING structtask_rss_statrss_stat; #endif // 进程状态参数 int exit_state; int exit_code; int exit_signal; /* The signal sent when the parent dies: */ int pdeath_signal; /* JOBCTL_*, siglock protected: */ unsignedlong jobctl;
/* Used for emulating ABI behavior of previous Linux versions: */ unsignedint personality;
#ifdef CONFIG_STACKPROTECTOR /* Canary value for the -fstack-protector GCC feature: */ unsignedlong stack_canary; #endif /* * Pointers to the (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */
/* Real parent process: */ structtask_struct __rcu *real_parent;// 指向真实父进程
/* * Children/sibling form the list of natural children: */ structlist_headchildren; structlist_headsibling; structtask_struct *group_leader;// 指向线程组组长
/* * 'ptraced' is the list of tasks this task is using ptrace() on. * * This includes both natural children and PTRACE_ATTACH targets. * 'ptrace_entry' is this task's link on the p->parent->ptraced list. */ structlist_headptraced; structlist_headptrace_entry;
/* Thread group tracking: */ u64 parent_exec_id; u64 self_exec_id;
/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ spinlock_t alloc_lock;
/* Protection of the PI data structures: */ raw_spinlock_t pi_lock;
structwake_q_nodewake_q;
#ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task: */ structrb_root_cachedpi_waiters; /* Updated under owner's pi_lock and rq lock */ structtask_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ structrt_mutex_waiter *pi_blocked_on; #endif
structtask_io_accountingioac; #ifdef CONFIG_PSI /* Pressure stall state */ unsignedint psi_flags; #endif #ifdef CONFIG_TASK_XACCT /* Accumulated RSS usage: */ u64 acct_rss_mem1; /* Accumulated virtual memory usage: */ u64 acct_vm_mem1; /* stime + utime since last update: */ u64 acct_timexpd; #endif #ifdef CONFIG_CPUSETS /* Protected by ->alloc_lock: */ nodemask_t mems_allowed; /* Seqence number to catch updates: */ seqcount_t mems_allowed_seq; int cpuset_mem_spread_rotor; int cpuset_slab_spread_rotor; #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock: */ structcss_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock: */ structlist_headcg_list; #endif #ifdef CONFIG_X86_CPU_RESCTRL u32 closid; u32 rmid; #endif #ifdef CONFIG_FUTEX structrobust_list_head __user *robust_list; #ifdef CONFIG_COMPAT structcompat_robust_list_head __user *compat_robust_list; #endif structlist_headpi_state_list; structfutex_pi_state *pi_state_cache; structmutexfutex_exit_mutex; unsignedint futex_state; #endif #ifdef CONFIG_PERF_EVENTS structperf_event_context *perf_event_ctxp[perf_nr_task_contexts]; structmutexperf_event_mutex; structlist_headperf_event_list; #endif #ifdef CONFIG_DEBUG_PREEMPT unsignedlong preempt_disable_ip; #endif #ifdef CONFIG_NUMA /* Protected by alloc_lock: */ structmempolicy *mempolicy; short il_prev; short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING int numa_scan_seq; unsignedint numa_scan_period; unsignedint numa_scan_period_max; int numa_preferred_nid; unsignedlong numa_migrate_retry; /* Migration stamp: */ u64 node_stamp; u64 last_task_numa_placement; u64 last_sum_exec_runtime; structcallback_headnuma_work;
/* * This pointer is only modified for current in syscall and * pagefault context (and for tasks being destroyed), so it can be read * from any of the following contexts: * - RCU read-side critical section * - current->numa_group from everywhere * - task's runqueue locked, task not running */ structnuma_group __rcu *numa_group;
/* * numa_faults is an array split into four regions: * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer * in this precise order. * * faults_memory: Exponential decaying average of faults on a per-node * basis. Scheduling placement decisions are made based on these * counts. The values remain static for the duration of a PTE scan. * faults_cpu: Track the nodes the process was running on when a NUMA * hinting fault was incurred. * faults_memory_buffer and faults_cpu_buffer: Record faults per node * during the current scan window. When the scan completes, the counts * in faults_memory and faults_cpu decay and these values are copied. */ unsignedlong *numa_faults; unsignedlong total_numa_faults;
/* * numa_faults_locality tracks if faults recorded during the last * scan window were remote/local or failed to migrate. The task scan * period is adapted based on the locality of the faults with different * weights depending on whether they were shared or private faults */ unsignedlong numa_faults_locality[3];
#ifdef CONFIG_RSEQ structrseq __user *rseq; u32 rseq_sig; /* * RmW on rseq_event_mask must be performed atomically * with respect to preemption. */ unsignedlong rseq_event_mask; #endif
structtlbflush_unmap_batchtlb_ubc;
union { refcount_t rcu_users; structrcu_headrcu; };
/* Cache last used pipe for splice(): */ structpipe_inode_info *splice_pipe;
#ifdef CONFIG_FAULT_INJECTION int make_it_fail; unsignedint fail_nth; #endif /* * When (nr_dirtied >= nr_dirtied_pause), it's time to call * balance_dirty_pages() for a dirty throttling pause: */ int nr_dirtied; int nr_dirtied_pause; /* Start of a write-and-pause period: */ unsignedlong dirty_paused_when;
#ifdef CONFIG_LATENCYTOP int latency_record_count; structlatency_recordlatency_record[LT_SAVECOUNT]; #endif /* * Time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds. */ u64 timer_slack_ns; u64 default_timer_slack_ns;
/* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. */ randomized_struct_fields_end
/* CPU-specific state of this task: */ structthread_structthread;
/* * WARNING: on x86, 'thread_struct' contains a variable-sized * structure. It *MUST* be at the end of 'task_struct'. * * Do not put anything below here! */ };
#ifdef CONFIG_SMP int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
/* * The switched_from() call is allowed to drop rq->lock, therefore we * cannot assume the switched_from/switched_to pair is serliazed by * rq->lock. They are however serialized by p->pi_lock. */ void (*switched_from)(struct rq *this_rq, struct task_struct *task); void (*switched_to) (struct rq *this_rq, struct task_struct *task); void (*prio_changed) (struct rq *this_rq, struct task_struct *task, int oldprio);
/* * nr_running and cpu_load should be in the same cacheline because * remote CPUs use both these fields when doing load calculation. */ unsignedint nr_running; #ifdef CONFIG_NUMA_BALANCING unsignedint nr_numa_running; unsignedint nr_preferred_running; unsignedint numa_migrate_on; #endif #ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_SMP unsignedlong last_load_update_tick; unsignedlong last_blocked_load_update_tick; unsignedint has_blocked_load; #endif/* CONFIG_SMP */ unsignedint nohz_tick_stopped; atomic_t nohz_flags; #endif/* CONFIG_NO_HZ_COMMON */
unsignedlong nr_load_updates; u64 nr_switches;
#ifdef CONFIG_UCLAMP_TASK /* Utilization clamp values based on CPU's RUNNABLE tasks */ structuclamp_rquclamp[UCLAMP_CNT] ____cacheline_aligned; unsignedint uclamp_flags; #define UCLAMP_FLAG_IDLE 0x01 #endif
structcfs_rqcfs; structrt_rqrt; structdl_rqdl;
#ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this CPU: */ structlist_headleaf_cfs_rq_list; structlist_head *tmp_alone_branch; #endif/* CONFIG_FAIR_GROUP_SCHED */
/* * This is part of a global counter where only the total sum * over all CPUs matters. A task can increase this counter on * one CPU and if it got migrated afterwards it may decrease * it on another CPU. Always updated under the runqueue lock: */ unsignedlong nr_uninterruptible;
unsignedint clock_update_flags; u64 clock; /* Ensure that all clocks are in the same cache line */ u64 clock_task ____cacheline_aligned; u64 clock_pelt; unsignedlong lost_idle_time;
atomic_t nr_iowait;
#ifdef CONFIG_MEMBARRIER int membarrier_state; #endif
/* * Original scheduling parameters. Copied here from sched_attr * during sched_setattr(), they will remain the same until * the next sched_setattr(). */ u64 dl_runtime; /* Maximum runtime for each instance */ u64 dl_deadline; /* Relative deadline of each instance */ u64 dl_period; /* Separation of two instances (period) */ u64 dl_bw; /* dl_runtime / dl_period */ u64 dl_density; /* dl_runtime / dl_deadline */
/* * Actual scheduling parameters. Initialized with the values above, * they are continuously updated during task execution. Note that * the remaining runtime could be < 0 in case we are in overrun. */ s64 runtime; /* Remaining runtime for this instance */ u64 deadline; /* Absolute deadline for this instance */ unsignedint flags; /* Specifying the scheduler behaviour */
/* * Some bool flags: * * @dl_throttled tells if we exhausted the runtime. If so, the * task has to wait for a replenishment to be performed at the * next firing of dl_timer. * * @dl_boosted tells if we are boosted due to DI. If so we are * outside bandwidth enforcement mechanism (but only until we * exit the critical section); * * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. * * @dl_non_contending tells if the task is inactive while still * contributing to the active utilization. In other words, it * indicates if the inactive timer has been armed and its handler * has not been executed yet. This flag is useful to avoid race * conditions between the inactive timer handler and the wakeup * code. * * @dl_overrun tells if the task asked to be informed about runtime * overruns. */ unsignedint dl_throttled : 1; unsignedint dl_boosted : 1; unsignedint dl_yielded : 1; unsignedint dl_non_contending : 1; unsignedint dl_overrun : 1;
/* * Bandwidth enforcement timer. Each -deadline task has its * own bandwidth to be enforced, thus we need one timer per task. */ structhrtimerdl_timer;
/* * Inactive timer, responsible for decreasing the active utilization * at the "0-lag time". When a -deadline task blocks, it contributes * to GRUB's active utilization until the "0-lag time", hence a * timer is needed to decrease the active utilization at the correct * time. */ structhrtimerinactive_timer; };
/* * 'curr' points to currently running entity on this cfs_rq. * It is set to NULL otherwise (i.e when none are currently running). */ // 记录当前 cfs_rq 上特殊的几个实体指针: // curr:cfs_rq 上当前正在运行的实体,如果运行的进程实体不在当前 cfs_rq 上,设置为 NULL。 // 需要注意的是,在支持组调度的情况下,一个进程 se 运行,被设置为当前 cfs_rq 的 curr, // 同时其 parent 也会被设置为同级 cfs_rq 的 curr. // next:用户特别指定的需要在下一次调度中执行的进程实体,但是这并不是绝对的, // 只有在 next 指定的进程实体快要运行(但可能不是下次)的时候,因为这时候不会造成太大的不公平, // 就会运行指定的 next,也是一种临时提高优先级的做法。 // last:上次执行过的实体不应该跨越公平性原则执行,比如将 next 设置为 last,这时候就需要仔细斟酌一下了, // 也是保证公平性的一种方法。 structsched_entity *curr, *next, *last, *skip;
structdl_rq { /* runqueue is an rbtree, ordered by deadline */ structrb_root_cachedroot;
unsignedlong dl_nr_running;
#ifdef CONFIG_SMP /* * Deadline values of the currently executing and the * earliest ready task on this rq. Caching these facilitates * the decision whether or not a ready but not running task * should migrate somewhere else. */ struct { u64 curr; u64 next; } earliest_dl;
unsignedlong dl_nr_migratory; int overloaded;
/* * Tasks on this rq that can be pushed away. They are kept in * an rb-tree, ordered by tasks' deadlines, with caching * of the leftmost (earliest deadline) element. */ structrb_root_cachedpushable_dl_tasks_root; #else structdl_bwdl_bw; #endif /* * "Active utilization" for this runqueue: increased when a * task wakes up (becomes TASK_RUNNING) and decreased when a * task blocks */ u64 running_bw;
/* * Utilization of the tasks "assigned" to this runqueue (including * the tasks that are in runqueue and the tasks that executed on this * CPU and blocked). Increased when a task moves to this runqueue, and * decreased when the task moves away (migrates, changes scheduling * policy, or terminates). * This is needed to compute the "inactive utilization" for the * runqueue (inactive utilization = this_bw - running_bw). */ u64 this_bw; u64 extra_bw;
/* * Inverse of the fraction of CPU utilization that can be reclaimed * by the GRUB algorithm. */ u64 bw_ratio; };
体系结构
Symmetrical Multi-Processing,简称SMP,即对称多处理技术,是指将多CPU汇集在同一总线上,各CPU间进行内存和总线共享的技术。将同一个工作平衡地(run in parallel)分布到多个CPU上运行,该相同任务在不同CPU上共享着相同的物理内存。在现行的SMP架构中,发展出三种模型:UMA、NUMA和MPP。下面将展开重点讲述前两种模型。
UMA系统架构
SMP (Symmetric Multi Processing),对称多处理系统内有许多紧耦合多处理器,在这样的系统中,所有的CPU共享全部资源,如总线,内存和I/O系统等,操作系统或管理数据库的复本只有一个,这种系统有一个最大的特点就是共享所有资源。多个CPU之间没有区别,平等地访问内存、外设、一个操作系统。操作系统管理着一个队列,每个处理器依次处理队列中的进程。如果两个处理器同时请求访问一个资源(例如同一段内存地址),由硬件、软件的锁机制去解决资源争用问题。Access to RAM is serialized; this and cache coherency issues causes performance to lag slightly behind the number of additional processors in the system.
所谓对称多处理器结构,是指服务器中多个 CPU 对称工作,无主次或从属关系。各 CPU 共享相同的物理内存,每个 CPU 访问内存中的任何地址所需时间是相同的,因此 SMP 也被称为一致存储器访问结构 (UMA : Uniform Memory Access) 。对 SMP 服务器进行扩展的方式包括增加内存、使用更快的 CPU 、增加 CPU 、扩充 I/O( 槽口数与总线数 ) 以及添加更多的外部设备 ( 通常是磁盘存储 ) 。
SMP 服务器的主要特征是共享,系统中所有资源 (CPU 、内存、 I/O 等 ) 都是共享的。也正是由于这种特征,导致了 SMP 服务器的主要问题,那就是它的扩展能力非常有限。对于 SMP 服务器而言,每一个共享的环节都可能造成 SMP 服务器扩展时的瓶颈,而最受限制的则是内存。由于每个 CPU 必须通过相同的内存总线访问相同的内存资源,因此随着 CPU 数量的增加,内存访问冲突将迅速增加,最终会造成 CPU 资源的浪费,使 CPU 性能的有效性大大降低。实验证明, SMP 服务器 CPU 利用率最好的情况是 2 至 4 个 CPU 。
现在回到读端临界区的问题上来。多个读端临界区不互斥,即多个读者可同时处于读端临界区中,但一块内存数据一旦能够在读端临界区内被获取到指针引用,这块内存块数据的释放必须等到读端临界区结束,等待读端临界区结束的 Linux kernel API 是synchronize_rcu()。读端临界区的检查是全局的,系统中有任何的代码处于读端临界区,synchronize_rcu() 都会阻塞,知道所有读端临界区结束才会返回。为了直观理解这个问题,举以下的代码实例:
/* * struct upid is used to get the id of the struct pid, as it is * seen in particular namespace. Later the struct pid is found with * find_pid_ns() using the int nr and struct pid_namespace *ns. */ structupid { /* Try to keep pid_chain in the same cacheline as nr for find_vpid */ int nr; // PID具体的值 structpid_namespace *ns;// 指向命名空间的指针 structhlist_nodepid_chain;/* pid哈希列表(pid_hash)中的节点,用于快速通过nr和ns查找到upid * 在alloc_pid 时将该节点添加到哈希列表中 */ };
find_get_pid()
根据进程编号,获取进程描述符。
1 2 3 4 5 6 7 8 9 10 11
struct pid *find_get_pid(pid_t nr)// nr 进程编号 { structpid *pid;
/* * the helpers to get the pid's id seen from different namespaces * * pid_nr() : global id, i.e. the id seen from the init namespace; * pid_vnr() : virtual id, i.e. the id seen from the pid namespace of * current. * pid_nr_ns() : id seen from the ns specified. * * see also task_xid_nr() etc in include/linux/sched.h */
staticinlinepid_tpid_nr(struct pid *pid) { pid_t nr = 0; if (pid) nr = pid->numbers[0].nr; return nr; }
rcu_read_lock(); if (!ns) ns = task_active_pid_ns(current); if (likely(pid_alive(task))) nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns); rcu_read_unlock();
wake_up_process(kthreadd_task); /* * Wait for completion in killable state, for I might be chosen by * the OOM killer while kthreadd is trying to allocate memory for * new kernel thread. */ if (unlikely(wait_for_completion_killable(&done))) { /* * If I was SIGKILLed before kthreadd (or new kernel thread) * calls complete(), leave the cleanup of this structure to * that thread. */ if (xchg(&create->done, NULL)) return ERR_PTR(-EINTR); /* * kthreadd (or new kernel thread) will call complete() * shortly. */ wait_for_completion(&done); } task = create->result; if (!IS_ERR(task)) { staticconststructsched_paramparam = { .sched_priority = 0 }; char name[TASK_COMM_LEN];
/* * task is already visible to other tasks, so updating * COMM must be protected. */ vsnprintf(name, sizeof(name), namefmt, args); set_task_comm(task, name); /* * root may have changed our (kthreadd's) priority or CPU mask. * The kernel thread should not inherit these properties. */ sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m); set_cpus_allowed_ptr(task, cpu_all_mask); } kfree(create); return task; }
staticint try_to_wake_up(struct task_struct *p, unsignedint state, int wake_flags) { unsignedlong flags; int cpu, success = 0;
preempt_disable(); if (p == current) { /* * We're waking current, this means 'p->on_rq' and 'task_cpu(p) * == smp_processor_id()'. Together this means we can special * case the whole 'p->on_rq && ttwu_remote()' case below * without taking any locks. * * In particular: * - we rely on Program-Order guarantees for all the ordering, * - we're serialized against set_special_state() by virtue of * it disabling IRQs (this allows not taking ->pi_lock). */ if (!(p->state & state)) goto out;
/* * If we are going to wake up a thread waiting for CONDITION we * need to ensure that CONDITION=1 done by the caller can not be * reordered with p->state check below. This pairs with mb() in * set_current_state() the waiting thread does. */ raw_spin_lock_irqsave(&p->pi_lock, flags); smp_mb__after_spinlock(); if (!(p->state & state)) goto unlock;
trace_sched_waking(p);
/* We're going to change ->state: */ success = 1; cpu = task_cpu(p);
/* * Ensure we load p->on_rq _after_ p->state, otherwise it would * be possible to, falsely, observe p->on_rq == 0 and get stuck * in smp_cond_load_acquire() below. * * sched_ttwu_pending() try_to_wake_up() * STORE p->on_rq = 1 LOAD p->state * UNLOCK rq->lock * * __schedule() (switch to task 'p') * LOCK rq->lock smp_rmb(); * smp_mb__after_spinlock(); * UNLOCK rq->lock * * [task p] * STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq * * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in * __schedule(). See the comment for smp_mb__after_spinlock(). */ smp_rmb(); if (p->on_rq && ttwu_remote(p, wake_flags)) goto unlock;
#ifdef CONFIG_SMP /* * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be * possible to, falsely, observe p->on_cpu == 0. * * One must be running (->on_cpu == 1) in order to remove oneself * from the runqueue. * * __schedule() (switch to task 'p') try_to_wake_up() * STORE p->on_cpu = 1 LOAD p->on_rq * UNLOCK rq->lock * * __schedule() (put 'p' to sleep) * LOCK rq->lock smp_rmb(); * smp_mb__after_spinlock(); * STORE p->on_rq = 0 LOAD p->on_cpu * * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in * __schedule(). See the comment for smp_mb__after_spinlock(). */ smp_rmb();
/* * If the owning (remote) CPU is still in the middle of schedule() with * this task as prev, wait until its done referencing the task. * * Pairs with the smp_store_release() in finish_task(). * * This ensures that tasks getting woken will be fully ordered against * their previous state and preserve Program Order. */ smp_cond_load_acquire(&p->on_cpu, !VAL);
inttest_func(void* argv) { int iData = -1; printk("test_func pid ==> %d\n", current->pid); iData = wake_up_process(pts_thread); printk("the state of pts_thread after wake_up_process ==> %ld\n", pts_thread->state); printk("the res of the wake_up_process ==> %d", iData); return0; }
voidset_user_nice(struct task_struct *p, long nice) { bool queued, running; int old_prio; structrq_flagsrf; structrq *rq;
if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) return; /* * We have to be careful, if called from sys_setpriority(), * the task might be in the middle of scheduling on another CPU. */ rq = task_rq_lock(p, &rf); update_rq_clock(rq);
/* * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected * it wont have any effect on scheduling until the task is * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR: */ if (task_has_dl_policy(p) || task_has_rt_policy(p)) { p->static_prio = NICE_TO_PRIO(nice); goto out_unlock; } queued = task_on_rq_queued(p); running = task_current(rq, p); if (queued) dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); if (running) put_prev_task(rq, p);
if (queued) enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); if (running) set_next_task(rq, p);
/* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: */ p->sched_class->prio_changed(rq, p, old_prio);
/** * complete_all: - signals all threads waiting on this completion * @x: holds the state of this particular completion * * This will wake up all threads waiting on this particular completion event. * * If this function wakes up a task, it executes a full memory barrier before * accessing the task state. * * Since complete_all() sets the completion of @x permanently to done * to allow multiple waiters to finish, a call to reinit_completion() * must be used on @x if @x is to be used again. The code must make * sure that all waiters have woken and finished before reinitializing * @x. Also note that the function completion_done() can not be used * to know if there are still waiters after complete_all() has been called. */ structcompletion { unsignedint done; wait_queue_head_t wait; };
/** * __wake_up_sync_key - wake up threads blocked on a waitqueue. * @wq_head: the waitqueue * @mode: which threads * @key: opaque value to be passed to wakeup targets * * The sync wakeup differs that the waker knows that it will schedule * away soon, so while the target thread will be woken up, it will not * be migrated to another CPU - ie. the two threads are 'synchronized' * with each other. This can prevent needless bouncing between CPUs. * * On UP it can prevent extra preemption. * * If this function wakes up a task, it executes a full memory barrier before * accessing the task state. */ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsignedint mode, void *key) { if (unlikely(!wq_head)) return;
intmy_function(void * argc) { printk("in the kernel thread function! \n"); printk("the current pid is:%d\n", current->pid); //显示当前进程的PID值 /*显示父进程的状态*/ printk("the state of the init funcation is :%ld\n", old_thread->state); __wake_up_sync(&head, TASK_NEW,0); //调用函数唤醒等待队列中的进程
// 显示函数调用之后的父进程的状态 printk("the state of the init function after __wake_up_sync is :%ld\n", old_thread->state); printk("out the kernel thread function\n"); return0; }
staticint __init __wake_up_sync_init(void) { char namefrm[]="__wake_up_sync.c%s"; //线程的输出类型名,在此程序中无影响 long time_out; //保存schedule_timeout_uninterruptible( )的返回结果 structtask_struct * result;//保存新进程的信息 wait_queue_t data; //等待队列元素 printk("into __wake_up_sync_init.\n"); result=kthread_create_on_node(my_function, NULL, -1, namefrm); // 创建新进程 printk("the pid of the new thread is:%d\n", result->pid); //显示新线程的PID值 printk("the current pid is:%d\n", current->pid); //显示当前进程的PID值