第11章 进程资源限制与统计
基于 Linux 6.12.38 源码分析
11.1 资源限制 (rlimit)
11.1.1 rlimit 结构
位置: include/linux/sched.h
1 2 3 4 5 6 7 8 9
| struct task_struct { struct rlimit rlim[RLIM_NLIMITS]; };
struct rlimit { __kernel_ulong_t rlim_cur; __kernel_ulong_t rlim_max; };
|
11.1.2 资源限制类型
位置: include/uapi/asm-generic/resource.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
|
enum __rlimit_resource { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, RLIMIT_MEMLOCK, RLIMIT_AS, RLIMIT_LOCKS, RLIMIT_SIGPENDING, RLIMIT_MSGQUEUE, RLIMIT_NICE, RLIMIT_RTPRIO, RLIMIT_RTTIME, RLIMIT_NLIMITS, };
|
11.1.3 软限制与硬限制
1 2 3 4 5 6 7 8 9 10 11 12
|
#define RLIM_INFINITY (~0UL)
|
11.1.4 资源限制系统调用
位置: kernel/sys.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
| SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) { struct rlimit value;
if (resource >= RLIM_NLIMITS) return -EINVAL;
task_lock(current->group_leader); value = current->signal->rlim[resource]; task_unlock(current->group_leader);
if (copy_to_user(rlim, &value, sizeof(*rlim))) return -EFAULT;
return 0; }
SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) { struct rlimit new_rlim, *old_rlim; int retval;
if (resource >= RLIM_NLIMITS) return -EINVAL;
if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) return -EFAULT;
if (new_rlim.rlim_cur > new_rlim.rlim_max) return -EINVAL;
old_rlim = current->signal->rlim + resource;
if ((new_rlim.rlim_max > old_rlim->rlim_max) && !capable(CAP_SYS_RESOURCE)) return -EPERM;
*old_rlim = new_rlim;
return 0; }
|
11.1.5 prlimit 系统调用
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
| SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim) { struct task_struct *tsk; struct rlimit64 old, new; int ret;
tsk = find_task_by_vpid(pid); if (!tsk) return -ESRCH;
if (old_rlim) { old.rlim_cur = tsk->signal->rlim[resource].rlim_cur; old.rlim_max = tsk->signal->rlim[resource].rlim_max;
if (copy_to_user(old_rlim, &old, sizeof(old))) return -EFAULT; }
if (new_rlim) { if (copy_from_user(&new, new_rlim, sizeof(new))) return -EFAULT;
ret = do_prlimit(tsk, resource, &new, NULL); if (ret) return ret; }
return 0; }
|
11.2 CPU 时间限制
11.2.1 RLIMIT_CPU
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| static inline int check_rlimit_cpu(struct task_struct *p, u64 runtime) { unsigned long rlim = p->signal->rlim[RLIMIT_CPU].rlim_cur;
if (rlim == RLIM_INFINITY) return 0;
if (runtime / HZ >= rlim) { send_sig(SIGXCPU, p, 0);
if (runtime / HZ >= p->signal->rlim[RLIMIT_CPU].rlim_max) send_sig(SIGKILL, p, 0);
return 1; }
return 0; }
|
11.2.2 实时 CPU 时间限制
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| static int check_rlimit_rttime(struct task_struct *p, u64 runtime) { unsigned long rlim = p->signal->rlim[RLIMIT_RTTIME].rlim_cur;
if (rlim == RLIM_INFINITY) return 0;
if (runtime >= rlim) { send_sig(SIGKILL, p, 0); return 1; }
return 0; }
|
11.3 文件描述符限制
11.3.1 RLIMIT_NOFILE
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
|
int get_fd_limit(struct task_struct *tsk) { struct rlimit *rlim = &tsk->signal->rlim[RLIMIT_NOFILE]; return rlim->rlim_cur; }
static int alloc_fd(unsigned start, unsigned flags) { struct files_struct *files = current->files; unsigned int max_files = get_fd_limit(current);
if (fd >= max_files) return -EMFILE;
}
|
11.3.2 系统限制
1 2 3 4 5 6 7 8 9 10
| /proc/sys/fs/file-max /proc/sys/fs/file-nr
$ ulimit -n 1024
$ ulimit -n 4096
|
11.4 内存限制
11.4.1 RLIMIT_AS
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
|
unsigned long task_rlimit(const struct task_struct *tsk, unsigned int resource) { struct rlimit *rlim = tsk->signal->rlim + resource; return READ_ONCE(rlim->rlim_cur); }
bool check_rlimit_address(struct mm_struct *mm, unsigned long len) { unsigned long rlim, new;
rlim = task_rlimit(current, RLIMIT_AS); if (rlim == RLIM_INFINITY) return true;
new = mm->total_vm << PAGE_SHIFT; if (new + len > rlim) return false;
return true; }
|
11.4.2 RLIMIT_DATA
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| static int check_data_rlimit(unsigned long addr, unsigned long len) { unsigned long rlim = task_rlimit(current, RLIMIT_DATA); unsigned long size = current->mm->total_vm << PAGE_SHIFT;
if (rlim == RLIM_INFINITY) return 0;
if (size + len > rlim) return -ENOMEM;
return 0; }
|
11.4.3 RLIMIT_STACK
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
|
unsigned long rlimit_stack(void) { struct rlimit *rlim = current->signal->rlim + RLIMIT_STACK;
if (rlim->rlim_cur == RLIM_INFINITY) return STACK_MAX;
return max(rlim->rlim_cur, (unsigned long)rlim->rlim_max); }
int expand_stack(struct vm_area_struct *vma, unsigned long addr) { unsigned long rlim = rlimit_stack();
if (addr > vma->vm_start + rlim) return -ENOMEM;
}
|
11.5 进程数限制
11.5.1 RLIMIT_NPROC
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
|
static int check_nproc_limit(struct task_struct *p) { struct rlimit *rlim = &p->signal->rlim[RLIMIT_NPROC]; long count;
count = atomic_read(&p->user->processes);
if (count >= rlim->rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE)) return -EAGAIN; }
return 0; }
static struct task_struct *copy_process(...) {
retval = check_nproc_limit(p); if (retval) goto bad_fork_free;
}
|
11.6 进程统计
11.6.1 CPU 时间统计
位置: include/linux/sched.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| struct task_struct { u64 utime; u64 stime; u64 gtime; unsigned long long utimescaled; unsigned long long stimescaled; u64 prev_cputime; };
static inline u64 task_utime(struct task_struct *p) { return p->utime; }
static inline u64 task_stime(struct task_struct *p) { return p->stime; }
|
11.6.2 缺页统计
1 2 3 4 5 6 7 8 9 10 11 12
| struct task_struct { unsigned long min_flt; unsigned long maj_flt; };
|
11.6.3 上下文切换统计
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| struct task_struct { unsigned long nvcsw; unsigned long nivcsw; };
|
11.6.4 I/O 统计
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| struct task_io_accounting { u64 rchar; u64 wchar; u64 syscr; u64 syscw; u64 read_bytes; u64 write_bytes; u64 cancelled_write_bytes; };
struct task_struct { #ifdef CONFIG_TASK_IO_ACCOUNTING struct task_io_accounting ioac; #endif };
|
11.7 /proc 进程统计接口
11.7.1 /proc/[pid]/stat
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
| $ cat /proc/1234/stat 1234 (sleep) S 1000 1000 1000 0 -1 4194304 45 0 0 0 12 8 0 0 20 0 1 0 123456789 123456 18446744073709551615 94763570548736 94763570559329 140736123456789 0 0 0 0 16781312 0 0 0 0 17 5 0 0 0 0 0 94763570564864 94763570566384 94763576612106 140736123459321 140736123459336 140736123459336 140736123462825 0
|
11.7.2 /proc/[pid]/statm
1 2 3 4 5 6 7 8 9 10 11
| $ cat /proc/1234/statm 1234 567 890 123 0 234 0
|
11.7.3 /proc/[pid]/status
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
| $ cat /proc/1234/status Name: sleep State: S (sleeping) Tgid: 1234 Ngid: 0 Pid: 1234 PPid: 1000 TracerPid: 0 Uid: 1000 1000 1000 1000 Gid: 1000 1000 1000 1000
FDSize: 64 Groups: 1000 1001 NStgid: 1234 NSpid: 1234 NSpgid: 2000 NSsid: 1000
VmPeak: 12345 kB VmSize: 12345 kB VmLck: 0 kB VmPin: 0 kB VmHWM: 5678 kB VmRSS: 4567 kB RssAnon: 3456 kB RssFile: 1111 kB RssShmem: 0 kB VmData: 2345 kB VmStk: 136 kB VmExe: 120 kB VmLib: 2345 kB VmPTE: 48 kB VmSwap: 0 kB
Threads: 1 voluntary_ctxt_switches: 123 nonvoluntary_ctxt_switches: 45
CapEff: 0000000000000000 CapBnd: 0000000000000000 CapAmb: 0000000000000000
|
11.7.4 /proc/[pid]/io
1 2 3 4 5 6 7 8
| $ cat /proc/1234/io rchar: 1234567 wchar: 2345678 syscr: 1234 syscw: 2345 read_bytes: 1234567 write_bytes: 2345678 cancelled_write_bytes: 0
|
11.8 cgroup 资源控制
11.8.1 CPU 控制
1 2 3 4 5 6 7 8 9 10 11 12
| mkdir /sys/fs/cgroup/cpu/mygroup
echo 50000 > /sys/fs/cgroup/cpu/mygroup/cpu.cfs_quota_us echo 100000 > /sys/fs/cgroup/cpu/mygroup/cpu.cfs_period_us
echo 512 > /sys/fs/cgroup/cpu/mygroup/cpu.shares
echo 1234 > /sys/fs/cgroup/cpu/mygroup/cgroup.procs
|
11.8.2 内存控制
1 2 3 4 5 6 7 8 9 10 11
| mkdir /sys/fs/cgroup/memory/mygroup
echo 1G > /sys/fs/cgroup/memory/mygroup/memory.limit_in_bytes
echo 1 > /sys/fs/cgroup/memory/mygroup/memory.oom_control
cat /sys/fs/cgroup/memory/mygroup/memory.usage_in_bytes
|
11.8.3 进程数控制
1 2 3 4 5 6 7 8
| mkdir /sys/fs/cgroup/pids/mygroup
echo 100 > /sys/fs/cgroup/pids/mygroup/pids.max
echo 1234 > /sys/fs/cgroup/pids/mygroup/cgroup.procs
|
11.9 本章小结
本章介绍了 Linux 进程资源限制与统计:
- rlimit 机制:软限制和硬限制
- 资源类型:CPU、内存、文件、进程数等
- 系统调用:getrlimit、setrlimit、prlimit
- CPU 统计:utime、stime、上下文切换
- 缺页统计:min_flt、maj_flt
- I/O 统计:读写字节数、系统调用次数
- /proc 接口:stat、statm、status、io
- cgroup 控制:CPU、内存、进程数限制
下一章将介绍进程调试与监控。