第14章:内存控制组
基于 Linux 6.12.38 源码
14.1 内存控制组概述
14.1.1 什么是 memcg
内存控制组 (Memory cgroup,memcg) 是 Linux 内核提供的资源控制机制,用于限制和统计一组进程的内存使用。
主要功能:
- 限制内存使用量
- 统计内存使用
- OOM 控制
- 内存 swap 限制
14.1.2 memcg 层次结构
1 2 3 4 5 6 7 8 9
| Root cgroup ├── group1 (1GB limit) │ ├── process1 │ └── process2 ├── group2 (2GB limit) │ ├── process3 │ └── process4 └── group3 (512MB limit) └── process5
|
14.2 mem_cgroup 结构
14.2.1 结构定义
位置: include/linux/memcontrol.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| struct mem_cgroup { struct cgroup_subsys_state css;
unsigned long memory; unsigned long swap; unsigned long memsw;
unsigned long memory_usage; unsigned long swap_usage; unsigned long memsw_usage;
struct mem_cgroup_events { unsigned long low; unsigned long high; unsigned long max; unsigned long oom; } events;
struct list_head lru;
};
|
14.2.2 创建/删除 memcg
1 2 3 4 5 6 7
| struct mem_cgroup *mem_cgroup_alloc(void); void mem_cgroup_free(struct mem_cgroup *memcg);
int mem_cgroup_online(struct mem_cgroup *memcg); void mem_cgroup_offline(struct mem_cgroup *memcg);
|
14.3 内存分配与 memcg
14.3.1 memcg 分配
位置: mm/memcontrol.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| bool mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { struct mem_cgroup *memcg; int ret;
if (mm) { memcg = get_mem_cgroup_from_mm(mm); } else { memcg = root_mem_cgroup; }
ret = try_charge(memcg, gfp_mask, nr_pages); if (ret) return false;
page->memcg_data = (unsigned long)memcg;
return true; }
void mem_cgroup_uncharge(struct page *page) { struct mem_cgroup *memcg;
memcg = page->memcg_data;
if (mem_cgroup_dec_and_test(memcg, nr_pages)) { memcg_event(memcg, MEMCG_LOW); } }
|
14.3.2 OOM 处理
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t gfp_mask) { if (memcg->oom_disable) return;
memcg_event(memcg, MEMCG_OOM);
mem_cgroup_out_of_memory(memcg, gfp_mask); }
bool mem_cgroup_over_limit(struct mem_cgroup *memcg) { if (memcg->memory_usage >= memcg->memory) return true;
if (memcg->swap_usage >= memcg->swap) return true;
return false; }
|
14.4 memcg 统计
14.4.1 统计接口
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) { if (swap) return memcg->swap_usage; else return memcg->memory_usage; }
void mem_cgroup_get_stats(struct mem_cgroup *memcg, struct mem_cgroup_stats *stats) { stats->cache = memcg_page_state(memcg, NR_FILE_PAGES); stats->rss = memcg_page_state(memcg, NR_ANON_MAPPED); stats->shmem = memcg_page_state(memcg, NR_SHMEM); stats->file_mapped = memcg_page_state(memcg, NR_FILE_MAPPED); }
|
14.4.2 /proc 接口
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| ls /sys/fs/cgroup/memory/
cat /sys/fs/cgroup/memory/<cgroup>/memory.usage_in_bytes cat /sys/fs/cgroup/memory/<cgroup>/memory.limit_in_bytes cat /sys/fs/cgroup/memory/<cgroup>/memory.max_usage_in_bytes
cat /sys/fs/cgroup/memory/<cgroup>/memory.oom_control
cat /sys/fs/cgroup/memory/<cgroup>/memory.memsw.usage_in_bytes cat /sys/fs/cgroup/memory/<cgroup>/memory.memsw.max_usage_in_bytes
|
14.5 memcg 与 swap
14.5.1 swap 控制
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| bool mem_cgroup_swappiness(struct mem_cgroup *memcg) { return memcg->swappiness; }
int mem_cgroup_charge_swap(struct page *page, int nr_pages) { struct mem_cgroup *memcg;
memcg = page->memcg_data;
if (!mem_cgroup_try_charge_swap(memcg, nr_pages)) { return -ENOMEM; }
return 0; }
|
14.5.2 swap 配置
1 2 3 4 5 6 7 8
| echo 0 > /sys/fs/cgroup/memory/<cgroup>/memory.swappiness
echo 60 > /sys/fs/cgroup/memory/<cgroup>/memory.swappiness
echo 1G > /sys/fs/cgroup/memory/<cgroup>/memory.memsw.limit_in_bytes
|
14.6 memcg 与内核内存
14.6.1 内核内存限制
1 2 3
| CONFIG_MEMCG_KMEM # memcg 内核内存 CONFIG_MEMCG_SWAP # memcg swap CONFIG_MEMCG_SWAP_ENABLED # 启用 memcg swap
|
14.6.2 内核内存分配
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| void *kmalloc_memcg(size_t size, gfp_t gfp, struct mem_cgroup *memcg) { if (!mem_cgroup_charge(memcg, size, gfp)) return NULL;
return kmalloc(size, gfp); }
void kfree_memcg(void *ptr, struct mem_cgroup *memcg) { kfree(ptr);
mem_cgroup_uncharge(memcg, size); }
|
14.7 memcg 事件
14.7.1 事件类型
1 2 3 4 5 6 7 8 9 10 11
| enum memcg_memory_event { MEMCG_LOW, MEMCG_HIGH, MEMCG_MAX, MEMCG_OOM, MEMCG_OOM_KILL, MEMCG_SWAP_MAX, MEMCG_SWAP_FAIL, MEMCG_NR_EVENTS, };
|
14.7.2 事件通知
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| void mem_cgroup_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { atomic64_inc(&memcg->events[event]);
cgroup_file_notify(memcg->css.cgroup, event); }
void mem_cgroup_oom_control(struct mem_cgroup *memcg, struct mem_cgroup_event *event) { if (event->type == MEMCG_OOM) { mem_cgroup_out_of_memory(memcg, event->gfp_mask); } }
|
14.8 本章小结
本章介绍了 Linux 6.12 的内存控制组:
- memcg 概述: 限制和统计进程组内存使用
- mem_cgroup: 核心数据结构,限制、统计、事件
- 内存分配: 分配时检查限制,charge/uncharge
- OOM 处理: 超过限制时触发 OOM killer
- memcg 统计: 使用统计、/proc 接口
- swap 控制: swap 使用限制和策略
- 内核内存: memcg 对内核内存的支持
- memcg 事件: 低水位、高水位、OOM 等事件
下一章将介绍内存统计与调试。