第12章:匿名内存与页缓存
基于 Linux 6.12.38 源码
12.1 匿名内存概述
12.1.1 什么是匿名内存
匿名内存是不与文件关联的内存,主要用于:
- 堆 (Heap): malloc/brk 分配的内存
- 栈 (Stack): 线程栈
- 匿名 mmap: mmap(MAP_ANONYMOUS) 分配的内存
12.1.2 匿名内存特点
| 特点 |
描述 |
| 无文件后端 |
不与任何文件关联 |
| 使用 swap |
作为后备存储 |
| 初始为零 |
首次访问时分配清零页面 |
| 私有 |
进程独占 |
12.2 brk 系统调用
12.2.1 sys_brk
位置: mm/mmap.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
| SYSCALL_DEFINE1(brk, unsigned long, brk) { struct mm_struct *mm = current->mm; unsigned long newbrk, oldbrk; struct vm_area_struct *vma;
oldbrk = mm->brk; newbrk = PAGE_ALIGN(brk);
if (newbrk < oldbrk) { if (!do_munmap(mm, newbrk, oldbrk - newbrk)) return newbrk; } else if (newbrk > oldbrk) { vma = find_vma(mm, oldbrk); if (!vma || vma->vm_start != oldbrk) return -ENOMEM;
if (vma->vm_end < newbrk) { if (expand_stack(vma, newbrk)) return -ENOMEM; } }
mm->brk = newbrk; return brk; }
|
12.2.2 brk 流程
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
| brk(new_brk) │ ▼ ┌──────────────────────────────────────┐ │ 1. 获取当前 brk │ │ - mm->brk │ └──────────────────────────────────────┘ │ ▼ ┌──────────────────────────────────────┐ │ 2. 比较新旧 brk │ │ - new_brk < old_brk: 缩小堆 │ │ - new_brk > old_brk: 扩大堆 │ └──────────────────────────────────────┘ │ 扩大 ▼ ┌──────────────────────────────────────┐ │ 3. 查找堆 VMA │ │ - find_vma │ └──────────────────────────────────────┘ │ ▼ ┌──────────────────────────────────────┐ │ 4. 扩展 VMA │ │ - expand_stack │ └──────────────────────────────────────┘ │ ▼ ┌──────────────────────────────────────┐ │ 5. 设置新的 brk │ │ - mm->brk = new_brk │ └──────────────────────────────────────┘
|
12.3 页缓存概述
12.3.1 什么是页缓存
页缓存是 Linux 内核用于缓存文件内容的机制,减少磁盘 I/O。
12.3.2 address_space 结构
位置: include/linux/pagemap.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| struct address_space { struct inode *host; struct xarray i_pages; gfp_t gfp_mask; spinlock_t i_lock; unsigned int i_pagesize;
const struct address_space_operations *a_ops;
unsigned long flags; errseq_t wb_err;
spinlock_t private_lock; struct list_head private_list; void *private_data; };
|
12.3.3 address_space_operations
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| struct address_space_operations { int (*read_folio)(struct file *filp, struct folio *folio);
int (*writepage)(struct page *page, struct writeback_control *wbc); int (*writepages)(struct address_space *, struct writeback_control *);
void (*release_folio)(struct folio *folio);
ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
};
|
12.4 页缓存操作
12.4.1 查找页面
位置: mm/filemap.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
| struct folio *filemap_get_folio(struct address_space *mapping, pgoff_t index) { struct folio *folio;
folio = xa_load(&mapping->i_pages, index); if (folio) { folio_ref_add(folio); }
return folio; }
struct folio *filemap_grab_folio(struct address_space *mapping, pgoff_t index) { struct folio *folio;
repeat: folio = filemap_get_folio(mapping, index); if (folio) { if (folio_test_locked(folio)) { folio_wait_locked(folio); goto repeat; } return folio; }
folio = filemap_alloc_folio(mapping, index); if (!folio) return ERR_PTR(-ENOMEM);
if (xa_insert(&mapping->i_pages, index, folio, GFP_KERNEL)) { folio_put(folio); goto repeat; }
return folio; }
|
12.4.2 添加页面
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| void add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp) { __set_page_locked(page); set_page_private(page, 0);
lru_cache_add(page);
xa_lock(&mapping->i_pages); if (xa_is_locked(&mapping->i_pages)) { xas_store(&xas, page); xas_unlock(&xas); } }
|
12.4.3 删除页面
1 2 3 4 5 6 7 8 9 10 11 12 13
| void delete_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping;
xa_lock_irq(&mapping->i_pages); __delete_from_page_cache(page, NULL); xa_unlock_irq(&mapping->i_pages);
put_page(page); }
|
12.5 页回写
12.5.1 回写触发
位置: mm/page-writeback.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| void balance_dirty_pages_ratelimited(struct address_space *mapping) { struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
if (dirty_exceeded && !(current->flags & PF_LESS_THROTTLE)) { wb_start_background_writeback(bdi); } }
static int wb_kupdate(void *dummy) { while (!kthread_should_stop()) { schedule_timeout_interruptible(dirty_writeback_interval);
wakeup_flusher_threads(WB_REASON_PERIODIC); }
return 0; }
|
12.5.2 脏页阈值
1 2 3 4 5 6 7
| /proc/sys/vm/dirty_background_ratio /proc/sys/vm/dirty_background_bytes /proc/sys/vm/dirty_ratio /proc/sys/vm/dirty_bytes /proc/sys/vm/dirty_expire_centisecs /proc/sys/vm/dirty_writeback_centisecs
|
12.5.3 回写流程
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| ┌──────────────────────────────────────────────────────────────┐ │ 页回写流程 │ ├──────────────────────────────────────────────────────────────┤ │ │ │ 1. 用户写入内存 (write/mmap) │ │ │ │ 2. 标记页面为脏 (set_page_dirty) │ │ │ │ 3. 检查脏页阈值 │ │ - 如果超过 dirty_ratio: 阻止写入 │ │ - 如果超过 dirty_background_ratio: 后台回写 │ │ │ │ 4. 后台回写 (wb_kupdate) │ │ - 唤醒 pdflush/kworker 线程 │ │ - 将脏页写入磁盘 │ │ - 清除脏位 │ │ │ │ 5. 周期性回写 │ │ - dirty_expire_centisecs 后过期 │ │ - 强制回写到磁盘 │ │ │ └──────────────────────────────────────────────────────────────┘
|
12.6 文件锁
12.6.1 文件锁类型
1 2 3 4 5 6 7 8 9
| #define F_RDLCK 0 #define F_WRLCK 1 #define F_UNLCK 2
#define F_GETLK 5 #define F_SETLK 6 #define F_SETLKW 7
|
12.6.2 flock 系统调用
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| SYSCALL_DEFINE3(flock, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file *file; int error;
error = -EBADF; file = fget(fd); if (!file) goto out;
error = locks_lock_file(file, cmd, arg);
out: return error; }
|
12.7 readahead
12.7.1 readahead 概述
readahead 是 Linux 的预读机制,提前读取文件内容到缓存,提高性能。
12.7.2 readahead API
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| void page_cache_sync_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *filp, pgoff_t index, unsigned long req_count);
void force_page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *filp, pgoff_t offset, unsigned long nr_to_read);
void page_cache_async_ra_init(struct address_space *mapping, struct file_ra_state *ra, unsigned long nr);
struct file_ra_state { pgoff_t start; unsigned int size; unsigned int async_size; unsigned int ra_pages; unsigned long prev_pos; };
|
12.8 本章小结
本章介绍了 Linux 6.12 的匿名内存与页缓存:
- 匿名内存: 堆、栈、匿名 mmap,无文件后端
- brk 系统调用: 调整堆大小,expand_stack
- 页缓存: 缓存文件内容,减少磁盘 I/O
- address_space: 页缓存管理结构
- 页缓存操作: 查找、添加、删除页面
- 页回写: 脏页阈值,后台/周期性回写
- 文件锁: 读锁、写锁
- readahead: 预读机制,提高性能
下一章将介绍内存压缩与迁移。