defmain(argv): # Create an Angr project. # If you want to be able to point to the binary from the command line, you can # use argv[1] as the parameter. Then, you can run the script from the command # line as follows: # python ./scaffold00.py [binary] # (!) path_to_binary = "./00_angr_find"# :string project = angr.Project(path_to_binary)
# Tell Angr where to start executing (should it start from the main() # function or somewhere else?) For now, use the entry_state function # to instruct Angr to start from the main() function. initial_state = project.factory.entry_state()
# Create a simulation manager initialized with the starting state. It provides # a number of useful tools to search and execute the binary. simulation = project.factory.simgr(initial_state)
# Explore the binary to attempt to find the address that prints "Good Job." # You will have to find the address you want to find and insert it here. # This function will keep executing until it either finds a solution or it # has explored every possible path through the executable. # (!) print_good_address = 0x8048678# :integer (probably in hexadecimal) simulation.explore(find=print_good_address)
# Check that we have found a solution. The simulation.explore() method will # set simulation.found to a list of the states that it could find that reach # the instruction we asked it to search for. Remember, in Python, if a list # is empty, it will be evaluated as false, otherwise true. if simulation.found: # The explore method stops after it finds a single state that arrives at the # target address. solution_state = simulation.found[0]
# Print the string that Angr wrote to stdin to follow solution_state. This # is our solution. print(solution_state.posix.dumps(sys.stdin.fileno())) else: # If Angr could not find a path that reaches print_good_address, throw an # error. Perhaps you mistyped the print_good_address? raise Exception('Could not find the solution')
if __name__ == '__main__': main(sys.argv)
1 2 3 4 5 6 7 8 9 10
> python3 scaffold00.py WARNING | 2024-07-15 15:50:18,672 | angr.storage.memory_mixins.default_filler_mixin | The program is accessing register with an unspecified value. This could indicate unwanted behavior. WARNING | 2024-07-15 15:50:18,672 | angr.storage.memory_mixins.default_filler_mixin | angr will cope with this by generating an unconstrained symbolic variable and continuing. You can resolve this by: WARNING | 2024-07-15 15:50:18,672 | angr.storage.memory_mixins.default_filler_mixin | 1) setting a value to the initial state WARNING | 2024-07-15 15:50:18,672 | angr.storage.memory_mixins.default_filler_mixin | 2) adding the state option ZERO_FILL_UNCONSTRAINED_{MEMORY,REGISTERS}, to make unknown regions hold null WARNING | 2024-07-15 15:50:18,673 | angr.storage.memory_mixins.default_filler_mixin | 3) adding the state option SYMBOL_FILL_UNCONSTRAINED_{MEMORY,REGISTERS}, to suppress these messages. WARNING | 2024-07-15 15:50:18,674 | angr.storage.memory_mixins.default_filler_mixin | Filling register edi with 4 unconstrained bytes referenced from 0x80486b1 (__libc_csu_init+0x1 in 00_angr_find (0x80486b1)) WARNING | 2024-07-15 15:50:18,678 | angr.storage.memory_mixins.default_filler_mixin | Filling register ebx with 4 unconstrained bytes referenced from 0x80486b3 (__libc_csu_init+0x3 in 00_angr_find (0x80486b3)) WARNING | 2024-07-15 15:50:23,213 | angr.storage.memory_mixins.default_filler_mixin | Filling memory at 0x7ffeff60 with 4 unconstrained bytes referenced from 0x819fb90 (strcmp+0x0 in libc.so.6 (0x9fb90)) b'JXWVXRKX'
# Tell Angr where to start executing (should it start from the main() # function or somewhere else?) For now, use the entry_state function # to instruct Angr to start from the main() function. initial_state = project.factory.entry_state()
state 代表程序的一个实例镜像,模拟执行某个时刻的状态,就类似于快照。保存运行状态的上下文信息,如内存/寄存器等,我们这里使用project.factory.entry_state()告诉符号执行引擎从程序的入口点开始符号执行,除了使用.entry_state() 创建 state 对象, 我们还可以根据需要使用其他构造函数创建 state。
设置 Simulation Managers
1 2 3
# Create a simulation manager initialized with the starting state. It provides # a number of useful tools to search and execute the binary. simulation = project.factory.simgr(initial_state)
# Explore the binary to attempt to find the address that prints "Good Job." # You will have to find the address you want to find and insert it here. # This function will keep executing until it either finds a solution or it # has explored every possible path through the executable. print_good_address = 0x8048678 simulation.explore(find=print_good_address)
# Check that we have found a solution. The simulation.explore() method will # set simulation.found to a list of the states that it could find that reach # the instruction we asked it to search for. Remember, in Python, if a list # is empty, it will be evaluated as false, otherwise true. if simulation.found: # The explore method stops after it finds a single state that arrives at the # target address. solution_state = simulation.found[0] # Print the string that Angr wrote to stdin to follow solution_state. This # is our solution. print(solution_state.posix.dumps(sys.stdin.fileno())) else: # If Angr could not find a path that reaches print_good_address, throw an # error. Perhaps you mistyped the print_good_address? raise Exception('Could not find the solution')
# Explore the binary, but this time, instead of only looking for a state that # reaches the print_good_address, also find a state that does not reach # will_not_succeed_address. The binary is pretty large, to save you some time, # everything you will need to look at is near the beginning of the address # space. # (!) print_good_address = 0x080485E0 will_not_succeed_address = 0x080485A8 simulation.explore(find=print_good_address, avoid=will_not_succeed_address)
if simulation.found: solution_state = simulation.found[0] print(solution_state.posix.dumps(sys.stdin.fileno())) else: raise Exception('Could not find the solution')
if __name__ == '__main__': main(sys.argv)
02_angr_find_condition
题目本意是教会我们如何根据程序本身的输出来告诉angr应避免或保留的内容。因为有时候打开二进制文件将看到有很多打印“ Good Job”的块,或“Try Again”的块。每次都记录下这些块的所有起始地址是一个麻烦的的问题,这时候我们可以直接根据打印到stdout的内容告诉angr保留或丢弃状态。
# It is very useful to be able to search for a state that reaches a certain # instruction. However, in some cases, you may not know the address of the # specific instruction you want to reach (or perhaps there is no single # instruction goal.) In this challenge, you don't know which instruction # grants you success. Instead, you just know that you want to find a state where # the binary prints "Good Job." # # Angr is powerful in that it allows you to search for a states that meets an # arbitrary condition that you specify in Python, using a predicate you define # as a function that takes a state and returns True if you have found what you # are looking for, and False otherwise.
# Define a function that checks if you have found the state you are looking # for. defis_successful(state): # Dump whatever has been printed out by the binary so far into a string. stdout_output = state.posix.dumps(sys.stdout.fileno())
# Return whether 'Good Job.' has been printed yet. # (!) ifb'Good Job.'in stdout_output: returnTrue else: returnFalse
# Same as above, but this time check if the state should abort. If you return # False, Angr will continue to step the state. In this specific challenge, the # only time at which you will know you should abort is when the program prints # "Try again." defshould_abort(state): stdout_output = state.posix.dumps(sys.stdout.fileno()) ifb'Try again.'in stdout_output: returnTrue else: returnFalse
# Tell Angr to explore the binary and find any state that is_successful identfies # as a successful state by returning True. simulation.explore(find=is_successful, avoid=should_abort)
if simulation.found: solution_state = simulation.found[0] print(solution_state.posix.dumps(sys.stdin.fileno())) else: raise Exception('Could not find the solution')
# Angr doesn't currently support reading multiple things with scanf (Ex: # scanf("%u %u).) You will have to tell the simulation engine to begin the # program after scanf is called and manually inject the symbols into registers.
# Sometimes, you want to specify where the program should start. The variable # start_address will specify where the symbolic execution engine should begin. # Note that we are using blank_state, not entry_state. # (!) start_address = 0x08048980# :integer (probably hexadecimal) initial_state = project.factory.blank_state(addr=start_address)
# Create a symbolic bitvector (the datatype Angr uses to inject symbolic # values into the binary.) The first parameter is just a name Angr uses # to reference it. # You will have to construct multiple bitvectors. Copy the two lines below # and change the variable names. To figure out how many (and of what size) # you need, dissassemble the binary and determine the format parameter passed # to scanf. # (!) password_size_in_bits = 32# :integer password0 = claripy.BVS('password0', password_size_in_bits) password1 = claripy.BVS('password1', password_size_in_bits) password2 = claripy.BVS('password2', password_size_in_bits)
# Set a register to a symbolic value. This is one way to inject symbols into # the program. # initial_state.regs stores a number of convenient attributes that reference # registers by name. For example, to set eax to password0, use: # # initial_state.regs.eax = password0 # # You will have to set multiple registers to distinct bitvectors. Copy and # paste the line below and change the register. To determine which registers # to inject which symbol, dissassemble the binary and look at the instructions # immediately following the call to scanf. # (!) initial_state.regs.eax = password0 initial_state.regs.ebx = password1 initial_state.regs.edx = password2
if simulation.found: solution_state = simulation.found[0]
# Solve for the symbolic values. If there are multiple solutions, we only # care about one, so we can use eval, which returns any (but only one) # solution. Pass eval the bitvector you want to solve for. # (!) solution0 = solution_state.se.eval(password0) solution1 = solution_state.se.eval(password1) solution2 = solution_state.se.eval(password2)
# Aggregate and format the solutions you computed above, and then print # the full string. Pay attention to the order of the integers, and the # expected base (decimal, octal, hexadecimal, etc). solution = hex(solution0) + " " + hex(solution1) + " " + hex(solution2) # :string print(solution) else: raise Exception('Could not find the solution')
if __name__ == '__main__': main(sys.argv)
states
状态预设
除了使用.entry_state() 创建 state 对象, 我们还可以根据需要使用其他构造函数创建 state:
# This challenge will be more challenging than the previous challenges that you # have encountered thus far. Since the goal of this CTF is to teach symbolic # execution and not how to construct stack frames, these comments will work you # through understanding what is on the stack. # ! ! ! # IMPORTANT: Any addresses in this script aren't necessarily right! Dissassemble # the binary yourself to determine the correct addresses! # ! ! !
# For this challenge, we want to begin after the call to scanf. Note that this # is in the middle of a function. # # This challenge requires dealing with the stack, so you have to pay extra # careful attention to where you start, otherwise you will enter a condition # where the stack is set up incorrectly. In order to determine where after # scanf to start, we need to look at the dissassembly of the call and the # instruction immediately following it: # sub $0x4,%esp # lea -0x10(%ebp),%eax # push %eax # lea -0xc(%ebp),%eax # push %eax # push $0x80489c3 # call 8048370 <__isoc99_scanf@plt> # add $0x10,%esp # Now, the question is: do we start on the instruction immediately following # scanf (add $0x10,%esp), or the instruction following that (not shown)? # Consider what the 'add $0x10,%esp' is doing. Hint: it has to do with the # scanf parameters that are pushed to the stack before calling the function. # Given that we are not calling scanf in our Angr simulation, where should we # start? # (!) start_address = 0x08048697 initial_state = project.factory.blank_state(addr=start_address)
# We are jumping into the middle of a function! Therefore, we need to account # for how the function constructs the stack. The second instruction of the # function is: # mov %esp,%ebp # At which point it allocates the part of the stack frame we plan to target: # sub $0x18,%esp # Note the value of esp relative to ebp. The space between them is (usually) # the stack space. Since esp was decreased by 0x18 # # /-------- The stack --------\ # ebp -> | | # |---------------------------| # | | # |---------------------------| # . . . (total of 0x18 bytes) # . . . Somewhere in here is # . . . the data that stores # . . . the result of scanf. # esp -> | | # \---------------------------/ # # Since we are starting after scanf, we are skipping this stack construction # step. To make up for this, we need to construct the stack ourselves. Let us # start by initializing ebp in the exact same way the program does. initial_state.regs.ebp = initial_state.regs.esp
# scanf("%u %u") needs to be replaced by injecting two bitvectors. The # reason for this is that Angr does not (currently) automatically inject # symbols if scanf has more than one input parameter. This means Angr can # handle 'scanf("%u")', but not 'scanf("%u %u")'. # You can either copy and paste the line below or use a Python list. # (!) password_size_in_bits = 32 password0 = claripy.BVS('password0', password_size_in_bits) password1 = claripy.BVS('password1', password_size_in_bits) # Here is the hard part. We need to figure out what the stack looks like, at # least well enough to inject our symbols where we want them. In order to do # that, let's figure out what the parameters of scanf are: # sub $0x4,%esp # lea -0x10(%ebp),%eax # push %eax # lea -0xc(%ebp),%eax # push %eax # push $0x80489c3 # call 8048370 <__isoc99_scanf@plt> # add $0x10,%esp # As you can see, the call to scanf looks like this: # scanf( 0x80489c3, ebp - 0xc, ebp - 0x10 ) # format_string password0 password1 # From this, we can construct our new, more accurate stack diagram: # # /-------- The stack --------\ # ebp -> | padding | # |---------------------------| # ebp - 0x01 | more padding | # |---------------------------| # ebp - 0x02 | even more padding | # |---------------------------| # . . . <- How much padding? Hint: how # |---------------------------| many bytes is password0? # ebp - 0x0b | password0, second byte | # |---------------------------| # ebp - 0x0c | password0, first byte | # |---------------------------| # ebp - 0x0d | password1, last byte | # |---------------------------| # . . . # |---------------------------| # ebp - 0x10 | password1, first byte | # |---------------------------| # . . . # |---------------------------| # esp -> | | # \---------------------------/ # # Figure out how much space there is and allocate the necessary padding to # the stack by decrementing esp before you push the password bitvectors. padding_length_in_bytes = 0x8# :integer initial_state.regs.esp -= padding_length_in_bytes
# Push the variables to the stack. Make sure to push them in the right order! # The syntax for the following function is: # # initial_state.stack_push(bitvector) # # This will push the bitvector on the stack, and increment esp the correct # amount. You will need to push multiple bitvectors on the stack. # (!) initial_state.stack_push(password0) # :bitvector (claripy.BVS, claripy.BVV, claripy.BV) initial_state.stack_push(password1)
# Determine the address of the global variable to which scanf writes the user # input. The function 'initial_state.memory.store(address, value)' will write # 'value' (a bitvector) to 'address' (a memory location, as an integer.) The # 'address' parameter can also be a bitvector (and can be symbolic!). # (!) password0_address = 0xA1BA1C0 password1_address = 0xA1BA1C8 password2_address = 0xA1BA1D0 password3_address = 0xA1BA1D8 initial_state.memory.store(password0_address, password0) initial_state.memory.store(password1_address, password1) initial_state.memory.store(password2_address, password2) initial_state.memory.store(password3_address, password3)
if simulation.found: solution_state = simulation.found[0] # Solve for the symbolic values. We are trying to solve for a string. # Therefore, we will use eval, with named parameter cast_to=str # which returns a string instead of an integer. # (!) solution0 = solution_state.se.eval(password0,cast_to=bytes) solution1 = solution_state.se.eval(password1,cast_to=bytes) solution2 = solution_state.se.eval(password2,cast_to=bytes) solution3 = solution_state.se.eval(password3,cast_to=bytes) solution = solution0 + b" " + solution1 + b" " + solution2 + b" " + solution3 print(solution) else: raise Exception('Could not find the solution')
defload(self, addr, size=None, condition=None, fallback=None, add_constraints=None, action=None, endness=None,inspect=True, disable_actions=False, ret_on_segv=False): """ Loads size bytes from dst. :param addr: The address to load from. #读取的地址 :param size: The size (in bytes) of the load. #大小 :param condition: A claripy expression representing a condition for a conditional load. :param fallback: A fallback value if the condition ends up being False. :param add_constraints: Add constraints resulting from the merge (default: True). :param action: A SimActionData to fill out with the constraints. :param endness: The endness to load with. #端序
1 2 3 4 5 6 7 8
defstore(self, addr, data, size=None, condition=None, add_constraints=None, endness=None, action=None, inspect=True, priv=None, disable_actions=False): """ Stores content into memory. :param addr: A claripy expression representing the address to store at. #内存地址 :param data: The data to store (claripy expression or something convertable to a claripy expression).#写入的数据 :param size: A claripy expression representing the size of the data to store. #大小 ...
# Instead of telling the binary to write to the address of the memory # allocated with malloc, we can simply fake an address to any unused block of # memory and overwrite the pointer to the data. This will point the pointer # with the address of pointer_to_malloc_memory_address0 to fake_heap_address. # Be aware, there is more than one pointer! Analyze the binary to determine # global location of each pointer. # Note: by default, Angr stores integers in memory with big-endianness. To # specify to use the endianness of your architecture, use the parameter # endness=project.arch.memory_endness. On x86, this is little-endian. # (!) # 之前buffer指向的是malloc分配好的内存地址,string存在这里。现在是buffer指向的是我们伪造的地址,符号位向量存在这里。 fake_heap_address0 = 0xffffc93c pointer_to_malloc_memory_address0 = 0xabcc8a4 fake_heap_address1 = 0xffffc95c pointer_to_malloc_memory_address1 = 0xabcc8ac # 参数 endness 用于设置端序,angr默认为大端序,总共可选的值如下: """ LE – 小端序(little endian, least significant byte is stored at lowest address) BE – 大端序(big endian, most significant byte is stored at lowest address) ME – 中间序(Middle-endian. Yep.) """ initial_state.memory.store(pointer_to_malloc_memory_address0, fake_heap_address0, endness=project.arch.memory_endness) initial_state.memory.store(pointer_to_malloc_memory_address1, fake_heap_address1, endness=project.arch.memory_endness)
# Store our symbolic values at our fake_heap_address. Look at the binary to # determine the offsets from the fake_heap_address where scanf writes. # (!) initial_state.memory.store(fake_heap_address0, password0) initial_state.memory.store(fake_heap_address1, password1)
# This challenge could, in theory, be solved in multiple ways. However, for the # sake of learning how to simulate an alternate filesystem, please solve this # challenge according to structure provided below. As a challenge, once you have # an initial solution, try solving this in an alternate way. # # Problem description and general solution strategy: # The binary loads the password from a file using the fread function. If the # password is correct, it prints "Good Job." In order to keep consistency with # the other challenges, the input from the console is written to a file in the # ignore_me function. As the name suggests, ignore it, as it only exists to # maintain consistency with other challenges. # We want to: # 1. Determine the file from which fread reads. # 2. Use Angr to simulate a filesystem where that file is replaced with our own # simulated file. # 3. Initialize the file with a symbolic value, which will be read with fread # and propogated through the program. # 4. Solve for the symbolic input to determine the password.
# Specify some information needed to construct a simulated file. For this # challenge, the filename is hardcoded, but in theory, it could be symbolic. # Note: to read from the file, the binary calls # 'fread(buffer, sizeof(char), 64, file)'. # (!) filename = 'OJKSQYDP.txt'# :string symbolic_file_size_bytes = 64
# Construct a bitvector for the password and then store it in the file's # backing memory. For example, imagine a simple file, 'hello.txt': # # Hello world, my name is John. # ^ ^ # ^ address 0 ^ address 24 (count the number of characters) # In order to represent this in memory, we would want to write the string to # the beginning of the file: # # hello_txt_contents = claripy.BVV('Hello world, my name is John.', 30*8) # # Perhaps, then, we would want to replace John with a # symbolic variable. We would call: # # name_bitvector = claripy.BVS('symbolic_name', 4*8) # # Then, after the program calls fopen('hello.txt', 'r') and then # fread(buffer, sizeof(char), 30, hello_txt_file), the buffer would contain # the string from the file, except four symbolic bytes where the name would be # stored. # (!) password = claripy.BVS('password', symbolic_file_size_bytes * 8)
# Construct the symbolic file. The file_options parameter specifies the Linux # file permissions (read, read/write, execute etc.) The content parameter # specifies from where the stream of data should be supplied. If content is # an instance of SimSymbolicMemory (we constructed one above), the stream will # contain the contents (including any symbolic contents) of the memory, # beginning from address zero. # Set the content parameter to our BVS instance that holds the symbolic data. # (!) password_file = angr.storage.SimFile(filename, content=password) # Add the symbolic file we created to the symbolic filesystem. initial_state.fs.insert(filename, password_file)
# The binary asks for a 16 character password to which is applies a complex # function and then compares with a reference string with the function # check_equals_[reference string]. (Decompile the binary and take a look at it!) # The source code for this function is provided here. However, the reference # string in your version will be different than AABBCCDDEEFFGGHH: # # #define REFERENCE_PASSWORD = "AABBCCDDEEFFGGHH"; # int check_equals_AABBCCDDEEFFGGHH(char* to_check, size_t length) { # uint32_t num_correct = 0; # for (int i=0; i<length; ++i) { # if (to_check[i] == REFERENCE_PASSWORD[i]) { # num_correct += 1; # } # } # return num_correct == length; # } # # ... # # char* input = user_input(); # char* encrypted_input = complex_function(input); # if (check_equals_AABBCCDDEEFFGGHH(encrypted_input, 16)) { # puts("Good Job."); # } else { # puts("Try again."); # } # # The function checks if *to_check == "AABBCCDDEEFFGGHH". Verify this yourself. # While you, as a human, can easily determine that this function is equivalent # to simply comparing the strings, the computer cannot. Instead the computer # would need to branch every time the if statement in the loop was called (16 # times), resulting in 2^16 = 65,536 branches, which will take too long of a # time to evaluate for our needs. # # We do not know how the complex_function works, but we want to find an input # that, when modified by complex_function, will produce the string: # AABBCCDDEEFFGGHH. # # In this puzzle, your goal will be to stop the program before this function is # called and manually constrain the to_check variable to be equal to the # password you identify by decompiling the binary. Since, you, as a human, know # that if the strings are equal, the program will print "Good Job.", you can # be assured that if the program can solve for an input that makes them equal, # the input will be the correct password.
# Angr will not be able to reach the point at which the binary prints out # 'Good Job.'. We cannot use that as the target anymore. # (!) address_to_check_constraint = 0x08048565 simulation.explore(find=address_to_check_constraint)
if simulation.found: solution_state = simulation.found[0]
# Recall that we need to constrain the to_check parameter (see top) of the # check_equals_ function. Determine the address that is being passed as the # parameter and load it into a bitvector so that we can constrain it. # (!) constrained_parameter_address = password_address constrained_parameter_size_bytes = 16 constrained_parameter_bitvector = solution_state.memory.load( constrained_parameter_address, constrained_parameter_size_bytes ) # We want to constrain the system to find an input that will make # constrained_parameter_bitvector equal the desired value. # (!) constrained_parameter_desired_value = 'AUPDNNPROEZRJWKB'# :string (encoded)
# Specify a claripy expression (using Pythonic syntax) that tests whether # constrained_parameter_bitvector == constrained_parameter_desired_value. # Add the constraint to the state to let z3 attempt to find an input that # will make this expression true. solution_state.add_constraints(constrained_parameter_bitvector == constrained_parameter_desired_value)
# Solve for the constrained_parameter_bitvector. # (!) solution = solution_state.solver.eval(password,cast_to=bytes)
print(solution) else: raise Exception('Could not find the solution')
# This level performs the following computations: # # 1. Get 16 bytes of user input and encrypt it. # 2. Save the result of check_equals_AABBCCDDEEFFGGHH (or similar) # 3. Get another 16 bytes from the user and encrypt it. # 4. Check that it's equal to a predefined password. # # The ONLY part of this program that we have to worry about is #2. We will be # replacing the call to check_equals_ with our own version, using a hook, since # check_equals_ will run too slowly otherwise.
# Since Angr can handle the initial call to scanf, we can start from the # beginning. initial_state = project.factory.entry_state()
# Hook the address of where check_equals_ is called. # (!) check_equals_called_address = 0x80486B3
# The length parameter in angr.Hook specifies how many bytes the execution # engine should skip after completing the hook. This will allow hooks to # replace certain instructions (or groups of instructions). Determine the # instructions involved in calling check_equals_, and then determine how many # bytes are used to represent them in memory. This will be the skip length. # (!) instruction_to_skip_length = 5 """ 然后我们需要在在@project.hook语句之后书写我们的模拟函数。然后如上题一致, 我们利用使用 state.memory 的 .load(addr, size)接口读出buffer处的内存数据,与答案进行比较 """ @project.hook(check_equals_called_address, length=instruction_to_skip_length) defskip_check_equals_(state): # Determine the address where user input is stored. It is passed as a # parameter ot the check_equals_ function. Then, load the string. Reminder: # int check_equals_(char* to_check, int length) { ... user_input_buffer_address = 0x804A054# :integer, probably hexadecimal user_input_buffer_length = 16
# Reminder: state.memory.load will read the stored value at the address # user_input_buffer_address of byte length user_input_buffer_length. # It will return a bitvector holding the value. This value can either be # symbolic or concrete, depending on what was stored there in the program. user_input_string = state.memory.load( user_input_buffer_address, user_input_buffer_length ) # Determine the string this function is checking the user input against. # It's encoded in the name of this function; decompile the program to find # it. check_against_string = 'XKSPZSJKJYQCQXZV'# :string
# gcc uses eax to store the return value, if it is an integer. We need to # set eax to 1 if check_against_string == user_input_string and 0 otherwise. # However, since we are describing an equation to be used by z3 (not to be # evaluated immediately), we cannot use Python if else syntax. Instead, we # have to use claripy's built in function that deals with if statements. # claripy.If(expression, ret_if_true, ret_if_false) will output an # expression that evaluates to ret_if_true if expression is true and # ret_if_false otherwise. # Think of it like the Python "value0 if expression else value1". """ 这个函数是利用EAX寄存器作为返回值,然后成功则返回1,不成功则返回0, 还需要注意在构建符号位向量的时候EAX寄存器是32位寄存器 """ state.regs.eax = claripy.If( user_input_string == check_against_string, claripy.BVV(1, 32), claripy.BVV(0, 32) )
if simulation.found: solution_state = simulation.found[0]
# Since we are allowing Angr to handle the input, retrieve it by printing # the contents of stdin. Use one of the early levels as a reference. solution = solution_state.posix.dumps(0) print(solution) else: raise Exception('Could not find the solution')
# This challenge is similar to the previous one. It operates under the same # premise that you will have to replace the check_equals_ function. In this # case, however, check_equals_ is called so many times that it wouldn't make # sense to hook where each one was called. Instead, use a SimProcedure to write # your own check_equals_ implementation and then hook the check_equals_ symbol # to replace all calls to scanf with a call to your SimProcedure. # # You may be thinking: # Why can't I just use hooks? The function is called many times, but if I hook # the address of the function itself (rather than the addresses where it is # called), I can replace its behavior everywhere. Furthermore, I can get the # parameters by reading them off the stack (with memory.load(regs.esp + xx)), # and return a value by simply setting eax! Since I know the length of the # function in bytes, I can return from the hook just before the 'ret' # instruction is called, which will allow the program to jump back to where it # was before it called my hook. # If you thought that, then congratulations! You have just invented the idea of # SimProcedures! Instead of doing all of that by hand, you can let the already- # implemented SimProcedures do the boring work for you so that you can focus on # writing a replacement function in a Pythonic way. # As a bonus, SimProcedures allow you to specify custom calling conventions, but # unfortunately it is not covered in this CTF.
# Define a class that inherits angr.SimProcedure in order to take advantage # of Angr's SimProcedures. """ 这里前面的部分都可以直接照抄上面一题的代码,关键是定义一个继承angr.SimProcedure的类,以利用Angr的SimProcedures。 SimProcedure用Python编写的我们自己的函数代替了原来函数。 除了用Python编写之外,该函数的行为与用C编写的任何函数基本相同。 self之后的任何参数都将被视为要替换的函数的参数, 参数将是符号位向量。 另外,Python可以以常用的Python方式返回,Angr将以与原来函数相同的方式对待它 """ classReplacementCheckEquals(angr.SimProcedure): # A SimProcedure replaces a function in the binary with a simulated one # written in Python. Other than it being written in Python, the function # acts largely the same as any function written in C. Any parameter after # 'self' will be treated as a parameter to the function you are replacing. # The parameters will be bitvectors. Additionally, the Python can return in # the ususal Pythonic way. Angr will treat this in the same way it would # treat a native function in the binary returning. An example: # # int add_if_positive(int a, int b) { # if (a >= 0 && b >= 0) return a + b; # else return 0; # } # # could be simulated with... # # class ReplacementAddIfPositive(angr.SimProcedure): # def run(self, a, b): # if a >= 0 and b >=0: # return a + b # else: # return 0 # # Finish the parameters to the check_equals_ function. Reminder: # int check_equals_AABBCCDDEEFFGGHH(char* to_check, int length) { ... # (!) """ check_equals_AABBCCDDEEFFGGHH(char* to_check, int length)函数的第一个参数是待检测字符串首地址指针, 然后就是字符串的长度,接下来我们就可以开始书写我们的模拟函数 """ defrun(self, to_check, length): # We can almost copy and paste the solution from the previous challenge. # Hint: Don't look up the address! It's passed as a parameter. # (!) user_input_buffer_address = to_check user_input_buffer_length = length
# Note the use of self.state to find the state of the system in a # SimProcedure. user_input_string = self.state.memory.load( user_input_buffer_address, user_input_buffer_length )
check_against_string = 'ORSDDWXHZURJRBDH' # Finally, instead of setting eax, we can use a Pythonic return statement # to return the output of this function. # Hint: Look at the previous solution. return claripy.If( user_input_string == check_against_string, claripy.BVV(1, 32), claripy.BVV(0, 32) )
# Hook the check_equals symbol. Angr automatically looks up the address # associated with the symbol. Alternatively, you can use 'hook' instead # of 'hook_symbol' and specify the address of the function. To find the # correct symbol, disassemble the binary. # (!) check_equals_symbol = 'check_equals_ORSDDWXHZURJRBDH'# :string project.hook_symbol(check_equals_symbol, ReplacementCheckEquals())
# This time, the solution involves simply replacing scanf with our own version, # since Angr does not support requesting multiple parameters with scanf.
classReplacementScanf(angr.SimProcedure): # Finish the parameters to the scanf function. Hint: 'scanf("%u %u", ...)'. # (!) defrun(self, format_string, scanf0_address, scanf1_address): scanf0 = claripy.BVS('scanf0', 32) scanf1 = claripy.BVS('scanf1', 32)
# The scanf function writes user input to the buffers to which the # parameters point. # 这里Scanf是要向内存写入数据的,于是我们利用使用 state.memory 的 .store(addr, val) 接口 # 将符号位向量写入两个字符串的内存区域 self.state.memory.store(scanf0_address, scanf0, endness=project.arch.memory_endness) self.state.memory.store(scanf1_address, scanf1, endness=project.arch.memory_endness)
# Now, we want to 'set aside' references to our symbolic values in the # globals plugin included by default with a state. You will need to # store multiple bitvectors. You can either use a list, tuple, or multiple # keys to reference the different bitvectors. # (!) self.state.globals['solution0'] = scanf0 self.state.globals['solution1'] = scanf1
# When you construct a simulation manager, you will want to enable Veritesting: # project.factory.simgr(initial_state, veritesting=True) # Hint: use one of the first few levels' solutions as a reference. import angr import sys import claripy
# This challenge is the exact same as the first challenge, except that it was # compiled as a static binary. Normally, Angr automatically replaces standard # library functions with SimProcedures that work much more quickly. # # Here are a few SimProcedures Angr has already written for you. They implement # standard library functions. You will not need all of them: # angr.SIM_PROCEDURES['libc']['malloc'] # angr.SIM_PROCEDURES['libc']['fopen'] # angr.SIM_PROCEDURES['libc']['fclose'] # angr.SIM_PROCEDURES['libc']['fwrite'] # angr.SIM_PROCEDURES['libc']['getchar'] # angr.SIM_PROCEDURES['libc']['strncmp'] # angr.SIM_PROCEDURES['libc']['strcmp'] # angr.SIM_PROCEDURES['libc']['scanf'] # angr.SIM_PROCEDURES['libc']['printf'] # angr.SIM_PROCEDURES['libc']['puts'] # angr.SIM_PROCEDURES['libc']['exit'] # angr.SIM_PROCEDURES['glibc']['__libc_start_main'] # # As a reminder, you can hook functions with something similar to: # project.hook(malloc_address, angr.SIM_PROCEDURES['libc']['malloc']()) # # There are many more, see: # https://github.com/angr/angr/tree/master/angr/procedures/libc # # Additionally, note that, when the binary is executed, the main function is not # the first piece of code called. In the _start function, __libc_start_main is # called to start your program. The initialization that occurs in this function # can take a long time with Angr, so you should replace it with a SimProcedure. # angr.SIM_PROCEDURES['glibc']['__libc_start_main'] # Note 'glibc' instead of 'libc'. import angr import sys import claripy
# The shared library has the function validate, which takes a string and returns # either true (1) or false (0). The binary calls this function. If it returns # true, the program prints "Good Job." otherwise, it prints "Try again." # # Note: When you run this script, make sure you run it on # lib14_angr_shared_library.so, not the executable. This level is intended to # teach how to analyse binary formats that are not typical executables.
# The shared library is compiled with position-independent code. You will need # to specify the base address. All addresses in the shared library will be # base + offset, where offset is their address in the file. # (!) base = 0x4000000 project = angr.Project(path_to_binary, load_options={ 'main_opts' : { 'custom_base_addr' : base } })
# Initialize any symbolic values here; you will need at least one to pass to # the validate function. buffer_pointer = claripy.BVV(0x3000000, 32)
# Begin the state at the beginning of the validate function, as if it was # called by the program. Determine the parameters needed to call validate and # replace 'parameters...' with bitvectors holding the values you wish to pass. # Recall that 'claripy.BVV(value, size_in_bits)' constructs a bitvector # initialized to a single value. # Remember to add the base value you specified at the beginning to the # function address! # Hint: int validate(char* buffer, int length) { ... # Another hint: the password is 8 bytes long. # (!) validate_function_address = base + 0x6d7 initial_state = project.factory.call_state(validate_function_address, buffer_pointer, claripy.BVV(8, 32))
# You will need to add code to inject a symbolic value into the program at the # end of the function that constrains eax to equal true (value of 1) just # before the function returns. There are multiple ways to do this: # 1. Use a hook. # 2. Search for the address just before the function returns and then # constrain eax (this may require putting code elsewhere) password = claripy.BVS('password', 8*8) initial_state.memory.store(buffer_pointer, password)
simulation = project.factory.simgr(initial_state)
success_address = base + 0x783 simulation.explore(find=success_address)
if simulation.found: solution_state = simulation.found[0] # Determine where the program places the return value, and constrain it so # that it is true. Then, solve for the solution and print it. # (!) solution_state.add_constraints(solution_state.regs.eax != 0) solution = solution_state.solver.eval(password,cast_to=bytes) print(solution) else: raise Exception('Could not find the solution')
# This binary takes both an integer and a string as a parameter. A certain # integer input causes the program to reach a buffer overflow with which we can # read a string from an arbitrary memory location. Our goal is to use Angr to # search the program for this buffer overflow and then automatically generate # an exploit to read the string "Good Job." # # What is the point of reading the string "Good Job."? # This CTF attempts to replicate a simplified version of a possible vulnerability # where a user can exploit the program to print a secret, such as a password or # a private key. In order to keep consistency with the other challenges and to # simplify the challenge, the goal of this program will be to print "Good Job." # instead. # # The general strategy for crafting this script will be to: # 1) Search for calls of the 'puts' function, which will eventually be exploited # to print out "Good Job." # 2) Determine if the first parameter of 'puts', a pointer to the string to be # printed, can be controlled by the user to be set to the location of the # "Good Job." string. # 3) Solve for the input that prints "Good Job." # # Note: The script is structured to implement step #2 before #1.
# Some of the source code for this challenge: # # #include <stdio.h> # #include <stdlib.h> # #include <string.h> # #include <stdint.h> # # // This will all be in .rodata # char msg[] = "${ description }$"; # char* try_again = "Try again."; # char* good_job = "Good Job."; # uint32_t key; # # void print_msg() { # printf("%s", msg); # } # # uint32_t complex_function(uint32_t input) { # ... # } # # struct overflow_me { # char buffer[16]; # char* to_print; # }; # # int main(int argc, char* argv[]) { # struct overflow_me locals; # locals.to_print = try_again; # # print_msg(); # # printf("Enter the password: "); # scanf("%u %20s", &key, locals.buffer); # # key = complex_function(key); # # switch (key) { # case ?: # puts(try_again); # break; # # ... # # case ?: # // Our goal is to trick this call to puts to print the "secret # // password" (which happens, in our case, to be the string # // "Good Job.") # puts(locals.to_print); # break; # # ... # } # # return 0; # }
# You can either use a blank state or an entry state; just make sure to start # at the beginning of the program. # (!) initial_state = initial_state = project.factory.entry_state()
# The bitvector.chop(bits=n) function splits the bitvector into a Python # list containing the bitvector in segments of n bits each. In this case, # we are splitting them into segments of 8 bits (one byte.) for char in scanf1.chop(bits=8): # Ensure that each character in the string is printable. An interesting # experiment, once you have a working solution, would be to run the code # without constraining the characters to the capital letters. # Even though the solution will technically work without this, it's more # difficult to enter in a solution that contains character you can't # copy, paste, or type into your terminal or the web form that checks # your solution. # If you are using the web form to submit answers, your solution must be # entirely alphanumeric except for spaces. # (!) self.state.add_constraints(char >= 'A', char <= 'Z')
# Warning: Endianness only applies to integers. If you store a string in # memory and treat it as a little-endian integer, it will be backwards. scanf0_address = param0 self.state.memory.store(scanf0_address, scanf0, endness=project.arch.memory_endness) scanf1_address = param1 self.state.memory.store(scanf1_address, scanf1, endness=project.arch.memory_endness)
# We will call this whenever puts is called. The goal of this function is to # determine if the pointer passed to puts is controllable by the user, such # that we can rewrite it to point to the string "Good Job." defcheck_puts(state): # Recall that puts takes one parameter, a pointer to the string it will # print. If we load that pointer from memory, we can analyse it to determine # if it can be controlled by the user input in order to point it to the # location of the "Good Job." string. # # Treat the implementation of this function as if puts was just called. # The stack, registers, memory, etc should be set up as if the x86 call # instruction was just invoked (but, of course, the function hasn't copied # the buffers yet.) # The stack will look as follows: # ... # esp + 7 -> /----------------\ # esp + 6 -> | puts | # esp + 5 -> | parameter | # esp + 4 -> \----------------/ # esp + 3 -> /----------------\ # esp + 2 -> | return | # esp + 1 -> | address | # esp -> \----------------/ # # Hint: Look at level 08, 09, or 10 to review how to load a value from a # memory address. Remember to use the correct endianness in the future when # loading integers; it has been included for you here. # (!) puts_parameter = state.memory.load(state.regs.esp + 4, 4, endness=project.arch.memory_endness)
# The following function takes a bitvector as a parameter and checks if it # can take on more than one value. While this does not necessary tell us we # have found an exploitable state, it is a strong indication that the # bitvector we checked may be controllable by the user. # Use it to determine if the pointer passed to puts is symbolic. # (!) if state.se.symbolic(puts_parameter): # Determine the location of the "Good Job." string. We want to print it # out, and we will do so by attempting to constrain the puts parameter to # equal it. (Hint: look at .rodata). # Hint: use 'objdump -s <binary>' to look for the string's address. # (!) good_job_string_address = 0x484F4A47# :integer, probably hexadecimal
# Create an expression that will test if puts_parameter equals # good_job_string_address. If we add this as a constraint to our solver, # it will try and find an input to make this expression true. Take a look # at level 08 to remind yourself of the syntax of this. # (!) is_vulnerable_expression = puts_parameter == good_job_string_address # :boolean bitvector expression
# Have Angr evaluate the state to determine if all the constraints can # be met, including the one we specified above. If it can be satisfied, # we have found our exploit! # # When doing this, however, we do not want to edit our state in case we # have not yet found what we are looking for. To test if our expression # is satisfiable without editing the original, we need to clone the state. copied_state = state.copy()
# We can now play around with the copied state without changing the # original. We need to add our vulnerable expression as a state to test it. # Look at level 08 and compare this call to how it is called there. copied_state.add_constraints(is_vulnerable_expression)
# Finally, we test if we can satisfy the constraints of the state. if copied_state.satisfiable(): # Before we return, let's add the constraint to the solver for real. state.add_constraints(is_vulnerable_expression) returnTrue else: returnFalse else: # not state.se.symbolic(???) returnFalse
simulation = project.factory.simgr(initial_state)
# In order to determine if we have found a vulnerable call to 'puts', we need # to run the function check_puts (defined above) whenever we reach a 'puts' # call. To do this, we will look for the place where the instruction pointer, # state.addr, is equal to the beginning of the puts function. defis_successful(state): # We are looking for puts. Check that the address is at the (very) beginning # of the puts function. Warning: while, in theory, you could look for # any address in puts, if you execute any instruction that adjusts the stack # pointer, the stack diagram above will be incorrect. Therefore, it is # recommended that you check for the very beginning of puts. # (!) puts_address = 0x8048370 if state.addr == puts_address: # Return True if we determine this call to puts is exploitable. return check_puts(state) else: # We have not yet found a call to puts; we should continue! returnFalse
simulation.explore(find=is_successful)
if simulation.found: solution_state = simulation.found[0] (scanf0, scanf1) = solution_state.globals['solutions'] solution0 = (solution_state.solver.eval(scanf0,cast_to=bytes)) solution1 = (solution_state.solver.eval(scanf1,cast_to=bytes)) print(solution0 + b' ' + solution1) else: raise Exception('Could not find the solution')
# Essentially, the program does the following: # # scanf("%d %20s", &key, user_input); # ... # // if certain unknown conditions are true... # strncpy(random_buffer, user_input); # ... # if (strncmp(secure_buffer, reference_string)) { # // The secure_buffer does not equal the reference string. # puts("Try again."); # } else { # // The two are equal. # puts("Good Job."); # } # # If this program has no bugs in it, it would _always_ print "Try again." since # user_input copies into random_buffer, not secure_buffer. # # The question is: can we find a buffer overflow that will allow us to overwrite # the random_buffer pointer to point to secure_buffer? (Spoiler: we can, but we # will need to use Angr.) # # We want to identify a place in the binary, when strncpy is called, when we can: # 1) Control the source contents (not the source pointer!) # * This will allow us to write arbitrary data to the destination. # 2) Control the destination pointer # * This will allow us to write to an arbitrary location. # If we can meet both of those requirements, we can write arbitrary data to an # arbitrary location. Finally, we need to contrain the source contents to be # equal to the reference_string and the destination pointer to be equal to the # secure_buffer.
# You can either use a blank state or an entry state; just make sure to start # at the beginning of the program. initial_state = project.factory.entry_state()
# In this challenge, we want to check strncpy to determine if we can control # both the source and the destination. It is common that we will be able to # control at least one of the parameters, (such as when the program copies a # string that it received via stdin). defcheck_strncpy(state): # The stack will look as follows: # ... ________________ # esp + 15 -> / \ # esp + 14 -> | param2 | # esp + 13 -> | len | # esp + 12 -> \________________/ # esp + 11 -> / \ # esp + 10 -> | param1 | # esp + 9 -> | src | # esp + 8 -> \________________/ # esp + 7 -> / \ # esp + 6 -> | param0 | # esp + 5 -> | dest | # esp + 4 -> \________________/ # esp + 3 -> / \ # esp + 2 -> | return | # esp + 1 -> | address | # esp -> \________________/ # (!) strncpy_src = state.memory.load(state.regs.esp + 8, 4, endness=project.arch.memory_endness) strncpy_dest = state.memory.load(state.regs.esp + 4, 4, endness=project.arch.memory_endness) strncpy_len = state.memory.load(state.regs.esp + 12, 4, endness=project.arch.memory_endness)
# We need to find out if src is symbolic, however, we care about the # contents, rather than the pointer itself. Therefore, we have to load the # the contents of src to determine if they are symbolic. # Hint: How many bytes is strncpy copying? # (!) src_contents = state.memory.load(strncpy_src, strncpy_len)
# Our goal is to determine if we can write arbitrary data to an arbitrary # location. This means determining if the source contents are symbolic # (arbitrary data) and the destination pointer is symbolic (arbitrary # destination). # (!) if state.solver.symbolic(src_contents) and state.solver.symbolic(strncpy_dest): # Use ltrace to determine the reference string. Decompile the binary to # determine the address of the buffer it checks the password against. Our # goal is to overwrite that buffer to store the password. # (!) password_string = 'NDYNWEUJ'# :string buffer_address = 0x57584344# :integer, probably in hexadecimal
# Create an expression that tests if the first n bytes is length. Warning: # while typical Python slices (array[start:end]) will work with bitvectors, # they are indexed in an odd way. The ranges must start with a high value # and end with a low value. Additionally, the bits are indexed from right # to left. For example, let a bitvector, b, equal 'ABCDEFGH', (64 bits). # The following will read bit 0-7 (total of 1 byte) from the right-most # bit (the end of the string). # b[7:0] == 'H' # To access the beginning of the string, we need to access the last 16 # bits, or bits 48-63: # b[63:48] == 'AB' # In this specific case, since we don't necessarily know the length of the # contents (unless you look at the binary), we can use the following: # b[-1:-16] == 'AB', since, in Python, -1 is the end of the list, and -16 # is the 16th element from the end of the list. The actual numbers should # correspond with the length of password_string. # (!) does_src_hold_password = src_contents[-1:-64] == password_string # Create an expression to check if the dest parameter can be set to # buffer_address. If this is true, then we have found our exploit! # (!) does_dest_equal_buffer_address = strncpy_dest == buffer_address
# In the previous challenge, we copied the state, added constraints to the # copied state, and then determined if the constraints of the new state # were satisfiable. Since that pattern is so common, Angr implemented a # parameter 'extra_constraints' for the satisfiable function that does the # exact same thing: if state.satisfiable(extra_constraints=(does_src_hold_password, does_dest_equal_buffer_address)): state.add_constraints(does_src_hold_password, does_dest_equal_buffer_address) returnTrue else: returnFalse else: # not state.se.symbolic(???) returnFalse