Rev:ollvm混淆控制流平坦化去除

前言:这几天刷逆向刷到了一道关于ollvm混淆,ollvm在pwn题的时候也出现过,这里主要简绍下如何去除ollvm混淆

oolvm的简介:
ollvm的出现,加强代码混淆,防止恶意破解,就是加了很多多重循环
SW[IJICW(HVG`7UR3[O{VUF.png

可以看到比一般的程序庞大,这就是控制流平坦化,这个情况可以用deflat.py脚本去除的
deflat.py:

#!/usr/bin/env python3

import angr
import pyvex
import claripy
import sys
sys.path.append('/azuoti/deflat-master')
import am_graph
import struct
from collections import defaultdict

import logging
logging.getLogger('angr.state_plugins.symbolic_memory').setLevel(logging.ERROR)
# logging.getLogger('angr.sim_manager').setLevel(logging.DEBUG)
    
def get_relevant_nop_nodes(supergraph):
    global pre_dispatcher_node, prologue_node, retn_node
    # relevant_nodes = list(supergraph.predecessors(pre_dispatcher_node))
    relevant_nodes = []
    nop_nodes = []
    for node in supergraph.nodes():
        if supergraph.has_edge(node, pre_dispatcher_node) and node.size > 8:
            # XXX: use node.size is faster than to create a block 
            relevant_nodes.append(node)
            continue
        if node.addr in ( prologue_node.addr, retn_node.addr, pre_dispatcher_node.addr):
            continue
        nop_nodes.append(node)
    return relevant_nodes, nop_nodes


def symbolic_execution(start_addr, hook_addr=None, modify=None, inspect=False):
    
    def retn_procedure(state):
        global project
        ip = state.se.eval(state.regs.ip)
        project.unhook(ip)
        return
    
    def statement_inspect(state):
        global modify_value
        expressions = list(state.scratch.irsb.statements[state.inspect.statement].expressions)
        if len(expressions) != 0 and isinstance(expressions[0], pyvex.expr.ITE):
            state.scratch.temps[expressions[0].cond.tmp] = modify_value
            state.inspect._breakpoints['statement'] = []

    global project, relevant_block_addrs, modify_value
    if hook_addr != None:
        project.hook(hook_addr, retn_procedure, length=5)
    if modify != None:
        modify_value = modify
    state = project.factory.blank_state(addr=start_addr, remove_options={angr.sim_options.LAZY_SOLVES})
    if inspect:
        state.inspect.b('statement', when=angr.state_plugins.inspect.BP_BEFORE, action=statement_inspect)
    sm = project.factory.simulation_manager(state)
    sm.step()
    while len(sm.active) > 0:
        for active_state in sm.active:
            if active_state.addr in relevant_block_addrs:
                return active_state.addr
        sm.step()

def fill_nop(data, start_addr, length):
    global opcode
    for i in range(0, length):
        data[start_addr + i] = ord(opcode['nop'])

def fill_jmp_offset(data, start, offset):
    jmp_offset = struct.pack('<i', offset)  # bytes
    for i in range(4):
        data[start + i] = jmp_offset[i]
    
def patch_byte(data, offset, value):
    # operate on bytearray, not str
    data[offset] = ord(value)

if __name__ == '__main__':
    if len(sys.argv) != 3:
        print('Usage: python deflat.py filename function_address(hex)')
        exit(0)
    
    opcode = {'a':'\x87', 'ae': '\x83', 'b':'\x82', 'be':'\x86', 'c':'\x82', 'e':'\x84', 'z':'\x84', 'g':'\x8F', 
              'ge':'\x8D', 'l':'\x8C', 'le':'\x8E', 'na':'\x86', 'nae':'\x82', 'nb':'\x83', 'nbe':'\x87', 'nc':'\x83',
              'ne':'\x85', 'ng':'\x8E', 'nge':'\x8C', 'nl':'\x8D', 'nle':'\x8F', 'no':'\x81', 'np':'\x8B', 'ns':'\x89',
              'nz':'\x85', 'o':'\x80', 'p':'\x8A', 'pe':'\x8A', 'po':'\x8B', 's':'\x88', 'nop':'\x90', 'jmp':'\xE9', 'j':'\x0F'}
    
    filename = sys.argv[1]
    start = int(sys.argv[2], 16)

    project = angr.Project(filename, load_options={'auto_load_libs': False})
    cfg = project.analyses.CFGFast(normalize=True)  # do normalize to avoid overlapping blocks
    target_function = cfg.functions.get(start)
    # A super transition graph is a graph that looks like IDA Pro's CFG
    supergraph = am_graph.to_supergraph(target_function.transition_graph)

    base_addr = project.loader.main_object.mapped_base >> 12 << 12

    # get prologue_node and retn_node
    prologue_node = None
    for node in supergraph.nodes():
        if supergraph.in_degree(node) == 0:
            prologue_node = node
        if supergraph.out_degree(node) == 0:
            retn_node = node
    
    if prologue_node is None or prologue_node.addr != start:
        print("Something must be wrong...")
        sys.exit(-1)
    
    main_dispatcher_node = list(supergraph.successors(prologue_node))[0]
    for node in supergraph.predecessors(main_dispatcher_node):
        if node.addr != prologue_node.addr:
            pre_dispatcher_node = node
            break
    
    relevant_nodes, nop_nodes = get_relevant_nop_nodes(supergraph)
    print('*******************relevant blocks************************')
    print('prologue: %#x' % start)
    print('main_dispatcher: %#x' % main_dispatcher_node.addr)
    print('pre_dispatcher: %#x' % pre_dispatcher_node.addr)
    print('retn: %#x' % retn_node.addr)
    relevant_block_addrs = [node.addr for node in relevant_nodes]
    print('relevant_blocks:', [hex(addr) for addr in relevant_block_addrs])

    print('*******************symbolic execution*********************')
    relevants = relevant_nodes
    relevants.append(prologue_node)
    relevants_without_retn = list(relevants)
    relevants.append(retn_node)
    relevant_block_addrs.extend([prologue_node.addr, retn_node.addr])

    flow = defaultdict(list)
    modify_value = None
    patch_instrs = {}
    for relevant in relevants_without_retn:
        print('-------------------dse %#x---------------------' % relevant.addr)
        block = project.factory.block(relevant.addr, size=relevant.size)
        has_branches = False
        hook_addr = None
        for ins in block.capstone.insns:
            if ins.insn.mnemonic.startswith('cmov'):
                patch_instrs[relevant] = ins
                has_branches = True
            elif ins.insn.mnemonic.startswith('call'):
                hook_addr = ins.insn.address
        if has_branches:
            flow[relevant].append(symbolic_execution(relevant.addr, hook_addr, claripy.BVV(1, 1), True))
            flow[relevant].append(symbolic_execution(relevant.addr, hook_addr, claripy.BVV(0, 1), True))
        else:
            flow[relevant].append(symbolic_execution(relevant.addr, hook_addr))
            
    print('************************flow******************************')
    for k, v in flow.items():
        print('%#x: ' % k.addr, [hex(child) for child in v])
    
    # print retn_node flow. Actually, it's [].
    print('%#x: ' % retn_node.addr, [])

    print('************************patch*****************************')
    with open(filename, 'rb') as origin:
        # Attention: can't transform to str by calling decode() directly. so use bytearray instead.
        origin_data = bytearray(origin.read())
        origin_data_len = len(origin_data)

    recovery_file = filename + '_recovered'
    recovery = open(recovery_file, 'wb')

    # patch irrelevant blocks
    for nop_node in nop_nodes:
        fill_nop(origin_data, nop_node.addr - base_addr, nop_node.size)
    
    # remove unnecessary control flows
    for parent, childs in flow.items():
        if len(childs) == 1:
            parent_block = project.factory.block(parent.addr, size=parent.size)
            last_instr = parent_block.capstone.insns[-1]
            file_offset = last_instr.address - base_addr
            # patch the last jmp instruction
            patch_byte(origin_data, file_offset, opcode['jmp'])
            file_offset += 1
            fill_nop(origin_data, file_offset, last_instr.size - 1)
            fill_jmp_offset(origin_data, file_offset, childs[0] - last_instr.address - 5)
        else:
            instr = patch_instrs[parent]
            file_offset = instr.address - base_addr
            # patch instructions starting from `cmovx` to the end of block
            fill_nop(origin_data, file_offset, parent.addr + parent.size - base_addr - file_offset)
            patch_byte(origin_data, file_offset, opcode['j'])
            patch_byte(origin_data, file_offset + 1, opcode[instr.mnemonic[4:]])
            fill_jmp_offset(origin_data, file_offset + 2, childs[0] - instr.address - 6)
            file_offset += 6
            patch_byte(origin_data, file_offset, opcode['jmp'])
            fill_jmp_offset(origin_data, file_offset + 1, childs[1] - (instr.address + 6) - 5)
    
    assert len(origin_data) == origin_data_len, "Error: size of data changed!!!"
    recovery.write(origin_data)
    recovery.close()
    print('Successful! The recovered file: %s' % recovery_file)

直接用这个脚本就可以去除了,运行命令:python3 deflat.py banary main_addr 即可
LN{D)LURSVB09J{INW${1N5.png

这是去除后的代码,看着之前分析起来容易点了
下面开始我们的正常逆向:

__int64 __fastcall main(__int64 a1, char **a2, char **a3)
{
  signed __int64 v4; // [rsp+1E0h] [rbp-110h]
  signed int j; // [rsp+1E8h] [rbp-108h]
  signed int i; // [rsp+1ECh] [rbp-104h]
  signed int k; // [rsp+1ECh] [rbp-104h]
  char s1[48]; // [rsp+1F0h] [rbp-100h]
  char s[60]; // [rsp+220h] [rbp-D0h]
  unsigned int v10; // [rsp+25Ch] [rbp-94h]
  char *v11; // [rsp+260h] [rbp-90h]
  int v12; // [rsp+26Ch] [rbp-84h]
  bool v13; // [rsp+272h] [rbp-7Eh]
  unsigned __int8 v14; // [rsp+273h] [rbp-7Dh]
  int v15; // [rsp+274h] [rbp-7Ch]
  char *v16; // [rsp+278h] [rbp-78h]
  int v17; // [rsp+284h] [rbp-6Ch]
  int v18; // [rsp+288h] [rbp-68h]
  bool v19; // [rsp+28Fh] [rbp-61h]
  char *v20; // [rsp+290h] [rbp-60h]
  int v21; // [rsp+298h] [rbp-58h]
  bool v22; // [rsp+29Fh] [rbp-51h]
  __int64 v23; // [rsp+2A0h] [rbp-50h]
  bool v24; // [rsp+2AFh] [rbp-41h]
  __int64 v25; // [rsp+2B0h] [rbp-40h]
  __int64 v26; // [rsp+2B8h] [rbp-38h]
  __int64 v27; // [rsp+2C0h] [rbp-30h]
  __int64 v28; // [rsp+2C8h] [rbp-28h]
  int v29; // [rsp+2D0h] [rbp-20h]
  int v30; // [rsp+2D4h] [rbp-1Ch]
  char *v31; // [rsp+2D8h] [rbp-18h]
  int v32; // [rsp+2E0h] [rbp-10h]
  int v33; // [rsp+2E4h] [rbp-Ch]
  bool v34; // [rsp+2EBh] [rbp-5h]

  v10 = 0;
  memset(s, 0, 0x30uLL);
  memset(s1, 0, 0x30uLL);
  printf("Input:", 0LL);
  v11 = s;
  __isoc99_scanf("%s", s, (dword_603054 - 1), 3788079310LL);
  for ( i = 0; ; ++i )
  {
    v12 = i;
    v13 = i < 64;
    if ( i >= 64 )
      break;
    v14 = s[i];
    v15 = v14;
    if ( v14 == 10 )
    {
      v16 = &s[i];
      *v16 = 0;
      break;
    }
    v17 = i + 1;
  }
  for ( j = 0; ; ++j )
  {
    v18 = j;
    v19 = j < 6;
    if ( j >= 6 )
      break;
    v20 = s;
    v4 = *&s[8 * j];
    for ( k = 0; ; ++k )
    {
      v21 = k;
      v22 = k < 64;
      if ( k >= 64 )
        break;
      v23 = v4;
      v24 = v4 < 0;
      if ( v4 >= 0 )
      {
        v27 = v4;
        v28 = 2 * v4;
        v4 *= 2LL;
      }
      else
      {
        v25 = 2 * v4;
        v26 = 2 * v4;
        v4 = 2 * v4 ^ 0xB0004B7679FA26B3LL;
      }
      v29 = k;
    }
    v30 = 8 * j;
    v31 = &s1[8 * j];
    *v31 = v4;
    v32 = j + 1;
  }
  v33 = memcmp(s1, &unk_402170, 0x30uLL);
  v34 = v33 != 0;
  if ( v33 != 0 )
    puts("Wrong!");
  else
    puts("Correct!");
  return v10;
}

通过逆向分析,发现这是一个前半部分是CRC32查表,我们需要CRC64去解密即可,那就把整个步骤逆向写成脚本即可,用的网上一位师傅的脚本,还是懒人一个,爬.....
re.py:

secret = [0xBC8FF26D43536296, 0x520100780530EE16, 0x4DC0B5EA935F08EC,
          0x342B90AFD853F450, 0x8B250EBCAA2C3681, 0x55759F81A2C68AE4]
key = 0xB0004B7679FA26B3

flag = ""

# 产生CRC32查表法所用的表
for s in secret:
    for i in range(64):
        sign = s & 1
        # 判断是否为负
        if sign == 1:
            s ^= key
        s //= 2
        # 防止负值除2,溢出为正值
        if sign == 1:
            s |= 0x8000000000000000
    # 输出表
    print(hex(s))
    print (s&0xFF)
    print (chr(s&0xFF))
    print (s>>8)
    # 计算CRC64
    j = 0
    while j < 8:
        flag += chr(s&0xFF)
        s >>= 8
        j += 1
print(flag)

总结:这个题,学到了如何去除ollvm混淆代码和CRC32加密算法的一些解密操作

本文链接:

http://azly.top/index.php/archives/28/
1 + 3 =
快来做第一个评论的人吧~