IDA Python 自动化检测格式化字符串漏洞

IDA Python个是IDA的一个插件，可以利用Python借助IDA的反汇编结果来实现自动化静态分析，当然还有很多高级功能，比如自动化patch等等。常用的moudle有idautils、idc、idaapi等。

格式化字符串漏洞是old school hacking技术中一个经典的漏洞，主要的原因在于printf家族的函数中format可控，导致攻击者可以利用format中一些字符来完成泄漏信息、控制PC指针等操作。由于这个漏洞很好检测，在编译时就可以被发现，因此现在基本已经见不到了（CTF除外）。

这里分享一下我写的检测脚本。

脚本

# date: 2019-03-05
# author: thinkycx
# info: This is a baby IDA Python script aims at finding format string vulnerability automatically.
# References: 
#           1. https://cartermgj.github.io/2017/10/10/ida-python/
#           2. IDA Python 初学者指南  作者：Alexander Hanel 翻译:foyjog
# usage:
#           1. shift+F2 and import this script
#           2. Run...
#
# Detect case:
'''
x64:
    vulnerable case 1:                              # 检测前5条指令中是否存在mov rdi，并且参数是否是寄存器
        lea     rax, [rbp+buf]
        mov     rdi, rax        ; format
        mov     eax, 0
        call    _printf
    vulnerable case 2:                              # 同上
        mov     rax, [rbp+buf]
        mov     rdi, rax        ; format
        mov     eax, 0
        call    _printf 
    not vulnerable case 1:                          # 白名单情况，mov edi，offset format
        lea     rax, [rbp+buf]
        mov     rsi, rax
        mov     edi, offset format ; "%s"
        mov     eax, 0
        call    _printf

x86:
    vulnerable case 1:                              # 检测 前一条push指令的操作数是否是 o_reg
        lea     eax, [ebp+buf]
        push    eax             ; format
        call    _printf 
    vulerable case 2:                               # 检测 前一条push指令的操作数是否是 o_displ             
        sub     esp, 0Ch
        push    [ebp+buf]       ; format
        call    _printf
    not vulerable case 1:                           # 白名单情况，push offset 
        add     esp, 10h
        sub     esp, 8
        lea     eax, [ebp+buf]
        push    eax
        push    offset format   ; "%s"
        call    _printf


'''

def get_printf_plt():
    '''
        获取printf的plt的地址
    '''
    for func in idautils.Functions():                               # 获取当前程序所有的函数
        # print hex(func), idc.GetFunctionName(func) 
        if idc.GetFunctionName(func) == '.printf':                  # 获取printf plt的地址                
            printf_plt = func
    return printf_plt
        
  
def find_fsb(printf_plt, bits):      
    ## x64 mov rdi, rax  , 不是offet xxxx
    # find printf plt xrefs;
    # search prev number
    number = 5                                                      # 向前搜索的指令数
    printf_plt_xrefs = list(idautils.XrefsTo(printf_plt, flags=0))  # 交叉引用获取所有.printf的引用
    print '[+] find printf@plt xref number: ', len(printf_plt_xrefs)
    for xref in printf_plt_xrefs:                                   # 遍历所有的call printf
        print '[+] call printf addr:', hex(xref.frm)                # 
        now_addr = xref.frm                                         # 获取call printf的地址
        if bits == 64: 
            for i in range(0, number):
                now_addr = idc.PrevHead(now_addr)                       # 获取前一条指令
                # print idc.GetDisasm(now_addr)                         # 获取当前地址的反汇编代码
                if idc.GetMnem(now_addr) == 'mov':                      # 向前寻找到mov指令          
                    if idc.GetOpnd(now_addr, 0) in ['rdi','edi']:       # 判断arg0是否是rdi或edi
                        if idc.GetOpnd(now_addr, 1) in ['offset']:      # 白名单,有offset就退出
                            pass
                        if idc.GetOpType(now_addr, 1) == o_reg:         # 检查操作数2类似是否是寄存器，如果是，可能存在格式化字符串漏洞。
                            print "[!] Might find format string attack .... 0x%x %s !!! " % (now_addr,\
                                 idc.GetDisasm(now_addr))
        elif bits == 32:
            now_addr = idc.PrevHead(now_addr)                           # 检查前一条指令 是否是push
            # print idc.GetDisasm(now_addr)
            if idc.GetMnem(now_addr) == 'push':                         # 获取到printf的第一个参数
                if  idc.GetOpnd(now_addr, 0) in ['offset']:             # 白名单，push的是一个变量
                    pass
                elif idc.GetOpType(now_addr, 0) in [ o_reg, o_displ ]:               # push的是一个 寄存器,或 位移的寻址操作
                    print "[!] Might find format string attack .... 0x%x %s !!! " % (now_addr,\
                                 idc.GetDisasm(now_addr))
        else:
            print 'not supported yet...:('
                


def check_arch():
    '''
        检查当前的x86的指令集架构，返回64 32 16
    '''
    info = idaapi.get_inf_structure()
    if info.is_64bit():
        bits = 64
    elif info.is_32bit():
        bits = 32
    else:
       bits = 16
    return bits
    

def main():
    print '=================================== easy format string vulnerablity check...'

    printf_plt = get_printf_plt()
    print '[*] printf@plt :' , hex(printf_plt)    
            
    bits = check_arch()
    print '[*] Arch :', bits
    
    find_fsb(printf_plt, bits)


if __name__ == '__main__':
    main()

检测效果

参考

IDA Python 初学者指南作者：Alexander Hanel 翻译:foyjog （可在看雪上下载到）
https://cartermgj.github.io/2017/10/10/ida-python/