## 2022/11/08 針對 Dofloo 單一個樣本，4 種 ASG 的結果進行分析

In [1]:
import pandas as pd
import pickle
import sys
from collections import Counter
from pathlib import Path

從上層資料夾import模組，但build()的相對路徑會出錯，還是直接把模組搬過來測。
- https://www.geeksforgeeks.org/python-import-from-parent-directory/
- https://stackoverflow.com/questions/714063/importing-modules-from-parent-folder
- https://stackoverflow.com/questions/39125532/file-does-not-exist-in-jupyter-notebook

In [2]:
# from cc_module import build
# from cc_module import AttackGraph, Node, Edge, FileTable
# from cc_module_cust import build
from cc_module_cust import AttackGraph, Node, Edge, FileTable # 增加 AttackGraph.step_list

### Excel 1: 包裝成 function，印出統計結果 (Node)
結果直接輸出到txt中喔!

In [3]:
def translate_type(s: str) -> str:
    type = {
        'p': 'proc',
        'f': 'file',
        'n': 'net',
        'm': 'mem',
        'm_addr': 'mem',
    }
    return type.get(s, 'other')

In [8]:
def analysis_graph(asg: AttackGraph) -> pd.DataFrame:
    node_set = [node.name for node in asg.graph.keys()]
    node_type = {}
    print('# of nodes:',len(node_set))
    print('# of edges:',len(asg.step_list),'(edges equal to steps)\n')

    nodeType_counter = Counter({n:0 for n in ['f','c','n','p','m_addr','pipe','else']})
    for node in asg.graph.keys():
        nodeType_counter[node.type] += 1
    print('# of node type:\n',nodeType_counter)

    node_counter = Counter({n:0 for n in node_set})
    edge_counter = Counter()
    for (src_node, dst_node, edgename) in asg.step_list:
        node_counter[src_node.name] += 1
        node_counter[dst_node.name] += 1
        edge_counter[edgename] += 1
        node_type[src_node.name] = translate_type(src_node.type)
        node_type[dst_node.name] = translate_type(dst_node.type)
    print('\nfrequency of node:\n',node_counter)
    print('\nfrequency of edge:\n',edge_counter)
    print('\n# of syscall',len(edge_counter))

    # 回傳這張圖的 node_name, node_name, appearance
    df = [[node,node_type[node],node_counter[node]] for node in node_counter]
    df = pd.DataFrame(df, columns=['node_name', 'type', 'appearance'])
    return df


- 這邊直接寫入檔案 V1.txt, V2.txt ...。也生成一個 node 的統計資訊 excel 檔。
- https://stackoverflow.com/questions/7152762/how-to-redirect-print-output-to-a-file

In [9]:
# filename = 'Dofloo_V1_ALL有畫出-Memory-and-System.pkl'
# with open(filename, 'rb') as inp:
#     asg_all = pickle.load(inp)

filename_lst = ['Dofloo_V1_ALL_Memory_and_System.pkl','Dofloo_V2_ALL_Memory_sink.pkl',]
    # 'Dofloo_V3_ALL_wo_system.pkl','Dofloo_V4_ALL_wo_system_memory.pkl']
node_stat_lst = []
    
for i,filename in enumerate(filename_lst):
    orig_stdout = sys.stdout
    f = open(f'1115_Dofloo_V{i+1}.txt', 'w')
    sys.stdout = f

    with open(f'1115_dofloo_V1~V2_pkl_cust/{filename}', 'rb') as inp:
        asg_object = pickle.load(inp)
        df = analysis_graph(asg_object)
        node_stat_lst.append(df)

    sys.stdout = orig_stdout
    f.close()

# 將 nodes 的統計資訊寫入 excel (會覆蓋掉同檔名，小心!)
with pd.ExcelWriter("1115_node_statistics.xlsx") as writer:
    [df.to_excel(writer, sheet_name=f"ver{i+1}") for i,df in enumerate(node_stat_lst)]

### Excel 2: 列印出圖中的 Triplate (edge)
用來觀察 x -> A 的 x 有哪些，又執行了甚麼動作。 
 
- 影響系統的函數關鍵字 : set、change、create ( 因為 create 也是在系統中多出一個新的東西，所以 open() 包含在這裡 ) affect_sys = ["set_thread_area", "setsid", "fcntl", "fchown", "fchmod", "rename", "umask", "set_tid_address", "futex", 
"set_robust_list", "arch_prctl", "execve", "exit_group", "clone", "write", "connect", "send", "brk", "socket", "mprotect", "mmap", "openat", "mmap2", "munmap", "open", "close"]

- 有些沒有影響系統，但是會取得系統資訊，這樣還是蠻重要的 (Ex: uname) non_affect_sys = ["ugetrlimit", "readlink", "getcwd", "uname", "waitpid", "getuid", "getgid", "getppid", 
 "getegid", "wait4", "prlimit64", "getpid", "geteuid", "statfs", "_newselect", "getsockopt", 
 "stat", "access", "fstat", "ioctl", "time", "fstat64", "nanosleep", "lseek", "read", ]

In [10]:
def is_important(edgename: str) -> bool:
    '''判斷一個syscall是否會改變系統檔案，是回傳True。'''
    affect_sys = ["set_thread_area", "setsid", "fcntl", "fchown", "fchmod", "rename", "umask", "set_tid_address", \
        "futex", "set_robust_list", "arch_prctl", "execve", "exit_group", "clone", "write", "connect", "send", \
        "brk", "socket", "mprotect", "mmap", "openat", "mmap2", "munmap", "open", "close"]
    if edgename in affect_sys:
        return True
    return False

def analysis_triplate(asg: AttackGraph) -> pd.DataFrame:
    node_set = [node.name for node in asg.graph.keys()]
    node_counter = Counter({n:0 for n in node_set})
    for i, (nodes, edge) in enumerate(asg.edges.items()):
        src_node, dst_node, _ = nodes
        node_counter[src_node.name] += 1
        node_counter[dst_node.name] += 1
    
    triplate_lst = []
    for (src_node, dst_node, edgename) in asg.step_list:
        src_type = translate_type(src_node.type)
        dst_type = translate_type(dst_node.type)
        imp = 1 if is_important(edgename) else 0 # 這個 syscall 是否會改變系統
        cs = node_counter[src_node.name] # node 的 degree
        cd = node_counter[dst_node.name]
        # 反轉 read 的節點
        if edgename == 'read':
            triplate_lst.append([i+1, dst_node.name, dst_type, cd, edgename,src_node.name, src_type, cs, imp])
        else:
            triplate_lst.append([i+1, src_node.name, src_type, cs, edgename, dst_node.name, dst_type, cd, imp])

    #     break
    # print(triplate_lst)
    df = pd.DataFrame(triplate_lst, columns=['step','src_node', 'type', 'deg', 'edge', 'dst_node', 'type', 'deg','importance'])
    return df

# filename = 'Dofloo_V4_ALL_wo_system_memory.pkl'
# with open(filename, 'rb') as inp:
#     asg_all = pickle.load(inp)
# df = analysis_triplate(asg_all)

filename_lst = ['Dofloo_V1_ALL_Memory_and_System.pkl','Dofloo_V2_ALL_Memory_sink.pkl',]
    # 'Dofloo_V3_ALL_wo_system.pkl','Dofloo_V4_ALL_wo_system_memory.pkl']
triplate_stat_lst = []
node_type_set = set()
    
for i,filename in enumerate(filename_lst):
    # f = open(f'1115_Dofloo_V{i+1}.txt', 'w')

    with open(f'1115_dofloo_V1~V2_pkl_cust/{filename}', 'rb') as inp:
        asg_object = pickle.load(inp)
        df = analysis_triplate(asg_object)
        triplate_stat_lst.append(df)

with pd.ExcelWriter("1115_triplate_statistics.xlsx") as writer:
    # df.to_excel(writer, sheet_name=f"ver4")
    [df.to_excel(writer, sheet_name=f"ver{i+1}") for i,df in enumerate(triplate_stat_lst)]

- 使用 VBA 套件操作 excel，將 importance 的欄位上色。

In [None]:
# ! pip install xlwings

In [11]:
import string
def char(idx: int) -> str:
    '''0->A, 1->B, ...'''
    return string.ascii_uppercase[idx]

In [12]:
import xlwings as xw
from xlwings.constants import Direction
workbook = xw.Book('1115_triplate_statistics.xlsx')
sheets = workbook.sheets

for i,sheet in enumerate(sheets):
    lenx = sheet.range("B1").end(Direction.xlDown).row - 1 # 資料有幾欄&幾列
    colx = sheet.range("B1").end(Direction.xlToRight).column - 1
    titles = sheet.range(f"B1:{char(colx)}1").value
    col_imp = char(titles.index('importance') + 1) # 'importance'在第幾欄 (第H欄)

    for row in range(2, lenx+2):
        # if importance cell == 1, color that row
        if sheet.range(f"{col_imp}{row}").value == 1:
            # print('yes')
            sheet.range(f"B{row}:{col_imp}{row}").color = (255, 255, 204) # 淺黃色
workbook.save()