In [1]:
from openpyxl import load_workbook, Workbook
import os
import pickle
import pandas as pd
import numpy as np
from numpy import nan as NaN
import re
import math

%run CollectForestInfo.ipynb

In [2]:
# calculate profile's maximum score in each family
def profileScoreInEachFamily(profileScoreInFamilyBTs):
    profileFamilyScores = dict()
    for proc, familyBTScores in profileScoreInFamilyBTs.items():

        profileFamilyScores[proc] = dict()
        for familyName, profileBTScores in familyBTScores.items():
            if familyName not in profileFamilyScores[proc].keys():
                profileFamilyScores[proc][familyName] = (0,0,0)

            for profileBTScore in profileBTScores:
                maxScore, maxGSALen, maxScoreModel = profileFamilyScores[proc][familyName]
                score, gsaLen, modelLen = profileBTScore
                if float(score) > float(maxScore):
                    profileFamilyScores[proc][familyName] = (score, gsaLen, modelLen)
                elif float(score) == float(maxScore):
                    if gsaLen > maxGSALen:
                        profileFamilyScores[proc][familyName] = (score, gsaLen, modelLen)
    return profileFamilyScores

In [3]:
def createScoreBox(result_data_dir, ignoreFamilys, weight_dict):
    scoreBox = pd.DataFrame()
    for resPkl in os.listdir(result_data_dir):
        truth = resPkl.split('_')[1].split('.')[0]
        with open(result_data_dir + resPkl, 'rb') as fHandle:
            result = pickle.load(fHandle)
            fHandle.close()
            
        processList = result.keys()
        profileScoreInFamilyBTs = dict()
        
        # calculate profile's score with each behavior tree
        # record with family name
        for proc in processList:
            familyBTScores = dict()
            BTNames = result[proc]
            for bt in BTNames:
                famName = bt.split('_')[0]
                if famName in ignoreFamilys: continue # skip unwanted family
                trName = bt.split('_')[2]
                gsaLen, commSeq = result[proc][bt]
                modelWeight, modelLength, modelMemberCount = weight_dict[famName][trName]
                
                # filter trees
                if modelLength > 10 and modelMemberCount > 2:
                    if famName not in familyBTScores.keys():
                        familyBTScores[famName] = list()
                        
                    profileInBT = gsaLen/modelLength # calculate gsa ratio score for bt
                    familyBTScores[famName].append((profileInBT, gsaLen, modelLength))
            profileScoreInFamilyBTs[proc] = familyBTScores
            
        # calculate profile's maximum score in each family
        profileFamilyScores = profileScoreInEachFamily(profileScoreInFamilyBTs)

        # record profile score of family to scorebox
        for proc, familyScores in profileFamilyScores.items():
            for famName, scoreInfo in familyScores.items():
                score, gsaLen, modelLen = scoreInfo
                score = (math.floor(score*100)) / 100
                outputSentence = str(gsaLen) +'/'+ str(modelLen) + '=' + str(score)
                scoreBox.loc[proc, famName+' Score'] = outputSentence
    return scoreBox

def assignGroundTruth(scoreBox, sample_truth_dict):
    for proc in scoreBox.index:
        shaName = proc.split('_')[0]
        scoreBox.loc[proc, 'GroundTruthLabel'] = truth_dict[shaName]
    return scoreBox

def getTruthScore(processScores, groundTruth):
    for pair in processScores:
        famName, score = pair
        if groundTruth == famName:
            return score
    return None # should not touch

def exactMatch(threshold, processScores, groundTruth):
    maxPair = processScores[0]
    maxScoreFamily, maxScore = maxPair
    
    if maxScore < threshold:
        return 3
    else:
        # if there are many equal maximum score, check whether truth in there.
        multi_max = set()
        for pair in processScores:
            famName, score = pair
            if score == maxScore:
                multi_max.add(famName)
#             else:
#                 break
        if groundTruth in multi_max:
            return 1 # match
        else:
            return 2 # mismatch

def effectiveMatch(threshold, processScores, groundTruth):
    maxPair = processScores[0]
    maxScoreFamily, maxScore = maxPair
    
    # prepare the prospective candidate set
    candidateSet = set()
    for pair in processScores:
        famName = pair[0]
        score = pair[1]
        if score >= threshold:
            candidateSet.add(famName)
            
    if len(candidateSet) == 0:
        return 3
    else:
        if groundTruth in candidateSet: return 1
        else: return 2

def getMatchResult(scoreBox, threshold, matchMethod):
    sample_scores = dict()
    sample_truth = dict()
    for procName, row in scoreBox.iterrows():
        sampleName = procName.split('_')[0]
        truthLabel = row['GroundTruthLabel']
        if sampleName not in sample_scores.keys():
            sample_scores[sampleName] = []
            sample_truth[sampleName] = truthLabel

        processScoreList = []
        for col in range(0,28):
            s_in_fam = float(row[col].split('=')[1])
            famName = scoreBox.columns[col].split(' ')[0]
            scorePair = (famName, s_in_fam)
            processScoreList.append(scorePair)
        sample_scores[sampleName].extend(processScoreList)

    sample_result = dict()
    matchResultTable = pd.DataFrame(columns = ['MatchType', 'PredictFamily', 'GroundTruth'])
    for key, val in sample_scores.items():
        truth = sample_truth[key]
        processScores = sorted(val, key=lambda x:x[1], reverse=True)
        maxPair = processScores[0]
        maxScoreFamily, maxScore = maxPair
        truthScore = getTruthScore(processScores, truth)

        # there are 'Exact-Match' and 'Effective-Match' method.
        # result: 1-match, 2-mismatch, 3-undecided
        if matchMethod == 'Effective_Match':
            matchResult = effectiveMatch(threshold, processScores, truth)
        elif matchMethod == 'Exact_Match':
            matchResult = exactMatch(threshold, processScores, truth)
            
        if matchResult == 1:
            matchResultTable.loc[key, 'MatchType'] = 'Match'
            matchResultTable.loc[key, 'PredictFamily'] = truth
            matchResultTable.loc[key, 'GroundTruth'] = truth
            matchResultTable.loc[key, 'MaxScore'] = maxScore
            matchResultTable.loc[key, 'TruthScore'] = truthScore
        else:
            matchResultTable.loc[key, 'PredictFamily'] = maxScoreFamily
            matchResultTable.loc[key, 'GroundTruth'] = truth
            matchResultTable.loc[key, 'MaxScore'] = maxScore
            matchResultTable.loc[key, 'TruthScore'] = truthScore
            if matchResult == 2:
                matchResultTable.loc[key, 'MatchType'] = 'Mismatch'
            else:
                matchResultTable.loc[key, 'MatchType'] = 'Undecided'

    return matchResultTable

In [4]:
def readModelFiles(modelBaseDirectory):
    base_dir = modelBaseDirectory
    familyDirs = [base_dir+f+'/' for f in os.listdir(base_dir)]

    weight_dict = dict()
    ignoreFamilys = set()
    for fam in familyDirs:
        pickleDir = fam + 'pickle/'
        tag = pickleDir.split('/')[-3]
        interPkl = pickleDir + tag + "_intermediate.pickle"
        residualPkl = pickleDir + tag + "_residual.pickle"
        forestInfo = CollectForestInfo(interPkl,
                               residualPkl,
                               True) # one pickle is a forest

        forestMemberCount = forestInfo.getForestMemberCount()
        weight_dict[tag.split("_")[0]] = dict()
        for treeName in forestInfo.getTreeRootNameList():
            labelName = tag+'_'+treeName
            memberCount = len(forestInfo.getTreeMembers(treeName))
            repSeq = forestInfo.getRepAPISeq(treeName)
            weight_dict[tag.split("_")[0]][treeName] = (memberCount/forestMemberCount,
                                                        len(repSeq), memberCount)

    for fName, trs in weight_dict.items():
        save = False
        for tr, info in trs.items():
            if info[1] > 10 and info[2] > 2:
                save = True
                break
        if not save:
            ignoreFamilys.add(fName)

    print("=== Finish building model ===")

    return ignoreFamilys, weight_dict

In [5]:
modelBaseDir = "output/top3_party_0622/"
resultDir = "output/omg_testing/11939_onlyone_detect/global_align/"
# resultDir = "output/testingResult/11939_0622_top3/global_align/"

ignoreFamilys, weight_dict = readModelFiles(modelBaseDir)
baseScoreBox = createScoreBox(resultDir, ignoreFamilys, weight_dict)

myDB = '11939data/no_consensus_one_detected/'
# myDB = '11939data/top3_party_thesis_test/'
testFamilies = os.listdir(myDB)
truth_dict = dict()
for famName in testFamilies:
    famPath = myDB + famName + '/'
    for trace in os.listdir(famPath):
        shaName = trace[0:6]
        truth_dict[shaName] = famName
        
scoreBox_withTruth = assignGroundTruth(baseScoreBox, truth_dict)

# matchMethod='Exact_Match' and 'Effective_Match'
matchResultTable = getMatchResult(scoreBox_withTruth, threshold=0.8, matchMethod='Effective_Match')

matchTypeStatistic = matchResultTable['MatchType'].value_counts()
matchCtr = matchTypeStatistic['Match']
mismatchCtr = matchTypeStatistic['Mismatch']
undecidedCtr = matchTypeStatistic['Undecided']
totalSamples = matchCtr + mismatchCtr + undecidedCtr
print(matchTypeStatistic)
print("Match Rate:", matchCtr/totalSamples)
matchResultTable

=== Finish building model ===
Match        27
Mismatch     26
Undecided    13
Name: MatchType, dtype: int64
Match Rate: 0.409090909091


Unnamed: 0,MatchType,PredictFamily,GroundTruth,MaxScore,TruthScore
0b0bc2,Undecided,allaple,berbew,0.61,0.19
0d4a1a,Mismatch,allaple,chir,1.00,0.39
3b720c,Match,expiro,expiro,1.00,0.98
0cc1fe,Undecided,allaple,fakeav,0.61,0.42
0d325e,Match,fakeav,fakeav,1.00,0.95
0dbd93,Match,fakeav,fakeav,1.00,0.95
00feef,Mismatch,allaple,mabezat,1.00,0.07
0efa91,Match,mydoom,mydoom,1.00,0.98
e86fcc,Match,parite,parite,1.00,1.00
1ce77e,Match,ramnit,ramnit,1.00,0.82


### Create Confusion Matrix and Do Evaluation
##### Calculate 'Precision', 'Recall', and 'F1'

In [None]:
family_confusion = dict()
for family in matchResultTable['GroundTruth'].values:
    if family not in family_confusion.keys():
        family_confusion[family] = {'TP':0, 'FP':0, 'TN':0, 'FN':0}

## create confusion matrix for each family
for sample, row in matchResultTable.iterrows():
    predictLabel = row['PredictFamily']
    truthLabel = row['GroundTruth']
    matchType = row['MatchType']
    
    if matchType == 'Undecided':
        family_confusion[truthLabel]['FN'] += 1
    else:
        if predictLabel == truthLabel:
            family_confusion[truthLabel]['TP'] += 1
        else:
            family_confusion[truthLabel]['FN'] += 1
            family_confusion[predictLabel]['FP'] += 1
        
    for famName in family_confusion.keys():
        if famName != truthLabel and famName != predictLabel:
            family_confusion[famName]['TN'] += 1
            
recalls = []
precisions = []
fs = []
for famName in sorted(family_confusion.keys()):
    matrix = family_confusion[famName]
    tp = matrix['TP']
    tn = matrix['TN']
    fp = matrix['FP']
    fn = matrix['FN']
    population = tp+tn+fp+fn
    
    if (tp+fp) != 0:
        precision = tp/(tp+fp)
    else:
        precision = 0.0
    recall = tp/(tp+fn)
    if recall!=0 and precision!=0:
        f1 = 2 / ((1/recall) + (1/precision))
    else:
        f1 = 0.0
        
    recalls.append("{0:.2f}".format(recall))
    precisions.append("{0:.2f}".format(precision))
    fs.append("{0:.2f}".format(f1))

### Create a dataframe table (dashboard)
evaTable = pd.DataFrame(columns=['Precision', 'Recall', 'F1'])
families = sorted(family_confusion.keys())
for idx in range(len(recalls)):
    evaTable.loc[families[idx]] = (precisions[idx], recalls[idx], fs[idx])

print(evaTable.shape)
evaTable

In [None]:
def averOfList(myList):
    mySum = 0
    for l in myList:
        mySum += float(l)
    return mySum/len(myList)

aver_precision = averOfList(precisions)
aver_recall = averOfList(recalls)
aver_f1 = averOfList(fs)
print("Average Precision:", aver_precision)
print("Average Recall:", aver_recall)
print("Average F1:", aver_f1)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

### Draw the visualization graph for precision/recall/f1
families = sorted(family_confusion.keys())
x = families
y = precisions ## precisions/recalls/fs
yinterval = 0.2
plt.bar(x, y, color='skyblue')
plt.xticks(rotation='vertical')
plt.tight_layout()
for i in range(len(x)):
    plt.text(x=i-1, y=y[i], s=y[i], rotation=20)
plt.savefig('Report/Aries/precisions_'+str(threshold)+'.png')