# Output Csv

In [None]:
class OutputCsv:
    motiDict = None
    resFreq = None
    
    path_stageMotiCsv = None
    path_totalResCsv = None
    
    stageMatrix = None
    seg2lb = None
    lb2seg = None
    
    stage2HkDi = None
    
    def __init__(self, claMotiAnaly, claStageMatrix,claOutStage):
        self.motiDict = claMotiAnaly.getMotiDict()
        self.resFreq = claMotiAnaly.getWinResFreq()
        
        famPath = claStageMatrix.getFamPath()
        fam_name = famPath.split('/')[-1]
        self.path_stageMotiCsv = famPath +'/'+fam_name+'_stageMoti.csv' 
        self.path_totalResCsv = famPath +'/'+fam_name+'_totalRes.csv' 
        
        self.stageMatrix = claStageMatrix.getStageMatrix()
        self.seg2lb = claStageMatrix.getMoti_seg2lb()
        self.lb2seg = claStageMatrix.getMoti_lb2seg()
        
        self.stage2HkDi = claOutStage.getStage2Hk()
    
        self.__saveStageMotiCsv()
        self.__saveTotalResCsv()
        
        
    #===private function
    
    def __saveStageMotiCsv(self):  # stage2HkDi, stageMatrix, lb2seg, seg2lb, motiDict
        o = open(self.path_stageMotiCsv, 'w')
        space = ','
        map(lambda s: o.write(',%s' % 'stage '+str(s)), self.stage2HkDi.keys())

        if reduce(lambda x,y:','+str(x),range(len(self.stage2HkDi)))!=0:
            space = reduce(lambda x,y:','+str(x),range(len(self.stage2HkDi))).split('0')[0]+space
        else:
            print '\t0 stage'
        o.write('\n')

        for hk in self.stageMatrix:
            o.write("%s" % (hk) )
            map(lambda s: o.write(',%s' % self.seg2lb[tuple(s)]), self.stageMatrix[hk])
            o.write('\n')

        totalMotifAPInum = sum([len(self.lb2seg[i]) for i in self.lb2seg ])-1
        o.write('%s  %s  %s\n'% ('idMotif: '+str(len(self.lb2seg)), 'comMotif: '+str(len(self.motiDict)), 
                               '#MotifAPIcalls: '+str(totalMotifAPInum)))

        for seg in range(len(self.lb2seg)):
            element_tuple= self.lb2seg['M'+str(seg+1)]
            o.write("%s,%s" % (space,'M'+str(seg+1) ))
            o.write(",%s" % (element_tuple[0]))
            o.write('\n')
            map(lambda s: o.write('%s,,%s\n' % (space,s)), element_tuple[1::])

        if o: o.close()
        print '--- 1.1 output StageMotiCsv---'

            
    def __saveTotalResCsv(self): # resFreq
        o = open(self.path_totalResCsv, 'w')
        o.write('%s,%s,%s,%s\n' % ('Resource','freq','cat #', 'category'))
        sortedItem = reversed(sorted(self.resFreq.items(), key=lambda (k,v): k))
        sortedItem = reversed(sorted(sortedItem, key=lambda (k,v): v[0]))
        
        for k,v in sortedItem:
            o.write('%s,%d,%d,%s\n' % (k,v[0] , v[1], str(v[2]).replace(',','/ ')))
            
        o.write('\n%s,%d' % ('totalRes', len(self.resFreq))) 
        if o: o.close()
        print '--- 1.2 output TotalResCsv---'
    

# Output ComMotifGraph

In [None]:
import matplotlib.pyplot as plt
import pygraphviz as pgv

In [None]:
class OutputComMotiAnaly:
    motiDict = None    #12
    type2API = None    #123
    ScAPI = None       #2
    winCat2res = None  #3
    winRes2poli = None #3
    
    fam_name = None
    path_comMotif_dir = None
    
    
    def __init__(self, claMotiAnaly, claStageMatrix):
        self.motiDict = claMotiAnaly.getMotiDict()
        self.type2API = claMotiAnaly.getType2API()
        self.ScAPI = claMotiAnaly.getScAPI()
        self.winCat2res = claMotiAnaly.getWinCat2Res()
        self.winRes2poli = claMotiAnaly.getWinRes2point()
        
        famPath = claStageMatrix.getFamPath()
        self.fam_name = famPath.split('/')[-1]
        self.path_comMotif_dir = famPath +'/'+self.fam_name +'_final common motif/'        
        if not os.path.isdir(self.path_comMotif_dir): 
            os.makedirs(self.path_comMotif_dir)       
    
        self.__saveCatGraph()
        self.__saveComMotiDetailAPI()
        self.__saveResAPIcatStatCsv()
        
    
    #===private function
    def __saveCatGraph(self):
        motif_dict = self.motiDict
        type2API_dict = self.type2API
        
        plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor='w', edgecolor='k')
        plt.subplot(4,1,1)
        ax1 = plt.subplot2grid((4,1), (0,0), rowspan=3)
        plt.title('API category graph', fontsize=16)
        plt.ylabel('Category')
        plt.xlim(0,len(motif_dict)+2) 
        plt.ylim(0,6)

        stage2cat_dict = {}
        cat2color_dict = [['lib',1,'r'], ['pro',2,'y'], ['fil',3,'b'], ['reg',4,'k'], ['net',5,'g']]
        
        for i, k in enumerate(sorted(motif_dict, key=lambda k: int(k[1:k.index('_')]))):
            plot_size = [0,0,0,0,0] #========== library, process, file, registry, network
            for t,a in motif_dict[k]:
                api = a.split('#PR#')[0]
                #if len([cat for cat in type2API_dict for ain in type2API_dict[cat] if ain == api]) ==0 :
                #    print 'cat:',[cat for cat in type2API_dict for ain in type2API_dict[cat] if ain == api]
                #    print ain, api  #========== if api in category
                
                cat = [cat for cat in type2API_dict for ain in type2API_dict[cat] if ain == api][0] 
                if cat == 'Library':
                    plot_size[0] = plot_size[0] +1
                elif cat == 'Process':
                    plot_size[1] = plot_size[1] +1
                elif cat == 'File':
                    plot_size[2] = plot_size[2] +1
                elif cat == 'Registry':
                    plot_size[3] = plot_size[3] +1
                elif cat == 'Network':
                    plot_size[4] = plot_size[4] +1

            stage2cat_dict[i] = plot_size

        stage2catMax_list=[]
        for s in stage2cat_dict:
            stage2catMax_list.append(max(stage2cat_dict[s]))
        if max(stage2catMax_list) >10 :scatterMax = int(round(max(stage2catMax_list)+10, -1))  #==========取到10位
        else: scatterMax =max(stage2catMax_list)+10
        scatterInterval= int(math.ceil(scatterMax/3))
        scatterRange = range(10, scatterMax+scatterInterval ,scatterInterval)

        for s in stage2cat_dict: 
            scaRanB = 0
            for scaRan in scatterRange:
                stage2cat_dict[s] = [scaRan if x < scaRan and x > scaRanB else x for x in stage2cat_dict[s] ]
                scaRanB = scaRan
            stage2cat_dict[s] = zip(stage2cat_dict[s],cat2color_dict)
        for s in stage2cat_dict:
            for cat in stage2cat_dict[s]:
                plt.scatter(s+1, cat[1][1], c=cat[1][2], s=cat[0]*5, marker='o')

        legendList=[]
        scaRanB = 0
        for ran in scatterRange:
            legendList.append(plt.scatter([], [], c='w', marker='o',s=ran*5, label=str(scaRanB+1)+' - '+str(ran)))
            scaRanB =ran
        plt.legend(handles=legendList,scatterpoints=1,labelspacing=1.3, borderpad=1)#,fontsize = 20)prop={'size':20}
        plt.yticks(range(1,6), ['Library', 'Process', 'File', 'Registry', 'Network'])  
        plt.tick_params(#==========拿掉一些不必要的標記
            axis='x',          # changes apply to the x-axis
            which='both',      # both major and minor ticks are affected
            bottom='off',      # ticks along the bottom edge are off
            top='off',         # ticks along the top edge are off
            labelbottom='off') 

        #畫每stage的長度為一長條圖 
        ax2 = plt.subplot2grid((4,1), (3,0))
        y_Max = (max([len(motif_dict[m]) for m in sorted(motif_dict, key=lambda k: int(k[1:k.index('_')]))])+5)
        if y_Max > 100: y_interval = 20
        else:y_interval = 10
        plt.yticks(range(0,y_Max,y_interval))

        x_Max = len(motif_dict)+2
        if x_Max > 10: x_interval = 5
        else:x_interval = 1
        plt.xticks(range(0,x_Max,x_interval))

        plt.xlim(0, x_Max)
        plt.ylim(0, y_Max)
        
        #==========vlines(xRange, yMinArray, yMaxArray)-> ex:([1,2], [0,0], [5,43])
        plt.vlines([range(1,len(motif_dict)+1)],
                   [0]*len(motif_dict), 
                   [len(motif_dict[m]) for m in sorted(motif_dict, key=lambda k: int(k[1:k.index('_')]))])
        for i, m in enumerate(sorted(motif_dict, key=lambda k: int(k[1:k.index('_')]))):
            plt.annotate(str(len(motif_dict[m])), xy=(i+1, len(motif_dict[m])), size='small')

        #spacing
        plt.xlabel('Timeline (ordered in stage index)')
        plt.ylabel('API numbers')
        plt.tight_layout()
        plt.savefig(self.path_comMotif_dir + self.fam_name  +'_Timline- category graph'+'.png', dpi=300) 
        
        print '--- 2.1 output CatGraph---' 
        

    #每個common stage都畫出一個bubble，其中有5類API，與其所使用的SC_API及其對應數量
    def __saveComMotiDetailAPI(self):
        motif_dict = self.motiDict
        type2API_dict = self.type2API
        SC_API = self.ScAPI
        
        type2SC_APIList_dict = {cat:[] for cat in type2API_dict}
        for cat in type2API_dict:
            type2SC_APIList_dict[cat] = [api for api in type2API_dict[cat] if api in SC_API]

        for i, k in enumerate(sorted(motif_dict, key=lambda k: int(k[1:k.index('_')]))):
            #print i, k
            g = pgv.AGraph()
            category_dict = {cat:[] for cat in type2API_dict}
            for t,a in motif_dict[k]:
                api = a[:a.index('#')]
                category = [cat for cat in type2API_dict for ain in type2API_dict[cat] if ain == api][0]
                if api in SC_API:
                    if category == 'Library':
                        category_dict['Library'] += [api]
                    elif category == 'Process':
                        category_dict['Process'] += [api]
                    elif category == 'File':
                        category_dict['File'] += [api]
                    elif category == 'Registry':
                        category_dict['Registry'] += [api]
                    elif category == 'Network':
                        category_dict['Network'] += [api]

            for cat in category_dict:
                if category_dict[cat] != []:
                    g.add_node(len(g.nodes())+1, label=cat) #SC_API #: '+str(len(set(category_dict[cat])))+'\n')
                    ddict = {}
                    for a in category_dict[cat]:
                        if a not in ddict:
                            ddict[a] = 1
                        else:
                            ddict[a] += 1

                    g.get_node(len(g.nodes())).attr['fillcolor'] = 'filled'

                    if cat == 'Library':
                        g.get_node(len(g.nodes())).attr['fillcolor'] = '#ff00000f'
                    elif cat == 'Process':
                        g.get_node(len(g.nodes())).attr['fillcolor'] = '#ffff000f'
                    elif cat == 'File':
                        g.get_node(len(g.nodes())).attr['fillcolor'] = '#0000ff0f'
                    elif cat == 'Registry':
                        g.get_node(len(g.nodes())).attr['fillcolor'] = '#00ffff0f'
                    elif cat == 'Network':
                        g.get_node(len(g.nodes())).attr['fillcolor'] = '#00ff000f'

                    g.get_node(len(g.nodes())).attr['label'] = g.get_node(len(g.nodes())).attr['label'] \
                        +'('+str(sum([ddict[a] for a in ddict]))+')\n'+'('+str(len(set(category_dict[cat]))) \
                        +' out of '+str(len(type2SC_APIList_dict[cat]))+' SC_APIs)\n'

                    for a in set(category_dict[cat]):
                        g.get_node(len(g.nodes())).attr['label'] = g.get_node(len(g.nodes())).attr['label'] \
                        + a +': '+str(ddict[a])+'\n'

            #for n in g.nodes(): print g.get_node(n).attr['label']
            g.add_subgraph(range(1,len(g.nodes())+1), 
                           name='cluster_SC_API', 
                           style='rounded', 
                           label=k+' (API len:'+str(len(motif_dict[k]))+')')
            g.draw(self.path_comMotif_dir + self.fam_name +'_'+ k +'.png', format='png', prog='dot')
            
        print '--- 2.2 output ComMotiDetailAPI---'
    
    
    #將資源統計以表呈現
    def __saveResAPIcatStatCsv(self):
        type2API_dict = self.type2API
        winCat2res_dict = self.winCat2res
        winRes2poli_dict = self.winRes2poli
        
        f = open(self.path_comMotif_dir + self.fam_name +'_resources in API category stat.csv', 'w') 
        
        defchainLen_set = set()        
        type2defchainLen2num_dict = {} # 記錄每 category 中 res defchainLen 的分布 from defchainLen2num_dict
        for cat in type2API_dict:
            defchainLen2num_dict = {}  # 存 1 category 中 res defchainLen 的分布 {1:2}-> defchainlen=1 有 2rsc 
            for rsrc in set(winCat2res_dict[cat]):  # 在 cat 中的 rsrc
                if len(set(winRes2poli_dict[rsrc])) in defchainLen2num_dict:    # 若 set(winRes2poli_dict[rsrc])長度
                    defchainLen2num_dict[len(set(winRes2poli_dict[rsrc]))] += 1 # 已在 defchainLen2num_dict 就 +1
                else:
                    defchainLen2num_dict[len(set(winRes2poli_dict[rsrc]))] = 1
            defchainLen_set.update(defchainLen2num_dict)
            type2defchainLen2num_dict[cat] = defchainLen2num_dict

        #print 'defchain在此資料集中有以下長度：'
        #print 'defchainlen:', defchainLen_set

        # first row
        f.write('Category,Resource #,')
        for defchainLen in sorted(defchainLen_set): 
            f.write('Len='+str(defchainLen-1)+',')
        f.write('\n')

        # else row 
        totalLen_noSameCat = 0 
        for cat in type2API_dict:
            dflnum_list = [type2defchainLen2num_dict[cat][dfl] for dfl in type2defchainLen2num_dict[cat]]
            #print cat, dflnum_list
            f.write(cat+','+str(sum(dflnum_list))+',')
            for defchainLen in sorted(defchainLen_set):
                if defchainLen not in type2defchainLen2num_dict[cat]:
                    f.write('0,')
                else:
                    f.write(str(type2defchainLen2num_dict[cat][defchainLen])+',')
                    if defchainLen-1 > 0:                         
                        totalLen_noSameCat = totalLen_noSameCat + (type2defchainLen2num_dict[cat][defchainLen]) 
            f.write('\n')

        total = 0 #defchain不為零之長度個數
        for var in set(winRes2poli_dict):
            total = total if len(winRes2poli_dict[var]) < 2 else total+1
            
        #print 'defchain不為零之長度 (len>1)：', total # 包含 [(1, 4), (1, 4), (1, 4), (1, 4), (1, 4), (1, 4)]這種
        #print 'defchain不為零之長度no (len>1)：', totalLen_noSameCat 
        #print 'total resource:', len(winRes2poli_dict)
                
        f.write('\ntotal resource,'+ str(len(winRes2poli_dict))+'\n') 
        f.write('defchainLen!=0_sameCat ,'+ str(total)+'\n')        
        f.write('defchainLen!=0_nosameCat ,'+ str(totalLen_noSameCat)+'\n')
        f.close()
       
        print '--- 2.3 output ResAPIcatStatCsv---'
        

