# Data Cutting

In [2]:
import os
from tqdm import tqdm
import numpy as np
from keras import Input, Model, models
import tensorflow as tf
from keras.layers import GRU, Bidirectional, Dropout, TimeDistributed, Dense, Activation, GlobalAveragePooling1D, Reshape, MaxPooling1D, Multiply

In [1]:
def DataCutting(irpath, labelpath, datafile):
    """数据切割

    :param irpath: 待切割ir数据地址
    :param labelpath: 待切割标签地址
    :param datafile: 切割后数据存放文件夹
    :return:
    """

    #切割完后存放标签的地址
    train_label_path = datafile + '/train_label.txt'
    test_label_path = datafile + '/test_label.txt'
    validation_label_path = datafile + '/validation_label.txt'
    #切割完后存放IR向量地址
    train_IR_path = datafile + '/train_IR.txt'
    test_IR_path = datafile + '/test_IR.txt'
    validation_IR_path = datafile + '/validation_IR.txt'

    #切割方式为每十行都把数据划分为6:2:2，分别为训练集、验证集、测试集
    #pbar用于进度条显示
    with tqdm(total=os.path.getsize(labelpath)) as pbar:
        with open(labelpath, 'r') as f1:
            train_label = open(train_label_path, 'a', encoding='UTF-8')
            test_label = open(test_label_path, 'a', encoding='UTF-8')
            validation_label = open(validation_label_path, 'a', encoding='UTF-8')
            t = 0
            train_num = 0
            test_num = 0
            validation_num = 0
            for line in f1:
                t += 1
                pbar.update(len(line))
                if t <= 6:
                    train_label.write(line)
                    train_num += 1
                elif t <= 8:
                    test_label.write(line)
                    test_num += 1
                elif t <= 10:
                    validation_label.write(line)
                    validation_num += 1
                    if t == 10:
                        t = 0

            train_label.close()
            test_label.close()
            validation_label.close()
            print("train_num: " + str(train_num))
            print("test_num: " + str(test_num))
            print("validation_num: " + str(validation_num))

    with tqdm(total=os.path.getsize(irpath)) as pbar:
        with open(irpath, 'r') as f1:
            train_IR = open(train_IR_path, 'a', encoding='UTF-8')
            test_IR = open(test_IR_path, 'a', encoding='UTF-8')
            validation_IR = open(validation_IR_path, 'a', encoding='UTF-8')
            t = 0
            k = 0
            for line in f1:
                pbar.update(len(line))
                if t < 6:
                    train_IR.write(line)
                elif t < 8:
                    test_IR.write(line)
                elif t < 10:
                    validation_IR.write(line)
                if line.find('#') != -1:
                    t += 1
                    k += 1
                    if t == 10:
                        t = 0

            print("data_sum: " + str(k))

            train_IR.close()
            test_IR.close()
            validation_IR.close()


#原始数据地址
label_path = '../data/label_Juliet2.txt'
IR_path = '../data/data_Juliet2.txt'
#切割后数据存放文件夹
data_file = '../data/Juliet'
DataCutting(IR_path, label_path, data_file)

100%|██████████| 24235/24235 [00:00<00:00, 215043.60it/s]


train_num: 600
test_num: 200
validation_num: 200


100%|██████████| 293765750/293765750 [00:03<00:00, 79619590.45it/s]

data_sum: 1000





# Data Generator

## TrainDataGenerator

In [11]:

def TrainDataGenerator(data_path, label_path, batch_size=64, maxlen=1000):
    """训练数据集生成器

    :param data_path: 数据集路径
    :param label_path: 行号标签路径
    :param batch_size: 批次大小
    :param maxlen: 时间步维度，即最大保留多少行号
    :return: 一个generator,形式为([dataSet,matrixSet], labelSet)
        dataSet:IR向量数据集
        matrixSet:注意力矩阵
        labelSet:行号标签
    """

    fd = open(data_path)
    fl = open(label_path)
    datas = fd.readlines()
    datas_tag = 0
    labels = fl.readlines()
    # print(len(datas), len(labels))
    iter_num = int(len(labels) / batch_size)
    print(iter_num)
    i = 0
    while iter_num:
        irLine = []  #每行ir向量
        irList = []  #ir切片向量列表
        labelList = []  #label列表
        vulList = []  #漏洞列表
        matrixList = []
        label_line = labels[i:i + batch_size]

        for line in label_line:
            line = line.strip()
            a = line.split()
            a = list(map(float, a))
            if a[0] != 0:
                vulList.append(a)
                labelList.append(1)
            else:
                vulList.append(0)
                labelList.append(0)

        for vp in range(len(vulList)):
            #先求漏洞行号标注在一个一维向量上
            if not vulList[vp]:
                attentionLine = [1] * maxlen
            else:
                attentionLine = [0] * maxlen
                for vul in vulList[vp]:
                    if int(vul) > maxlen:
                        continue
                    attentionLine[int(vul) - 1] = 1
            #再将其转化为矩阵
            attentionmatrix = np.diag(attentionLine)
            matrixList.append(attentionmatrix)

        while len(irList) < batch_size:
            line = datas[datas_tag]
            datas_tag += 1
            #逐行遍历：行内字段按'\t'分隔符分隔，转换为列表
            line = line.strip()
            a = line.split('\t')
            if '#' not in a[0]:
                a = list(map(float, a))
                irLine.append(a)
            else:
                x = [0 for t in range(300)]
                while len(irLine) < maxlen:
                    irLine.append(x)
                irList.append(irLine)
                irLine = []
                continue

        dataSet = np.array(irList)
        labelSet = np.array(labelList)
        matrixSet = np.array(matrixList)
        # print(i)
        # print(dataSet.ndim)
        if dataSet.ndim != 3:
            i += batch_size
            iter_num -= 1
            if iter_num == 0:
                iter_num = int(len(labels) / batch_size)
                datas_tag = 0
                i = 0
            continue
        yield [dataSet, matrixSet], labelSet
        i += batch_size

        iter_num -= 1
        if iter_num == 0:
            iter_num = int(len(labels) / batch_size)
            datas_tag = 0
            i = 0

## TestDataGenerator

In [12]:
def TestDataGenerator(data_path, label_path, batch_size=64, maxlen=1000):
    """测试数据集生成器

    :param data_path: 数据集路径
    :param label_path: 行号标签路径
    :param batch_size: 批次大小
    :param maxlen: 时间步维度，即最大保留多少行号
    :return: 一个generator,形式为([dataSet,matrixSet], labelSet,vulList)
        dataSet:IR向量数据集
        matrixSet:注意力矩阵
        labelSet:行号01标签
        vulList:行号标签

    """

    fd = open(data_path)
    fl = open(label_path)
    datas = fd.readlines()
    labels = fl.readlines()
    # print(len(datas), len(labels))
    iter_num = int(len(labels) / batch_size)
    print(iter_num)
    datas_tag = 0
    i = 0
    while iter_num:
        irLine = []  #每行ir向量
        irList = []  #ir切片向量列表
        labelList = []  #label列表
        vulList = []  #漏洞列表
        matrixList = []
        label_line = labels[i:i + batch_size]

        for line in label_line:
            line = line.strip()
            a = line.split()
            a = list(map(float, a))
            if a[0] != 0:
                vulList.append(a)
                labelList.append(1)
            else:
                vulList.append(0)
                labelList.append(0)

        for vp in range(len(vulList)):
            #先求漏洞行号标注在一个一维向量上
            if not vulList[vp]:
                attentionLine = [1] * maxlen
            else:
                attentionLine = [0] * maxlen
                for vul in vulList[vp]:
                    if int(vul) > maxlen:
                        continue
                    attentionLine[int(vul) - 1] = 1
            #再将其转化为矩阵
            attentionmatrix = np.diag(attentionLine)
            matrixList.append(attentionmatrix)

        while len(irList) < batch_size:
            line = datas[datas_tag]
            datas_tag += 1
            #逐行遍历：行内字段按'\t'分隔符分隔，转换为列表
            line = line.strip()
            a = line.split('\t')
            if '#' not in a[0]:
                a = list(map(float, a))
                irLine.append(a)
            else:
                x = [0 for t in range(300)]
                while len(irLine) < maxlen:
                    irLine.append(x)
                irList.append(irLine)
                irLine = []
                continue

        dataSet = np.array(irList)
        labelSet = np.array(labelList)
        matrixSet = np.array(matrixList)
        # print(i)
        # print(dataSet.ndim)
        if dataSet.ndim != 3:
            i += batch_size
            iter_num -= 1
            if iter_num == 0:
                iter_num = int(len(labels) / batch_size)
                datas_tag = 0
                i = 0
            continue
        yield [dataSet, matrixSet], labelSet, vulList
        i += batch_size

        iter_num -= 1
        if iter_num == 0:
            iter_num = int(len(labels) / batch_size)
            datas_tag = 0
            i = 0

## cs

In [12]:
train_IR_path = '../data/Juliet/train_IR.txt'
test_IR_path = '../data/Juliet/test_IR.txt'
validation_IR_path = '../data/Juliet/validation_IR.txt'

train_label_path = '../data/Juliet/train_label.txt'
test_label_path = '../data/Juliet/test_label.txt'
validation_label_path = '../data/Juliet/validation_label.txt'

label_path = '../data/label_Juliet2.txt'
data_path = '../data/data_Juliet2.txt'

for i, ([data, attMatrix], label) in enumerate(TrainDataGenerator(train_IR_path, train_label_path, batch_size=64)):
    print(i, data.shape, label.shape, attMatrix.shape)

9
0 (64, 1000, 300) (64,) (64, 1000, 1000)
1 (64, 1000, 300) (64,) (64, 1000, 1000)


KeyboardInterrupt: 

# Model Build

In [4]:
from keras.layers import Masking


def build_model(maxlen, dropout, units):
    """

    :param maxlen:最大时间步(最大行号)
    :param dropout:随机让神经元停止工作的概率
    :param units:GRU神经元数量
    :return:
    """
    inputs = Input(shape=(maxlen, 300))
    mask_1 = Masking(mask_value=0.0, name='mask_1')(inputs)
    bgru_1 = Bidirectional(GRU(units=units,
                               activation='tanh',
                               recurrent_activation='sigmoid',
                               return_sequences=True),
                           name='bgru_1')(mask_1)
    dropout_1 = Dropout(rate=dropout, name='dropout_1')(bgru_1)
    bgru_2 = Bidirectional(GRU(units=units,
                               activation='tanh',
                               recurrent_activation='sigmoid',
                               return_sequences=True),
                           name='bgru_2')(dropout_1)
    dropout_2 = Dropout(rate=dropout, name='dropout_2')(bgru_2)

    dense_1 = TimeDistributed(Dense(1), name='dense1')(dropout_2)
    activation_1 = Activation('sigmoid', name='activation_1')(dense_1)

    att_Matrix_1 = Input(shape=(maxlen, maxlen), name='att_Matrix')
    multiply_1 = Multiply(name='multiply_1')([att_Matrix_1, activation_1])
    reshape_1 = Reshape((1, maxlen ** 2))(multiply_1)

    k_max_1 = MaxPooling1D(pool_size=maxlen ** 2, data_format='channels_first')(reshape_1)
    average_1 = GlobalAveragePooling1D(name='average_1')(k_max_1)

    model = Model(inputs=[inputs, att_Matrix_1], outputs=average_1)
    model.summary()

    model.compile(optimizer="adam",
                  loss="binary_crossentropy",
                  metrics=["accuracy",
                           "Precision",
                           "Recall",
                           "TruePositives",
                           "TrueNegatives",
                           "FalsePositives",
                           "FalseNegatives"])
    return model


model = build_model(maxlen=1000, dropout=0.4, units=128)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1000, 300)]  0                                            
__________________________________________________________________________________________________
mask_1 (Masking)                (None, 1000, 300)    0           input_1[0][0]                    
__________________________________________________________________________________________________
bgru_1 (Bidirectional)          (None, 1000, 64)     64128       mask_1[0][0]                     
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 1000, 64)     0           bgru_1[0][0]                     
______________________________________________________________________________________________

# Model Train

In [5]:
def train_model(model, TrainGenerator, ValidGenerator):
    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            # 模型路径
            filepath='./model/model_{epoch:02d}-{val_accuracy:.2f}.h5',
            # 是否保存最佳
            save_best_only=True,
            # 监控指标
            monitor='val_accuracy',
            # 进度条类型
            verbose=1
        )
    ]

    model.fit(TrainGenerator,
              steps_per_epoch=9,  #每批次总步数
              epochs=4,  #批次数
              validation_data=ValidGenerator,  #验证集
              validation_steps=5,  #每次验证步数
              validation_batch_size=64,  #验证集每批次数据大小
              verbose=1,
              callbacks=callbacks)


train_IR_path = '../data/Juliet/train_IR.txt'
validation_IR_path = '../data/Juliet/validation_IR.txt'

train_label_path = '../data/Juliet/train_label.txt'
validation_label_path = '../data/Juliet/validation_label.txt'

tdg = TrainDataGenerator(train_IR_path, train_label_path, batch_size=64)
vdg = TrainDataGenerator(validation_IR_path, validation_label_path, batch_size=64)
train_model(model, tdg, vdg)

9
Epoch 1/4

Epoch 00001: val_accuracy improved from -inf to 0.66250, saving model to ./model\model_01-0.66.h5
Epoch 2/4





Epoch 00002: val_accuracy improved from 0.66250 to 0.69063, saving model to ./model\model_02-0.69.h5
Epoch 3/4

Epoch 00003: val_accuracy did not improve from 0.69063
Epoch 4/4

Epoch 00004: val_accuracy did not improve from 0.69063


# Model Test

In [24]:

def get_predict_line(value_sequence, threshold_value=0.5):
    value_sequence = list(np.array(value_sequence))
    vs = len(value_sequence) - 1
    while value_sequence[vs] == value_sequence[-1]:
        vs -= 1
    value_sequence = value_sequence[:vs + 2]
    # print(value_sequence)
    dict={}
    for i in range(len(value_sequence)):
        dict[i]=value_sequence[i]
    # print(dict)
    #按照value_sequence值大小排序
    dict=sorted(dict.items(), key = lambda kv:(kv[1], kv[0]),reverse=True)
    # print(dict)
    predict_line = []
    t=0
    for d in dict:
        t+=1
        if t>20:
            break
        if d[1] > threshold_value:
            predict_line.append(d[0])
    return predict_line


def test_model(model, datapath, labelpath, result_path):
    TP, TN, FP, FN = 0, 0, 0, 0
    TP_l, TN_l, FP_l, FN_l = 0, 0, 0, 0
    loU_list = []
    partial_model = Model(inputs=model.layers[0].input, outputs=model.layers[8].output)
    test_data = TestDataGenerator(datapath, labelpath, batch_size=64)
    batch_size = 64
    iter_num = 92
    for i in range(3):
        print("epochs: " + str(i))
        td = next(test_data)
        output_test = partial_model([td[0][0]], training=False)
        label = td[1]
        vul_line = td[2]
        # print(output_test.shape,label.shape,len(vul_line))
        for j in range(batch_size):
            predict_line = get_predict_line(output_test[j])
            print(predict_line,label[j],vul_line[j])
            # print(output_test[i])
            # print(predict_line)
            # print(label[j])
            if predict_line:
                label_pred = 1
            else:
                label_pred = 0
            # print(label_pred,label[j])
            if label_pred == 0 and label[j] == 0:
                TN += 1
                TN_l += 1
            if label_pred == 0 and label[j] == 1:
                FN += 1
                FN_l += 1
            if label_pred == 1 and label[j] == 0:
                FP += 1
                FP_l += 1
            if label_pred == 1 and label[j] == 1:
                TP += 1
                flag_l = False
                for pred in predict_line:
                    if pred in vul_line[j]:
                        flag_l = True
                        break
                if flag_l:
                    TP_l += 1
                else:
                    FN_l += 1
                overlap_line = list(set(predict_line).intersection(set(vul_line[j])))
                union_line = list(set(predict_line).union(set(vul_line[j])))
                loU = len(overlap_line) / len(union_line)
                loU_list.append(loU)
        print('TP:' + str(TP) + ' FP:' + str(FP) + ' FN:' + str(FN) + ' TN:' + str(TN))
        print('TP_l:' + str(TP_l) + ' FP_l:' + str(FP_l) + ' FN_l:' + str(FN_l) + ' TN_l:' + str(TN_l))
    FPR = FP / (FP + TN)
    FNR = FN / (TP + FN)
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    F1_score = (2 * precision * recall) / (precision + recall)

    FPR_line = FP_l / (FP_l + TN_l)
    FNR_line = FN_l / (TP_l + FN_l)
    accuracy_line = (TP_l + TN_l) / (TP_l + FP_l + FN_l + TN_l)
    precision_line = TP_l / (TP_l + FP_l)
    recall_line = TP_l / (TP_l + FN_l)
    F1_score_line = (2 * precision_line * recall_line) / (precision_line + recall_line)

    loU = np.mean(loU_list)
    # print('test_samples_num: ' + str(len(data)) + '\n')
    print('TP:' + str(TP) + ' FP:' + str(FP) + ' FN:' + str(FN) + ' TN:' + str(TN) + '\n')
    print('FPR: ' + str(FPR) + '\n')
    print('FNR: ' + str(FNR) + '\n')
    print('accuracy: ' + str(accuracy) + '\n')
    print('precision: ' + str(precision) + '\n')
    print('recall: ' + str(recall) + '\n')
    print('F1_score: ' + str(F1_score) + '\n\n')

    print('TP_l:' + str(TP_l) + ' FP_l:' + str(FP_l) + ' FN_l:' + str(FN_l) + ' TN:' + str(TN_l) + '\n')
    print('FPR_location: ' + str(FPR_line) + '\n')
    print('FNR_location: ' + str(FNR_line) + '\n')
    print('accuracy_location: ' + str(accuracy_line) + '\n')
    print('precision_location: ' + str(precision_line) + '\n')
    print('recall_location: ' + str(recall_line) + '\n')
    print('F1_score_location: ' + str(F1_score_line) + '\n\n')

    print('loU: ' + str(loU) + '\n')

    with open(result_path, 'a') as fwrite:
        # fwrite.write('test_samples_num: ' + str(len(data)) + '\n')
        fwrite.write('TP:' + str(TP) + ' FP:' + str(FP) + ' FN:' + str(FN) + ' TN:' + str(TN) + '\n')
        fwrite.write('FPR: ' + str(FPR) + '\n')
        fwrite.write('FNR: ' + str(FNR) + '\n')
        fwrite.write('accuracy: ' + str(accuracy) + '\n')
        fwrite.write('precision: ' + str(precision) + '\n')
        fwrite.write('recall: ' + str(recall) + '\n')
        fwrite.write('F1_score: ' + str(F1_score) + '\n\n')

        fwrite.write('TP_l:' + str(TP_l) + ' FP_l:' + str(FP_l) + ' FN_l:' + str(FN_l) + ' TN:' + str(TN_l) + '\n')
        fwrite.write('FPR_location: ' + str(FPR_line) + '\n')
        fwrite.write('FNR_location: ' + str(FNR_line) + '\n')
        fwrite.write('accuracy_location: ' + str(accuracy_line) + '\n')
        fwrite.write('precision_location: ' + str(precision_line) + '\n')
        fwrite.write('recall_location: ' + str(recall_line) + '\n')
        fwrite.write('F1_score_location: ' + str(F1_score_line) + '\n\n')

        fwrite.write('loU: ' + str(loU) + '\n')


modelPath = './model2/model_09-0.95.h5'
resultPath = './result/result_model_09_0.95_validation.txt'

# test_IR_path = '../data/Juliet/test_IR.txt'
# test_label_path = '../data/Juliet/test_label.txt'
test_IR_path = '../data/Juliet/validation_IR.txt'
test_label_path = '../data/Juliet/validation_label.txt'

model = models.load_model(modelPath)
test_model(model, test_IR_path, test_label_path, resultPath)


epochs: 0
3
[] 0 0
[754, 143, 58, 325, 586, 283, 670, 628, 712, 456, 544, 367, 100, 15, 241, 186, 499, 411, 412, 500] 0 0
[] 0 0
[] 0 0
[] 0 0
[] 0 0
[4] 1 [5.0, 6.0, 7.0]
[] 0 0
[] 0 0
[] 0 0
[] 0 0
[] 0 0
[] 0 0
[] 0 0
[] 1 [12.0, 13.0, 14.0]
[] 0 0
[] 0 0
[] 0 0
[759, 581, 63, 148, 320, 675, 278, 717, 15, 451, 539, 236, 362, 105, 633, 191, 494, 406, 407, 495] 1 [6.0, 7.0, 8.0, 62.0, 63.0, 64.0, 111.0, 112.0, 113.0, 161.0, 162.0, 163.0, 210.0, 211.0, 212.0, 263.0, 264.0, 265.0, 312.0, 313.0, 314.0, 361.0, 362.0, 363.0, 410.0, 411.0, 412.0, 460.0, 461.0, 462.0, 513.0, 514.0, 515.0, 562.0, 563.0, 564.0, 615.0, 616.0, 617.0, 664.0, 665.0, 666.0, 712.0, 713.0, 714.0, 726.0, 727.0, 728.0, 775.0, 776.0, 777.0, 824.0, 825.0, 826.0, 873.0, 874.0, 875.0]
[] 0 0
[] 0 0
[] 0 0
[] 0 0
[] 0 0
[] 0 0
[] 0 0
[] 0 0
[4] 1 [5.0, 6.0, 7.0]
[] 0 0
[] 0 0
[754, 143, 58, 315, 586, 712, 670, 628, 273, 544, 231, 357, 100, 15, 456, 401, 499, 186, 402, 500] 1 [6.0, 7.0, 8.0, 56.0, 57.0, 58.0, 105.0, 106.0, 1

 ## cs

### 模型中间结果测试

In [14]:
modelPath = './model2/model_09-0.95.h5'

test_IR_path = '../data/Juliet/test_IR.txt'
test_label_path = '../data/Juliet/test_label.txt'
model = models.load_model(modelPath)

test_data = TestDataGenerator(test_IR_path, test_label_path, batch_size=64)
td = next(test_data)
partial_model = Model(inputs=model.layers[0].input, outputs=model.layers[8].output)
output_test = partial_model([td[0][0]], training=False)
label = td[1]
vul_line = td[2]
print(output_test.shape, label.shape, len(vul_line))


3
(64, 1000, 1) (64,) 64


In [15]:
print(output_test[1])

tf.Tensor(
[[0.04940239]
 [0.04741302]
 [0.0427826 ]
 [0.0496155 ]
 [0.05577952]
 [0.05368701]
 [0.13170964]
 [0.9797673 ]
 [0.9982405 ]
 [0.9979354 ]
 [0.9975699 ]
 [0.9967884 ]
 [0.9962789 ]
 [0.9956728 ]
 [0.9949349 ]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615348]
 [0.14615

In [18]:
print(get_predict_line(output_test[6]))

[13, 14, 15, 16, 17, 18, 19, 20, 21, 12, 11]


In [46]:
def dictionairy():

    # 声明字典
    key_value ={}

    # 初始化
    key_value[2] = 56
    key_value[1] = 2
    key_value[5] = 12
    key_value[4] = 24
    key_value[6] = 18
    key_value[3] = 323


    print ("按值(value)排序:")
    dict=sorted(key_value.items(), key = lambda kv:(kv[1], kv[0]),reverse=True)
    print(dict)
    for i in dict:
        print(i[0],i[1])
dictionairy()

按值(value)排序:
[(3, 323), (2, 56), (4, 24), (6, 18), (5, 12), (1, 2)]
3 323
2 56
4 24
6 18
5 12
1 2


In [61]:
print(label)
print(vul_line[4])
# print(output_test[1])
predict_line = get_predict_line(output_test[7])
print(predict_line)
if predict_line:
    label_pred = 1
else:
    label_pred = 0
print(label_pred)

[0 1 0 0 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 0 1 1 0 0 0 0 1 0 0
 0 1 0 0 0 1 1 0 0 0 1 0 1 0 0 0 1 1 1 0 0 0 1 0 0 1 1]
0
[754, 58, 325, 586, 670, 628, 283, 712, 456, 544]
1


### 模型最终结果显示

In [6]:
td = next(test_data)
output = model([td[0]], training=False)
label = td[1]
print(label)
print(output)


NameError: name 'test_data' is not defined

In [12]:
test_data2 = TrainDataGenerator(test_IR_path, test_label_path, batch_size=64)
model.evaluate(test_data2, batch_size=64, steps=100)

15
  9/100 [=>............................] - ETA: 4:04 - loss: 0.1923 - accuracy: 0.9306 - precision: 0.9638 - recall: 0.7917 - true_positives: 133.0000 - true_negatives: 403.0000 - false_positives: 5.0000 - false_negatives: 35.0000

KeyboardInterrupt: 

# Model Predict

In [25]:
def loadData(datapath):
    irLine = []  # 每行ir向量
    irList = []  # ir切片向量列表
    print('Data processing progress:')
    # 打开文件：以二进制读模式、utf-8格式的编码方式打开
    with tqdm(total=os.path.getsize(datapath)) as pbar:
        with open(datapath, 'r', encoding='utf-8') as frData:
            for line in frData:
                pbar.update(len(line))
                # 逐行遍历：行内字段按'\t'分隔符分隔，转换为列表
                line = line.strip()
                a = line.split('\t')
                if '#' not in a[0]:
                    a = list(map(float, a))
                    irLine.append(a)
                else:
                    x = [0 for i in range(300)]
                    while len(irLine) < 1000:
                        irLine.append(x)
                    irList.append(irLine)
                    irLine = []
                    continue
    return np.array(irList)

def get_predict_line(value_sequence, threshold_value=0.5):
    value_sequence = list(np.array(value_sequence))
    vs = len(value_sequence) - 1
    while value_sequence[vs] == value_sequence[-1]:
        vs -= 1
    value_sequence = value_sequence[:vs + 2]
    # print(value_sequence)
    dict={}
    for i in range(len(value_sequence)):
        dict[i]=value_sequence[i]
    # print(dict)
    #按照value_sequence值大小排序
    dict=sorted(dict.items(), key = lambda kv:(kv[1], kv[0]),reverse=True)
    # print(dict)
    predict_line = []
    t=0
    for d in dict:
        t+=1
        if t>20:
            break
        if d[1] > threshold_value:
            predict_line.append(d[0])
    return predict_line


def model_predict(model, datapath):
    data=loadData(datapath)
    partial_model = Model(inputs=model.layers[0].input, outputs=model.layers[8].output)
    output_test = partial_model([data], training=False)
    # print(output_test.shape)
    predict_line=[]
    for j in range(output_test.shape[0]):
        predict_line.append(get_predict_line(output_test[j]))
    return predict_line


modelPath = './model2/model_09-0.95.h5'

test_IR_path = '../data/data2.txt'

model = models.load_model(modelPath)
result=model_predict(model, test_IR_path)
print(result)

Data processing progress:


100%|██████████| 21834705/21834705 [00:00<00:00, 31180411.77it/s]


[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [],