In [13]:
from kan import KAN
from collections import Counter
from sklearn.model_selection import KFold, cross_val_score, train_test_split
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
import torch
from sklearn.utils import shuffle
import random
import numpy as np
# torch.set_default_dtype(torch.float32)

In [14]:
def read_fasta(file_path):
    protein_list=[]
    label_list=[]
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if line.startswith('>'):
                # 提取标签
                label = line.split('|')[1]
                label_list.append(int(label))
            else:
                # 提取蛋白质序列
                protein_list.append(line)

    return protein_list, label_list

# 示例文件路径
train_path = '/tmp/pycharm_project_763/raw/ACP.txt'
test_path ='/tmp/pycharm_project_763/raw/ACPT.txt'

sequences, y = read_fasta(train_path)
sequences_test, y_test = read_fasta(test_path)

In [15]:
def calculate_aac(protein_list):
    amino_acids = 'ACDEFGHIKLMNPQRSTVWY'

    # 初始化存储结果的列表
    aac_list = []

    for protein_sequence in protein_list:
        # 初始化AAC特征字典
        aac = {aa: 0 for aa in amino_acids}

        # 计算每种氨基酸在序列中的频率
        for aa in protein_sequence:
            if aa in aac:
                aac[aa] += 1

        # 将频率转换为比例
        sequence_length = len(protein_sequence)
        aac = [count / sequence_length for aa, count in aac.items()]

        # 将结果添加到列表中
        aac_list.append(aac)

    return aac_list
aac_train = calculate_aac(sequences)
aac_test = calculate_aac(sequences_test)

In [21]:
aac_train[0]

[0.13513513513513514,
 0.0,
 0.02702702702702703,
 0.02702702702702703,
 0.02702702702702703,
 0.10810810810810811,
 0.0,
 0.13513513513513514,
 0.1891891891891892,
 0.02702702702702703,
 0.0,
 0.02702702702702703,
 0.02702702702702703,
 0.08108108108108109,
 0.02702702702702703,
 0.0,
 0.02702702702702703,
 0.10810810810810811,
 0.02702702702702703,
 0.0]

In [16]:
NAKH900113 = {
    'A': 1.61, 'L': 1.37,
    'R': 0.40, 'K': 0.62,
    'N': 0.73, 'M': 1.59,
    'D': 0.75, 'F': 1.24,
    'C': 0.37, 'P': 0.67,
    'Q': 0.61, 'S': 0.68,
    'E': 1.50, 'T': 0.92,
    'G': 3.12, 'W': 1.63,
    'H': 0.46, 'Y': 0.67,
    'I': 1.61, 'V': 1.30
}
KRIW710101={
    'A': 4.60, 'L': 3.25,
    'R': 6.50, 'K': 7.90,
    'N': 5.90, 'M': 1.40,
    'D': 5.70, 'F': 3.20,
    'C': -1.00, 'P': 7.00,
    'Q': 6.10, 'S': 5.25,
    'E': 5.60, 'T': 4.80,
    'G': 7.60, 'W': 4.00,
    'H': 4.50, 'Y': 4.35,
    'I': 2.60, 'V': 3.40
}
HUTJ700103={
    'A': 154.33, 'L': 232.30,
    'R': 341.01, 'K': 300.46,
    'N': 207.90, 'M': 202.65,
    'D': 194.91, 'F': 204.74,
    'C': 219.79, 'P': 179.93,
    'Q': 235.51, 'S': 174.06,
    'E': 223.16, 'T': 205.80,
    'G': 127.90, 'W': 237.01,
    'H': 242.54, 'Y': 229.15,
    'I': 233.21, 'V': 207.60
}
ZIMJ680103={
    'A': 0.00, 'L': 0.13,
    'R': 52.00, 'K': 49.50,
    'N': 3.38, 'M': 1.43,
    'D': 49.70, 'F': 0.35,
    'C': 1.48, 'P': 1.58,
    'Q': 3.53, 'S': 1.67,
    'E': 49.90, 'T': 1.66,
    'G': 0.00, 'W': 2.10,
    'H': 51.60, 'Y': 1.61,
    'I': 0.13, 'V': 0.13
}
TANS770104= {
    'A': 1.194, 'L': 0.595,
    'R': 0.795, 'K': 1.060,
    'N': 0.659, 'M': 0.831,
    'D': 1.056, 'F': 0.377,
    'C': 0.678, 'P': 3.159,
    'Q': 1.290, 'S': 1.444,
    'E': 0.928, 'T': 1.172,
    'G': 1.015, 'W': 0.452,
    'H': 0.611, 'Y': 0.816,
    'I': 0.603, 'V': 0.640
}
CEDJ970105={
    'A': 8.3, 'L': 7.4,
    'R': 8.7, 'K': 7.9,
    'N': 3.7, 'M': 2.3,
    'D': 4.7, 'F': 2.7,
    'C': 1.6, 'P': 6.9,
    'Q': 4.7, 'S': 8.8,
    'E': 6.5, 'T': 5.1,
    'G': 6.3, 'W': 0.7,
    'H': 2.1, 'Y': 2.4,
    'I': 3.7, 'V': 5.3
}
QIAN880127={
    'A': -0.05, 'L': 0.04,
    'R': 0.06, 'K': -0.42,
    'N': 0.00, 'M': 0.25,
    'D': 0.15, 'F': 0.09,
    'C': 0.30, 'P': 0.31,
    'Q': -0.08, 'S': -0.11,
    'E': -0.02, 'T': -0.06,
    'G': -0.14, 'W': 0.19,
    'H': -0.07, 'Y': 0.33,
    'I': 0.26, 'V': 0.04
}
LEVM760107={
    'A': 0.025, 'L': 0.19,
    'R': 0.20, 'K': 0.19,
    'N': 0.10, 'M': 0.19,
    'D': 0.10, 'F': 0.39,
    'C': 0.10, 'P': 0.17,
    'Q': 0.10, 'S': 0.025,
    'E': 0.10, 'T': 0.10,
    'G': 0.025, 'W': 0.56,
    'H': 0.10, 'Y': 0.39,
    'I': 0.19, 'V': 0.15
}
# NAKH900113 【200】，KRIW710101【146】， HUTJ700103【117】，ZIMJ680103【399】，TANS770104【368】，CEDJ970105【459】，QIAN880127【283】，LEVM760107【158】
def autocorrelation(protein_list, lag, AAindex_list):
    autocorrelation = []
    for sequence in protein_list:
        temp=[]
        for property_values in AAindex_list:
        # 将氨基酸序列转换为属性值序列
        #     property_values = np.array([property_dict[aa] for aa in sequence])
            property_values = [0 if value is None else value for value in property_values]
            # 计算属性值的平均值
            # print(property_values)
            mean_value = np.mean(property_values)
            # print(mean_value)
            # 计算Moran自相关
            n = len(sequence)
            autocorr = np.sum((property_values[:-lag] - mean_value) * (property_values[lag:] - mean_value))
            autocorr /= (n - lag)
            temp.append(autocorr)

        autocorrelation.append(temp)

    return autocorrelation

hydrophobicity = {
    'A': 1.8, 'C': 2.5, 'D': -3.5, 'E': -3.5, 'F': 2.8, 'G': -0.4, 'H': -3.2, 'I': 4.5,
    'K': -3.9, 'L': 3.8, 'M': 1.9, 'N': -3.5, 'P': -1.6, 'Q': -3.5, 'R': -4.5, 'S': -0.8,
    'T': -0.7, 'V': 4.2, 'W': -0.9, 'Y': -1.3
}

lag = 1  # 步长
import pandas as pd
    # """加载AAindex数据，假设第一列为描述，其余列为各个氨基酸的物理化学属性值。"""
import csv

# Path to the CSV file
csv_file_path = 'aaindex1.csv'

# Initialize an empty list to store the data
data = []

# Open the CSV file and read it
with open(csv_file_path, mode='r', encoding='utf-8') as file:
    csv_reader = csv.reader(file)

    # Skip the first cell of the first row
    for row in csv_reader:
        # Append the row with the first cell removed to the data list
        # data.append([float(value) for value in row[1:]])
        data.append(row[1:])
data=data[1:]
# float_data = [[float(value) for value in row] for row in data]


AAindex_data = [
    [float(value) if value not in ['NA', ''] else None for value in row]
    for row in data
]
# NAKH900113 【200】，KRIW710101【146】， HUTJ700103【117】，ZIMJ680103【399】，TANS770104【368】，CEDJ970105【459】，QIAN880127【283】，LEVM760107【158】
selected_indices = [200, 146, 117, 399, 368,459,283, 158]
AAindex_data = [AAindex_data[i] for i in selected_indices if i < len(AAindex_data)]
# AAindex_list=[ NAKH900113, KRIW710101, HUTJ700103,ZIMJ680103,TANS770104,CEDJ970105, QIAN880127, LEVM760107]

part_autocorrelation_train = autocorrelation(sequences, lag, AAindex_data)
part_autocorrelation_test = autocorrelation(sequences_test, lag, AAindex_data)
len(part_autocorrelation_train),len(part_autocorrelation_test[0])

(500, 8)

In [17]:
import scipy.stats.stats as st


def AAcal(seqcont):
    v = []
    for i in range(len(seqcont)):
        vtar = seqcont[i]
        vtarv = []
        vtar7 = 0
        vtar8 = 0
        vtar9 = 0
        s = pd.Series(vtar)
        vtar3 = np.mean(vtar)  # These 4 dimensions are relevant statistical terms
        vtar4 = st.kurtosis(vtar)
        vtar5 = np.var(vtar)
        vtar6 = st.skew(vtar)
        #for p in range(len(vtar)): # These 3 dimensions are inspired by PAFIG algorithm
        #vtar7=vtar[p]**2+vtar7
        #if vtar[p]>va:
        #vtar8=vtar[p]**2+vtar8
        #else:
        #vtar9=vtar[p]**2+vtar9
        vcf1 = []
        vcf2 = []
        for j in range(len(vtar) - 1):  #Sequence-order-correlation terms
            vcf1.append((vtar[j] - vtar[j + 1]))
        for k in range(len(vtar) - 2):
            vcf2.append((vtar[k] - vtar[k + 2]))
        vtar10 = np.mean(vcf1)
        vtar11 = np.var(vcf1)
        vtar11A = st.kurtosis(vcf1)
        vtar11B = st.skew(vcf1)
        vtar12 = np.mean(vcf2)
        vtar13 = np.var(vcf2)
        vtar13A = st.kurtosis(vcf2)
        vtar13B = st.skew(vcf2)
        vtarv.append(vtar3)
        vtarv.append(vtar4)
        vtarv.append(vtar5)
        vtarv.append(vtar6)
        #vtarv.append(vtar7/len(vtar))
        #vtarv.append(vtar8/len(vtar))
        #vtarv.append(vtar9/len(vtar))
        vtarv.append(vtar10)
        vtarv.append(vtar11)
        vtarv.append(vtar11A)
        vtarv.append(vtar11B)
        vtarv.append(vtar12)
        vtarv.append(vtar13)
        vtarv.append(vtar13A)
        vtarv.append(vtar13B)
        v.append(vtarv)
    return v

AAC_2_train = AAcal(part_autocorrelation_train)
AAC_2_test = AAcal(part_autocorrelation_test)

In [18]:
def calculate_GAAC(protein_list):
    # 定义氨基酸分组
    groups = {
        'G1': 'GAVLMI',  # 脂肪族
        'G2': 'FYW',     # 芳香族
        'G3': 'KRH',     # 带正电
        'G4': 'DE',      # 带负电
        'G5': 'STCPNQ'   # 不带电
    }

    # 初始化结果列表
    results = []

    # 遍历蛋白质序列
    for protein in protein_list:
        # 计算每个组的频率
        group_counts = {key: 0 for key in groups}
        total_length = len(protein)

        for aa in protein:
            for group_name, group_aa in groups.items():
                if aa in group_aa:
                    group_counts[group_name] += 1
                    break

        # 计算频率
        group_frequencies = [count / total_length for count in group_counts.values()]
        results.append(group_frequencies)

    return results

# 示例蛋白质序列列表

gaac_train = calculate_GAAC(sequences)
gaac_test = calculate_GAAC(sequences_test)
gaac_train=np.array(gaac_train)
gaac_test=np.array(gaac_test)
gaac_train.shape

(500, 5)

In [22]:
gaac_train[0]

array([0.51351351, 0.05405405, 0.21621622, 0.05405405, 0.16216216])

In [19]:
# AAC_2_train
train_encodings=np.concatenate((aac_train,gaac_train),axis=1)
test_encodings=np.concatenate((aac_test,gaac_test),axis=1)

In [20]:
train_encodings[0]

array([0.13513514, 0.        , 0.02702703, 0.02702703, 0.02702703,
       0.10810811, 0.        , 0.13513514, 0.18918919, 0.02702703,
       0.        , 0.02702703, 0.02702703, 0.08108108, 0.02702703,
       0.        , 0.02702703, 0.10810811, 0.02702703, 0.        ,
       0.51351351, 0.05405405, 0.21621622, 0.05405405, 0.16216216])

In [6]:
from sklearn.utils import shuffle
# X_train, y_train = shuffle(train_encodings, y, random_state=42)
# X_test, y_test = shuffle(test_encodings, y_test, random_state=42)
X_train,X_test,y_train,y_test=train_test_split(train_encodings,y,random_state=42)
X_train=np.array(X_train)
X_test=np.array(X_test)
y_train=np.array(y_train)
y_test=np.array(y_test)
X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train).view(-1, 1)
X_test = torch.tensor(X_test)
y_test = torch.tensor(y_test).view(-1, 1)
X_train = torch.tensor(X_train ,dtype=torch.float32)
y_train = torch.tensor(y_train,dtype=torch.float32)
X_test = torch.tensor(X_test,dtype=torch.float32)
y_test = torch.tensor(y_test,dtype=torch.float32)

  del sys.path[0]
  
  from ipykernel import kernelapp as app
  


In [7]:
dataset = {}
dataset = {
    'train_input': X_train,
    'test_input': X_test,
    'train_label': y_train,
    'test_label': y_test
}
X_train.shape

torch.Size([375, 25])

In [8]:
X_train[0]

tensor([0.0000, 0.0370, 0.0370, 0.0000, 0.0370, 0.0741, 0.0000, 0.1481, 0.2222,
        0.1111, 0.0370, 0.0000, 0.0000, 0.0000, 0.1481, 0.0370, 0.0741, 0.0370,
        0.0000, 0.0000, 0.4074, 0.0370, 0.3704, 0.0370, 0.1481])

In [9]:
y_train

tensor([[0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
      

In [10]:
model = KAN(width=[25,5,1], grid=7, k=9, seed=42)
from sklearn.metrics import roc_auc_score, confusion_matrix, matthews_corrcoef
def train_acc():
    pred=model(X_train)
    with torch.no_grad():
        pred_labels = (pred > 0.5).float() 
        auc = roc_auc_score(y_train.cpu(), pred.cpu())

    # 混淆矩阵计算
        tn, fp, fn, tp = confusion_matrix(y_train.cpu(), pred_labels.cpu()).ravel()

        # Specificity (SP)
        sp = tn / (tn + fp) if (tn + fp) > 0 else 0
        
        acc = (tp + tn) / (tp + tn + fp + fn)

        # Sensitivity (SN)
        sn = tp / (tp + fn) if (tp + fn) > 0 else 0

        # Matthews Correlation Coefficient (MCC)
        mcc = matthews_corrcoef(y_train.cpu(), pred_labels.cpu())
    print(f"ACC: {acc:.4f}, AUC: {auc:.4f}, SP: {sp:.4f}, MCC: {mcc:.4f}, SN: {sn:.4f}")
    return torch.mean((torch.round(model(X_train)[:, 0]) == y_train[:, 0]).float())

def test_acc():
    pred=model(X_test)
    with torch.no_grad():
        pred_labels = (pred > 0.5).float() 
        auc = roc_auc_score(y_test.cpu(), pred.cpu())

    # 混淆矩阵计算
        tn, fp, fn, tp = confusion_matrix(y_test.cpu(), pred_labels.cpu()).ravel()

        # Specificity (SP)
        sp = tn / (tn + fp) if (tn + fp) > 0 else 0
        
        acc = (tp + tn) / (tp + tn + fp + fn)

        # Sensitivity (SN)
        sn = tp / (tp + fn) if (tp + fn) > 0 else 0

        # Matthews Correlation Coefficient (MCC)
        mcc = matthews_corrcoef(y_test.cpu(), pred_labels.cpu())
    print(f"ACC: {acc:.4f}, AUC(test): {auc:.4f}, SP: {sp:.4f}, MCC: {mcc:.4f}, SN: {sn:.4f}")
    return torch.mean((torch.round(model(X_test)[:, 0]) == y_test[:, 0]).float())

# results = model.train(dataset, opt="LBFGS", steps=10, metrics=(train_acc, test_acc)) ,lamb=0.001 lamb_entropy=4.,lamb=0.1,lamb_l1=2.5,
# lamb=0.005 train/fit
results = model.fit(dataset, opt="LBFGS", steps=30, metrics=(train_acc, test_acc));
# print(results['train_acc'][-1], results['test_acc'][-1])

checkpoint directory created: ./model
saving model version 0.0


| train_loss: 3.10e-01 | test_loss: 4.91e-01 | reg: 0.00e+00 | :   3%| | 1/30 [00:01<00:36,  1.25s/i

ACC: 0.8800, AUC: 0.9514, SP: 0.8254, MCC: 0.7650, SN: 0.9355
ACC: 0.6720, AUC(test): 0.7538, SP: 0.6066, MCC: 0.3440, SN: 0.7344


| train_loss: 2.26e-01 | test_loss: 5.12e-01 | reg: 0.00e+00 | :   7%| | 2/30 [00:01<00:20,  1.35it/

ACC: 0.9573, AUC: 0.9932, SP: 0.9418, MCC: 0.9152, SN: 0.9731
ACC: 0.7440, AUC(test): 0.7456, SP: 0.7377, MCC: 0.4877, SN: 0.7500


| train_loss: 1.72e-01 | test_loss: 4.92e-01 | reg: 0.00e+00 | :  10%| | 3/30 [00:02<00:16,  1.61it/

ACC: 0.9867, AUC: 0.9984, SP: 0.9841, MCC: 0.9733, SN: 0.9892
ACC: 0.7280, AUC(test): 0.7659, SP: 0.7541, MCC: 0.4575, SN: 0.7031


| train_loss: 1.35e-01 | test_loss: 5.04e-01 | reg: 0.00e+00 | :  13%|▏| 4/30 [00:02<00:14,  1.80it/

ACC: 0.9947, AUC: 0.9999, SP: 0.9947, MCC: 0.9893, SN: 0.9946
ACC: 0.7360, AUC(test): 0.7533, SP: 0.7377, MCC: 0.4720, SN: 0.7344


| train_loss: 1.11e-01 | test_loss: 5.19e-01 | reg: 0.00e+00 | :  17%|▏| 5/30 [00:03<00:13,  1.86it/

ACC: 1.0000, AUC: 1.0000, SP: 1.0000, MCC: 1.0000, SN: 1.0000
ACC: 0.7040, AUC(test): 0.7500, SP: 0.7377, MCC: 0.4101, SN: 0.6719


| train_loss: 2.85e-01 | test_loss: 4.71e-01 | reg: 0.00e+00 | :  20%|▏| 6/30 [00:03<00:13,  1.79it/

ACC: 0.9067, AUC: 0.9706, SP: 0.8624, MCC: 0.8168, SN: 0.9516
ACC: 0.7120, AUC(test): 0.7684, SP: 0.7213, MCC: 0.4243, SN: 0.7031


| train_loss: 2.03e-01 | test_loss: 5.27e-01 | reg: 0.00e+00 | :  23%|▏| 7/30 [00:04<00:12,  1.91it/

ACC: 0.9787, AUC: 0.9969, SP: 0.9683, MCC: 0.9576, SN: 0.9892
ACC: 0.6880, AUC(test): 0.7398, SP: 0.7049, MCC: 0.3768, SN: 0.6719


| train_loss: 1.65e-01 | test_loss: 5.43e-01 | reg: 0.00e+00 | :  27%|▎| 8/30 [00:04<00:11,  1.97it/

ACC: 0.9840, AUC: 0.9993, SP: 0.9735, MCC: 0.9682, SN: 0.9946
ACC: 0.6640, AUC(test): 0.7234, SP: 0.6721, MCC: 0.3283, SN: 0.6562


| train_loss: 1.36e-01 | test_loss: 5.67e-01 | reg: 0.00e+00 | :  30%|▎| 9/30 [00:05<00:10,  1.99it/

ACC: 0.9920, AUC: 0.9999, SP: 0.9841, MCC: 0.9841, SN: 1.0000
ACC: 0.6400, AUC(test): 0.7075, SP: 0.6393, MCC: 0.2799, SN: 0.6406


| train_loss: 1.17e-01 | test_loss: 5.89e-01 | reg: 0.00e+00 | :  33%|▎| 10/30 [00:05<00:09,  2.01it

ACC: 0.9973, AUC: 1.0000, SP: 0.9947, MCC: 0.9947, SN: 1.0000
ACC: 0.6480, AUC(test): 0.6934, SP: 0.6557, MCC: 0.2963, SN: 0.6406


| train_loss: 2.27e-01 | test_loss: 5.07e-01 | reg: 0.00e+00 | :  37%|▎| 11/30 [00:06<00:10,  1.89it

ACC: 0.9600, AUC: 0.9936, SP: 0.9524, MCC: 0.9201, SN: 0.9677
ACC: 0.6880, AUC(test): 0.7579, SP: 0.6557, MCC: 0.3754, SN: 0.7188


| train_loss: 1.67e-01 | test_loss: 5.26e-01 | reg: 0.00e+00 | :  40%|▍| 12/30 [00:06<00:09,  1.99it

ACC: 0.9893, AUC: 0.9996, SP: 0.9894, MCC: 0.9787, SN: 0.9892
ACC: 0.7040, AUC(test): 0.7569, SP: 0.7377, MCC: 0.4101, SN: 0.6719


| train_loss: 1.39e-01 | test_loss: 5.59e-01 | reg: 0.00e+00 | :  43%|▍| 13/30 [00:07<00:08,  1.99it

ACC: 0.9973, AUC: 0.9999, SP: 0.9947, MCC: 0.9947, SN: 1.0000
ACC: 0.6640, AUC(test): 0.7428, SP: 0.7541, MCC: 0.3369, SN: 0.5781


| train_loss: 1.15e-01 | test_loss: 5.87e-01 | reg: 0.00e+00 | :  47%|▍| 14/30 [00:07<00:07,  2.06it

ACC: 0.9973, AUC: 1.0000, SP: 1.0000, MCC: 0.9947, SN: 0.9946
ACC: 0.6160, AUC(test): 0.7106, SP: 0.6885, MCC: 0.2375, SN: 0.5469


| train_loss: 1.01e-01 | test_loss: 5.92e-01 | reg: 0.00e+00 | :  50%|▌| 15/30 [00:07<00:07,  2.14it

ACC: 1.0000, AUC: 1.0000, SP: 1.0000, MCC: 1.0000, SN: 1.0000
ACC: 0.6400, AUC(test): 0.7075, SP: 0.7213, MCC: 0.2871, SN: 0.5625


| train_loss: 2.40e-01 | test_loss: 5.33e-01 | reg: 0.00e+00 | :  53%|▌| 16/30 [00:08<00:06,  2.06it

ACC: 0.9600, AUC: 0.9899, SP: 0.9577, MCC: 0.9200, SN: 0.9624
ACC: 0.7280, AUC(test): 0.7346, SP: 0.7377, MCC: 0.4563, SN: 0.7188


| train_loss: 1.78e-01 | test_loss: 5.23e-01 | reg: 0.00e+00 | :  57%|▌| 17/30 [00:08<00:06,  2.10it

ACC: 0.9813, AUC: 0.9993, SP: 0.9735, MCC: 0.9628, SN: 0.9892
ACC: 0.7120, AUC(test): 0.7561, SP: 0.7213, MCC: 0.4243, SN: 0.7031


| train_loss: 1.42e-01 | test_loss: 5.52e-01 | reg: 0.00e+00 | :  60%|▌| 18/30 [00:09<00:05,  2.02it

ACC: 0.9947, AUC: 0.9999, SP: 0.9894, MCC: 0.9894, SN: 1.0000
ACC: 0.6720, AUC(test): 0.7518, SP: 0.7377, MCC: 0.3495, SN: 0.6094


| train_loss: 1.20e-01 | test_loss: 5.78e-01 | reg: 0.00e+00 | :  63%|▋| 19/30 [00:10<00:05,  2.02it

ACC: 0.9973, AUC: 1.0000, SP: 0.9947, MCC: 0.9947, SN: 1.0000
ACC: 0.6720, AUC(test): 0.7474, SP: 0.7705, MCC: 0.3546, SN: 0.5781


| train_loss: 1.06e-01 | test_loss: 5.77e-01 | reg: 0.00e+00 | :  67%|▋| 20/30 [00:10<00:04,  2.00it

ACC: 0.9973, AUC: 1.0000, SP: 1.0000, MCC: 0.9947, SN: 0.9946
ACC: 0.6640, AUC(test): 0.7387, SP: 0.7541, MCC: 0.3369, SN: 0.5781


| train_loss: 2.26e-01 | test_loss: 5.10e-01 | reg: 0.00e+00 | :  70%|▋| 21/30 [00:11<00:04,  1.97it

ACC: 0.9680, AUC: 0.9943, SP: 0.9524, MCC: 0.9365, SN: 0.9839
ACC: 0.7280, AUC(test): 0.7626, SP: 0.7377, MCC: 0.4563, SN: 0.7188


| train_loss: 1.68e-01 | test_loss: 5.28e-01 | reg: 0.00e+00 | :  73%|▋| 22/30 [00:11<00:04,  1.98it

ACC: 0.9920, AUC: 0.9995, SP: 0.9894, MCC: 0.9840, SN: 0.9946
ACC: 0.7280, AUC(test): 0.7651, SP: 0.7541, MCC: 0.4575, SN: 0.7031


| train_loss: 1.33e-01 | test_loss: 5.69e-01 | reg: 0.00e+00 | :  77%|▊| 23/30 [00:12<00:03,  1.99it

ACC: 0.9920, AUC: 0.9999, SP: 0.9894, MCC: 0.9840, SN: 0.9946
ACC: 0.7040, AUC(test): 0.7643, SP: 0.7705, MCC: 0.4140, SN: 0.6406


| train_loss: 1.15e-01 | test_loss: 5.84e-01 | reg: 0.00e+00 | :  80%|▊| 24/30 [00:12<00:03,  1.99it

ACC: 0.9973, AUC: 1.0000, SP: 0.9947, MCC: 0.9947, SN: 1.0000
ACC: 0.6880, AUC(test): 0.7585, SP: 0.7541, MCC: 0.3817, SN: 0.6250


| train_loss: 1.03e-01 | test_loss: 5.78e-01 | reg: 0.00e+00 | :  83%|▊| 25/30 [00:12<00:02,  2.05it

ACC: 0.9973, AUC: 1.0000, SP: 0.9947, MCC: 0.9947, SN: 1.0000
ACC: 0.6800, AUC(test): 0.7556, SP: 0.7541, MCC: 0.3668, SN: 0.6094


| train_loss: 2.35e-01 | test_loss: 4.99e-01 | reg: 0.00e+00 | :  87%|▊| 26/30 [00:14<00:02,  1.49it

ACC: 0.9493, AUC: 0.9911, SP: 0.9418, MCC: 0.8988, SN: 0.9570
ACC: 0.7280, AUC(test): 0.7672, SP: 0.7541, MCC: 0.4575, SN: 0.7031


| train_loss: 1.82e-01 | test_loss: 5.43e-01 | reg: 0.00e+00 | :  90%|▉| 27/30 [00:14<00:01,  1.64it

ACC: 0.9893, AUC: 0.9990, SP: 0.9841, MCC: 0.9787, SN: 0.9946
ACC: 0.7120, AUC(test): 0.7579, SP: 0.7377, MCC: 0.4254, SN: 0.6875


| train_loss: 1.50e-01 | test_loss: 5.58e-01 | reg: 0.00e+00 | :  93%|▉| 28/30 [00:15<00:01,  1.71it

ACC: 0.9920, AUC: 0.9998, SP: 0.9894, MCC: 0.9840, SN: 0.9946
ACC: 0.7280, AUC(test): 0.7656, SP: 0.7869, MCC: 0.4611, SN: 0.6719


| train_loss: 1.27e-01 | test_loss: 5.96e-01 | reg: 0.00e+00 | :  97%|▉| 29/30 [00:15<00:00,  1.84it

ACC: 0.9947, AUC: 0.9999, SP: 0.9947, MCC: 0.9893, SN: 0.9946
ACC: 0.7200, AUC(test): 0.7623, SP: 0.8033, MCC: 0.4490, SN: 0.6406


| train_loss: 1.14e-01 | test_loss: 6.01e-01 | reg: 4.21e+01 | : 100%|█| 30/30 [00:16<00:00,  1.85it

ACC: 1.0000, AUC: 1.0000, SP: 1.0000, MCC: 1.0000, SN: 1.0000
ACC: 0.7120, AUC(test): 0.7602, SP: 0.7869, MCC: 0.4314, SN: 0.6406
saving model version 0.1





In [None]:
import numpy as np
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve

model = SVC(kernel='linear', probability=True)  # probability=True为了后续计算AUC
X_train,X_test=X_test,X_train
y_train,y_test=y_test,y_train
# 训练模型
model.fit(X_train, y_train)

# 预测
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# 计算准确度
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

# 计算AUC
y_train_prob = model.predict_proba(X_train)[:, 1]
y_test_prob = model.predict_proba(X_test)[:, 1]
train_auc = roc_auc_score(y_train, y_train_prob)
test_auc = roc_auc_score(y_test, y_test_prob)

# 输出结果
print(f"Training Accuracy: {train_accuracy:.2f}")
print(f"Training AUC: {train_auc:.2f}")
print(f"Test Accuracy: {test_accuracy:.2f}")
print(f"Test AUC: {test_auc:.2f}")