In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import svm


class MoannaDataset(Dataset):
    def __init__(self, data_file, label_file):
        self.data = pd.read_csv(data_file, sep=',', header=0, index_col=0).values
        self.label = pd.read_csv(label_file, header=0, index_col=0).values[:,1]
        # self.data = torch.from_numpy(self.data).float().to(device)
        self.label = self.label.astype(int)
        # self.label = torch.from_numpy(self.label).long().to(device)

    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        return self.data[idx], self.label[idx]



In [3]:
data_file = '/home/intern/WarmUpProject/p-tcga_binary_mutation.csv'
label_file = '/home/intern/WarmUpProject/pp-TCGA-clinical.csv'

dataset = MoannaDataset(data_file, label_file)

X = dataset.data
y = dataset.label

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [20]:
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.39628277464321277


In [27]:
len(y_pred)

3013

In [4]:
# 创建SVM模型
clf = svm.SVC(C=1.0, kernel='rbf', gamma='scale', tol=0.1, class_weight=None)

# 训练模型
clf.fit(X_train, y_train)

# 预测测试集
y_pred = clf.predict(X_test)

# 计算准确率、精确率、召回率等指标
acc = accuracy_score(y_test, y_pred)

# 输出指标
print("Accuracy: {:.2f}%".format(acc * 100))


Accuracy: 32.79%


In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# Define parameter grid
param_grid = {'C': [0.1, 1, 10], 'gamma': [0.01, 0.1, 1], 'kernel': ['linear','rbf']}

# Split training set into training and validation sets for cross-validation
X_train_cv, X_val, y_train_cv, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Create SVM model
svm = SVC()

# Create GridSearchCV object
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, scoring='accuracy')

# Fit GridSearchCV object to training set
grid_search.fit(X_train_cv, y_train_cv)

# Print best parameters and best score
print('Best parameters:', grid_search.best_params_)
print('Best score:', grid_search.best_score_)

# Train SVM model with best hyperparameters on full training set
svm_best = SVC(C=grid_search.best_params_['C'], gamma=grid_search.best_params_['gamma'], kernel=grid_search.best_params_['kernel'])
svm_best.fit(X_train, y_train)

# Evaluate SVM model on test set
y_pred = svm_best.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Test set accuracy:', accuracy)

Best parameters: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}
Best score: 0.395412732305259
Test set accuracy: 0.42183869897112514
