In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc, roc_auc_score
from sklearn.svm import SVC
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error



In [2]:
y_train = pd.read_csv("Y_train.csv")
y_train= y_train["mort_icu"]
y_valid = pd.read_csv("Y_valid.csv")
y_valid = y_valid["mort_icu"]

X_train = pd.read_csv("X_train_new.csv")
X_train = X_train.dropna(axis = 1)
X_valid = pd.read_csv("X_valid_new.csv")
X_valid = X_valid.dropna(axis = 1)

In [6]:
model = SVC(class_weight = 'balanced', C = 2, kernel = "rbf", gamma = 'auto', probability = True)
model.fit(X_train, y_train)
y_pred_valid = model.predict_proba(X_valid)[:, 1]
y_pred_train = model.predict_proba(X_train)[:, 1]
print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

AUC of train: 0.9998911739784424
AUC of valid: 0.853624289872764


In [3]:
for c in [0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    model = SVC(class_weight = 'balanced', C = c, kernel = "rbf", gamma = 'auto', probability = True)
    model.fit(X_train, y_train)
    y_pred_valid = model.predict_proba(X_valid)[:, 1]
    y_pred_train = model.predict_proba(X_train)[:, 1]
    print('C: ', c)
    print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
    print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

C:  0.4
AUC of train: 0.9845720854160782
AUC of valid: 0.8557517371707593
C:  0.5
AUC of train: 0.9896912436039753
AUC of valid: 0.8562549884971125
C:  0.6
AUC of train: 0.9929693450601997
AUC of valid: 0.854972651298183
C:  0.7
AUC of train: 0.9949759791911615
AUC of valid: 0.853552396826142
C:  0.8
AUC of train: 0.9962746924324929
AUC of valid: 0.8531181041363444
C:  0.9
AUC of train: 0.9974024158746215
AUC of valid: 0.8531181041363444


In [None]:
for c in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    model = SVC(class_weight = 'balanced', C = c, kernel = "rbf", gamma = 'auto', probability = True)
    model.fit(X_train, y_train)
    y_pred_valid = model.predict_proba(X_valid)[:, 1]
    y_pred_train = model.predict_proba(X_train)[:, 1]
    print('C: ', c)
    print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
    print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

C:  0.05
AUC of train: 0.8942725511189349
AUC of valid: 0.8201177285318558
C:  0.1
AUC of train: 0.9277884960228057
AUC of valid: 0.8364917367012534
C:  0.2
AUC of train: 0.9596882352554796
AUC of valid: 0.8486812761162495
C:  0.3
AUC of train: 0.9758092260399491
AUC of valid: 0.8537049861495846


In [7]:
for c in [0.001, 0.01, 1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3, 3.2, 3.4, 3.6, 4]:
    model = SVC(class_weight = 'balanced', C = c, kernel = "rbf", gamma = 'auto', probability = True)
    model.fit(X_train, y_train)
    y_pred_valid = model.predict_proba(X_valid)[:, 1]
    y_pred_train = model.predict_proba(X_train)[:, 1]
    print('C: ', c)
    print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
    print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

C:  0.001
AUC of train: 0.7554090237317972
AUC of valid: 0.730896990469036
C:  0.01
AUC of train: 0.8065351838836597
AUC of valid: 0.7651562279919246
C:  1
AUC of train: 0.997997884960228
AUC of valid: 0.8533866026574017
C:  1.1
AUC of train: 0.9984709707507077
AUC of valid: 0.8537827480163389
C:  1.2
AUC of train: 0.9989693844708788
AUC of valid: 0.8542478520118315
C:  1.3
AUC of train: 0.9992444184625894
AUC of valid: 0.8540541809474622
C:  1.4
AUC of train: 0.9993586043364884
AUC of valid: 0.8540072303864031
C:  1.5
AUC of train: 0.9994950703809042
AUC of valid: 0.8541451476595145
C:  1.6
AUC of train: 0.999733794000381
AUC of valid: 0.8542097046809709
C:  1.7
AUC of train: 0.9997699204561128
AUC of valid: 0.8540893938682568
C:  1.8
AUC of train: 0.9997969561818935
AUC of valid: 0.8541656885299779
C:  1.9
AUC of train: 0.99981603095052
AUC of valid: 0.8538370346025635
C:  2
AUC of train: 0.9998911739784424
AUC of valid: 0.8535186511103808
C:  2.1
AUC of train: 0.9999029183607785
AUC

In [8]:
for c in [4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5, 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 6, 6.2, 6.4, 6.6, 6.8, 7, 8, 9, 11]:
    model = SVC(class_weight = 'balanced', C = c, kernel = "rbf", gamma = 'auto', probability = True)
    model.fit(X_train, y_train)
    y_pred_valid = model.predict_proba(X_valid)[:, 1]
    y_pred_train = model.predict_proba(X_train)[:, 1]
    print('C: ', c)
    print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
    print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

C:  4.1
AUC of train: 0.9999997372621401
AUC of valid: 0.8431293722709986
C:  4.2
AUC of train: 0.999999842357284
AUC of valid: 0.8426539978402742
C:  4.3
AUC of train: 0.999999842357284
AUC of valid: 0.8421844922296822
C:  4.4
AUC of train: 0.9999998949048561
AUC of valid: 0.8418411662519366
C:  4.5
AUC of train: 0.9999998949048561
AUC of valid: 0.8415301187849193
C:  4.6
AUC of train: 0.999999947452428
AUC of valid: 0.8411779895769755
C:  4.7
AUC of train: 0.999999947452428
AUC of valid: 0.8409432367716794
C:  4.8
AUC of train: 0.9999999474524282
AUC of valid: 0.8407525001173765
C:  4.9
AUC of train: 0.9999999474524282
AUC of valid: 0.8405529602328748
C:  5
AUC of train: 0.999999947452428
AUC of valid: 0.8402419127658576
C:  5.1
AUC of train: 0.999999947452428
AUC of valid: 0.8399719470397671
C:  5.2
AUC of train: 1.0
AUC of valid: 0.8398281609465232
C:  5.3
AUC of train: 1.0
AUC of valid: 0.839646227522419
C:  5.4
AUC of train: 1.0
AUC of valid: 0.8396139490116906
C:  5.5
AUC of tra

In [9]:
for c in [13, 15, 17, 19, 20, 25]:
    model = SVC(class_weight = 'balanced', C = c, kernel = "rbf", gamma = 'auto', probability = True)
    model.fit(X_train, y_train)
    y_pred_valid = model.predict_proba(X_valid)[:, 1]
    y_pred_train = model.predict_proba(X_train)[:, 1]
    print('C: ', c)
    print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
    print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

C:  13
AUC of train: 1.0
AUC of valid: 0.8344083055542515
C:  15
AUC of train: 1.0
AUC of valid: 0.8339006526127988
C:  17
AUC of train: 1.0
AUC of valid: 0.8335690642753181
C:  19
AUC of train: 1.0
AUC of valid: 0.833231607117705
C:  20
AUC of train: 1.0
AUC of valid: 0.8331142307150571
C:  25
AUC of train: 1.0
AUC of valid: 0.8328824123198273


In [5]:
X_train = pd.read_csv("X_train_filtered2.csv")
X_train = X_train.drop(labels = ['Unnamed: 0'], axis = 1)
y_train = pd.read_csv("Y_train.csv")
y_train = y_train.drop(labels = ['Unnamed: 0'], axis = 1)
y_train = y_train["mort_icu"]

X_valid = pd.read_csv("X_valid_filtered2.csv")
X_valid = X_valid.drop(labels = ['Unnamed: 0'], axis = 1)
y_valid = pd.read_csv("Y_valid.csv")
y_valid = y_valid.drop(labels = ['Unnamed: 0'], axis = 1)
y_valid = y_valid["mort_icu"]

In [5]:
model = SVC(class_weight='balanced', C = 5.5, kernel = "rbf", gamma = 'auto')
model.fit(X_train, y_train)
y_pred_train = model.predict(X_train)
y_pred_valid = model.predict(X_valid)
print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

AUC of train: 1.0
AUC of valid: 0.5


In [2]:
X_train = pd.read_csv("X_train_filtered2.csv")
X_train = X_train.drop(labels = ['Unnamed: 0'], axis = 1)
y_train = pd.read_csv("Y_train_T2.csv")
y_train = y_train.drop(labels = ['Unnamed: 0'], axis = 1)
y_train = y_train["los_icu"]

X_valid = pd.read_csv("X_valid_filtered2.csv")
X_valid = X_valid.drop(labels = ['Unnamed: 0'], axis = 1)
y_valid = pd.read_csv("Y_valid_T2.csv")
y_valid = y_valid.drop(labels = ['Unnamed: 0'], axis = 1)
y_valid = y_valid["los_icu"]
#X_train = X_train.drop(labels = ['hemoglobin_min', 'red blood cell count_min', 'chloride_mean', 'blood urea nitrogen_max', 'red blood cell count_min', 'co2 (etco2, pco2, etc.)_mean', 'co2_mean', 'mean blood pressure_mean','phosphorous_mean', 'lactic acid_min', 'lymphocytes_mean', 'positive end-expiratory pressure_mean', 'plateau pressure_max', 'cardiac index_min', 'systemic vascular resistance_max','cholesterol_max','cholesterol hdl_min','cardiac output fick_min','cholesterol ldl_max','pulmonary artery pressure mean_mean','chloride urine_mean','lymphocytes atypical_max','pulmonary capillary wedge pressure_max','troponin-i_max','total protein urine_max','venous pvo2_mean','post void residual_mean','red blood cell count csf_max','monocytes csl_max','lymphocytes body fluid_mean','lymphocytes ascites_mean','red blood cell count ascites_max','eosinophils_mean','total protein_mean','lactate dehydrogenase pleural_max','lymphocytes pleural_mean','red blood cell count pleural_max','calcium urine_mean','albumin urine_max','albumin ascites_mean','lymphocytes percent_mean','albumin pleural_mean','creatinine ascites_max','creatinine pleural_mean','lymphocytes atypical csl_mean','creatinine body fluid_max'], axis = 1)
#X_valid = X_valid.drop(labels = ['hemoglobin_min', 'red blood cell count_min', 'chloride_mean', 'blood urea nitrogen_max', 'red blood cell count_min', 'co2 (etco2, pco2, etc.)_mean', 'co2_mean', 'mean blood pressure_mean','phosphorous_mean', 'lactic acid_min', 'lymphocytes_mean', 'positive end-expiratory pressure_mean', 'plateau pressure_max', 'cardiac index_min', 'systemic vascular resistance_max','cholesterol_max','cholesterol hdl_min','cardiac output fick_min','cholesterol ldl_max','pulmonary artery pressure mean_mean','chloride urine_mean','lymphocytes atypical_max','pulmonary capillary wedge pressure_max','troponin-i_max','total protein urine_max','venous pvo2_mean','post void residual_mean','red blood cell count csf_max','monocytes csl_max','lymphocytes body fluid_mean','lymphocytes ascites_mean','red blood cell count ascites_max','eosinophils_mean','total protein_mean','lactate dehydrogenase pleural_max','lymphocytes pleural_mean','red blood cell count pleural_max','calcium urine_mean','albumin urine_max','albumin ascites_mean','lymphocytes percent_mean','albumin pleural_mean','creatinine ascites_max','creatinine pleural_mean','lymphocytes atypical csl_mean','creatinine body fluid_max'], axis = 1)

In [12]:
X_train2 = X_train.append(X_valid)
y_train2 = y_train.append(y_valid)

  X_train2 = X_train.append(X_valid)
  y_train2 = y_train.append(y_valid)


In [4]:
#scaler = MinMaxScaler((-1, 1))
#scaler.fit(X_train)
#X_train = scaler.transform(X_train)
#scaler.fit(X_valid)
#X_valid = scaler.transform(X_valid)

In [41]:
svm = SVC(class_weight='balanced')
svm.fit(X_train, y_train)
y_pred_train = svm.predict(X_train)
y_pred_valid = svm.predict(X_valid)

In [50]:
print('train: \n', classification_report(y_train,y_pred_train))
print('valid: \n', classification_report(y_valid,y_pred_valid))
con_mat_train = confusion_matrix(y_train, y_pred_train)
print('train: \n', con_mat_train)
con_mat_valid = confusion_matrix(y_valid, y_pred_valid)
print('valid: \n', con_mat_valid)
print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average="macro", sample_weight=None))

train: 
               precision    recall  f1-score   support

           0       0.99      0.92      0.96     15535
           1       0.48      0.93      0.63      1225

    accuracy                           0.92     16760
   macro avg       0.74      0.93      0.79     16760
weighted avg       0.96      0.92      0.93     16760

valid: 
               precision    recall  f1-score   support

           0       0.97      0.91      0.94      2242
           1       0.29      0.55      0.38       152

    accuracy                           0.89      2394
   macro avg       0.63      0.73      0.66      2394
weighted avg       0.93      0.89      0.90      2394

train: 
 [[14289  1246]
 [   83  1142]]
valid: 
 [[2041  201]
 [  68   84]]
AUC of valid: 0.7314897413024085


In [None]:
C = [1e-2, 1e-1, 1, 10]
kernel = ["linear"]
for i in C:
    for j in kernel:
        model = SVC(class_weight='balanced', C = i, kernel = j)
        model.fit(X_train, y_train)
        y_pred_valid = model.predict(X_valid)
        print('C: ', i, ', kernel: ', j)
        print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average="macro", sample_weight=None))

C:  0.01 , kernel:  linear
AUC of valid: 0.7298171275646743
C:  0.1 , kernel:  linear
AUC of valid: 0.7196699375557538
C:  1 , kernel:  linear
AUC of valid: 0.5002230151650312
C:  10 , kernel:  linear
AUC of valid: 0.5


In [5]:
kernel2 = ["rbf","sigmoid", 'poly']
gamma = ['scale', 'auto']
C = [1e-3, 1e-2, 1e-1, 1, 10]
for i in C:
    for j in kernel2:
        for k in gamma:
            model = SVC(class_weight = 'balanced', C = i, kernel = j, gamma = k)
            model.fit(X_train, y_train)
            y_pred_valid = model.predict(X_valid)
            print('C: ', i, ', kernel: ', j, ', gamma: ', k)
            print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

C:  0.001 , kernel:  rbf , gamma:  scale
AUC of valid: 0.6260593220338984
C:  0.001 , kernel:  rbf , gamma:  auto
AUC of valid: 0.6733385370205174
C:  0.001 , kernel:  sigmoid , gamma:  scale
AUC of valid: 0.651092774308653
C:  0.001 , kernel:  sigmoid , gamma:  auto
AUC of valid: 0.6052074041034791
C:  0.001 , kernel:  poly , gamma:  scale
AUC of valid: 0.5549732381801963
C:  0.001 , kernel:  poly , gamma:  auto
AUC of valid: 0.523193577163247
C:  0.01 , kernel:  rbf , gamma:  scale
AUC of valid: 0.6197033898305084
C:  0.01 , kernel:  rbf , gamma:  auto
AUC of valid: 0.7076271186440678
C:  0.01 , kernel:  sigmoid , gamma:  scale
AUC of valid: 0.6505909901873328
C:  0.01 , kernel:  sigmoid , gamma:  auto
AUC of valid: 0.6838202497769847
C:  0.01 , kernel:  poly , gamma:  scale
AUC of valid: 0.6325267618198037
C:  0.01 , kernel:  poly , gamma:  auto
AUC of valid: 0.5751003568242641
C:  0.1 , kernel:  rbf , gamma:  scale
AUC of valid: 0.7133697591436217
C:  0.1 , kernel:  rbf , gamma:  a

In [4]:
C = [1e-3]
kernel = ["linear"]
for i in C:
    for j in kernel:
        model = SVC(class_weight = 'balanced', C = i, kernel = j)
        model.fit(X_train, y_train)
        y_pred_valid = model.predict(X_valid)
        print('C: ', i, ', kernel: ', j)
        print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

C:  0.001 , kernel:  linear
AUC of valid: 0.7164919714540589


In [9]:
gamma = ['auto', 10, 1, 0.1, 0.01,0.001]
for j in gamma:
    model = SVC(class_weight = 'balanced', C = 2, kernel = "rbf", gamma = auto)
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    print('C: 1, kernel: rbf, gamma: ', j)
    print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

C: 1, kernel: rbf, gamma:  auto
AUC of valid: 0.7604817127564674
C: 1, kernel: rbf, gamma:  10
AUC of valid: 0.5
C: 1, kernel: rbf, gamma:  1
AUC of valid: 0.5
C: 1, kernel: rbf, gamma:  0.1
AUC of valid: 0.5
C: 1, kernel: rbf, gamma:  0.01
AUC of valid: 0.7604817127564674
C: 1, kernel: rbf, gamma:  0.001
AUC of valid: 0.7389049955396967


In [15]:
C = [1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2]
gamma = [0.006, 0.007, 0.008, 0.009, 0.01, 0.02, 0.03, 0.04]
for i in C:
    model = SVC(class_weight = 'balanced', C = i, kernel = "rbf", gamma = 'auto')
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    y_pred_train = model.predict(X_train)
    print('C: ', i, ', kernel: rbf, gamma: auto')
    print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
    print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))

C:  1 , kernel: rbf, gamma: auto
AUC of train: 0.8772268018890851
AUC of valid: 0.7604817127564674
C:  1.1 , kernel: rbf, gamma: auto
AUC of train: 0.880328290955906
AUC of valid: 0.7591993755575379
C:  1.2 , kernel: rbf, gamma: auto
AUC of train: 0.8835702134088266
AUC of valid: 0.7629906333630687
C:  1.3 , kernel: rbf, gamma: auto
AUC of train: 0.8874251032888212
AUC of valid: 0.762600356824264
C:  1.4 , kernel: rbf, gamma: auto
AUC of train: 0.8902486156999009
AUC of valid: 0.7610950044603033
C:  1.5 , kernel: rbf, gamma: auto
AUC of train: 0.8926317794578404
AUC of valid: 0.7675066904549509
C:  1.6 , kernel: rbf, gamma: auto
AUC of train: 0.8961531236247317
AUC of valid: 0.7675624442462088
C:  1.7 , kernel: rbf, gamma: auto
AUC of train: 0.8982890510565347
AUC of valid: 0.7693465655664585
C:  1.8 , kernel: rbf, gamma: auto
AUC of train: 0.8998237028960281
AUC of valid: 0.7680642283675291
C:  1.9 , kernel: rbf, gamma: auto
AUC of train: 0.9027657100819085
AUC of valid: 0.77341659232

In [33]:
C = np.linspace(2, 9, 71)
for i in C:
    model = SVC(class_weight = 'balanced', C = i, kernel = "rbf", gamma = 'auto')
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    y_pred_train = model.predict(X_train)
    print('C: ', i, ', kernel: rbf, gamma: auto')
    print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
    print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))
    print()

C:  2.0 , kernel: rbf, gamma: auto
AUC of train: 0.9060617565339621
AUC of valid: 0.7739183764495986

C:  2.1 , kernel: rbf, gamma: auto
AUC of train: 0.9078333979230572
AUC of valid: 0.7691793041926851

C:  2.2 , kernel: rbf, gamma: auto
AUC of train: 0.9108295291080706
AUC of valid: 0.7716324710080286

C:  2.3 , kernel: rbf, gamma: auto
AUC of train: 0.9139002778452868
AUC of valid: 0.7701271186440678

C:  2.4 , kernel: rbf, gamma: auto
AUC of train: 0.9154890536839132
AUC of valid: 0.7710191793041926

C:  2.5 , kernel: rbf, gamma: auto
AUC of train: 0.9166696662572335
AUC of valid: 0.7651092774308653

C:  2.6 , kernel: rbf, gamma: auto
AUC of train: 0.9179146496062216
AUC of valid: 0.7591993755575379

C:  2.7 , kernel: rbf, gamma: auto
AUC of train: 0.9189767674047411
AUC of valid: 0.7596454058876003

C:  2.8 , kernel: rbf, gamma: auto
AUC of train: 0.9205011724676997
AUC of valid: 0.7596454058876003

C:  2.9 , kernel: rbf, gamma: auto
AUC of train: 0.921703723652319
AUC of valid: 0

KeyboardInterrupt: 

In [5]:
gamma = [0.006, 0.007, 0.008, 0.009, 0.01, 0.02, 0.03, 0.04]
for i in gamma:
    model = SVC(class_weight = 'balanced', C = 2, kernel = "rbf", gamma = i)
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    y_pred_train = model.predict(X_train)
    print('C: 2, kernel: rbf, gamma: ', i)
    print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
    print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))
    print()

C: 2, kernel: rbf, gamma:  0.006
AUC of train: 0.8663614090631424
AUC of valid: 0.7550735950044603

C: 2, kernel: rbf, gamma:  0.007
AUC of train: 0.8775691493204941
AUC of valid: 0.7584745762711864

C: 2, kernel: rbf, gamma:  0.008
AUC of train: 0.887081310799183
AUC of valid: 0.7625446030330063

C: 2, kernel: rbf, gamma:  0.009
AUC of train: 0.8954757854219899
AUC of valid: 0.7635481712756468

C: 2, kernel: rbf, gamma:  0.01
AUC of train: 0.9060617565339621
AUC of valid: 0.7739183764495986

C: 2, kernel: rbf, gamma:  0.02
AUC of train: 0.9642130015829957
AUC of valid: 0.6427297056199822



KeyboardInterrupt: 

In [6]:
gamma = [0.0095, 0.01, 0.011, 0.012]
for i in gamma:
    model = SVC(class_weight = 'balanced', C = 2, kernel = "rbf", gamma = i)
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    y_pred_train = model.predict(X_train)
    print('C: 2, kernel: rbf, gamma: ', i)
    print('AUC of train:' , roc_auc_score(y_train, y_pred_train, average = "macro", sample_weight = None))
    print('AUC of valid:' , roc_auc_score(y_valid, y_pred_valid, average = "macro", sample_weight = None))
    print()

C: 2, kernel: rbf, gamma:  0.0095
AUC of train: 0.8997271467325263
AUC of valid: 0.7695695807314897

C: 2, kernel: rbf, gamma:  0.01
AUC of train: 0.9060617565339621
AUC of valid: 0.7739183764495986

C: 2, kernel: rbf, gamma:  0.011
AUC of train: 0.915939649113588
AUC of valid: 0.7666703835860837

C: 2, kernel: rbf, gamma:  0.012
AUC of train: 0.92124143638788
AUC of valid: 0.7476583407671721



In [13]:
X_test = pd.read_csv("X_test_filtered.csv")
X_test = X_test.drop(labels = ['Unnamed: 0'], axis = 1)
X_test = X_test.iloc[:, 0:100]
model = SVC(class_weight = 'balanced', C = 2, kernel = "rbf", gamma = 'auto')
model.fit(X_train2, y_train2)
y_pred_test = model.predict(X_test)
pd.DataFrame(y_pred_test).to_csv("y_test.csv")

In [7]:
model = SVR(C = 2, kernel = "rbf", gamma = 'auto')
model.fit(X_train, y_train)
y_pred_valid = model.predict(X_valid)
y_pred_train = model.predict(X_train)
print('C = ', i)
print('AUC of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
print('AUC of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

AUC of valid: 1.7817279021483168
AUC of valid: 1.8872166693854993


In [8]:
X_test = pd.read_csv("X_test_filtered.csv")
X_test = X_test.drop(labels = ['Unnamed: 0'], axis = 1)
X_test = X_test.iloc[:, 0:100]
#model = SVC(class_weight = 'balanced', C = 2, kernel = "rbf", gamma = 'auto')
#model.fit(X_train2, y_train2)
y_pred_test = model.predict(X_test)
pd.DataFrame(y_pred_test).to_csv("y_test.csv")

In [9]:
for i in [1.9, 2.1]:
    model = SVR(C = i, kernel = "rbf", gamma = 'auto')
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    y_pred_train = model.predict(X_train)
    print('C = ', i)
    print('AUC of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
    print('AUC of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

C =  1.9
AUC of train: 1.7863966338295922
AUC of valid: 1.8884642351853336
C =  2.1
AUC of train: 1.7771979947852412
AUC of valid: 1.8863074940689841


In [10]:
for i in [2.2, 2.3]:
    model = SVR(C = i, kernel = "rbf", gamma = 'auto')
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    y_pred_train = model.predict(X_train)
    print('C = ', i)
    print('AUC of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
    print('AUC of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

C =  2.2
AUC of train: 1.7727960584724272
AUC of valid: 1.8855053698967823
C =  2.3
AUC of train: 1.7684419266766331
AUC of valid: 1.884800434072471


In [11]:
for i in [2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3]:
    model = SVR(C = i, kernel = "rbf", gamma = 'auto')
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    y_pred_train = model.predict(X_train)
    print('C = ', i)
    print('AUC of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
    print('AUC of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

C =  2.4
AUC of train: 1.764208423414914
AUC of valid: 1.8841586097921286


KeyboardInterrupt: 

In [12]:
model = SVR(C = 3, kernel = "rbf", gamma = 'auto')
model.fit(X_train, y_train)
y_pred_valid = model.predict(X_valid)
y_pred_train = model.predict(X_train)
print('AUC of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
print('AUC of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

AUC of train: 1.741019722731009
AUC of valid: 1.881016241093991


In [13]:
model = SVR(C = 5, kernel = "rbf", gamma = 'auto')
model.fit(X_train, y_train)
y_pred_valid = model.predict(X_valid)
y_pred_train = model.predict(X_train)
print('AUC of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
print('AUC of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

AUC of train: 1.6820556310056973
AUC of valid: 1.8726880429285626


In [14]:
model = SVR(C = 8, kernel = "rbf", gamma = 'auto')
model.fit(X_train, y_train)
y_pred_valid = model.predict(X_valid)
y_pred_train = model.predict(X_train)
print('AUC of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
print('AUC of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

AUC of train: 1.6172414245874978
AUC of valid: 1.8854966546413001


In [15]:
for i in [5.5, 6, 6.5, 7, 7.5]:
    model = SVR(C = i, kernel = "rbf", gamma = 'auto')
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    y_pred_train = model.predict(X_train)
    print('AUC of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
    print('AUC of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

AUC of train: 1.669678210587689
AUC of valid: 1.8726108275880462
AUC of train: 1.6579465078835374
AUC of valid: 1.873914504957696
AUC of train: 1.6468921560963385
AUC of valid: 1.8758820556653253
AUC of train: 1.6364448913519967
AUC of valid: 1.8783340344287045
AUC of train: 1.626596343150119
AUC of valid: 1.8816278947108849


In [17]:
for e in [0.01, 0.1, 1, 10]:
    model = SVR(C = 5.5, kernel = "rbf", gamma = 'auto', epsilon = e)
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    y_pred_train = model.predict(X_train)
    print(e)
    print('RMSE of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
    print('RMSE of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

0.01
RMSE of train: 1.6738532941633857
RMSE of valid: 1.8751272643541537
0.1
RMSE of train: 1.669678210587689
RMSE of valid: 1.8726108275880462
1
RMSE of train: 1.6315556467200638
RMSE of valid: 1.9343909516970126
10
RMSE of train: 3.0145682832800524
RMSE of valid: 3.035915191230261


In [18]:
for e in [0.05, 0.15, 0.2, 0.25, 0.3]:
    model = SVR(C = 5.5, kernel = "rbf", gamma = 'auto', epsilon = e)
    model.fit(X_train, y_train)
    y_pred_valid = model.predict(X_valid)
    y_pred_train = model.predict(X_train)
    print(e)
    print('RMSE of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
    print('RMSE of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

0.05
RMSE of train: 1.672069153950644
RMSE of valid: 1.8739128858535758
0.15
RMSE of train: 1.6671770462464301
RMSE of valid: 1.8721883519902816
0.2
RMSE of train: 1.6652287821985716
RMSE of valid: 1.8718239334830864
0.25
RMSE of train: 1.6628007183600995
RMSE of valid: 1.8711518399691
0.3
RMSE of train: 1.6597833234213262
RMSE of valid: 1.8703705105625084


In [19]:
model = SVR(C = 5.5, kernel = "rbf", gamma = 'auto', epsilon = 0.4)
model.fit(X_train, y_train)
y_pred_valid = model.predict(X_valid)
y_pred_train = model.predict(X_train)
print('RMSE of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
print('RMSE of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

0.35
RMSE of train: 1.6571319862582894
RMSE of valid: 1.8699867165072306
0.4
RMSE of train: 1.65442354452938
RMSE of valid: 1.8703181268847604
0.45
RMSE of train: 1.651274770434508
RMSE of valid: 1.8718776319741255
0.5
RMSE of train: 1.6485814510814358
RMSE of valid: 1.8746697233326381


In [6]:
model = SVR(C = 5.5, kernel = "rbf", gamma = 'auto', epsilon = 0.4)
model.fit(X_train, y_train)
y_pred_valid = model.predict(X_valid)
y_pred_train = model.predict(X_train)
print('RMSE of train:' , mean_squared_error(y_train, y_pred_train, squared=False))
print('RMSE of valid:' , mean_squared_error(y_valid, y_pred_valid, squared=False))

RMSE of train: 0.4000331355378851
RMSE of valid: 0.42749308984261675
