In [6]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
import numpy as np
import pandas as pd

## Read Data

In [2]:
train = pd.read_csv("train_sample.csv")
test = pd.read_csv("test_sample.csv")
print(train.shape, test.shape)

(1000, 4) (1000, 3)


In [3]:
train.head(5)

Unnamed: 0,ID,x0,x1,class
0,0,-1.536908,1.047249,0
1,1,1.024568,15.164733,0
2,2,0.82066,1.628443,0
3,3,11.545803,3.518345,0
4,4,12.60762,5.557497,0


In [4]:
test.head(5)

Unnamed: 0,ID,x0,x1
0,0,22.076855,-2.78519
1,1,7.905336,3.132435
2,2,-4.586772,14.183051
3,3,22.194153,16.717894
4,4,8.273831,19.694569


In [5]:
# Prepare the data for xgboost
X_train = train.drop(['class','ID'], axis=1)
y_train = train['class']
feature_columns = ['x0', 'x1']
X_test = test[feature_columns]

## Create pipeline

In [7]:
# Set up the pipeline with MinMaxScaler and SVC
pipe = Pipeline([
    ('scaler', MinMaxScaler()),
    ('svc', SVC())
])

## Grid Search

In [8]:
# Parameter grid for GridSearch
param_grid = {
    'svc__gamma': np.logspace(-4, 1, 6),
    'svc__C': np.linspace(1, 500, 10)}

In [10]:
# Create the GridSearchCV object
grid_search = GridSearchCV(pipe, param_grid, cv=5, verbose=10, n_jobs=-1)

# Start the grid search
grid_search.fit(X_train, y_train)

# Check the best parameters found by GridSearchCV
best_params = grid_search.best_params_
best_score = grid_search.best_score_

Fitting 5 folds for each of 60 candidates, totalling 300 fits


## Drop Answer

In [11]:
# Make predictions on the test set using the best found model
test['class'] = grid_search.predict(X_test)

# Save the predictions to a CSV file
submission_file_path = 'submission.csv'  # Path for saving the submission file
test[['ID', 'class']].to_csv(submission_file_path, index=False)

best_params, best_score, submission_file_path

({'svc__C': 56.44444444444444, 'svc__gamma': 1.0}, 0.907, 'submission.csv')