In [1]:
import os
import sys
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import time

#Optimization
from sklearn.model_selection import GridSearchCV


In [2]:
# Loading train and test data CSV files
train_data = pd.read_csv(os.path.join(r'F:\Kabir\CSUN\R','training_data_final.csv'))
test_data = pd.read_csv(os.path.join(r'F:\Kabir\CSUN\R','testing_data_final.csv'))

In [3]:
train_data.head(2)

Unnamed: 0,sentiment_class,aake,aapki,aapse,abhorrent,ability,abject,able,abomination,abrasive,...,zoo,zoom,zoomedin,zoomers,zooming,zz,zürich,äckligt,åt,punctuation_percent
0,negative,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.26
1,positive,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
test_data.head(2)

Unnamed: 0,sentiment_class,aake,aapki,aapse,abhorrent,ability,abject,able,abomination,abrasive,...,zoo,zoom,zoomedin,zoomers,zooming,zz,zürich,äckligt,åt,punctuation_percent
0,positive,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.06
1,neutral,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.08


In [5]:
# train_data and test_data split of TF-IDF transformed features
X_train = train_data.drop(columns=['sentiment_class', 'punctuation_percent'])  
y_train = train_data['sentiment_class']
X_test = test_data.drop(columns=['sentiment_class', 'punctuation_percent'])  
y_test = test_data['sentiment_class']


In [6]:
# Initialize SVM model
svm_model = SVC()

In [7]:
#Hyperparameterized tuning
start_time = time.time()
param_grid = {
    'C': [ 1, 2, 10],  # Regularization parameter
    'kernel': ['linear', 'rbf', 'poly'],  # Kernel type
    'gamma': ['scale', 'auto']  # Kernel coefficient for 'rbf' and 'poly'
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5,
                           scoring='accuracy', n_jobs=-1)

# Perform grid search
grid_search.fit(X_train, y_train)
print("Time taken for Hypertuning : {} seconds".format(round(time.time()-start_time, 3)))

Time taken for Hypertuning : 1224.936 seconds


In [8]:
# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

Best Hyperparameters: {'C': 2, 'gamma': 'scale', 'kernel': 'linear'}


In [9]:
X_test.head(2)

Unnamed: 0,aake,aapki,aapse,abhorrent,ability,abject,able,abomination,abrasive,abso,...,zone,zoo,zoom,zoomedin,zoomers,zooming,zz,zürich,äckligt,åt
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# Get the best model
best_model = grid_search.best_estimator_

# Make predictions on the test data using the best model
predictions = best_model.predict(X_test)

# Evaluate the best model
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.7191176470588235


In [11]:
from joblib import dump

dump(best_model, r'F:\Kabir\CSUN\R\svm_model.pkl')

['F:\\Kabir\\CSUN\\R\\svm_model.pkl']