In [15]:
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn

In [16]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report,precision_score,recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [17]:
dataset = pd.read_csv('C:\mlflow\Globant_mlflow\survey lung cancer.csv')

In [18]:
dataset.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,M,69,1,2,2,1,1,2,1,2,2,2,2,2,2,YES
1,M,74,2,1,1,1,2,2,2,1,1,1,2,2,2,YES
2,F,59,1,1,1,2,1,2,1,2,1,2,2,1,2,NO
3,M,63,2,2,2,1,1,1,1,1,2,1,1,2,2,NO
4,F,63,1,2,1,1,1,1,1,2,1,2,2,1,1,NO


In [19]:
dataset.columns

Index(['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY',
       'PEER_PRESSURE', 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',
       'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',
       'SWALLOWING DIFFICULTY', 'CHEST PAIN', 'LUNG_CANCER'],
      dtype='object')

In [20]:
encoder = LabelEncoder()
dataset['GENDER'] = encoder.fit_transform(dataset['GENDER'])
dataset['LUNG_CANCER'] = encoder.fit_transform(dataset['LUNG_CANCER'])

In [21]:
#split the dataset
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

In [22]:
dataset.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,1,69,1,2,2,1,1,2,1,2,2,2,2,2,2,1
1,1,74,2,1,1,1,2,2,2,1,1,1,2,2,2,1
2,0,59,1,1,1,2,1,2,1,2,1,2,2,1,2,0
3,1,63,2,2,2,1,1,1,1,1,2,1,1,2,2,0
4,0,63,1,2,1,1,1,1,1,2,1,2,2,1,1,0


In [23]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(x)

In [24]:

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [25]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #report = classification_report(y_test, y_pred)
    precision = precision_score(y_test, y_pred,pos_label=1)
    recall = recall_score(y_test, y_pred, pos_label=1)
    return accuracy,precision,recall

In [26]:
# Hyperparameter values to try
kernels = ['linear', 'poly', 'sigmoid']
degrees = [2, 3, 4]
C_values = [0.001, 0.01, 0.1, 1]

In [27]:
mlflow.set_experiment(experiment_name="exp_1")


2024/05/30 12:22:00 INFO mlflow.tracking.fluent: Experiment with name 'exp_1' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///c:/mlflow/Globant_mlflow/mlruns/771130435585652042', creation_time=1717051920256, experiment_id='771130435585652042', last_update_time=1717051920256, lifecycle_stage='active', name='exp_1', tags={}>

In [28]:
# Iterate over all combinations of hyperparameters
for kernel in kernels:
    for degree in degrees:
        for C in C_values:
            # Start the run
            with mlflow.start_run():
                # Initialize and fit the model
                svm_model = SVC(kernel=kernel, probability=True, degree=degree, C=C)
                svm_model.fit(X_train, y_train)

                # Evaluate the model
                svm_accuracy, svm_p, svm_r = evaluate_model(svm_model, X_test, y_test)
                print(f"SVM Accuracy with kernel={kernel}, degree={degree}, C={C}: {svm_accuracy}")

                # Log parameters
                mlflow.log_param("kernel", kernel)
                mlflow.log_param("degree", degree)
                mlflow.log_param("C", C)

                # Log metrics
                mlflow.log_metric("accuracy", svm_accuracy)
                mlflow.log_metric("precision", svm_p)
                mlflow.log_metric("recall", svm_r)

                # Log the model
                mlflow.sklearn.log_model(svm_model, "model")

SVM Accuracy with kernel=linear, degree=2, C=0.001: 0.9247311827956989




SVM Accuracy with kernel=linear, degree=2, C=0.01: 0.9247311827956989




SVM Accuracy with kernel=linear, degree=2, C=0.1: 0.978494623655914




SVM Accuracy with kernel=linear, degree=2, C=1: 0.967741935483871




SVM Accuracy with kernel=linear, degree=3, C=0.001: 0.9247311827956989




SVM Accuracy with kernel=linear, degree=3, C=0.01: 0.9247311827956989




SVM Accuracy with kernel=linear, degree=3, C=0.1: 0.978494623655914




SVM Accuracy with kernel=linear, degree=3, C=1: 0.967741935483871




SVM Accuracy with kernel=linear, degree=4, C=0.001: 0.9247311827956989




SVM Accuracy with kernel=linear, degree=4, C=0.01: 0.9247311827956989




SVM Accuracy with kernel=linear, degree=4, C=0.1: 0.978494623655914




SVM Accuracy with kernel=linear, degree=4, C=1: 0.967741935483871




SVM Accuracy with kernel=poly, degree=2, C=0.001: 0.9247311827956989




SVM Accuracy with kernel=poly, degree=2, C=0.01: 0.9247311827956989




SVM Accuracy with kernel=poly, degree=2, C=0.1: 0.9247311827956989




SVM Accuracy with kernel=poly, degree=2, C=1: 0.946236559139785




SVM Accuracy with kernel=poly, degree=3, C=0.001: 0.9247311827956989




SVM Accuracy with kernel=poly, degree=3, C=0.01: 0.9247311827956989




SVM Accuracy with kernel=poly, degree=3, C=0.1: 0.9247311827956989




SVM Accuracy with kernel=poly, degree=3, C=1: 0.978494623655914




SVM Accuracy with kernel=poly, degree=4, C=0.001: 0.9247311827956989




SVM Accuracy with kernel=poly, degree=4, C=0.01: 0.9247311827956989




SVM Accuracy with kernel=poly, degree=4, C=0.1: 0.9247311827956989




SVM Accuracy with kernel=poly, degree=4, C=1: 0.967741935483871




SVM Accuracy with kernel=sigmoid, degree=2, C=0.001: 0.9247311827956989




SVM Accuracy with kernel=sigmoid, degree=2, C=0.01: 0.9247311827956989




SVM Accuracy with kernel=sigmoid, degree=2, C=0.1: 0.9247311827956989




SVM Accuracy with kernel=sigmoid, degree=2, C=1: 0.967741935483871




SVM Accuracy with kernel=sigmoid, degree=3, C=0.001: 0.9247311827956989




SVM Accuracy with kernel=sigmoid, degree=3, C=0.01: 0.9247311827956989




SVM Accuracy with kernel=sigmoid, degree=3, C=0.1: 0.9247311827956989




SVM Accuracy with kernel=sigmoid, degree=3, C=1: 0.967741935483871




SVM Accuracy with kernel=sigmoid, degree=4, C=0.001: 0.9247311827956989




SVM Accuracy with kernel=sigmoid, degree=4, C=0.01: 0.9247311827956989




SVM Accuracy with kernel=sigmoid, degree=4, C=0.1: 0.9247311827956989




SVM Accuracy with kernel=sigmoid, degree=4, C=1: 0.967741935483871


