In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import classification_report, accuracy_score, mean_squared_error, mean_absolute_error

In [2]:
# Load the dataset
data = pd.read_excel("After_Pre_Processing.xlsx")

# Split predictors (features) and target variables
X = data.drop(columns=['S1Q061nor', 'S1Q06_P1nor', 'USER'])  # Features
y_S1Q061 = data['S1Q061nor']  # Target variable S1Q061
y_S1Q06P1 = data['S1Q06_P1nor']  # Target variable S1Q06P1

In [3]:
# Convert values in y_S1Q061 to categorical: 0 as 0, 0.5 as 1, 1 as 2
y_S1Q061 = y_S1Q061.replace({0.5: 1, 1: 2})

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train_S1Q061, y_test_S1Q061 = train_test_split(X, y_S1Q061, test_size=0.2, random_state=42)
X_train, X_test, y_train_S1Q06P1, y_test_S1Q06P1 = train_test_split(X, y_S1Q06P1, test_size=0.2, random_state=42)

In [5]:
# For classification (y_S1Q061)
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train_S1Q061)
y_pred_classifier = dt_classifier.predict(X_test)

In [6]:
# For regression (y_S1Q06P1)
dt_regressor = DecisionTreeRegressor(random_state=42)
dt_regressor.fit(X_train, y_train_S1Q06P1)
y_pred_regressor = dt_regressor.predict(X_test)

In [7]:
# Evaluate classifier performance
print("Performance metrics for classification (variable S1Q061):")
print(classification_report(y_test_S1Q061, y_pred_classifier))
print("Accuracy:", accuracy_score(y_test_S1Q061, y_pred_classifier))


Performance metrics for classification (variable S1Q061):
              precision    recall  f1-score   support

         0.0       0.50      0.56      0.53         9
         1.0       0.82      0.86      0.84        36
         2.0       1.00      0.67      0.80         9

    accuracy                           0.78        54
   macro avg       0.77      0.69      0.72        54
weighted avg       0.79      0.78      0.78        54

Accuracy: 0.7777777777777778


In [11]:
mse = mean_squared_error(y_test_S1Q06P1, y_pred_regressor)
mae = mean_absolute_error(y_test_S1Q06P1, y_pred_regressor)
print("Performance metrics for regression (variable S1Q06P1):")
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)

Performance metrics for regression (variable S1Q06P1):
Mean Squared Error: 0.005511279949926053
Mean Absolute Error: 0.040108989197531045
