In [None]:
#load dataframe

import pandas as pd

df = pd.read_csv('dataset_pca_3_components_personality.csv', sep=';')
df.head()

Unnamed: 0,PC1,PC2,PC3,Personality
0,0.425156,0.024034,0.255252,0.0
1,-0.822265,-0.213964,0.008904,1.0
2,-0.545634,-0.053308,0.213832,1.0
3,0.908568,-0.042458,0.072104,0.0
4,0.404915,0.079302,0.065595,0.0


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Separate predictor variables and target
X = df.drop('Personality', axis=1)
y = df['Personality']

# Define stratified 5-fold cross-validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# List to store results
metrics = []

for fold, (train_index, test_index) in enumerate(skf.split(X, y)):
    print(f"\nFold {fold + 1}")

    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Initialize and train model
    model = GradientBoostingClassifier(random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(cm)

    # Metrics per fold
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    metrics.append({
        'Fold': fold + 1,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    })

# Convert to DataFrame
metrics_df = pd.DataFrame(metrics)

# Calculate averages
avg_metrics = metrics_df.drop('Fold', axis=1).mean().to_frame().T
avg_metrics['Fold'] = 'Average'

# Combine results
final_results = pd.concat([metrics_df, avg_metrics], ignore_index=True)

# Display final results
print("\nFinal Results per Fold and Average:")
print(final_results)



Fold 1
Matriz de Confusión:
[[261  20]
 [ 19 200]]

Fold 2
Matriz de Confusión:
[[260  21]
 [ 15 204]]

Fold 3
Matriz de Confusión:
[[262  19]
 [ 15 204]]

Fold 4
Matriz de Confusión:
[[253  27]
 [ 21 198]]

Fold 5
Matriz de Confusión:
[[261  19]
 [ 20 199]]

Resultados Finales por Fold y Promedio:
       Fold  Accuracy  Precision    Recall  F1 Score
0         1  0.922000   0.922046  0.922000  0.922019
1         2  0.928000   0.928465  0.928000  0.928098
2         3  0.932000   0.932248  0.932000  0.932064
3         4  0.903808   0.904329  0.903808  0.903937
4         5  0.921844   0.921812  0.921844  0.921824
5  Promedio  0.921530   0.921780  0.921530  0.921588


In [None]:
import pickle

# Assuming the trained model object is named 'model' from the previous cell
filename = 'modelo.pkl'
pickle.dump(model, open(filename, 'wb'))

print(f"Model saved to {filename}")

Model saved to modelo.pkl


In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier

# load dataset
df = pd.read_csv("dataset_pca_3_components_personality.csv", sep=';')

# separate datos
X = df.drop('Personality', axis=1)
y = df['Personality']

# train the model
model = GradientBoostingClassifier(random_state=42)
model.fit(X, y)

# try different combinations
values = np.linspace(-3, 3, 30)
found = []

for pc1 in values:
    for pc2 in values:
        for pc3 in values:
            sample = pd.DataFrame([[pc1, pc2, pc3]], columns=['PC1', 'PC2', 'PC3'])
            pred = model.predict(sample)[0]
            if pred == 1:
                found.append((pc1, pc2, pc3))
                if len(found) >= 5:
                    break
        if len(found) >= 5:
            break
    if len(found) >= 5:
        break

print("Examples combinations that prediction is 1:")
for combo in found:
    print(f"PC1: {combo[0]:.2f}, PC2: {combo[1]:.2f}, PC3: {combo[2]:.2f}")


Ejemplos de combinaciones que predicen 1:
PC1: -0.93, PC2: -3.00, PC3: -3.00
PC1: -0.93, PC2: -3.00, PC3: -2.79
PC1: -0.93, PC2: -3.00, PC3: -2.59
PC1: -0.93, PC2: -3.00, PC3: -2.38
PC1: -0.93, PC2: -3.00, PC3: -2.17
