CONFIGURACIÓN INICIAL

In [20]:
# Verificar versión de Python y recursos disponibles
import sys
print("Versión de Python:", sys.version)

# Verificar GPU disponible (opcional)
import tensorflow as tf
print("GPU disponible:", tf.config.list_physical_devices('GPU'))

# Instalar librerías adicionales si es necesario
!pip install seaborn plotly
!pip install -U kaleido # Guardar grafico interactivo

# Importaciones estándar
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os # Control carpetas
import warnings
import getpass # Token Github
import shutil
from google.colab import drive # Montar Mi unidad
import plotly.express as px # Generar gráfico interactivo
warnings.filterwarnings('ignore')

# Configuración de visualización
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 12

# Montar Mi Unidad
drive.mount('/content/drive')

print("Entorno configurado correctamente en Google Colab")

Versión de Python: 3.11.13 (main, Jun  4 2025, 08:57:29) [GCC 11.4.0]
GPU disponible: []
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Entorno configurado correctamente en Google Colab


In [None]:
# Clonar y trabajar con repositorio Github

# Datos de usuario
email = "veronica.ochoa@uees.edu.ec"
username = "veritochoah"
token = getpass.getpass("Token de GitHub:")
repository = "UEES-IA-Semana1-Grupo2-VO"
origen = "veritochoah"
ruta_repo = f"/content/drive/MyDrive/IA/{repository}"

# Configuración de Git
!git config --global user.email "{email}"
!git config --global user.name "{username}"

# Eliminar carpeta si ya existe
if os.path.exists(f"/content/drive/MyDrive/IA/{repository}"):
    shutil.rmtree(f"/content/drive/MyDrive/IA/{repository}")
    %cd /content


# Clonar el repositorio desde GitHub con el token
!git clone https://{username}:{token}@github.com/{origen}/{repository}.git "{ruta_repo}"
%cd "{ruta_repo}"

# Función para guardar automáticamente el notebook en GitHub
def save_to_github(notebook_name, commit_message):
    !git add .
    !git commit -m "{commit_message}"
    !git push origin main
    print(f"{notebook_name} guardado correctamente en GitHub")

Modelos de clasificación con Scikit-learn


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score



# Crear carpeta para imágenes en el repositorio
repo = "UEES-IA-Semana1-Grupo2-VO"
project_folder = "03_Machine_Learning_Basico"
repo_images_folder = f"/content/drive/MyDrive/IA/{repo}/{project_folder}/images"
os.makedirs(repo_images_folder, exist_ok=True)

# Cargar dataset titanic
titanic = sns.load_dataset('titanic')

# Preprocesamiento
features = ['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked']
X = titanic[features]
y = titanic['survived']

# Manejar valores nulos
X.loc[:, 'age'] = X['age'].fillna(X['age'].median())
X.loc[:, 'embarked'] = X['embarked'].fillna(X['embarked'].mode()[0])

# Codificar variables categóricas
X.loc[:, 'sex'] = X['sex'].map({'male': 0, 'female': 1})
X = pd.get_dummies(X, columns=['embarked'], drop_first=True)

# Escalar características numéricas
scaler = StandardScaler()
X[['age', 'fare', 'sibsp', 'parch']] = scaler.fit_transform(X[['age', 'fare', 'sibsp', 'parch']])

# División
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelos de clasificación
models = {
    'Logistic Regression': LogisticRegression(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100)
}

# Entrenar y evaluar modelos
model_results = {}
accuracies = []
model_names = []
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    model_results[name] = {'model': model, 'accuracy': accuracy, 'y_pred': y_pred}
    model_names.append(name)
    accuracies.append(accuracy)
    print(f"{name} Accuracy: {accuracy:.4f}")

# Gráfico de barras de accuracies
plt.figure(figsize=(8, 5))
sns.barplot(x=model_names, y=accuracies, palette='Blues_d')
plt.title('Precisión de Modelos de Clasificación', fontsize=14, pad=15)
plt.ylabel('Precisión (Accuracy)', fontsize=12)
plt.xlabel('Modelo', fontsize=12)
plt.ylim(0, 1)
for i, v in enumerate(accuracies):
    plt.text(i, v + 0.01, f'{v:.2%}', ha='center', fontsize=10)
plt.savefig(os.path.join(repo_images_folder, 'accuracy_comparison.png'), bbox_inches='tight')
plt.show()

Modelos de Regresión y Evaluación

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, f1_score, mean_squared_error
import pandas as pd

# Evaluar Logistic Regression en detalle
log_reg = model_results['Logistic Regression']['model']
y_pred = model_results['Logistic Regression']['y_pred']
y_pred_proba = log_reg.predict_proba(X_test)[:, 1]  # Probabilidad de clase positiva

# Métricas
metrics = {
    'Métrica': ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'MSE (Probabilidades)'],
    'Valor': [
        model_results['Logistic Regression']['accuracy'],
        precision_score(y_test, y_pred),
        recall_score(y_test, y_pred),
        f1_score(y_test, y_pred),
        mean_squared_error(y_test, y_pred_proba)
    ]
}
metrics_df = pd.DataFrame(metrics)
print("Resumen de Métricas (Logistic Regression):")
print(metrics_df.to_string(index=False))

# Gráfico de distribución de probabilidades
plt.figure(figsize=(8, 5))
sns.histplot(y_pred_proba, kde=True, color='blue', bins=30)
plt.title('Distribución de Probabilidades de Supervivencia\n(Logistic Regression)', fontsize=14, pad=15)
plt.xlabel('Probabilidad de Supervivencia', fontsize=12)
plt.ylabel('Número de Pasajeros', fontsize=12)
plt.axvline(x=0.5, color='red', linestyle='--', label='Umbral (0.5)')
plt.legend()
plt.savefig(os.path.join(f"/content/drive/MyDrive/IA/UEES-IA-Semana1-Grupo2-VO/03_Machine_Learning_Basico/images", "proba_distribution.png"), bbox_inches='tight')
plt.show()

Comparación de Algoritmos y Métricas

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Comparar métricas de todos los modelos
metrics = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1-Score': []}
for name, result in model_results.items():
    y_pred = result['y_pred']
    metrics['Model'].append(name)
    metrics['Accuracy'].append(accuracy_score(y_test, y_pred))
    metrics['Precision'].append(precision_score(y_test, y_pred))
    metrics['Recall'].append(recall_score(y_test, y_pred))
    metrics['F1-Score'].append(f1_score(y_test, y_pred))

metrics_df = pd.DataFrame(metrics)
print("Comparación de Modelos:")
print(metrics_df.to_string(index=False))

# Gráfico de comparación de métricas
plt.figure(figsize=(10, 6))
metrics_melted = metrics_df.melt(id_vars='Model', var_name='Métrica', value_name='Valor')
sns.barplot(x='Model', y='Valor', hue='Métrica', data=metrics_melted, palette='Blues')
plt.title('Comparación de Métricas de Modelos de Clasificación', fontsize=14, pad=15)
plt.ylabel('Puntuación', fontsize=12)
plt.xlabel('Modelo', fontsize=12)
plt.ylim(0, 1)
for i, bar in enumerate(plt.gca().patches):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height + 0.01, f'{height:.2%}',
             ha='center', fontsize=8)
plt.legend(title='Métrica')
plt.savefig(os.path.join(f"/content/drive/MyDrive/IA/UEES-IA-Semana1-Grupo2-VO/03_Machine_Learning_Basico/images", "model_comparison.png"), bbox_inches='tight')
plt.show()

# Matrices de confusión para cada modelo
for name, result in model_results.items():
    cm = confusion_matrix(y_test, result['y_pred'])
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.title(f'Matriz de Confusión: {name}\n(TP: Sobrevivieron correctamente predichos, FP: No sobrevivieron predichos como sobrevivientes)', fontsize=12, pad=15)
    plt.xlabel('Predicho', fontsize=10)
    plt.ylabel('Verdadero', fontsize=10)
    plt.savefig(os.path.join(f"/content/drive/MyDrive/IA/UEES-IA-Semana1-Grupo2-VO/03_Machine_Learning_Basico/images", f"confusion_matrix_{name.lower().replace(' ', '_')}.png"), bbox_inches='tight')
    plt.show()

In [None]:
!ls -l /content/drive/MyDrive/IA/UEES-IA-Semana1-Grupo2-VO/03_Machine_Learning_Basico/images

CODIGO GITHUB

In [None]:
# Copiar notebook a carpeta Git
notebook_name = "03_Machine_Learning_Basico"

notebook_path = f"/content/drive/MyDrive/IA/{repository}/{notebook_name}"
os.makedirs(notebook_path, exist_ok=True)

notebook_path_images = f"/content/drive/MyDrive/IA/{repository}/{notebook_name}/images"
os.makedirs(notebook_path_images, exist_ok=True)

# Copiar notebook a carpeta Git
!cp '/content/drive/MyDrive/Colab Notebooks/03_Machine_Learning_Basico.ipynb' {notebook_path}/


In [None]:
# Ejecutamos guardado para este notebook
save_to_github(notebook_name, "Notebook 2 completo")