In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [None]:
df = pd.read_csv('data/traficogt_clean.csv')
df['datetime_gt'] = pd.to_datetime(df['datetime_gt'])

In [None]:
def prepare_classification_data(df):
    df_model = df.copy()
    df_model['engagement'] = df_model['likeCount'] + df_model['retweetCount'] + df_model['replyCount']
    df_model['high_engagement'] = (df_model['engagement'] > df_model['engagement'].median()).astype(int)

    features = ['hora', 'dia', 'mes_num', 'retweetCount']
    X = df_model[features].fillna(0)
    y = df_model['high_engagement']

    return train_test_split(X, y, test_size=0.3, random_state=42)

def train_models(X_train, X_test, y_train, y_test):
    models = {
        'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
        'Decision Tree': DecisionTreeClassifier(random_state=42),
        'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42)
    }

    results = {}
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        results[name] = {
            'model': model,
            'predictions': y_pred,
            'accuracy': accuracy_score(y_test, y_pred),
            'confusion_matrix': confusion_matrix(y_test, y_pred)
        }

    return results

In [None]:
X_train, X_test, y_train, y_test = prepare_classification_data(df)
models_results = train_models(X_train, X_test, y_train, y_test)

In [None]:
title = widgets.HTML(value="<h1 style='text-align: center; color: #2c3e50;'>Dashboard de Análisis TraficoGT</h1>")

month_selector = widgets.SelectMultiple(
    options=list(df['mes_nombre'].unique()),
    value=list(df['mes_nombre'].unique()),
    description='Meses:',
    disabled=False,
    layout=widgets.Layout(width='300px', height='100px')
)

day_selector = widgets.SelectMultiple(
    options=list(df['dia_semana'].unique()),
    value=list(df['dia_semana'].unique()),
    description='Días:',
    disabled=False,
    layout=widgets.Layout(width='300px', height='100px')
)

model_selector = widgets.Dropdown(
    options=list(models_results.keys()),
    value=list(models_results.keys())[0],
    description='Modelo:',
    layout=widgets.Layout(width='400px')
)

comparison_selector = widgets.SelectMultiple(
    options=list(models_results.keys()),
    value=list(models_results.keys()),
    description='Modelos:',
    layout=widgets.Layout(width='400px', height='100px')
)

exploration_output = widgets.Output()
models_output = widgets.Output()
comparison_output = widgets.Output()

In [None]:
def update_exploration(change):
    with exploration_output:
        clear_output(wait=True)

        selected_months = list(month_selector.value)
        selected_days = list(day_selector.value)

        if not selected_months or not selected_days:
            print("Seleccione al menos un mes y un día")
            return

        df_filtered = df[df['mes_nombre'].isin(selected_months) & df['dia_semana'].isin(selected_days)]

        tweets_por_dia = df_filtered.groupby('dia_semana').size().reindex(['Lunes', 'Martes', 'Miércoles', 'Jueves', 'Viernes', 'Sábado', 'Domingo'])
        fig1 = px.bar(x=tweets_por_dia.index, y=tweets_por_dia.values, title="Tweets por Día de la Semana", labels={'x': 'Día', 'y': 'Cantidad'}, color=tweets_por_dia.values, color_continuous_scale='Blues')
        fig1.show()

        tweets_por_hora = df_filtered.groupby('hora').size()
        fig2 = px.line(x=tweets_por_hora.index, y=tweets_por_hora.values, title="Tweets por Hora del Día", labels={'x': 'Hora', 'y': 'Cantidad'}, markers=True)
        fig2.update_traces(line_color='#1f77b4')
        fig2.show()

        engagement_por_mes = df_filtered.groupby('mes_nombre')[['likeCount', 'retweetCount', 'replyCount']].sum()
        fig3 = px.bar(engagement_por_mes, x=engagement_por_mes.index, y=['likeCount', 'retweetCount', 'replyCount'], title="Engagement por Mes", labels={'value': 'Cantidad', 'variable': 'Tipo', 'mes_nombre': 'Mes'}, barmode='group')
        fig3.show()

        top_users = df_filtered['user.username'].value_counts().head(10)
        fig4 = px.bar(x=top_users.values, y=top_users.index, orientation='h', title="Top 10 Usuarios más Activos", labels={'x': 'Tweets', 'y': 'Usuario'}, color=top_users.values, color_continuous_scale='Teal')
        fig4.show()

def update_models(change):
    with models_output:
        clear_output(wait=True)

        explanation = """
        <h3>Objetivo del Modelo</h3>
        <p>Este modelo intenta predecir si un tweet sobre tráfico tendrá <b>alto engagement</b> (muchos likes, retweets y replies)
        basándose en características temporales y en el número de retweets que recibe en tiempo real.</p>

        <h4>Features utilizados:</h4>
        <ul>
            <li><b>Hora del tweet</b> (0-23): La hora del día en que se publica</li>
            <li><b>Día de la semana</b> (1-7): Lunes, martes, etc.</li>
            <li><b>Mes del año</b> (1-12): El mes en que se publica</li>
            <li><b>Retweet Count</b>: Cantidad de retweets recibidos</li>
        </ul>

        <h4>Variable objetivo:</h4>
        <ul>
            <li><b>Alto engagement</b> (1): Engagement por encima de la mediana</li>
            <li><b>Bajo engagement</b> (0): Engagement por debajo de la mediana</li>
        </ul>
        """

        display(widgets.HTML(value=explanation))

        selected_model = model_selector.value
        accuracy = models_results[selected_model]['accuracy']

        print(f"\nAccuracy: {accuracy:.2%}")
        print("Porcentaje de predicciones correctas del modelo en datos de prueba")
        print("\n")

        cm = models_results[selected_model]['confusion_matrix']
        fig_cm = px.imshow(cm, text_auto=True, title=f"Matriz de Confusión - {selected_model}", labels=dict(x="Predicción", y="Real", color="Cantidad"), color_continuous_scale='Blues')
        fig_cm.show()

        report = classification_report(y_test, models_results[selected_model]['predictions'], output_dict=True)
        df_report = pd.DataFrame(report).transpose()
        print("\nMétricas del Modelo:")
        display(df_report)

def update_comparison(change):
    with comparison_output:
        clear_output(wait=True)

        selected_models = list(comparison_selector.value)

        if not selected_models:
            print("Seleccione al menos un modelo")
            return

        comparison_data = []
        for model_name in selected_models:
            comparison_data.append({
                'Modelo': model_name,
                'Accuracy': models_results[model_name]['accuracy']
            })

        df_comparison = pd.DataFrame(comparison_data)
        print("Tabla Comparativa:")
        display(df_comparison)
        print("\n")

        fig = px.bar(df_comparison, x='Modelo', y='Accuracy', title="Comparación de Accuracy entre Modelos", color='Accuracy', color_continuous_scale='Viridis')
        fig.show()

In [None]:
month_selector.observe(update_exploration, names='value')
day_selector.observe(update_exploration, names='value')
model_selector.observe(update_models, names='value')
comparison_selector.observe(update_comparison, names='value')

In [None]:
update_exploration(None)
update_models(None)
update_comparison(None)

In [None]:
exploration_tab = widgets.VBox([
    widgets.HTML(value="<h3>Filtros de Exploración</h3>"),
    widgets.HBox([month_selector, day_selector]),
    exploration_output
])

models_tab = widgets.VBox([
    widgets.HTML(value="<h3>Seleccionar Modelo</h3>"),
    model_selector,
    models_output
])

comparison_tab = widgets.VBox([
    widgets.HTML(value="<h3>Seleccionar Modelos para Comparar</h3>"),
    comparison_selector,
    comparison_output
])

tab = widgets.Tab()
tab.children = [exploration_tab, models_tab, comparison_tab]
tab.set_title(0, 'Exploración de Datos')
tab.set_title(1, 'Modelos Predictivos')
tab.set_title(2, 'Comparación de Modelos')

display(title)
display(tab)