In [6]:
from src.training.training import train_baselines
from src.data.preprocess import load_data, encode_labels, split_data
from src.evaluation.metrics_report import generate_report  


In [7]:

CSV_PATH = "data/mental_health.csv"

def main():
    # 1) Entrena solo los baselines (rápido)
    baseline_results = train_baselines(CSV_PATH)

    # 2) Construye test_df desde los mismos pasos que train_baselines
    df = load_data(CSV_PATH)
    df, le = encode_labels(df)
    _, _, test_df = split_data(df)

    # 3) TF-IDF + SVM report 
    vec_svm = baseline_results["vec_svm"]
    clf_svm = baseline_results["clf_svm"]
    texts = test_df["clean_text"].tolist()
    y_true = test_df["label"].tolist()
    y_pred_svm = clf_svm.predict(vec_svm.transform(texts)).tolist()

    res_svm = generate_report(
        y_true=y_true,
        y_pred=y_pred_svm,
        y_scores=None,
        target_names=baseline_results["label_encoder"].classes_.tolist(),
        output_dir="reports",
        prefix="tfidf_svm_test"
    )
    print("SVM report files:", res_svm)

    # 4) TF-IDF + LogisticRegression (tiene predict_proba) — opcional
    vec_lr = baseline_results["vec_logreg"]
    clf_lr = baseline_results["clf_logreg"]
    y_pred_lr = clf_lr.predict(vec_lr.transform(texts)).tolist()
    try:
        y_scores_lr = clf_lr.predict_proba(vec_lr.transform(texts))
    except Exception:
        y_scores_lr = None

    res_lr = generate_report(
        y_true=y_true,
        y_pred=y_pred_lr,
        y_scores=y_scores_lr,
        target_names=baseline_results["label_encoder"].classes_.tolist(),
        output_dir="reports",
        prefix="tfidf_logreg_test"
    )
    print("LogReg report files:", res_lr)

if __name__ == "__main__":
    main()


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


SVM report files: {'metrics_json': 'reports\\tfidf_svm_test_metrics.json', 'classification_csv': 'reports\\tfidf_svm_test_classification_report.csv', 'confusion_png': 'reports\\tfidf_svm_test_confusion_matrix.png'}
LogReg report files: {'metrics_json': 'reports\\tfidf_logreg_test_metrics.json', 'classification_csv': 'reports\\tfidf_logreg_test_classification_report.csv', 'confusion_png': 'reports\\tfidf_logreg_test_confusion_matrix.png', 'roc_png': 'reports\\tfidf_logreg_test_roc.png', 'pr_png': 'reports\\tfidf_logreg_test_pr.png', 'calibration_png': 'reports\\tfidf_logreg_test_calibration.png'}


In [8]:

import torch
from src.data.preprocess import load_data, encode_labels, split_data, texts_to_sequences
from src.evaluation.metrics_report import generate_report
from src.training.training import train_rnn  # solo si quieres entrenar aquí

CSV_PATH = "data/mental_health.csv"

def main():
    rnn_res = train_rnn(CSV_PATH, model_type="lstm", epochs=1) 
    model = rnn_res["model"]
    word2idx = rnn_res["word2idx"]
    label_encoder = rnn_res["label_encoder"]

    # reconstruir test_df
    df = load_data(CSV_PATH)
    df, _ = encode_labels(df)  
    train_df, val_df, test_df = split_data(df)

    # sequences
    max_len = 70
    test_seqs = texts_to_sequences(test_df["clean_text"].tolist(), word2idx, max_len)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    # predict in batch
    with torch.no_grad():
        X = torch.tensor(test_seqs, dtype=torch.long).to(device)
        logits = model(X)
        probs = torch.softmax(logits, dim=-1).cpu().numpy()

    y_true = test_df["label"].tolist()
    y_pred = probs.argmax(axis=1).tolist()

    res = generate_report(
        y_true=y_true,
        y_pred=y_pred,
        y_scores=probs,
        target_names=label_encoder.classes_.tolist(),
        output_dir="reports",
        prefix="rnn_lstm_test"
    )
    print("RNN report files:", res)

if __name__ == "__main__":
    main()


Epoch 1/1: 100%|██████████| 1153/1153 [01:24<00:00, 13.61it/s]
Evaluating RNN: 100%|██████████| 247/247 [00:02<00:00, 86.43it/s]


Epoch 1: train_loss=1.0412, val_acc=0.6754, val_f1=0.5241


Evaluating RNN: 100%|██████████| 247/247 [00:03<00:00, 79.59it/s]


Test metrics: 0.6778438567632544 0.5282711240183233
RNN report files: {'metrics_json': 'reports\\rnn_lstm_test_metrics.json', 'classification_csv': 'reports\\rnn_lstm_test_classification_report.csv', 'confusion_png': 'reports\\rnn_lstm_test_confusion_matrix.png', 'roc_png': 'reports\\rnn_lstm_test_roc.png', 'pr_png': 'reports\\rnn_lstm_test_pr.png', 'calibration_png': 'reports\\rnn_lstm_test_calibration.png'}
