# 最初から機械学習でやってみる

In [4]:
!pip install lightgbm

Collecting lightgbm
  Downloading lightgbm-4.5.0-py3-none-win_amd64.whl.metadata (17 kB)
Downloading lightgbm-4.5.0-py3-none-win_amd64.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 1.4/1.4 MB 7.6 MB/s eta 0:00:00
Installing collected packages: lightgbm
Successfully installed lightgbm-4.5.0



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import os
import json
import time
import glob
from typing import TypeVar, Type, List, Optional
from dataclasses import dataclass
from pydantic import BaseModel
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from openai import AzureOpenAI
from tqdm import tqdm
from typing import List, Dict, Optional, Any, Tuple

In [8]:
csv_files = glob.glob('*.csv')
print(csv_files)

['extracted_500_first_20241130_検索結果1204.csv', 'pubmed_true_data.csv']


In [9]:
df = pd.read_csv(csv_files[0])
# Assuming your dataframe is named 'df'
df['included'] = df['notes'].astype(str).str.contains('Included')

print(df['included'].value_counts())
print(f'不一致の数: {df[df["included"] == True]["notes"].str.contains("Excluded").sum()}')
#予測の元となる変数

import json

def create_json_text(row):
    return json.dumps({
        'title': str(row['title']),
        'abstract': str(row['abstract'])
    })

df['tiab'] = df.apply(create_json_text, axis=1)

included
False    493
True       7
Name: count, dtype: int64
不一致の数: 0


In [5]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report
import optuna
from lightgbm import LGBMClassifier

def prepare_text_data(df, text_column='tiab', label_column='included'):
    """テキストデータの前処理とTF-IDF変換"""
    texts = df[text_column].fillna('')

    vectorizer = TfidfVectorizer(
        max_features=10000,
        min_df=2,
        max_df=0.95,
        ngram_range=(1, 2)
    )

    X_vec = vectorizer.fit_transform(texts)
    y = df[label_column]

    class_weights = dict(zip(
        y.unique(),
        [1 / (len(y) * (y == label).mean()) for label in y.unique()]
    ))

    return X_vec, y, vectorizer, class_weights

def fbeta_score_custom(y_true, y_pred, beta=1):
    """カスタムF-betaスコアの計算"""
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    if precision == 0.0 and recall == 0.0:
        return 0.0
    fbeta = (1 + beta**2) * (precision * recall) / ((beta**2 * precision) + recall)
    return fbeta

def calculate_detailed_metrics(y_true, y_pred):
    """詳細な評価指標の計算と表示"""
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()

    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    f1 = 2 * (precision * sensitivity) / (precision + sensitivity) if (precision + sensitivity) > 0 else 0

    print("\n=== 詳細な評価指標 ===")
    print(f"感度 (Sensitivity/Recall): {sensitivity:.3f}")
    print(f"特異度 (Specificity): {specificity:.3f}")
    print(f"適合率 (Precision): {precision:.3f}")
    print(f"F1スコア: {f1:.3f}")

    print("\n=== 混同行列 ===")
    print("                  Predicted")
    print("                  Negative  Positive")
    print(f"Actual Negative    {tn:^8} {fp:^8}")
    print(f"      Positive    {fn:^8} {tp:^8}")

    return {
        'sensitivity': sensitivity,
        'specificity': specificity,
        'precision': precision,
        'f1': f1,
        'confusion_matrix': {'tn': tn, 'fp': fp, 'fn': fn, 'tp': tp}
    }

def evaluate_model(X_vec, y, params, threshold, class_weights, beta=2):
    """モデルの評価（交差検証）"""
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = []
    all_predictions = []
    all_true_values = []

    for train_idx, val_idx in skf.split(X_vec, y):
        X_tr = X_vec[train_idx]
        X_val = X_vec[val_idx]
        y_tr = y.iloc[train_idx]
        y_val = y.iloc[val_idx]

        model = LGBMClassifier(
            **params,
            random_state=42,
            class_weight=class_weights
        )

        model.fit(X_tr, y_tr)
        y_prob = model.predict_proba(X_val)[:, 1]
        y_pred = (y_prob >= threshold).astype(int)

        score = fbeta_score_custom(y_val, y_pred, beta=beta)
        scores.append(score)

        all_predictions.extend(y_pred)
        all_true_values.extend(y_val)

    if len(scores) == 5:
        print("\n=== 交差検証全体の評価 ===")
        metrics = calculate_detailed_metrics(all_true_values, all_predictions)
        print("\nClassification Report:")
        print(classification_report(all_true_values, all_predictions))

    return np.mean(scores)

def objective(trial, X_vec, y, class_weights, beta=2):
    """Optunaの目的関数"""
    params = {
        "num_leaves": trial.suggest_int("num_leaves", 8, 128),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.3, log=True),
        "n_estimators": trial.suggest_int("n_estimators", 50, 500),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-3, 10.0, log=True),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "verbosity": -1
    }
    threshold = trial.suggest_float("threshold", 0.1, 0.9)

    score = evaluate_model(X_vec, y, params, threshold, class_weights, beta=beta)
    return score

def train_final_model(df, beta_value=2, n_trials=50):
    """完全なトレーニングパイプライン"""
    # データの準備
    X_vec, y, vectorizer, class_weights = prepare_text_data(df)

    print(f"データセットの形状: {X_vec.shape}")
    print(f"クラスの分布:\n{y.value_counts(normalize=True)}")
    print(f"クラスの重み: {class_weights}")

    # ハイパーパラメータの最適化
    study = optuna.create_study(direction="maximize")
    study.optimize(
        lambda trial: objective(trial, X_vec, y, class_weights, beta=beta_value),
        n_trials=n_trials
    )

    best_params = study.best_trial.params
    threshold = best_params.pop("threshold")

    print("\nBest parameters:", best_params)
    print("Best threshold:", threshold)

    final_score = evaluate_model(X_vec, y, best_params, threshold, class_weights, beta=beta_value)
    print(f"\nFinal Mean F{beta_value} Score:", final_score)

    # 最終モデルの学習
    final_model = LGBMClassifier(
        **best_params,
        random_state=42,
        class_weight=class_weights
    )
    final_model.fit(X_vec, y)

    # 最終評価
    print("\n=== 最終モデルの評価（全データ） ===")
    y_prob = final_model.predict_proba(X_vec)[:, 1]
    y_pred = (y_prob >= threshold).astype(int)
    final_metrics = calculate_detailed_metrics(y, y_pred)

    return final_model, vectorizer, threshold, best_params

def predict_new_text(text, model, vectorizer, threshold):
    """新しいテキストの予測"""
    X_new = vectorizer.transform([text])
    prob = model.predict_proba(X_new)[0, 1]
    prediction = prob >= threshold
    return prediction, prob



In [10]:
# 使用例
if __name__ == "__main__":
    # データの読み込み
    #df = pd.read_csv('your_data.csv')  # tiab列とincluded列を含むデータ

    # モデルのトレーニング
    model, vectorizer, threshold, best_params = train_final_model(
        df,
        beta_value=4,  # より高い値で再現率を重視
        n_trials=50
    )


[I 2024-12-07 06:39:34,639] A new study created in memory with name: no-name-791c9cec-4e66-48ea-b25e-7be700f78b6d


データセットの形状: (500, 10000)
クラスの分布:
included
False    0.986
True     0.014
Name: proportion, dtype: float64
クラスの重み: {np.False_: np.float64(0.002028397565922921), np.True_: np.float64(0.14285714285714285)}


[I 2024-12-07 06:39:35,456] Trial 0 finished with value: 0.0 and parameters: {'num_leaves': 90, 'learning_rate': 0.019446585785254194, 'n_estimators': 116, 'reg_lambda': 0.04347690116313084, 'min_child_samples': 52, 'threshold': 0.5241715322992514}. Best is trial 0 with value: 0.0.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.959
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      473       20   
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.96      0.97       493
        True       0.00      0.00      0.00         7

    accuracy                           0.95       500
   macro avg       0.49      0.48      0.49       500
weighted avg       0.97      0.95      0.96       500



[I 2024-12-07 06:39:36,247] Trial 1 finished with value: 0.0 and parameters: {'num_leaves': 104, 'learning_rate': 0.14019770800548456, 'n_estimators': 217, 'reg_lambda': 0.04551234981347055, 'min_child_samples': 57, 'threshold': 0.18956718191486843}. Best is trial 0 with value: 0.0.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.984
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      485       8    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.98      0.98       493
        True       0.00      0.00      0.00         7

    accuracy                           0.97       500
   macro avg       0.49      0.49      0.49       500
weighted avg       0.97      0.97      0.97       500



[I 2024-12-07 06:39:37,658] Trial 2 finished with value: 0.0 and parameters: {'num_leaves': 112, 'learning_rate': 0.03569322138499483, 'n_estimators': 406, 'reg_lambda': 0.009621643725100748, 'min_child_samples': 33, 'threshold': 0.3634034197736611}. Best is trial 0 with value: 0.0.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.998
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      492       1    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      1.00      0.99       493
        True       0.00      0.00      0.00         7

    accuracy                           0.98       500
   macro avg       0.49      0.50      0.50       500
weighted avg       0.97      0.98      0.98       500



[I 2024-12-07 06:39:38,346] Trial 3 finished with value: 0.08499999999999999 and parameters: {'num_leaves': 9, 'learning_rate': 0.16471326795226046, 'n_estimators': 325, 'reg_lambda': 1.5809384663283297, 'min_child_samples': 88, 'threshold': 0.411111185799643}. Best is trial 3 with value: 0.08499999999999999.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.143
特異度 (Specificity): 0.917
適合率 (Precision): 0.024
F1スコア: 0.041

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      452       41   
      Positive       6        1    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.92      0.95       493
        True       0.02      0.14      0.04         7

    accuracy                           0.91       500
   macro avg       0.51      0.53      0.50       500
weighted avg       0.97      0.91      0.94       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:39:39,049] Trial 4 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 25, 'learning_rate': 0.005068716131682012, 'n_estimators': 172, 'reg_lambda': 1.2924327076115225, 'min_child_samples': 34, 'threshold': 0.11600014327317476}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:39:39,588] Trial 5 finished with value: 0.0 and parameters: {'num_leaves': 44, 'learning_rate': 0.013454999751983568, 'n_estimators': 122, 'reg_lambda': 0.0010178678855207268, 'min_child_samples': 87, 'threshold': 0.8995471396132101}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 1.000
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      493       0    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      1.00      0.99       493
        True       0.00      0.00      0.00         7

    accuracy                           0.99       500
   macro avg       0.49      0.50      0.50       500
weighted avg       0.97      0.99      0.98       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:39:40,451] Trial 6 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 66, 'learning_rate': 0.0010146410871570467, 'n_estimators': 78, 'reg_lambda': 0.04506961498536548, 'min_child_samples': 15, 'threshold': 0.21215007592202256}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



[I 2024-12-07 06:39:41,486] Trial 7 finished with value: 0.0 and parameters: {'num_leaves': 87, 'learning_rate': 0.24326138254481244, 'n_estimators': 427, 'reg_lambda': 0.37989490323223657, 'min_child_samples': 61, 'threshold': 0.5222490407881408}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.996
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      491       2    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      1.00      0.99       493
        True       0.00      0.00      0.00         7

    accuracy                           0.98       500
   macro avg       0.49      0.50      0.50       500
weighted avg       0.97      0.98      0.98       500



[I 2024-12-07 06:39:42,067] Trial 8 finished with value: 0.0 and parameters: {'num_leaves': 20, 'learning_rate': 0.08024015496556205, 'n_estimators': 220, 'reg_lambda': 0.07857196857779807, 'min_child_samples': 74, 'threshold': 0.6130310101537486}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.992
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      489       4    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.99      0.99       493
        True       0.00      0.00      0.00         7

    accuracy                           0.98       500
   macro avg       0.49      0.50      0.49       500
weighted avg       0.97      0.98      0.98       500



[I 2024-12-07 06:39:43,403] Trial 9 finished with value: 0.0 and parameters: {'num_leaves': 128, 'learning_rate': 0.13317802839398646, 'n_estimators': 482, 'reg_lambda': 0.951252143237476, 'min_child_samples': 41, 'threshold': 0.7525370135253405}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.998
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      492       1    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      1.00      0.99       493
        True       0.00      0.00      0.00         7

    accuracy                           0.98       500
   macro avg       0.49      0.50      0.50       500
weighted avg       0.97      0.98      0.98       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:39:45,384] Trial 10 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 36, 'learning_rate': 0.0032726491457276, 'n_estimators': 289, 'reg_lambda': 9.939560329579486, 'min_child_samples': 9, 'threshold': 0.30428639524719936}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:39:46,212] Trial 11 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 67, 'learning_rate': 0.0010000620892218278, 'n_estimators': 51, 'reg_lambda': 8.720993399644971, 'min_child_samples': 13, 'threshold': 0.1077382866773492}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:39:47,142] Trial 12 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 55, 'learning_rate': 0.0031469182410976174, 'n_estimators': 146, 'reg_lambda': 0.007962846428624075, 'min_child_samples': 24, 'threshold': 0.2385330044957842}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:39:47,717] Trial 13 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 29, 'learning_rate': 0.0011795696295497505, 'n_estimators': 50, 'reg_lambda': 0.3069420830671977, 'min_child_samples': 24, 'threshold': 0.11609173020844653}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:39:48,559] Trial 14 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 62, 'learning_rate': 0.004151131363024696, 'n_estimators': 186, 'reg_lambda': 0.012739400553732343, 'min_child_samples': 39, 'threshold': 0.22808845035683561}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:39:50,743] Trial 15 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 46, 'learning_rate': 0.006146930935597213, 'n_estimators': 103, 'reg_lambda': 2.4013272988965513, 'min_child_samples': 5, 'threshold': 0.39586033553061734}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:39:51,651] Trial 16 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 80, 'learning_rate': 0.001940721539042067, 'n_estimators': 175, 'reg_lambda': 0.24756819633300517, 'min_child_samples': 24, 'threshold': 0.2877933567524519}. Best is trial 4 with value: 0.19096133751306166.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



[I 2024-12-07 06:39:53,035] Trial 17 finished with value: 0.26115942028985506 and parameters: {'num_leaves': 13, 'learning_rate': 0.008459750491622507, 'n_estimators': 233, 'reg_lambda': 0.002056247488548144, 'min_child_samples': 17, 'threshold': 0.16508139075842654}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.429
特異度 (Specificity): 0.903
適合率 (Precision): 0.059
F1スコア: 0.103

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      445       48   
      Positive       4        3    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.90      0.94       493
        True       0.06      0.43      0.10         7

    accuracy                           0.90       500
   macro avg       0.52      0.67      0.52       500
weighted avg       0.98      0.90      0.93       500



[I 2024-12-07 06:39:53,952] Trial 18 finished with value: 0.2268958543983822 and parameters: {'num_leaves': 8, 'learning_rate': 0.00923046784793834, 'n_estimators': 276, 'reg_lambda': 0.00217502064941912, 'min_child_samples': 47, 'threshold': 0.12055449615877487}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.429
特異度 (Specificity): 0.824
適合率 (Precision): 0.033
F1スコア: 0.062

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      406       87   
      Positive       4        3    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.82      0.90       493
        True       0.03      0.43      0.06         7

    accuracy                           0.82       500
   macro avg       0.51      0.63      0.48       500
weighted avg       0.98      0.82      0.89       500



[I 2024-12-07 06:39:54,838] Trial 19 finished with value: 0.0 and parameters: {'num_leaves': 10, 'learning_rate': 0.01066186343862808, 'n_estimators': 336, 'reg_lambda': 0.0010703351270563526, 'min_child_samples': 71, 'threshold': 0.6069541994568448}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.984
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      485       8    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.98      0.98       493
        True       0.00      0.00      0.00         7

    accuracy                           0.97       500
   macro avg       0.49      0.49      0.49       500
weighted avg       0.97      0.97      0.97       500



[I 2024-12-07 06:39:55,712] Trial 20 finished with value: 0.1 and parameters: {'num_leaves': 18, 'learning_rate': 0.034015055057351674, 'n_estimators': 258, 'reg_lambda': 0.00353516958956239, 'min_child_samples': 48, 'threshold': 0.32620587114654886}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.143
特異度 (Specificity): 0.996
適合率 (Precision): 0.333
F1スコア: 0.200

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      491       2    
      Positive       6        1    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      1.00      0.99       493
        True       0.33      0.14      0.20         7

    accuracy                           0.98       500
   macro avg       0.66      0.57      0.60       500
weighted avg       0.98      0.98      0.98       500



[I 2024-12-07 06:39:56,830] Trial 21 finished with value: 0.21794871794871798 and parameters: {'num_leaves': 26, 'learning_rate': 0.008422094352208628, 'n_estimators': 249, 'reg_lambda': 0.0025689803019393444, 'min_child_samples': 32, 'threshold': 0.14796039097198874}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.429
特異度 (Specificity): 0.876
適合率 (Precision): 0.047
F1スコア: 0.085

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      432       61   
      Positive       4        3    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.88      0.93       493
        True       0.05      0.43      0.08         7

    accuracy                           0.87       500
   macro avg       0.52      0.65      0.51       500
weighted avg       0.98      0.87      0.92       500



[I 2024-12-07 06:39:57,824] Trial 22 finished with value: 0.25003142677561285 and parameters: {'num_leaves': 35, 'learning_rate': 0.00820281098675724, 'n_estimators': 269, 'reg_lambda': 0.0030845012114937603, 'min_child_samples': 45, 'threshold': 0.17056070587287456}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.429
特異度 (Specificity): 0.872
適合率 (Precision): 0.045
F1スコア: 0.082

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      430       63   
      Positive       4        3    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.87      0.93       493
        True       0.05      0.43      0.08         7

    accuracy                           0.87       500
   macro avg       0.52      0.65      0.50       500
weighted avg       0.98      0.87      0.92       500



[I 2024-12-07 06:39:58,709] Trial 23 finished with value: 0.0 and parameters: {'num_leaves': 36, 'learning_rate': 0.02974250236715857, 'n_estimators': 303, 'reg_lambda': 0.0032708032077740407, 'min_child_samples': 63, 'threshold': 0.19526316304338343}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.980
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      483       10   
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.98      0.98       493
        True       0.00      0.00      0.00         7

    accuracy                           0.97       500
   macro avg       0.49      0.49      0.49       500
weighted avg       0.97      0.97      0.97       500



[I 2024-12-07 06:39:59,828] Trial 24 finished with value: 0.0 and parameters: {'num_leaves': 10, 'learning_rate': 0.018702128942471353, 'n_estimators': 360, 'reg_lambda': 0.018664753079613367, 'min_child_samples': 44, 'threshold': 0.25872132698862244}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.982
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      484       9    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.98      0.98       493
        True       0.00      0.00      0.00         7

    accuracy                           0.97       500
   macro avg       0.49      0.49      0.49       500
weighted avg       0.97      0.97      0.97       500



[I 2024-12-07 06:40:00,425] Trial 25 finished with value: 0.08499999999999999 and parameters: {'num_leaves': 37, 'learning_rate': 0.00766596496200018, 'n_estimators': 215, 'reg_lambda': 0.005014043109481592, 'min_child_samples': 70, 'threshold': 0.4194066822452613}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.143
特異度 (Specificity): 0.911
適合率 (Precision): 0.022
F1スコア: 0.038

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      449       44   
      Positive       6        1    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.91      0.95       493
        True       0.02      0.14      0.04         7

    accuracy                           0.90       500
   macro avg       0.50      0.53      0.49       500
weighted avg       0.97      0.90      0.93       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:40:01,078] Trial 26 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 18, 'learning_rate': 0.002339561447222897, 'n_estimators': 273, 'reg_lambda': 0.00202223975205052, 'min_child_samples': 97, 'threshold': 0.16380388960443337}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:40:02,777] Trial 27 finished with value: 0.0 and parameters: {'num_leaves': 49, 'learning_rate': 0.012001858248549968, 'n_estimators': 366, 'reg_lambda': 0.0017415778968961721, 'min_child_samples': 21, 'threshold': 0.4652897904274305}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 1.000
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      493       0    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      1.00      0.99       493
        True       0.00      0.00      0.00         7

    accuracy                           0.99       500
   macro avg       0.49      0.50      0.50       500
weighted avg       0.97      0.99      0.98       500



[I 2024-12-07 06:40:03,631] Trial 28 finished with value: 0.0 and parameters: {'num_leaves': 8, 'learning_rate': 0.06386605666326206, 'n_estimators': 238, 'reg_lambda': 0.017791844103979576, 'min_child_samples': 54, 'threshold': 0.34878122799569}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.992
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      489       4    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.99      0.99       493
        True       0.00      0.00      0.00         7

    accuracy                           0.98       500
   macro avg       0.49      0.50      0.49       500
weighted avg       0.97      0.98      0.98       500



[I 2024-12-07 06:40:04,551] Trial 29 finished with value: 0.0 and parameters: {'num_leaves': 18, 'learning_rate': 0.024910174515516047, 'n_estimators': 288, 'reg_lambda': 0.006374682928715207, 'min_child_samples': 49, 'threshold': 0.6028268637573894}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.996
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      491       2    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      1.00      0.99       493
        True       0.00      0.00      0.00         7

    accuracy                           0.98       500
   macro avg       0.49      0.50      0.50       500
weighted avg       0.97      0.98      0.98       500



[I 2024-12-07 06:40:06,084] Trial 30 finished with value: 0.0 and parameters: {'num_leaves': 32, 'learning_rate': 0.016826745910833503, 'n_estimators': 391, 'reg_lambda': 0.02212036554417828, 'min_child_samples': 30, 'threshold': 0.24492582291992515}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.978
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      482       11   
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.98      0.98       493
        True       0.00      0.00      0.00         7

    accuracy                           0.96       500
   macro avg       0.49      0.49      0.49       500
weighted avg       0.97      0.96      0.97       500



[I 2024-12-07 06:40:07,233] Trial 31 finished with value: 0.22317948717948716 and parameters: {'num_leaves': 25, 'learning_rate': 0.00856805416338782, 'n_estimators': 251, 'reg_lambda': 0.002585283323072764, 'min_child_samples': 30, 'threshold': 0.15360563083644982}. Best is trial 17 with value: 0.26115942028985506.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.429
特異度 (Specificity): 0.886
適合率 (Precision): 0.051
F1スコア: 0.091

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      437       56   
      Positive       4        3    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.89      0.94       493
        True       0.05      0.43      0.09         7

    accuracy                           0.88       500
   macro avg       0.52      0.66      0.51       500
weighted avg       0.98      0.88      0.92       500



[I 2024-12-07 06:40:08,804] Trial 32 finished with value: 0.2717868338557993 and parameters: {'num_leaves': 23, 'learning_rate': 0.007359871973223338, 'n_estimators': 313, 'reg_lambda': 0.0016436001202953976, 'min_child_samples': 17, 'threshold': 0.15471637546727893}. Best is trial 32 with value: 0.2717868338557993.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.429
特異度 (Specificity): 0.921
適合率 (Precision): 0.071
F1スコア: 0.122

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      454       39   
      Positive       4        3    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.92      0.95       493
        True       0.07      0.43      0.12         7

    accuracy                           0.91       500
   macro avg       0.53      0.67      0.54       500
weighted avg       0.98      0.91      0.94       500



[I 2024-12-07 06:40:10,547] Trial 33 finished with value: 0.2737960285328706 and parameters: {'num_leaves': 16, 'learning_rate': 0.005526042854038906, 'n_estimators': 308, 'reg_lambda': 0.004693688645642403, 'min_child_samples': 17, 'threshold': 0.18502952006595624}. Best is trial 33 with value: 0.2737960285328706.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.429
特異度 (Specificity): 0.892
適合率 (Precision): 0.054
F1スコア: 0.095

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      440       53   
      Positive       4        3    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.89      0.94       493
        True       0.05      0.43      0.10         7

    accuracy                           0.89       500
   macro avg       0.52      0.66      0.52       500
weighted avg       0.98      0.89      0.93       500



[I 2024-12-07 06:40:12,495] Trial 34 finished with value: 0.34689189189189185 and parameters: {'num_leaves': 43, 'learning_rate': 0.00597124763190807, 'n_estimators': 325, 'reg_lambda': 0.005217512945281993, 'min_child_samples': 18, 'threshold': 0.1922612484366543}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.571
特異度 (Specificity): 0.895
適合率 (Precision): 0.071
F1スコア: 0.127

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      441       52   
      Positive       3        4    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.89      0.94       493
        True       0.07      0.57      0.13         7

    accuracy                           0.89       500
   macro avg       0.53      0.73      0.53       500
weighted avg       0.98      0.89      0.93       500



[I 2024-12-07 06:40:14,595] Trial 35 finished with value: 0.13076923076923078 and parameters: {'num_leaves': 54, 'learning_rate': 0.006010718993309947, 'n_estimators': 321, 'reg_lambda': 0.005531217352971148, 'min_child_samples': 17, 'threshold': 0.2801501068492047}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.143
特異度 (Specificity): 0.955
適合率 (Precision): 0.043
F1スコア: 0.067

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      471       22   
      Positive       6        1    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.96      0.97       493
        True       0.04      0.14      0.07         7

    accuracy                           0.94       500
   macro avg       0.52      0.55      0.52       500
weighted avg       0.97      0.94      0.96       500



[I 2024-12-07 06:40:17,516] Trial 36 finished with value: 0.2529104823747681 and parameters: {'num_leaves': 42, 'learning_rate': 0.004085512668432545, 'n_estimators': 346, 'reg_lambda': 0.02907289908555193, 'min_child_samples': 6, 'threshold': 0.20832733316180913}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.571
特異度 (Specificity): 0.734
適合率 (Precision): 0.030
F1スコア: 0.056

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      362      131   
      Positive       3        4    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.73      0.84       493
        True       0.03      0.57      0.06         7

    accuracy                           0.73       500
   macro avg       0.51      0.65      0.45       500
weighted avg       0.98      0.73      0.83       500



[I 2024-12-07 06:40:19,382] Trial 37 finished with value: 0.18006968641114982 and parameters: {'num_leaves': 21, 'learning_rate': 0.001782839559873618, 'n_estimators': 309, 'reg_lambda': 0.0013257000845019965, 'min_child_samples': 12, 'threshold': 0.36446058715909346}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.286
特異度 (Specificity): 0.939
適合率 (Precision): 0.062
F1スコア: 0.103

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      463       30   
      Positive       5        2    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.94      0.96       493
        True       0.06      0.29      0.10         7

    accuracy                           0.93       500
   macro avg       0.53      0.61      0.53       500
weighted avg       0.98      0.93      0.95       500



[I 2024-12-07 06:40:21,063] Trial 38 finished with value: 0.0 and parameters: {'num_leaves': 14, 'learning_rate': 0.01501946916951155, 'n_estimators': 420, 'reg_lambda': 0.009982674924904959, 'min_child_samples': 19, 'threshold': 0.18559035269940244}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.988
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      487       6    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.99      0.99       493
        True       0.00      0.00      0.00         7

    accuracy                           0.97       500
   macro avg       0.49      0.49      0.49       500
weighted avg       0.97      0.97      0.97       500



[I 2024-12-07 06:40:22,248] Trial 39 finished with value: 0.2772619047619048 and parameters: {'num_leaves': 101, 'learning_rate': 0.004367210355305156, 'n_estimators': 381, 'reg_lambda': 0.0700029772634674, 'min_child_samples': 36, 'threshold': 0.27153830391106837}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.571
特異度 (Specificity): 0.789
適合率 (Precision): 0.037
F1スコア: 0.070

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      389      104   
      Positive       3        4    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.79      0.88       493
        True       0.04      0.57      0.07         7

    accuracy                           0.79       500
   macro avg       0.51      0.68      0.47       500
weighted avg       0.98      0.79      0.87       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:40:23,401] Trial 40 finished with value: 0.0 and parameters: {'num_leaves': 104, 'learning_rate': 0.00510987633181791, 'n_estimators': 387, 'reg_lambda': 0.07667977607541962, 'min_child_samples': 37, 'threshold': 0.8105464482532754}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 1.000
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      493       0    
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      1.00      0.99       493
        True       0.00      0.00      0.00         7

    accuracy                           0.99       500
   macro avg       0.49      0.50      0.50       500
weighted avg       0.97      0.99      0.98       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:40:24,766] Trial 41 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 74, 'learning_rate': 0.0028405150934051712, 'n_estimators': 374, 'reg_lambda': 0.0050929345414829985, 'min_child_samples': 25, 'threshold': 0.14420911364717875}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



[I 2024-12-07 06:40:27,296] Trial 42 finished with value: 0.20954927878141533 and parameters: {'num_leaves': 101, 'learning_rate': 0.004134278285705069, 'n_estimators': 440, 'reg_lambda': 0.1499947463161744, 'min_child_samples': 11, 'threshold': 0.20655249299267336}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.071
適合率 (Precision): 0.015
F1スコア: 0.030

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       35      458   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       1.00      0.07      0.13       493
        True       0.02      1.00      0.03         7

    accuracy                           0.08       500
   macro avg       0.51      0.54      0.08       500
weighted avg       0.99      0.08      0.13       500



[I 2024-12-07 06:40:29,386] Trial 43 finished with value: 0.21781781781781784 and parameters: {'num_leaves': 127, 'learning_rate': 0.005830880788701414, 'n_estimators': 344, 'reg_lambda': 0.01068000232892843, 'min_child_samples': 16, 'threshold': 0.2677719861350309}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.286
特異度 (Specificity): 0.935
適合率 (Precision): 0.059
F1スコア: 0.098

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      461       32   
      Positive       5        2    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.94      0.96       493
        True       0.06      0.29      0.10         7

    accuracy                           0.93       500
   macro avg       0.52      0.61      0.53       500
weighted avg       0.98      0.93      0.95       500



[I 2024-12-07 06:40:31,276] Trial 44 finished with value: 0.0 and parameters: {'num_leaves': 120, 'learning_rate': 0.012699301621210104, 'n_estimators': 462, 'reg_lambda': 0.035127971755879256, 'min_child_samples': 28, 'threshold': 0.3161913431413885}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.000
特異度 (Specificity): 0.974
適合率 (Precision): 0.000
F1スコア: 0.000

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      480       13   
      Positive       7        0    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.97      0.98       493
        True       0.00      0.00      0.00         7

    accuracy                           0.96       500
   macro avg       0.49      0.49      0.49       500
weighted avg       0.97      0.96      0.97       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:40:33,045] Trial 45 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 23, 'learning_rate': 0.00641609794688929, 'n_estimators': 206, 'reg_lambda': 0.0015019936946440947, 'min_child_samples': 9, 'threshold': 0.10900622223182982}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



[I 2024-12-07 06:40:34,665] Trial 46 finished with value: 0.2900067008474765 and parameters: {'num_leaves': 90, 'learning_rate': 0.004189407386493158, 'n_estimators': 321, 'reg_lambda': 0.06018664283874203, 'min_child_samples': 19, 'threshold': 0.2272995353003402}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.714
特異度 (Specificity): 0.653
適合率 (Precision): 0.028
F1スコア: 0.055

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      322      171   
      Positive       2        5    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.65      0.79       493
        True       0.03      0.71      0.05         7

    accuracy                           0.65       500
   macro avg       0.51      0.68      0.42       500
weighted avg       0.98      0.65      0.78       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:40:36,163] Trial 47 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 97, 'learning_rate': 0.0014432450854579765, 'n_estimators': 323, 'reg_lambda': 0.12675981938386, 'min_child_samples': 22, 'threshold': 0.22007862436976477}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500



[I 2024-12-07 06:40:37,564] Trial 48 finished with value: 0.2644444444444444 and parameters: {'num_leaves': 90, 'learning_rate': 0.003627606694375339, 'n_estimators': 398, 'reg_lambda': 0.05397920294256693, 'min_child_samples': 35, 'threshold': 0.44880202746350556}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.429
特異度 (Specificity): 0.933
適合率 (Precision): 0.083
F1スコア: 0.140

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative      460       33   
      Positive       4        3    

Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.93      0.96       493
        True       0.08      0.43      0.14         7

    accuracy                           0.93       500
   macro avg       0.54      0.68      0.55       500
weighted avg       0.98      0.93      0.95       500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2024-12-07 06:40:39,120] Trial 49 finished with value: 0.19096133751306166 and parameters: {'num_leaves': 86, 'learning_rate': 0.002332079954749238, 'n_estimators': 302, 'reg_lambda': 0.5089387875482961, 'min_child_samples': 14, 'threshold': 0.2474268203108433}. Best is trial 34 with value: 0.34689189189189185.



=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 1.000
特異度 (Specificity): 0.000
適合率 (Precision): 0.014
F1スコア: 0.028

=== 混同行列 ===
                  Predicted
                  Negative  Positive
Actual Negative       0       493   
      Positive       0        7    

Classification Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       493
        True       0.01      1.00      0.03         7

    accuracy                           0.01       500
   macro avg       0.01      0.50      0.01       500
weighted avg       0.00      0.01      0.00       500


Best parameters: {'num_leaves': 43, 'learning_rate': 0.00597124763190807, 'n_estimators': 325, 'reg_lambda': 0.005217512945281993, 'min_child_samples': 18}
Best threshold: 0.1922612484366543

=== 交差検証全体の評価 ===

=== 詳細な評価指標 ===
感度 (Sensitivity/Recall): 0.571
特異度 (Specificity): 0.895
適合率 (Precision): 0.071
F1スコア: 0.127

=== 混同行列 ===
                  Predicted
       

In [11]:
import joblib


In [12]:

# モデルと関連オブジェクトの保存
model_data = {
    'model': model,
    'vectorizer': vectorizer,
    'threshold': threshold,
    'best_params': best_params
}
joblib.dump(model_data, 'model_data.joblib')
print("\nモデルと関連オブジェクトを 'model_data.joblib' に保存しました。")


モデルと関連オブジェクトを 'model_data.joblib' に保存しました。


In [20]:
# 保存したモデルデータをロード
model_data = joblib.load('model_data.joblib')

# 各オブジェクトの取り出し
loaded_model = model_data['model']
loaded_vectorizer = model_data['vectorizer']
loaded_threshold = model_data['threshold']
loaded_best_params = model_data['best_params']

print("モデルと関連オブジェクトをロードしました。")


モデルと関連オブジェクトをロードしました。


In [14]:
df2 = pd.read_csv(csv_files[1])

In [18]:
df2.columns = ["PMID", "title", "abstract"]

In [19]:
df2["tiab"] = df2.apply(create_json_text, axis=1)

In [22]:
df2.shape

(10, 4)

In [21]:
# テキストデータの前処理とベクトル化
X_new = loaded_vectorizer.transform(df2["tiab"].fillna(''))

print(f"新しいデータの形状: {X_new.shape}")

新しいデータの形状: (10, 10000)


In [23]:
# 予測確率の取得
y_prob_new = loaded_model.predict_proba(X_new)[:, 1]

# 閾値を適用してクラスラベルを決定
y_pred_new = (y_prob_new >= loaded_threshold).astype(int)

# 予測結果をデータフレームに追加
df2['prediction'] = y_pred_new
df2['probability'] = y_prob_new

print("予測を実行し、結果をデータフレームに追加しました。")


予測を実行し、結果をデータフレームに追加しました。


In [24]:
# 予測結果の表示
print("\n=== 予測結果 ===")
print(df2[['tiab', 'prediction', 'probability']])

# クラス分布の確認
print("\nクラスの分布:")
print(df2['prediction'].value_counts())

# 確率の統計情報
print("\n予測確率の統計情報:")
print(df2['probability'].describe())



=== 予測結果 ===
                                                tiab  prediction  probability
0  {"title": "A multicenter, randomized, controll...           1     0.312545
1  {"title": "Transfusion requirements after head...           1     0.211932
2  {"title": "Restrictive vs Liberal Transfusion ...           1     0.206408
3  {"title": "Liberal or Restrictive Transfusion ...           1     0.290118
4  {"title": "Effect of Transfusion on Mortality ...           1     0.233342
5  {"title": "Transfusion-related risk of seconda...           1     0.351916
6  {"title": "Impact of Blood Product Transfusion...           1     0.269007
7  {"title": "Transfusion practice and blood stre...           1     0.291831
8  {"title": "Anemia, transfusions and hospital o...           1     0.298781
9  {"title": "Transfusion of platelets, but not o...           0     0.084553

クラスの分布:
prediction
1    9
0    1
Name: count, dtype: int64

予測確率の統計情報:
count    10.000000
mean      0.255043
std       0.075384