<a href="https://colab.research.google.com/github/ykitaguchi77/FundusPhoto/blob/main/Metabo2024_final_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Metabo2024_final_prediction**

In [None]:
# Import necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import cross_val_score

# Mount Google Drive if the CSV is in your drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


###**preprocess images**

In [None]:
#テスト画像を384pxに整形

import os
import cv2
from tqdm import tqdm

def crop_center_image(cv2_img, crop_size=1170):
    # 画像のサイズを取得
    height, width, _ = cv2_img.shape

    # 中心座標を計算
    center_x = width // 2
    center_y = height // 2

    # 切り抜きの開始座標を計算
    start_x = center_x - crop_size // 2
    start_y = center_y - crop_size // 2

    # 画像の切り抜き
    cropped_img = cv2_img[start_y:start_y + crop_size, start_x:start_x + crop_size]

    return cropped_img

def resize_image(cv2_img, size=(384, 384)):
    # リサイズ
    resized_img = cv2.resize(cv2_img, size)
    return resized_img

def process_images(input_dir, output_dir):
    # 出力ディレクトリが存在しない場合は作成
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # 入力ディレクトリ内のすべての画像ファイルを取得
    image_files = [f for f in os.listdir(input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    # tqdmで進捗バーを表示しながら処理
    for filename in tqdm(image_files, desc="Processing images"):
        # 画像の読み込み
        img_path = os.path.join(input_dir, filename)
        img = cv2.imread(img_path)

        if img is not None:
            try:
                # 画像の処理
                cropped_img = crop_center_image(img)
                resized_img = resize_image(cropped_img)

                # 処理済み画像の保存
                output_path = os.path.join(output_dir, filename)
                cv2.imwrite(output_path, resized_img)
            except Exception as e:
                print(f"Error processing {filename}: {str(e)}")
        else:
            print(f"Could not read image: {filename}")

# 入力・出力ディレクトリのパス
input_dir = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/images_test"
output_dir = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/images_test_384px"

# 画像の処理を実行
process_images(input_dir, output_dir)

Processing images:   1%|          | 5/500 [00:19<32:51,  3.98s/it]


KeyboardInterrupt: 

#**Inference images**

In [None]:
test_images_dir = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/images_test_384px"
model_path = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/final_prediction/mets.pth"
csv_path = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/解答用ファイル.csv"

In [None]:
# 必要ライブラリ読み込み
!pip install timm==0.5.4 --q #timmのバージョンが違うとうまくstate dictできないので注意！！！
import random
import timm
import copy
import torchvision.transforms as transforms
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from timm.scheduler import CosineLRScheduler
import numpy as np
from PIL import Image
from tqdm.notebook import tqdm
import os
import glob
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import mean_squared_error, r2_score
import time
import matplotlib.pyplot as plt
%matplotlib inline

# Set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load and prepare the model
def load_model(model_path):
    print("Loading model...")
    model = timm.create_model('swin_base_patch4_window12_384', num_classes=1, pretrained=False)
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model.eval()
    return model

# Transform for test images (same as validation transform)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def predict_image(model, image_path, transform):
    # Load and preprocess image
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)  # Add batch dimension
    image = image.to(device)

    # Inference
    with torch.no_grad():
        output = model(image)
        prob = torch.sigmoid(output).item()
        prediction = 1 if prob > 0.5 else 0

    return prediction, prob

def main(test_images_dir, model_path, csv_path):
    # Start timing
    start_time = time.time()

    # Create progress bars
    pbar_main = tqdm(total=4, desc='Overall Progress', position=0)

    # Load model
    model = load_model(model_path)
    pbar_main.update(1)

    # Load answer template
    print("\nLoading answer template...")
    df = pd.read_csv(csv_path)
    pbar_main.update(1)

    # Get all image files
    image_files = [f for f in os.listdir(test_images_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
    total_images = len(image_files)
    print(f"\nFound {total_images} images to process")

    # Create a progress bar for image processing
    pbar_images = tqdm(total=total_images, desc='Processing Images',
                      position=1, leave=True)

    # Counter for positive predictions
    positive_preds = 0

    # Process each image
    for image_file in image_files:
        image_path = os.path.join(test_images_dir, image_file)
        basename = os.path.splitext(image_file)[0]

        try:
            # Get prediction
            prediction, probability = predict_image(model, image_path, transform)

            # Update counters
            if prediction == 1:
                positive_preds += 1

            # Update DataFrame
            mask = df['filename'] == basename
            if mask.any():
                df.loc[mask, 'MetabolicSyndrome_0=No_1=Yes'] = prediction

            # Update progress bar description with current statistics
            pbar_images.set_description(
                f'Processed: {pbar_images.n}/{total_images} | '
                f'Positive: {positive_preds} ({(positive_preds/pbar_images.n*100):.1f}%)'
            )

        except Exception as e:
            print(f"\nError processing {image_file}: {str(e)}")
            continue

        finally:
            pbar_images.update(1)

    pbar_images.close()
    pbar_main.update(1)

    # Save results
    print("\nSaving results...")
    df.to_csv(csv_path, index=False)
    pbar_main.update(1)

    # Final statistics
    end_time = time.time()
    processing_time = end_time - start_time

    print("\nInference completed!")
    print(f"Total processing time: {processing_time:.2f} seconds")
    print(f"Average time per image: {processing_time/total_images:.2f} seconds")
    print(f"Total images processed: {total_images}")
    print(f"Positive predictions: {positive_preds} ({(positive_preds/total_images*100):.1f}%)")
    print(f"Negative predictions: {total_images-positive_preds} ({((total_images-positive_preds)/total_images*100):.1f}%)")

    pbar_main.close()

if __name__ == "__main__":
    test_images_dir = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/images_test_384px"
    model_path = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/final_prediction/mets.pth"
    csv_path = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/解答用ファイル.csv"

    main(test_images_dir, model_path, csv_path)

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/431.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m431.5/431.5 kB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m
[?25hUsing device: cuda:0


Overall Progress:   0%|          | 0/4 [00:00<?, ?it/s]

Loading model...


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
  model.load_state_dict(torch.load(model_path))



Loading answer template...

Found 500 images to process


Processing Images:   0%|          | 0/500 [00:00<?, ?it/s]


Error processing img06334262_00_1R.jpg: 'filename'

Error processing img07327628_00_1R.jpg: 'filename'

Error processing img07046869_00_1R.jpg: 'filename'

Error processing img04471264_00_1R.jpg: 'filename'

Error processing img01376448_00_1R.jpg: 'filename'

Error processing img06569321_00_1R.jpg: 'filename'

Error processing img01931470_00_1R.jpg: 'filename'

Error processing img03621168_00_1R.jpg: 'filename'

Error processing img09748120_00_1R.jpg: 'filename'

Error processing img08116743_00_1R.jpg: 'filename'

Error processing img01119364_00_1R.jpg: 'filename'

Error processing img05562039_00_1R.jpg: 'filename'

Error processing img00396378_00_1R.jpg: 'filename'

Error processing img05101416_00_1R.jpg: 'filename'

Error processing img05901321_00_1R.jpg: 'filename'

Error processing img03714772_00_1R.jpg: 'filename'

Error processing img08458833_00_1R.jpg: 'filename'

Error processing img06112930_00_1R.jpg: 'filename'

Error processing img08698608_00_1R.jpg: 'filename'

Error proce

In [None]:
import torch
import torchvision.transforms as transforms
import timm
from PIL import Image
import os
import pandas as pd
from tqdm.auto import tqdm
import torch.nn as nn
import time

# Set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def load_model(model_path):
    print("Loading model...")
    model = timm.create_model('swin_base_patch4_window12_384', num_classes=1, pretrained=False)
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model.eval()
    return model

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def predict_image(model, image_path, transform):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)
    image = image.to(device)

    with torch.no_grad():
        output = model(image)
        prob = torch.sigmoid(output).item()
        prediction = int(prob > 0.5)  # 明示的にint型に変換

    return prediction, prob

def main(test_images_dir, model_path, csv_path):
    start_time = time.time()

    # Load model
    print("Loading model...")
    model = load_model(model_path)

    # Load answer template
    print("\nLoading answer template...")
    df = pd.read_csv(csv_path)
    print(f"CSV columns: {df.columns.tolist()}")

    # Get all image files
    image_files = [f for f in os.listdir(test_images_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
    total_images = len(image_files)
    print(f"\nFound {total_images} images to process")

    # Process each image
    pbar = tqdm(total=total_images, desc='Processing Images')
    positive_preds = 0
    processed_count = 0

    # Create a dictionary to store predictions
    predictions = {}

    for image_file in image_files:
        image_path = os.path.join(test_images_dir, image_file)

        try:
            # Get prediction
            prediction, probability = predict_image(model, image_path, transform)

            # Store prediction using the full filename (ensuring it's an integer)
            predictions[image_file] = int(prediction)

            # Update counters
            if prediction == 1:
                positive_preds += 1
            processed_count += 1

            # Update progress bar description
            if processed_count > 0:
                pbar.set_description(
                    f'Processed: {processed_count}/{total_images} | '
                    f'Positive: {positive_preds} ({(positive_preds/processed_count*100):.1f}%)'
                )
            else:
                pbar.set_description(f'Processed: {processed_count}/{total_images}')

        except Exception as e:
            print(f"\nError processing {image_file}: {str(e)}")

        finally:
            pbar.update(1)

    # Update the DataFrame with predictions (ensuring integer type)
    updated_count = 0
    for index, row in df.iterrows():
        file_name = row['File']
        if file_name in predictions:
            df.at[index, 'MetabolicSyndrome_0=No_1=Yes'] = int(predictions[file_name])
            updated_count += 1

    # Convert the prediction column to integer type
    df['MetabolicSyndrome_0=No_1=Yes'] = df['MetabolicSyndrome_0=No_1=Yes'].astype(int)

    pbar.close()

    # Save results
    output_path = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/final_prediction/解答用ファイル.csv"
    print(f"\nSaving results to: {output_path}")
    print(f"Updated {updated_count} entries in the CSV file")
    df.to_csv(output_path, index=False)

    # Final statistics
    end_time = time.time()
    processing_time = end_time - start_time

    if processed_count > 0:
        print("\nInference completed!")
        print(f"Total processing time: {processing_time:.2f} seconds")
        print(f"Average time per image: {processing_time/processed_count:.2f} seconds")
        print(f"Total images processed successfully: {processed_count}")
        print(f"Positive predictions: {positive_preds} ({(positive_preds/processed_count*100):.1f}%)")
        print(f"Negative predictions: {processed_count-positive_preds} ({((processed_count-positive_preds)/processed_count*100):.1f}%)")
        print(f"Entries updated in CSV: {updated_count}")

        # Print matching statistics
        print(f"\nMatching statistics:")
        print(f"Total entries in CSV: {len(df)}")
        print(f"Total images processed: {len(predictions)}")
        print(f"Successful matches: {updated_count}")
        print(f"Unmatched entries: {len(df) - updated_count}")

if __name__ == "__main__":
    test_images_dir = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/images_test_384px"
    model_path = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/final_prediction/mets.pth"
    csv_path = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/解答用ファイル.csv"

    main(test_images_dir, model_path, csv_path)

Using device: cuda:0
Loading model...
Loading model...


  model.load_state_dict(torch.load(model_path))



Loading answer template...
CSV columns: ['File', 'MetabolicSyndrome_0=No_1=Yes']

Found 500 images to process


Processing Images:   0%|          | 0/500 [00:00<?, ?it/s]


Saving results to: /content/drive/MyDrive/Deep_learning/Fundus_metabolic/final_prediction/解答用ファイル.csv
Updated 500 entries in the CSV file

Inference completed!
Total processing time: 32.02 seconds
Average time per image: 0.06 seconds
Total images processed successfully: 500
Positive predictions: 221 (44.2%)
Negative predictions: 279 (55.8%)
Entries updated in CSV: 500

Matching statistics:
Total entries in CSV: 500
Total images processed: 500
Successful matches: 500
Unmatched entries: 0


# **Seek best practice**

・/content/drive/MyDrive/Deep_learning/Fundus_metabolic/images_whole_384pxの画像をinference

・/content/drive/MyDrive/Deep_learning/Fundus_metabolic/models/finetuned_model/predicted_data.csvに"pred"の列を作って予測の結果（METsが0か1か）を記載

・予測年齢別のMETsの分布率とpredの正解率を、10歳刻みで計算してグラフに

※予測年齢のデータ(age_prediction)およびMETsのgroundtruth(METS)はpredicted_data.csv内に記載されています

In [None]:
import torch
import torchvision.transforms as transforms
import timm
from PIL import Image
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

# Set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def load_model(model_path):
    print("Loading model...")
    model = timm.create_model('swin_base_patch4_window12_384', num_classes=1, pretrained=False)
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model.eval()
    return model

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def predict_image(model, image_path, transform):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)
    image = image.to(device)

    with torch.no_grad():
        output = model(image)
        prob = torch.sigmoid(output).item()
        prediction = int(prob > 0.5)

    return prediction, prob

def get_age_group(age):
    """年齢を10歳刻みのグループに分類"""
    return f"{(age // 10) * 10}-{(age // 10) * 10 + 9}"

def main():
    # パスの設定
    test_images_dir = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/images_whole_384px"
    model_path = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/final_prediction/mets.pth"
    csv_path = "/content/drive/MyDrive/Deep_learning/Fundus_metabolic/models/finetuned_model/predicted_data.csv"

    # モデルの読み込み
    model = load_model(model_path)

    # CSVファイルの読み込み
    df = pd.read_csv(csv_path)
    print("Original DataFrame shape:", df.shape)

    # 画像ファイルの取得
    image_files = [f for f in os.listdir(test_images_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
    total_images = len(image_files)
    print(f"\nFound {total_images} images to process")

    # 推論の実行
    predictions = {}
    pbar = tqdm(total=total_images, desc='Processing Images')

    for image_file in image_files:
        image_path = os.path.join(test_images_dir, image_file)
        basename = image_file  # ファイル名をそのまま使用

        try:
            prediction, _ = predict_image(model, image_path, transform)
            predictions[basename] = prediction
            pbar.update(1)
        except Exception as e:
            print(f"\nError processing {image_file}: {str(e)}")

    pbar.close()

    # 予測結果をDataFrameに追加
    df['pred'] = df['filename'].map(predictions).astype('Int64')

    # 年齢グループの作成
    df['age_group'] = df['age_prediction'].apply(get_age_group)

    # 年齢グループごとの統計計算
    age_stats = []
    for group in sorted(df['age_group'].unique()):
        group_data = df[df['age_group'] == group]

        # METs分布率の計算
        mets_rate = (group_data['METS'] == 1).mean() * 100

        # 予測の正解率の計算
        accuracy = (group_data['METS'] == group_data['pred']).mean() * 100

        age_stats.append({
            'age_group': group,
            'mets_rate': mets_rate,
            'accuracy': accuracy,
            'count': len(group_data)
        })

    age_stats_df = pd.DataFrame(age_stats)

    # グラフの作成
    plt.figure(figsize=(12, 6))

    # METs分布率と予測正解率を並べて表示
    x = np.arange(len(age_stats_df['age_group']))
    width = 0.35

    fig, ax1 = plt.subplots(figsize=(12, 6))

    # METs分布率（棒グラフ）
    bars1 = ax1.bar(x - width/2, age_stats_df['mets_rate'], width,
                    label='METs Rate (%)', color='skyblue')

    # 予測正解率（棒グラフ）
    bars2 = ax1.bar(x + width/2, age_stats_df['accuracy'], width,
                    label='Prediction Accuracy (%)', color='lightgreen')

    # グラフの設定
    ax1.set_xlabel('Age Group')
    ax1.set_ylabel('Percentage (%)')
    ax1.set_title('METs Rate and Prediction Accuracy by Age Group')
    ax1.set_xticks(x)
    ax1.set_xticklabels(age_stats_df['age_group'])
    ax1.legend()

    # データ数を表示
    for i, count in enumerate(age_stats_df['count']):
        plt.text(i, 5, f'n={count}', ha='center')

    # グラフの表示
    plt.tight_layout()
    plt.show()

    # 結果の保存
    print("\nSaving results...")
    df.to_csv(csv_path, index=False)

    # 統計情報の表示
    print("\nAge Group Statistics:")
    print(age_stats_df.to_string(index=False))

    # 全体の統計
    print("\nOverall Statistics:")
    print(f"Total samples: {len(df)}")
    print(f"Overall METs Rate: {(df['METS'] == 1).mean() * 100:.2f}%")
    print(f"Overall Prediction Accuracy: {(df['METS'] == df['pred']).mean() * 100:.2f}%")

if __name__ == "__main__":
    main()

Using device: cuda:0
Loading model...


  model.load_state_dict(torch.load(model_path))


Original DataFrame shape: (5000, 8)

Found 5000 images to process


Processing Images:   0%|          | 0/5000 [00:00<?, ?it/s]