<a href="https://colab.research.google.com/github/ykitaguchi77/FundusPhoto/blob/main/FundusPhoto_sample_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
from __future__ import print_function, division
!pip install torch_optimizer

import torch
import torch.nn as nn
import torch_optimizer as optim
import torch.utils.data as data
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import time
import os
import copy
import math
import shutil
import csv
import pandas as pd
import glob
from PIL import Image


#サポートパッチのインポート
from google.colab.patches import cv2_imshow
import cv2

#あればGPUを使用
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#google driveをcolabolatoryにマウント
from google.colab import drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
DATASET_PATH = '/content/drive/MyDrive/Deep_learning/FundusPhoto'
os.chdir(DATASET_PATH)

TRAIN_FOLDER_NAME = 'cropped_img' #TESTイメージのフォルダ
VAL_FOLDER_NAME = 'cropped_img' #VALイメージのフォルダ

FILENAME_LABELCSV = 'age_train.csv' #年齢の値のcsv
FILENAME_RESULTCSV = 'result.csv' #年齢推定結果を書き出すcsv
imagesize_process = (128,128)  #処理時の画像サイズ→小さすぎるような気がする？

NET_NAME = "RepVGG-A2-train"
MODEL_PATH = "/content/drive/MyDrive/Deep_learning/RepVGG-A2-train.pth"
#OPTIMIZER_PATH = "./optimizer_multi.pth"
LOG_PATH = "./log_multi.txt"
ROC_PATH = "./roc_multi.png"
CHECKPOINT_COUNT = 10
EPOCH = 100
PATIENCE = 20 #early stopping patience; how long to wait after last time validation loss improved.
BATCH_SIZE = 16


# transforms param
PX = 224
TRAIN_NORMALIZE_PARAM = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
TRAIN_CROP_SCALE =(0.75,1.0)
TRAIN_BRIGHTNESS_PARAM = 0.2
TRAIN_CONTRAST_PARAM = 0.1
TRAIN_SATURATION_PARAM = 0.1
TRAIN_RANDOM_ROTATION = 3
TRAIN_HUE_PARAM = 0.02
VAL_NORMALIZE_PARAM = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]

In [13]:
#csvファイルを開く
df_labelcsv = pd.read_csv(FILENAME_LABELCSV)

#csvファイルを表示
print(df_labelcsv)

#ID,ageの列の値をリストとして取り出す
df_filename = df_labelcsv['filename'].values
df_age = df_labelcsv['age'].values

#CSVファイル内の画像数
print(len(df_labelcsv))

#画像フォルダ内の画像数
len(os.listdir(DATASET_PATH +"/"+ TRAIN_FOLDER_NAME))



                   filename  age
0     img00085008_00_1R.jpg   61
1     img00085024_00_1R.jpg   29
2     img00241280_10_1R.jpg   51
3     img00265140_00_1R.jpg   29
4     img00265140_00_2L.jpg   29
...                     ...  ...
1409  img76791392_10_1R.jpg   38
1410  img76843122_10_1R.jpg   49
1411  img76843122_11_1R.jpg   49
1412  img76888512_00_1R.jpg   74
1413  img76888512_00_2L.jpg   74

[1414 rows x 2 columns]
1414


1414

In [None]:
folder_path = DATASET_PATH +"/" + TRAIN_FOLDER_NAME
img_name = os.listdir(folder_path)[1000]
print(img_name)

age_temp = df_labelcsv[df_labelcsv['filename'] == img_name].iloc[0,1]
print(age_temp)

image_path = os.path.join(folder_path, img_name)
print(image_path)

img21443464_02_1R.jpg
27
/content/drive/MyDrive/Deep_learning/FundusPhoto/cropped_img/img21443464_02_1R.jpg


In [20]:
class SimpleImageDataset(Dataset):
    def __init__(self, folder_path, csv_path, transform):
        self.transform = transform
        self.folder_path = folder_path
        self.item_paths = []
        self.item_dict = {}
        self.age = []

        for i in range(len(df_labelcsv)):
              img_name = os.listdir(self.folder_path)[i]
              age_temp = df_labelcsv[df_labelcsv['filename'] == img_name].iloc[0,1] #age
              self.age.append(float(age_temp)/100)

              img_path = os.path.join(self.folder_path, img_name)
              self.item_paths.append(image_path)
              #self.item_dict[image_path] = self.age


    def __len__(self):
        return len(self.item_paths)

    def __getitem__(self, idx):
        image_path = self.item_paths[idx]
        pilr_image = Image.open(image_path).convert("RGB")
        tensor_image = self.transform(pilr_image)
        target = torch.tensor([age[idx]])      
        return tensor_image, target


train_data_transforms = transforms.Compose([
                transforms.RandomResizedCrop(PX, scale=TRAIN_CROP_SCALE),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(TRAIN_NORMALIZE_PARAM[0], TRAIN_NORMALIZE_PARAM[1])])
val_data_transforms = transforms.Compose([
                transforms.Resize(PX),
                transforms.ToTensor(),
                transforms.Normalize(VAL_NORMALIZE_PARAM[0], VAL_NORMALIZE_PARAM[1])]) 

train_dataset = SimpleImageDataset(os.path.join(DATASET_PATH, TRAIN_FOLDER_NAME), os.path.join(DATASET_PATH, FILENAME_LABELCSV), train_data_transforms)
val_dataset = SimpleImageDataset(os.path.join(DATASET_PATH, VAL_FOLDER_NAME), os.path.join(DATASET_PATH, FILENAME_LABELCSV),  val_data_transforms)
train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = False)

print(TRAIN_FOLDER_NAME + "_dataset_size：" + str(len(train_dataset)))

cropped_img_dataset_size：1414


In [None]:
from torchvision import datasets
from torch.utils.data.dataset import Subset

trainvaltest_dataset = 
n_samples = len()

In [21]:
print(train_dataset[1])

(tensor([[[-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         ...,
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179]],

        [[-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         ...,
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357]],

        [[-1.8044, -1.8044, -1.8044,  ..., -1.8044, -1.8044, -1.8044],
         [-1.8044, -1.8044, -1.8044,  ..., -

In [None]:
#csvファイルを開く
df_labelcsv = pd.read_csv(FILENAME_LABELCSV)

#csvファイルを表示
print(df_labelcsv)

#ID,ageの列の値をリストとして取り出す
labelcsv_filename = df_labelcsv['filename'].values
labelcsv_age = df_labelcsv['age'].values

#データ準備
#画像読み込み
image_org, image_org_filenames = load_images('./img_train/', (192,128), 'Color')
print("images loaded...")

#画像clippingし左右の黒帯部分を除く
image_org_clip = image_org[:, :, 32:160, ...]

In [None]:
#画像それぞれについて，画像ファイル名の中にcsvのIDの値が含まれていたらageをラベルとして取り出す
label_org = get_label(image_org_filenames, labelcsv_filename, labelcsv_age)

#ラベルが-1(csvに年齢がなかった)のラベルと画像を削除する
label_org = np.asarray(label_org, dtype=np.float32)
label_mask = np.where(label_org >= 0, True, False)
image = image_org_clip[label_mask]
image_filenames = np.array(image_org_filenames)[label_mask]
label = label_org[label_mask]

if len(image) != len(label):
    print('Numbers of train images and labels not match. Something wrong.')

#ラベル不一致を除外した後のイメージ総数
print(len(image))

1013


In [None]:
# ディレクトリ内の画像を読み込む
# inputpath: ディレクトリのパス, imagesize: 画像サイズ, type_color: ColorかGray


def load_images(inputpath, imagesize, type_color):
    imglist = []
    filenamelist = []

    for root, dirs, files in os.walk(inputpath):
        t=1
        for fn in sorted(files):
            bn, ext = os.path.splitext(fn)
            if ext not in [".bmp", ".BMP", ".jpg", ".JPG", ".jpeg", ".JPEG", ".png", ".PNG"]:
                continue
            # if 'R.jpg' not in fn:
            #     continue

            filename = os.path.join(root, fn)
            
            if type_color == 'Color':
                # カラー画像の場合
                testimage = cv2.imread(filename, cv2.IMREAD_COLOR)
                # サイズ変更
                height, width = testimage.shape[:2]
                testimage = cv2.resize(testimage, imagesize, interpolation = cv2.INTER_AREA)  #主に縮小するのでINTER_AREA使用
                testimage = np.asarray(testimage, dtype=np.float32)
                # 色チャンネル，高さ，幅に入れ替え．data_format="channels_first"を使うとき必要
                #testimage = testimage.transpose(2, 0, 1)
                # チャンネルをbgrからrgbの順に変更
                #testimage = testimage[:,:,::-1]

                #cv2_imshow(testimage)
                print(str(t))
                t+=1

            
            elif type_color == 'Gray':
                # グレースケール画像の場合
                testimage = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
                # サイズ変更
                height, width = testimage.shape[:2]
                testimage = cv2.resize(testimage, imagesize, interpolation = cv2.INTER_AREA)  #主に縮小するのでINTER_AREA使用
                # チャンネルの次元がないので1次元追加する
                testimage = np.asarray(testimage, dtype=np.float32).reshape((imagesize[1], imagesize[0], 1))
                # チャンネル，高さ，幅に入れ替え．data_format="channels_first"を使うとき必要
                #testimage = testimage.transpose(2, 0, 1)

            imglist.append(testimage)
            filenamelist.append(fn)
    imgsdata = np.asarray(imglist, dtype=np.float32)

    return imgsdata, filenamelist  # 画像リストとファイル名のリストを返す



#読み込んだ画像ファイル名リストに対して正解年齢リストを作成して返す
#読み込んだ画像ファイル名リスト，csvのファイル名リスト，csvの年齢リストを受け取り，ファイル名が一致したら年齢を割り当てる
def get_label(image_filenames, label_filename, label_age):
    labels = []
    for i in range(len(image_filenames)):
        labelfound = False
        for j in range(len(label_filename)):
            if image_filenames[i] == label_filename[j]:
                labels.append(label_age[j])
                labelfound = True
                break
        
        #csv中に画像に該当するageがなかった
        if labelfound == False:
            labels.append(-1)    #目印としてラベルを-1にする
            print('Age data is not found for %s'%(image_filenames[i]))
            
    return labels

In [None]:
os.listdir("./crop_img")

FileNotFoundError: ignored

In [None]:
#trainデータをtrainとtestデータに分割（8:2）
image_train, image_test, label_train, label_test = train_test_split(image, label, test_size=0.2)

print('Data load finished')
print('Data numbers for train: ' + repr(len(image_train)) + ', test: ' + repr(len(image_test)))



NameError: ignored

In [None]:
#trainデータをtrainとtestデータに分割（8:2）
image_train, image_test, label_train, label_test = train_test_split(image, label, test_size=0.2)

print('Data load finished')
print('Data numbers for train: ' + repr(len(image_train)) + ', test: ' + repr(len(image_test)))


#値の正規化
image_train /= 255.0
image_test /= 255.0

label_train /= 100.0 #年齢は100で割る
label_test /= 100.0


#%%
#年齢推定モデル
def model_cnn1():
    input_img = Input(shape=(imagesize_process[1], imagesize_process[0], 3))
    
    x = Conv2D(16, kernel_size=3, strides=1, activation='relu', padding='same')(input_img)
    x = BatchNormalization()(x)
    x = Conv2D(16, kernel_size=3, strides=1, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=2, strides=2)(x)
    
    x = Conv2D(32, kernel_size=3, strides=1, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(32, kernel_size=3, strides=1, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=2, strides=2)(x)
    
    x = Conv2D(64, kernel_size=3, strides=1, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, kernel_size=3, strides=1, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=2, strides=2)(x)
    
    x = Conv2D(128, kernel_size=3, strides=1, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, kernel_size=3, strides=1, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=2, strides=2)(x)
    
    x = Conv2D(256, kernel_size=3, strides=1, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(256, kernel_size=3, strides=1, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)

    
    x = Flatten()(x)
    
    x = Dense(64, activation='linear')(x)
    x = Dropout(0.1)(x)
    x = Dense(8, activation='linear')(x)
    x = Dropout(0.1)(x)
    
    x = Dense(1, activation='linear')(x)
    
    model = Model(inputs=input_img, outputs=x)
    
    return model


model = model_cnn1()

#モデル構造表示
#print(model.summary())


#学習の設定
from tensorflow.keras.optimizers import Adam
adam = Adam(lr=0.00001)
model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mae'])

#学習
training = model.fit(image_train, label_train,
                    epochs=50, batch_size=6, shuffle=True, validation_data=(image_test, label_test), verbose=1)


#学習済みモデルをファイルに保存
#モデル
json_string = model.to_json()
open('model.json', 'w').write(json_string)
#重み
model.save_weights('weight.hdf5')


#学習履歴表示
def plot_history(history):
    plt.plot(history.history['mae'])
    plt.plot(history.history['val_mae'])
    plt.title('mean absolute error')
    plt.xlabel('epoch')
    plt.ylabel('mae')
    plt.yscale('log')
    plt.legend(['mae', 'val_mae'], loc='lower right')
    plt.show()
    
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.yscale('log')
    plt.legend(['loss', 'val_loss'], loc='lower right')
    plt.show()
    
plot_history(training)


#testデータを用いた推定
result = model.predict(image_test, verbose=1)

#値の範囲を元に戻す
label_test = label_test*100.0
result = result*100.0

result_mse = mean_squared_error(label_test, result)
print('MSE : %.2f'%(result_mse ** 0.5))


#推定結果をcsvファイルに書き出し
f = open(FILENAME_RESULTCSV, 'w')
writer = csv.writer(f, lineterminator='\n')

savedata = ['true_age', 'estimate_age']
writer.writerow(savedata)

for i in range(len(label_test)):
    savedata = [label_test[i], result[i][0]]
    writer.writerow(savedata)

f.close()

KeyboardInterrupt: ignored