<a href="https://colab.research.google.com/github/ykitaguchi77/grav_bootcamp/blob/master/YOLOv5_dataset_to_bootcamp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**YOLOv5用データセット --> Bootcamp用セット作成**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
import pandas as pd
import glob
import os


for dataset in ["train", "valid"]:
    dataset_path = f"/content/drive/MyDrive/Deep_learning/GO_extended_dataset/periocular_for_YOLO_training/{dataset}" #train or valid 

    txt_files = []  # txtファイルのパスを格納するリスト
    image_paths, name_list, label_list = [],  [], []

    # labelフォルダ内のテキストの中の１番目の数字がラベル
    for file_name in os.listdir(f"{dataset_path}/labels"):
        if file_name.endswith('.txt'):
            file_path = f"{dataset_path}/labels/{file_name}"
            name = os.path.basename(file_path).split(".")[0] 
            name_list.append(name.split("-")[0]) #患者IDを抜き出す
            image_paths.append(f"{dataset_path}/images/{name}.JPG")

            with open(file_path, 'r') as f:
                for line in f:
                    label = line.split()[0]
                    label_list.append(label)

    # データフレームを作成する
    df = pd.DataFrame({'image_path': image_paths, 'name': name_list, 'label': label_list})
    print(f"{dataset}: {len(df)}")

    # データフレームを表示する
    df.to_csv(f'/content/drive/MyDrive/Deep_learning/GO_extended_dataset/periocular_for_YOLO_training/{dataset}_list.csv', index=False)




train: 2649
valid: 664


#**Valid_dataset --> hum_evalへ**

validデータセットから、なるべく患者が被らないように66×3セットの人力評価用データセットを作成する

In [5]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/Deep_learning/GO_extended_dataset/periocular_for_YOLO_training/valid_list.csv')
# df = df.sort_values('name') #nameで並べ替え
# df = df.sort_values('label') #labelで並べ替え
# df = df.reset_index() #index振り直し
# df = df.drop('index', axis=1) #indexの列を削除
# df
#df

In [6]:
import pandas as pd

def extract_dataset(df):
    # nameが重複していないdfを作る（かぶっているものをランダムにdropした）
    rows_no_name_duplicate = df.groupby("name").apply(lambda x: x.sample(1)).reset_index(drop=True)

    # "label"が0の行を取得
    label_0_rows = rows_no_name_duplicate[rows_no_name_duplicate["label"]==0]

    # "label"が1の行を取得
    label_1_rows = rows_no_name_duplicate[rows_no_name_duplicate["label"]==1]

    # ランダムに33個選択
    random_label_0_rows = label_0_rows.sample(n=33, random_state=1)

    # ランダムに33個選択
    random_label_1_rows = label_1_rows.sample(n=33, random_state=1)

    # 上記で抜粋したデータを消去したリストを返す
    df_concat = pd.concat([random_label_0_rows, random_label_1_rows])
    df_remain = df[~df.isin(df_concat)].dropna()

    return random_label_0_rows, random_label_1_rows, df_remain


# データセットからIDの被らない33例ずつ（grav, cont)を抜粋
step0_cont, step0_grav, df = extract_dataset(df)
step1_cont, step1_grav, df = extract_dataset(df)
step2_cont, step2_grav, df = extract_dataset(df)

#　各ステップのデータセットをシャッフルする
phase1 = pd.concat([step0_cont, step0_grav]).sample(frac=1, random_state=2).reset_index(drop=True)
phase2 = pd.concat([step1_cont, step1_grav]).sample(frac=1, random_state=3).reset_index(drop=True)
phase3 = pd.concat([step2_cont, step2_grav]).sample(frac=1, random_state=4).reset_index(drop=True)

#**評価用CSVを作成**

In [7]:
def make_idxs(phase):
    idxs = [f"phase{phase}_{i:02d}" for i in range(66)]
    return idxs


idxs = make_idxs(0) + make_idxs(1) + make_idxs(2)
labels = phase1["label"].tolist() + phase2["label"].tolist() + phase3["label"].tolist()
paths = phase1["image_path"].tolist() + phase2["image_path"].tolist() + phase3["image_path"].tolist()

In [8]:
columns = ["idx", "label", "path"]
participants = ["ishihara", "ohagi", "kinoshita", "takamine", "hayashi", "fukuyama", "makino", "motomura", "iwasaki", "komatsu", "koizumi", "kitaguchi", "YOLOv5"] 
columns.extend(participants)

df = pd.DataFrame(index=[], columns=columns)
df["idx"] = idxs
df["label"] = labels
df["path"] = paths

In [10]:
#参考 https://github.com/ykitaguchi77/GravCont_classification_colab/blob/master/%E3%80%90%E8%A9%95%E4%BE%A1%E7%94%A8%E3%80%91TrainingSet%E3%81%AE%E3%83%AA%E3%82%B9%E3%83%88CSV%E3%81%A8%E7%95%AA%E5%8F%B7%E4%BB%98%E3%81%8D%E7%94%BB%E5%83%8F%E3%82%92%E4%BD%9C%E6%88%90_colab.ipynb

import os
import shutil

bootcamp_dir = "/content/drive/MyDrive/Deep_learning/GO_extended_dataset/grav_bootcamp_from_YOLOv5"
img_save_dir = f"{bootcamp_dir}/images" #通し番号を記入した画像を保存するパス
csv_dir = bootcamp_dir   #画像のリストをCSVに書き出した保存先
pdf_save_dir = bootcamp_dir             #画像を番号順にpdf化したものを保存するパス
zip_save_dir = bootcamp_dir               #評価画像と回答用csvの組み合わせを圧縮して保存
reply_dir = f"{bootcamp_dir}/reply"        #返信のcsvを保存するフォルダ

for dir in [csv_dir, img_save_dir, pdf_save_dir, zip_save_dir, reply_dir]:
    if dir == img_save_dir:
        if os.path.exists(dir):
            shutil.rmtree(dir)
        os.makedirs(dir)
    else:
        if not os.path.exists(dir):
            os.makedirs(dir)

for phase in [0,1,2]:
    os.makedirs(f"{img_save_dir}/phase_{phase}", exist_ok=True)


In [None]:
#選択した画像をフォルダに移動
for i, (idx, path) in enumerate(zip(df["idx"],df["path"])):
    print(idx,path)
    shutil.copy(path, f"{img_save_dir}/phase_{idx[5]}/{idx}.jpg") #idx[5]-->左から5文字目


In [14]:
import zipfile
import os

def zip_directory(directory_path, zip_file_path, password):
    with zipfile.ZipFile(zip_file_path, 'w') as zip_file:
        for root, dirs, files in os.walk(directory_path):
            for file in files:
                file_path = os.path.join(root, file)
                zip_file.write(file_path, os.path.relpath(file_path, directory_path), compress_type=zipfile.ZIP_DEFLATED)
    zip_file.setpassword(password.encode('utf-8'))

os.chdir(img_save_dir) 
for dir in os.listdir():
    zip_directory(dir, f"{dir}.zip", 'bootcamp')

In [19]:
df

Unnamed: 0,idx,label,path,ishihara,ohagi,kinoshita,takamine,hayashi,fukuyama,makino,motomura,iwasaki,komatsu,koizumi,kitaguchi,YOLOv5
0,phase0_00,0.0,/content/drive/MyDrive/Deep_learning/GO_extend...,,,,,,,,,,,,,
1,phase0_01,0.0,/content/drive/MyDrive/Deep_learning/GO_extend...,,,,,,,,,,,,,
2,phase0_02,1.0,/content/drive/MyDrive/Deep_learning/GO_extend...,,,,,,,,,,,,,
3,phase0_03,0.0,/content/drive/MyDrive/Deep_learning/GO_extend...,,,,,,,,,,,,,
4,phase0_04,1.0,/content/drive/MyDrive/Deep_learning/GO_extend...,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,phase2_61,1.0,/content/drive/MyDrive/Deep_learning/GO_extend...,,,,,,,,,,,,,
194,phase2_62,0.0,/content/drive/MyDrive/Deep_learning/GO_extend...,,,,,,,,,,,,,
195,phase2_63,0.0,/content/drive/MyDrive/Deep_learning/GO_extend...,,,,,,,,,,,,,
196,phase2_64,1.0,/content/drive/MyDrive/Deep_learning/GO_extend...,,,,,,,,,,,,,


In [23]:
# CSVファイルを保存
df.to_csv(f"{csv_dir}/hum_eval_parent.csv", index=False)

In [24]:
df_eval = df.drop(["label", "path", "kitaguchi", "YOLOv5"], axis=1)
df_eval.to_csv(f"{csv_dir}/hum_eval.csv", index=False)

#**評価用画像を作成**

In [None]:
import cv2
from google.colab.patches import cv2_imshow
import shutil
import glob

# save_path = img_save_dir

# #save_pathがあれば削除して新しく作り直す
# try:
#     shutil.rmtree(save_path)
#     os.makedirs(save_path)
# except FileNotFoundError:
#     os.makedirs(save_path)
#     pass

# #CSVに対応する画像のパスを取得

# img_path_list = []
# for i in range(len(df3)):
#     img_name = df3.iloc[i,0]
#     img_class = df3.iloc[i,1]
#     img_path_list.append(test_dir + "/" +img_class+ "/" + img_name)

# print(img_path_list)

def write_text(image_path, text, save_path):
    img = cv2.imread(image_path)
    # 文字を加える                                                                                      
    cv2.putText(img, text, (20, 60),
               cv2.FONT_ITALIC, 1.5,
               (255,255,255), 4, cv2.LINE_AA)
    #cv2.imwrite(save_path +"/" +os.path.basename(image_path), img)
    cv2.imwrite(save_path, img)
    return(img)

for phase in [0,1,2]:
    dir = f"{img_save_dir}/phase_{phase}"
    for idx, path in enumerate(glob.glob(f"{dir}/*")):
        im = write_text(path, str(os.path.basename(path).split(".")[0]), path)
        cv2_imshow(im)


#**画像を連結して印刷用のPDFファイルにする**

In [17]:
!pip install img2pdf --q
import os
import img2pdf
from PIL import Image # img2pdfと一緒にインストールされたPillowを使います

for phase in [0,1,2]:
    pdfFileName = f"phase_{phase}.pdf"
    path = f"{img_save_dir}/phase_{phase}/*"
    ext = ".jpg"
 
    with open(pdfFileName, "wb") as files:
      files.write(img2pdf.convert([i for i in glob.glob(path) if i.endswith(ext)]))

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/97.8 KB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.8/97.8 KB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m54.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for img2pdf (setup.py) ... [?25l[?25hdone


In [18]:
##PDFにパスワードをかける

#input
!pip install PyPDF4
from PyPDF4 import PdfFileReader
from PyPDF4 import PdfFileWriter

for phase in [0,1,2]:
    # パスワード設定用PDF
    pdf = f"phase_{phase}.pdf"

    # パスワード設定用PDFをバイナリモードで開く
    pdf_open = open(pdf,"rb")

    # PdfFileReaderオブジェクト生成
    pdf_reader = PdfFileReader(pdf_open)

    # PdfFileWriterオブジェクト
    pdf_writer = PdfFileWriter()

    # PDFのページ数取得
    num = pdf_reader.numPages

    # PDFページコピー
    for cp in range(num):
        page = pdf_reader.getPage(cp)
        pdf_writer.addPage(page)

    # パスワードの指定
    password = "6903"
    pdf_writer.encrypt(password)

    # 暗号化情報を書き込み
    with open(pdf.replace('.pdf','') + r'_encrypt.pdf','wb') as f:
        pdf_writer.write(f)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting PyPDF4
  Downloading PyPDF4-1.27.0.tar.gz (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.9/63.9 KB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: PyPDF4
  Building wheel for PyPDF4 (setup.py) ... [?25l[?25hdone
  Created wheel for PyPDF4: filename=PyPDF4-1.27.0-py3-none-any.whl size=61248 sha256=5bee5acf6956ce1039a5f0b806e3488a562781b4d1a14e0d360811a7584e0202
  Stored in directory: /root/.cache/pip/wheels/f0/79/75/d130281ec9996a2551dbdd1836aa4beb376d53f8cdca49b4b0
Successfully built PyPDF4
Installing collected packages: PyPDF4
Successfully installed PyPDF4-1.27.0


