In [1]:
import torch
import open_clip
from open_clip import tokenizer
from deep_translator import GoogleTranslator
import os
import shutil
import random
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
random.seed(1234)

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
high_model, _, high_preprocess = open_clip.create_model_and_transforms('ViT-H-14', pretrained='laion2b_s32b_b79k')
high_model.eval()  
high_model = high_model.to(device)

In [3]:
low_model, _, low_preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
low_model.eval()
low_model = low_model.to(device)

In [None]:
# DeeplTranslatorのインスタンスを作成
translator = GoogleTranslator(source='ja', target='en')

# 辞書の定義
descriptions = {
    "darkA": "緑色が濃い草が生えているが、白色の物体や青色の物体、黒色の物体は落ちていない",
    "darkB": "細い草がたくさん生えているが、白色の物体や青色の物体、黒色の物体は落ちていない",
    "lightA": "緑色が薄い草が生えているが、白色の物体や青色の物体、黒色の物体は落ちていない",
    "lightB": "細い草がたくさん生えているが、白色の物体や青色の物体、黒色の物体は落ちていない",
    "whiteA":"灰色の砂浜の上に砂が積もっていて草も生えておらず、白色の物体や青色の物体、黒色の物体は落ちていない",
    "whiteB":"灰色の砂浜の上に砂が積もっていて少しだけ草が生えているが、白色の物体や青色の物体、黒色の物体は落ちていない",
    "物体A":"緑色が濃い草が生えている上に、白色の物体や青色の物体、黒色の物体の少なくとも１つが落ちている",
    "物体B":"細い草がたくさん生えている上に、白色の物体や青色の物体、黒色の物体の少なくとも１つが落ちている",
    "物体C":"灰色の砂浜の上に緑色が薄い草が生えている上に、白色の物体や青色の物体、黒色の物体の少なくとも１つが落ちている",
    "物体D":"灰色の砂浜の上に細い草が生えている上に、白色の物体や青色の物体、黒色の物体の少なくとも１つが落ちている",
    "物体E":"灰色の砂浜の上に砂が積もっていて草も生えていないが、白色の物体や青色の物体、黒色の物体の少なくとも１つが落ちている",
    "物体F":"灰色の砂浜の上に砂が積もっていて少しだけ草が生えている上に、白色の物体や青色の物体、黒色の物体の少なくとも１つが落ちている",
    "物体G":"灰色の砂浜の上に青色の物体、草ではない緑色の物体、黒色の物体の少なくとも１つが落ちている"
}

descriptions = {translator.translate(key): translator.translate(value) for key, value in descriptions.items()}

# 翻訳した辞書を表示
print(descriptions)

{'darkA': 'There is dark green grass growing, but no white, blue or black objects have fallen.', 'darkB': 'There is a lot of thin grass growing, but no white, blue or black objects have fallen.', 'Light A': 'There is light green grass growing, but no white, blue or black objects have fallen.', 'Light B': 'There is a lot of thin grass growing, but no white, blue or black objects have fallen.', 'whiteA': 'The beach is white with piled-up sand and no grass growing on it, and there are no white, blue or black objects on it.', 'White B': 'There is sand piled up on the white beach and a little grass growing on it, but there are no white, blue or black objects on the beach.', 'Object A': 'There is dark green grass growing and at least one of the following objects is lying on the ground: white, blue, or black.', 'Object B': 'There is a lot of thin grass growing, and at least one of the following objects is lying around: white, blue, or black.', 'Object C': 'A beach with white sand, light green

In [None]:
# 元のフォルダパス
source_base_path = "imgs/update_img"
# コピー先のフォルダパス
destination_path = "imgs/clip_test"

# コピー先のフォルダを作成
os.makedirs(destination_path, exist_ok=True)

# サブフォルダを取得
subfolders = [f for f in os.listdir(source_base_path) if os.path.isdir(os.path.join(source_base_path, f))]

# 各サブフォルダからランダムにn枚ずつ画像をコピー
for subfolder in subfolders:
    subfolder_path = os.path.join(source_base_path, subfolder)
    images = [f for f in os.listdir(subfolder_path) if os.path.isfile(os.path.join(subfolder_path, f))]
    
    # ランダムに2枚選択
    selected_images = random.sample(images, 15)
    
    for image in selected_images:
        source_file = os.path.join(subfolder_path, image)
        destination_file = os.path.join(destination_path, f"{subfolder}_{image}")
        
        # 画像をコピー
        shutil.copy(source_file, destination_file)

In [6]:
clip_test_path = destination_path
result_path = "imgs/clip_test_result"
# 結果フォルダを作成
os.makedirs(result_path, exist_ok=True)

# 画像の予測とプロット
for image_name in os.listdir(clip_test_path):
    image_path = os.path.join(clip_test_path, image_name)
    if os.path.isfile(image_path):
        # 画像の読み込みと前処理
        image = Image.open(image_path).convert("RGB")
        high_input = high_preprocess(image).unsqueeze(0).to(device)
        low_input = low_preprocess(image).unsqueeze(0).to(device)

        # テキストの前処理
        texts = list(descriptions.values())
        text_tokens = open_clip.tokenize(texts).to(device)

        # 予測
        with torch.no_grad():
            high_img_embedding = high_model.encode_image(high_input)
            high_text_embedding = high_model.encode_text(text_tokens)
            low_img_embedding = low_model.encode_image(low_input)
            low_text_embedding = low_model.encode_text(text_tokens)

        # コサイン類似度を計算
        high_probs = (100 * high_img_embedding @ high_text_embedding.T).softmax(dim=-1)
        low_probs = (100 * low_img_embedding @ low_text_embedding.T).softmax(dim=-1)

        # 結果のプロット
        fig, ax = plt.subplots(1, 2, figsize=(12, 6))
        
        # 画像の表示
        ax[0].imshow(image)
        ax[0].axis('off')
        ax[0].set_title(f"Image: {image_name}")

        # 予測結果の棒グラフ
        labels = list(descriptions.keys())
        high_values = high_probs.squeeze().detach().cpu().numpy()
        low_values = low_probs.squeeze().detach().cpu().numpy()
        x = np.arange(len(labels))
        width = 0.35
        ax[1].bar(x - width/2, high_values, width, label='High Model', color='blue', alpha=0.5)
        ax[1].bar(x + width/2, low_values, width, label='Low Model', color='green', alpha=0.5)
        ax[1].set_title("Model Predictions")
        ax[1].set_ylabel("Probability (%)")
        ax[1].legend()
        ax[1].set_xticks(x)
        ax[1].set_xticklabels(labels, rotation=45, ha='right')

        # 結果の保存
        result_image_path = os.path.join(result_path, f"{image_name}_result.png")
        plt.savefig(result_image_path)
        plt.close(fig)

print("予測結果の保存が完了しました。")

予測結果の保存が完了しました。


In [7]:
# 画像フォルダのパス
source_base_path = "imgs/update_img"
result_base_path = "imgs/clip_result"

# 結果フォルダを作成
os.makedirs(result_base_path, exist_ok=True)
high_result_path = os.path.join(result_base_path, "high")
low_result_path = os.path.join(result_base_path, "low")
os.makedirs(high_result_path, exist_ok=True)
os.makedirs(low_result_path, exist_ok=True)

# サブフォルダを取得
subfolders = [f for f in os.listdir(source_base_path) if os.path.isdir(os.path.join(source_base_path, f))]

# 画像の予測と分類
for subfolder in subfolders:
    subfolder_path = os.path.join(source_base_path, subfolder)
    high_yes_path = os.path.join(high_result_path, subfolder, "yes")
    high_no_path = os.path.join(high_result_path, subfolder, "no")
    low_yes_path = os.path.join(low_result_path, subfolder, "yes")
    low_no_path = os.path.join(low_result_path, subfolder, "no")
    os.makedirs(high_yes_path, exist_ok=True)
    os.makedirs(high_no_path, exist_ok=True)
    os.makedirs(low_yes_path, exist_ok=True)
    os.makedirs(low_no_path, exist_ok=True)
    
    for image_name in os.listdir(subfolder_path):
        image_path = os.path.join(subfolder_path, image_name)
        if os.path.isfile(image_path):
            # 画像の読み込みと前処理
            image = Image.open(image_path).convert("RGB")
            high_input = high_preprocess(image).unsqueeze(0).to(device)
            low_input = low_preprocess(image).unsqueeze(0).to(device)

            # テキストの前処理
            texts = list(descriptions.values())
            text_tokens = open_clip.tokenize(texts).to(device)

            # 予測
            with torch.no_grad():
                high_img_embedding = high_model.encode_image(high_input)
                high_text_embedding = high_model.encode_text(text_tokens)
                low_img_embedding = low_model.encode_image(low_input)
                low_text_embedding = low_model.encode_text(text_tokens)

            # コサイン類似度を計算
            high_probs = (100 * high_img_embedding @ high_text_embedding.T).softmax(dim=-1)
            low_probs = (100 * low_img_embedding @ low_text_embedding.T).softmax(dim=-1)

            # 物体があるかないかの判断
            high_max_prob, high_max_index = torch.max(high_probs, dim=-1)
            low_max_prob, low_max_index = torch.max(low_probs, dim=-1)
            high_label = list(descriptions.keys())[high_max_index]
            low_label = list(descriptions.keys())[low_max_index]

            # high_modelの分類
            if high_label.startswith("Object"):
                shutil.copy(image_path, os.path.join(high_yes_path, image_name))
            else:
                shutil.copy(image_path, os.path.join(high_no_path, image_name))

            # low_modelの分類
            if low_label.startswith("Object"):
                shutil.copy(image_path, os.path.join(low_yes_path, image_name))
            else:
                shutil.copy(image_path, os.path.join(low_no_path, image_name))

print("画像の分類と保存が完了しました。")

画像の分類と保存が完了しました。
