In [None]:
# 加载模型预测
from tensorflow import keras
# model_path = 'models\\model_20230616193622.h5'
model_path = 'models\\model_20230714155707.h5'
model = keras.models.load_model(model_path)
input_shape = (224, 448)

### 测试集


In [None]:
# 图片预测 分类
import pandas as pd
import shutil
import os
from tqdm import tqdm_notebook as tqdm
from utils.utils import check_dir_exists, walkdir, tqdm_file_count
from utils.tf_utils import preprocess_image

dest_dir = r"C:\Users\yhk\Downloads\sdxlLora_homeland_female\1"

print(tqdm_file_count(dest_dir))
_, dir_name = os.path.split(dest_dir)
csv_file = f"{dir_name}_预测结果.csv"
threshold = 0.5
result_data = []

for file_path in tqdm(walkdir(dest_dir), total=tqdm_file_count(dest_dir)):
    root = os.path.dirname(file_path)
    file_name = os.path.split(file_path)[-1]
    if not file_path.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue
    img_array = preprocess_image(file_path, target_size=input_shape)
    # 使用训练好的模型进行预测
    predictions = model.predict(img_array)
    if predictions[0] > threshold:
        predict_class = 1
        sub_dir_name = '可用'
    else:
        predict_class = 0
        sub_dir_name = '不可用'
    copy_dir = os.path.join(root, sub_dir_name)
    check_dir_exists(copy_dir)
    copy_img_path = os.path.join(copy_dir, file_name)
    if not os.path.exists(copy_img_path):
        try:
            shutil.move(file_path, copy_img_path)
        except Exception as e:
            print(e)
            result_data.append([file_path,  predict_class, predictions[0]])
df = pd.DataFrame(result_data, columns=[
                  "img_path",  "predict_class", 'predictions'])
df.to_csv(csv_file, index=False)
print(tqdm_file_count(dest_dir))

In [None]:
# 随机选取500张
import os
import random
file_list = []
dest_dir_list = ["C:\\Users\\Administrator\\Desktop\\workspace\\stablediffusion\\跑图结果\\0609随机男",
                 "C:\\Users\\Administrator\\Desktop\\workspace\\stablediffusion\\跑图结果\\0609随机女"]
for dest_dir in dest_dir_list:
    for root, _, files in os.walk(dest_dir):
        for file in files:
            file_list.append(os.path.join(root, file))

result_list = random.sample(file_list, 500)

In [None]:
# 计算图片md5
import hashlib
import os
from utils import walkdir, tqdm_file_count
from tqdm import tqdm


def check_file_dup(dest_dir):
    md5_list = []
    md5_map = {}
    for img_path in tqdm(walkdir(dest_dir), total=tqdm_file_count(dest_dir)):
        with open(img_path, 'rb') as fp:
            data = fp.read()
            md5 = hashlib.md5(data).hexdigest()
        if md5 not in md5_map:
            md5_map[md5] = img_path
            md5_list.append(md5)
        else:
            # 删除文件
            os.remove(img_path)
            print(f"img_path:{img_path} md5:{md5} {md5_map[md5]}")

    return md5_list

In [None]:
# 计算目录下文件的md5 防止重复
train_data_dir = 'C:\\Users\\Administrator\\Desktop\\workspace\\慧凯训练筛图模型\\训练集_COPY'
dir_list = ["0", "1"]
# 校验图片训练集
for dest_dir in dir_list:
    dest_dir_path = os.path.join(train_data_dir, dest_dir)
    check_file_dup(dest_dir_path)

In [None]:
# 计算图片是否有重复的
md5_list_0 = check_file_dup(
    "C:\\Users\\Administrator\\Desktop\\workspace\\慧凯训练筛图模型\\训练集_COPY\\0")
md5_list_1 = check_file_dup(
    "C:\\Users\\Administrator\\Desktop\\workspace\\慧凯训练筛图模型\\训练集_COPY\\1")
# 计算list交集
inner_list = set(md5_list_0).intersection(set(md5_list_1))
print(inner_list)

In [None]:
# 删除不可用里面重复图片
import os
import hashlib
train_data_dir = 'C:\\Users\\Administrator\\Desktop\\workspace\\慧凯训练筛图模型\\训练集_COPY'
# 计算目录下文件的md5 防止重复
md5_map_0 = {}
for root, _, files in os.walk(os.path.join(train_data_dir, "0")):
    for file in files:
        img_path = os.path.join(root, file)
        with open(img_path, 'rb') as fp:
            data = fp.read()
            md5 = hashlib.md5(data).hexdigest()
        if md5 in inner_list:
            os.remove(img_path)
            print(f"img_path:{img_path} md5:{md5}")

In [None]:
# 随机选取200张作为验证集
import glob
import random
import os
import shutil
train_data_dir = 'C:\\Users\\Administrator\\Desktop\\workspace\\慧凯训练筛图模型\\训练集_COPY'
class_dir = "1"
# 创建验证集文件夹
valid_dir = os.path.join(
    'C:\\Users\\Administrator\\Desktop\\workspace\\慧凯训练筛图模型\\测试集', class_dir)
number_of_valid_images = 200
# 获取所有图片的路径
image_files = glob.glob(f"{os.path.join(train_data_dir, class_dir)}/*.*")
valid_images = random.sample(image_files, number_of_valid_images)
os.makedirs(valid_dir, exist_ok=True)
# 将选取的图片移动到验证集文件夹
for image in valid_images:
    shutil.move(image, os.path.join(valid_dir, os.path.basename(image)))
print(f"已从训练集中随机选取了 {number_of_valid_images} 张图片作为验证集。")