In [1]:
import os
import random
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import torchvision.transforms as transforms
from PIL import Image
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [6]:
import fnmatch
import glob
data_dir = 'D:/face/lfw'
name_list = os.listdir(data_dir)
face_names = [name for name in os.listdir(data_dir) if len(glob.glob(os.path.join(data_dir, name, '*.jpg'))) >= 50]
face_names_mapping = {}
js = 0
for name in face_names:
    face_names_mapping[name] = js
    js+=1
print(len(face_names))
def find_images(directory, pattern='*.jpg'):
    for root, dirs, files in os.walk(directory):
        for file in fnmatch.filter(files, pattern):
            if '_'.join(file.split('_')[:-1]) in face_names:
                yield file

# 指定你要搜索的文件夹路径
directory_path = data_dir

# 保存所有找到的图片路径的列表
imgs_list = list(find_images(directory_path))
print(face_names_mapping)

12
{'Ariel_Sharon': 0, 'Colin_Powell': 1, 'Donald_Rumsfeld': 2, 'George_W_Bush': 3, 'Gerhard_Schroeder': 4, 'Hugo_Chavez': 5, 'Jacques_Chirac': 6, 'Jean_Chretien': 7, 'John_Ashcroft': 8, 'Junichiro_Koizumi': 9, 'Serena_Williams': 10, 'Tony_Blair': 11}


In [104]:
class LFWDataset(Dataset):
    def __init__(self, imgs, transform=None):
        self.transform = transform
        self.labels = [face_names_mapping['_'.join(img.split('_')[:-1])] for img in imgs]
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        img_path = os.path.join(data_dir, '_'.join(imgs_list[idx].split('_')[:-1]), imgs_list[idx])
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        return image, label

dataset = LFWDataset(imgs_list, transform=transform)

In [105]:
random.seed(0)
rand_list = [i for i in range(len(dataset))]
random.shuffle(rand_list)
length = int(len(dataset) * 0.8)
train_idx, val_idx = rand_list[:length], rand_list[length:]
train_dataset = LFWDataset([imgs_list[i] for i in train_idx], transform=transform)
val_dataset = LFWDataset([imgs_list[i] for i in val_idx], transform=transform)
print(len(train_dataset),len(val_dataset))

5976 1494


In [106]:
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size = batch_size,shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = batch_size,shuffle=False)

In [107]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
from cho_models.resnet import resnet50
model = resnet50(include_top=True, num_classes=len(face_names))


In [108]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [109]:
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [110]:
from tqdm import tqdm

num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
    epoch_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(f'Epoch {epoch + 1}, Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%')
    model.eval()  # 设置模型为评估模式
    correct = 0
    total = 0
    with torch.no_grad():  # 在不需要计算梯度的情况下运行
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)  # 获取每个输出的最大值及其索引
            total += labels.size(0)  # 累加总数
            correct += (predicted == labels).sum().item()  # 累加正确预测的数量

    accuracy = 100 * correct / total  # 计算准确率
    print(f'Validation Accuracy: {accuracy}%')


100%|██████████| 187/187 [00:21<00:00,  8.67it/s]


Epoch 1, Loss: 4.4320, Accuracy: 13.25%
Validation Accuracy: 13.989290495314592%


100%|██████████| 187/187 [00:20<00:00,  8.96it/s]


Epoch 2, Loss: 4.2662, Accuracy: 14.07%
Validation Accuracy: 14.190093708165998%


100%|██████████| 187/187 [00:21<00:00,  8.77it/s]


Epoch 3, Loss: 4.2705, Accuracy: 13.89%
Validation Accuracy: 13.788487282463185%


100%|██████████| 187/187 [00:22<00:00,  8.40it/s]


Epoch 4, Loss: 4.2485, Accuracy: 14.01%
Validation Accuracy: 14.123159303882195%


100%|██████████| 187/187 [00:22<00:00,  8.17it/s]


Epoch 5, Loss: 4.2206, Accuracy: 14.16%
Validation Accuracy: 14.257028112449799%


 60%|█████▉    | 112/187 [00:14<00:09,  7.91it/s]


KeyboardInterrupt: 