## Art Dating

#### Students
- Zhenbang Chen
- Zhenjia Chen

### Setup

Importing packages and dependencies.  Load dataset for categorization.

In [3]:
import os
import copy
from PIL import Image
rootpath = "."

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
# You might not have tqdm, which gives you nice progress bars
!pip install tqdm
from tqdm.auto import tqdm
import os
import copy
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Using the GPU!")
else:
    print("WARNING: Could not find GPU! Using CPU only")

PyTorch Version:  1.3.1
Torchvision Version:  0.4.2
Using the GPU!


### Model Initialization

In [7]:
def initialize_model(num_classes, resume_from=None):
    
    if resume_from:
        model_ft = models.resnet50(pretrained=False)
        
        in_features = model_ft.fc.in_features
        model_ft.fc = nn.Linear(in_features, num_classes)
        
        model_ft.load_state_dict(torch.load(resume_from))
        
        return model_ft
    else:
        # Model (nn.Module) to return
        # model_ft = models.resnet18(pretrained = false)
        model_ft = models.resnet50(pretrained=False)

        in_features = model_ft.fc.in_features
        model_ft.fc = nn.Linear(in_features, num_classes)

        return model_ft

### Tests

In [8]:
# Transform to apply to the data
# transform = torchvision.transforms.Compose([
#     torchvision.transforms.ToTensor(),
#     torchvision.transforms.Normalize(mean=(0.5,), std=(0.5,))
# ])

# Transform to apply to the data for use with pretrained ResNet model
transform = torchvision.transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [9]:
# Get training data from directory
art_train = torchvision.datasets.ImageFolder(root="./data/art_culture_train",
                                                 transform=transform)

# Get validation data from directory
art_val = torchvision.datasets.ImageFolder(root="./data/art_culture_val",
                                               transform=transform)

# Get testing data from directory
art_test = torchvision.datasets.ImageFolder(root="./data/art_culture_test",
                                               transform=transform)

In [10]:
# Create random sampler
random_sampler = torch.utils.data.RandomSampler(data_source=art_train,
                                                replacement=True,
                                                num_samples=int(len(art_train)/10))

# Create batched dataloader
art_train_loader = torch.utils.data.DataLoader(dataset=art_train,
                                                   batch_size=8,
                                                   shuffle=True,
                                                   num_workers=4,
                                                   pin_memory=True)

# art_train_loader = torch.utils.data.DataLoader(dataset=art_train,
#                                                    batch_size=8,
#                                                    sampler=random_sampler,
#                                                    shuffle=False,
#                                                    num_workers=4,
#                                                    pin_memory=True)

art_val_loader = torch.utils.data.DataLoader(dataset=art_val,
                                                 batch_size=8,
                                                 shuffle=False,
                                                 num_workers=4,
                                                 pin_memory=True)

art_test_loader = torch.utils.data.DataLoader(dataset=art_test,
                                                 batch_size=8,
                                                 shuffle=False,
                                                 num_workers=4,
                                                 pin_memory=True)

### Initialization and Methods

In [11]:
# Initialize model
# model = torchvision.models.resnet18(pretrained=False)
# model = torchvision.models.resnet50(pretrained=True)

# Set number of output classes
# model.conv1 = nn.Conv2d(in_channels=3,
#                         out_channels=64,
#                         kernel_size=(7,7),
#                         stride=(2,2),
#                         padding=(3,3),
#                         bias=False)

# in_features = model.fc.in_features
# out_features = 11
# model.fc = nn.Linear(in_features, out_features)

# model = model.to(device)

model = initialize_model(num_classes=11).to(device)

In [12]:
# Training method
def train(net, optim, criterion,train_loader):
    net.train()
    for image_cpu, label_cpu in tqdm(train_loader):
        # Move image and label to GPU
        image = image_cpu.to(device)
        label = label_cpu.to(device)
        
        # Clear gradient
        optim.zero_grad()
        
        # Forward through the network
        output = net(image)
        
        # Loss and gradient
        loss = criterion(output, label)
        loss.backward()
        
        # Update paramters
        optim.step()

In [13]:
# Evaluation method
def evaluate(net, val_loader, top_n=1):
    total = 0
    correct = 0
    net.eval()
    
    for image_cpu, label_cpu in tqdm(val_loader):
        # Move image and label to GPU
        image = image_cpu.to(device)
        label = label_cpu.to(device)
        tiled_labels = torch.stack([label for _ in range(top_n)], dim=1) 

        
        # Don't track gradients for performance in evaluation
        with torch.no_grad():
            # Get prediction with forward pass
#             prediction = net(image).argmax(dim=-1)
            
            # Get the indices of the top_n predictions
            prediction = net(image).topk(k=top_n, dim=-1)[1]
            
            # Total number in batch
            total += image.size(0)
            
            # Number correct in batch
#             correct += (prediction == label).sum().item()
            # Number correct in batch
            correct += (prediction == tiled_labels).sum().item()
            
    return correct/total

In [14]:
# Validation method
def validate(net, optim, criterion, val_loader):
    total = 0
    running_loss = 0
    correct = 0
    net.eval()
    
    for image_cpu, label_cpu in tqdm(val_loader):
        # Move image and label to GPU
        image = image_cpu.to(device)
        label = label_cpu.to(device)
        
        # Clear gradient
        optim.zero_grad()
        
        # Don't track gradients for performance in validation
        with torch.no_grad():
            # Forward through the network
            output = net(image)
            
            # Get prediction with forward pass
            prediction = output.argmax(dim=-1)

            # Loss and gradient
            loss = criterion(output, label)
            
            # Total number in batch
            total += image.size(0)
            
            # Accumulate loss
            running_loss += loss.item()
            
            # Number correct in batch
            correct += (prediction == label).sum().item()
            
    return running_loss/total, correct/total

### Training

In [15]:
# Create optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Create criterion
criterion = torch.nn.CrossEntropyLoss()

# Create scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=4, gamma=0.1)
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
#                                                  mode="min",
#                                                  factor=0.2,
#                                                  patience=1,
#                                                  verbose=True,
#                                                  threshold=0.01)

In [None]:
# Start training
num_epochs = 15

print("start")

best_state_dict = {}
best_val_acc = 0
for epoch in range(num_epochs):
    print("Epoch {}".format(epoch))
#     val_acc = evaluate(model, art_val_loader) * 100
#     if val_acc > best_val_acc:
#         best_val_acc = val_acc
#         best_state_dict = copy.deepcopy(model.state_dict())
    
    train(model, optimizer, criterion, art_train_loader)
    val_loss, val_acc = validate(model, optimizer, criterion, art_val_loader)
    scheduler.step(val_loss)
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state_dict = copy.deepcopy(model.state_dict())
    
    print("Val Loss - Epoch {}: {}".format(epoch, val_loss))
    print("Val Acc - Epoch {}: {}%".format(epoch, val_acc))
    
    if epoch % 4 == 0 and epoch != 0:
        torch.save(best_state_dict, "./models/art_culture_temp_epoch" + str(epoch))
    
print("Done! {}%".format(evaluate(model, art_val_loader) * 100))

start
Epoch 0


HBox(children=(IntProgress(value=0, max=668), HTML(value='')))

In [None]:
# Save model
torch.save(best_state_dict, "./models/art_culture_steplr0001_step3_gamma01_epoch12_")

### Testing

In [None]:
# Load the model to test
model = initialize_model(num_classes=11,
                         resume_from="./models/art_culture_platlr0001_fac02_pat1_thres001_epoch30").to(device)

test_acc = evaluate(net=model, val_loader=art_test_loader, top_n=3)
print("Done!", test_acc)

### Miscellaneous

In [None]:
culture_categories = {
    0 : "american",
    1 : "british",
    2 : "chinese",
    3 : "dutch",
    4 : "flemish",
    5 : "french",
    6 : "german",
    7 : "indian",
    8 : "italian",
    9 : "japanese",
    10 : "spanish"
}

In [None]:
# Test on one specific example

# from torch.autograd import Variable
import matplotlib.pyplot as plt

def load_image(image_name):
    """Loads an image; returns a batched image tensor"""
    image = Image.open(image_name).convert("RGB")
#     plt.imshow(image)
    
    image = transform(image)
    
    plt.imshow(image.permute(1,2,0))
    
    image = image.clone().detach().requires_grad_(True)
    image = image.unsqueeze(0)
    return image


def classify_image(model, categories, image_name):
    '''Classify an image using a specified model'''
    model.eval()
    image = load_image(image_name).to(device)
    result_vec = model(image)
    
    results = result_vec.sort(descending=True)[1][0].tolist()
#     top_result = result_vec.argmax(dim=-1).item()
    
    return [(categories[index],) for index in results]

model = initialize_model(num_classes=11,
                         resume_from="./models/art_culture_platlr0001_fac02_pat1_thres001_epoch30").to(device)

culture = "chinese"
image_index = "103"

print(classify_image(model,
                     culture_categories,
                     "test_images/grant_wood.jpg"))

# print(classify_image(model,
#                      culture_categories,
#                      "./data/art_culture_all_filtered/" + culture + "/" + str(image_index) + ".jpg"))

In [None]:
# Get testing data from directory
letters_val = torchvision.datasets.ImageFolder(root="./data/text_val",
                                               transform=transform)

letters_val_loader = torch.utils.data.DataLoader(dataset=letters_val,
                                                 batch_size=512,
                                                 shuffle=False,
                                                 num_workers=4,
                                                 pin_memory=True)

# Initialize model
model = torchvision.models.resnet18(pretrained=False)
# model = torchvision.models.resnet50(pretrained=False)

# Set number of output classes
model.conv1 = nn.Conv2d(in_channels=3,
                        out_channels=64,
                        kernel_size=(7,7),
                        stride=(2,2),
                        padding=(3,3),
                        bias=False)

in_features = model.fc.in_features
out_features = 26
model.fc = nn.Linear(in_features, out_features)

model = model.to(device)
model.load_state_dict(torch.load("./models/letter_model_lr01_gamma015_e12"))

val_acc = evaluate(model, letters_val_loader) * 100
print("Done!", val_acc)