<a href="https://colab.research.google.com/github/wzj207/2020fst/blob/master/water_check.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
torch.__version__, torch.cuda.is_available()

In [None]:
import requests
import os
import time

In [None]:
species = 'Passer domesticus'
species_folder = os.path.join('images', species)

file_path_1 = 'House x Italian Sparrow (hybrid) - Passer domesticus x italiae.txt'
def get_ids_from_txt(file_path):
    with open(file_path) as f:
        ids = f.read()
        ids = ids.split(',')
        print(len(ids))
        return ids
ids_1 = get_ids_from_txt(file_path_1)


assertIdsListRockPigeon_Columbalivia = ids_1
assertIdsSetRockPigeon_Columbalivia = set(assertIdsListRockPigeon_Columbalivia)

os.makedirs(species_folder, exist_ok=True)

current_list = assertIdsListRockPigeon_Columbalivia
current_set = assertIdsSetRockPigeon_Columbalivia
print(len(current_list))
print(len(current_set))

In [None]:
def get_images(current_set):
    for i, id in enumerate(current_set):
        url_of_single_image = f'https://cdn.download.ams.birds.cornell.edu/api/v1/asset/{id}/1200'
        response = requests.get(url_of_single_image)
        time.sleep(0.05)
        print('wait for 2 ms...')
        if response.ok:  # .status_code == 200:
            print(f'Code: {response.status_code}, url: {url_of_single_image}')
            save_fn = os.path.join(species_folder, str(id)+'.jpg')
            if not os.path.exists(save_fn):
                with open(save_fn, 'wb') as f:
                    f.write(response.content)
        print(f'{len(os.listdir(species_folder))} images has/have been saved.')

get_images(current_set)

In [None]:
from google.colab import drive
import shutil
path = '/content/drive/'
drive.mount(path)
os.listdir(path)

In [None]:
os.listdir(path+'/MyDrive/Colab Notebooks/datadir')

In [None]:
shutil.copytree(path+'/MyDrive/Colab Notebooks/datadir', 'datadir')

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, models, transforms
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
data_dir = './datadir'
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(5),
        transforms.ColorJitter(),
        # transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        # ReshapeTransform((-1,)) # flattens the data
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        # transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        # ReshapeTransform((-1,)) # flattens the data
    ]),

    'test': transforms.Compose([
        transforms.Resize((224,224)),
        # transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        # ReshapeTransform((-1,)) # flattens the data
    ]),
}

## load the correspoding folders
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val','test']}

In [None]:
len(image_datasets['train']),len(image_datasets['val']),len(image_datasets['test'])

In [None]:
# os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, targets = batch
        images = images.to(device)
        targets = targets.to(device)
        targets = torch.reshape(targets.type(torch.cuda.FloatTensor), (len(targets), 1))
        #targets = torch.reshape(targets.type(torch.float), (len(targets), 1))
        out = self(images)
        loss = F.binary_cross_entropy(out, targets)
        return loss

    def validation_step(self, batch):
        images, targets = batch
        images = images.to(device)
        targets = targets.to(device)
        targets = torch.reshape(targets.type(torch.cuda.FloatTensor), (len(targets), 1))
        #targets = torch.reshape(targets.type(torch.float), (len(targets), 1))
        out = self(images)  # Generate predictions
        loss = F.binary_cross_entropy(out, targets)  # Calculate loss
        score = F_score(out, targets)
        return {'val_loss': loss.detach(), 'val_score': score.detach()}

    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()  # Combine losses
        batch_scores = [x['val_score'] for x in outputs]
        epoch_score = torch.stack(batch_scores).mean()  # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_score': epoch_score.item()}

    def epoch_end(self, epoch, result):
        print("Epoch [{}], last_lr: {:.4f}, train_loss: {:.4f}, val_loss: {:.4f}, val_score: {:.4f}".format(
            epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_score']))


class HabitatElementRegResnet50(ImageClassificationBase):
    def __init__(self):
        super().__init__()
        # Use a pretrained model
        self.network = models.resnet50(pretrained=True)
        # Replace last layer
        num_ftrs = self.network.fc.in_features
        self.network.fc = nn.Linear(num_ftrs, 1)

    def forward(self, xb):
        return torch.sigmoid(self.network(xb))

    def freeze(self):
        # To freeze the residual layers
        for param in self.network.parameters():
            param.require_grad = False
        for param in self.network.fc.parameters():
            param.require_grad = True

    def unfreeze(self):
        # Unfreeze all layers
        for param in self.network.parameters():
            param.require_grad = True

In [None]:
print(device)
model = HabitatElementRegResnet50().to(device)
sample = torch.randn(1, 3, 224, 224).to(device)
print(model(sample))

In [None]:
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)
def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader,
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []

    # Set up cutom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs,
                                                steps_per_epoch=len(train_loader))

    for epoch in range(epochs):
        # Training Phase
        model.train()
        train_losses = []
        lrs = []
        for batch in tqdm(train_loader):
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()

            # Gradient clipping
            if grad_clip:
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)

            optimizer.step()
            optimizer.zero_grad()

            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()

        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
    return history
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

In [None]:
## load the entire dataset; we are not using minibatches here
train_loader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(image_datasets['val'], batch_size=32, shuffle=False)
test_loader = torch.utils.data.DataLoader(image_datasets['test'], batch_size=1, shuffle=False)

In [None]:
def F_score(output, label, threshold=0.5, beta=1):
    prob = output > threshold
    label = label > threshold

    TP = (prob & label).sum(1).float()
    TN = ((~prob) & (~label)).sum(1).float()
    FP = (prob & (~label)).sum(1).float()
    FN = ((~prob) & label).sum(1).float()

    precision = torch.mean(TP / (TP + FP + 1e-12))
    recall = torch.mean(TP / (TP + FN + 1e-12))
    F2 = (1 + beta**2) * precision * recall / (beta**2 * precision + recall + 1e-12)
    return F2.mean(0)

In [None]:
model.freeze()

epochs = 100
max_lr = 0.01
grad_clip = 0.1
weight_decay = 1e-4
opt_func = torch.optim.Adam

# start_time = time.time()

history = []
history += fit_one_cycle(epochs, max_lr, model, train_loader, val_loader,
                          grad_clip=grad_clip,
                          weight_decay=weight_decay,
                          opt_func=opt_func)

In [None]:
torch.save(model, 'resnet50_100epochs_model.pt')

In [None]:
torch.save(model.state_dict(),'resnet50_100epochs_model_state.pt')

In [None]:
def plot_scores(history):
    scores = [x['val_score'] for x in history]
    plt.plot(scores, '-x')
    plt.xlabel('epoch')
    plt.ylabel('score')
    plt.title('F1 score vs. No. of epochs')
    plt.savefig("CNN_scores_no_augmentation")
    plt.show()

def plot_losses(history):
    train_losses = [x.get('train_loss') for x in history]
    val_losses = [x['val_loss'] for x in history]
    plt.plot(train_losses, '-bx')
    plt.plot(val_losses, '-rx')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['Training', 'Validation'])
    plt.title('Loss vs. No. of epochs')
    plt.savefig("CNN_losses_no_augmentation")
    plt.show()

def plot_lrs(history):
    lrs = np.concatenate([x.get('lrs', []) for x in history])
    plt.plot(lrs)
    plt.xlabel('Batch no.')
    plt.ylabel('Learning rate')
    plt.title('Learning Rate vs. Batch no.')
    plt.savefig("CNN_lrs_no_augmentation")
    plt.show()

In [None]:
plot_lrs(history)

In [None]:
plot_losses(history)

In [None]:
plot_scores(history)

In [None]:
model_load = torch.load('resnet50_100epochs_model.pt')
sample = torch.randn(1, 3, 224, 224).to(device)
model_load(sample)

In [None]:
@torch.no_grad()
def predict_dl(dl, model, threshold=0.5):
    torch.cuda.empty_cache()
    batch_probs = []
    batch_y = []
    model = model.to(device)
    model.eval()
    for xb, yb in tqdm(dl):
      xb = xb.to(device)
      probs = model(xb)
      batch_probs.append(probs.cpu().detach())
      batch_y.append(yb)
    batch_probs = torch.cat(batch_probs)
    return [int(x) for x in batch_probs>threshold], [int(y.item()) for y in batch_y]

In [None]:
torch.cuda.is_available()

In [None]:
dl = test_loader
model = model_load
p1, y1 = predict_dl(dl, model, threshold=0.5)

In [None]:
dl = test_loader
model = model
p2, y2 = predict_dl(dl, model, threshold=0.5)

In [None]:
for y, yy in zip(y1, y2):
  if y != yy:
    print(y, yy)

In [None]:
actual_label = test_loader.dataset.targets
# actual_label
for yb, l in zip(y1, actual_label):
  if yb != l:
    print(yb, l)

In [None]:
test_preds = p1

In [None]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report

f1 = f1_score(actual_label, test_preds)
f_score = float(np.array(F_score(torch.tensor(np.array(test_preds).reshape(len(test_preds), 1)), torch.tensor(np.array(actual_label).reshape(len(actual_label), 1)))))
accuracy = accuracy_score(actual_label, test_preds)
cm = confusion_matrix(actual_label, test_preds)
report = classification_report(actual_label, test_preds)

print("Model F-Score (Test Data): ", f_score)
print("Model F1-Score (Test Data): ", f1)
print("Model Accuracy: ", accuracy)
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", report)

In [None]:
# Plot Confusion Matrix
import pandas as pd
import seaborn as sns
df_cm = pd.DataFrame(cm, index = [i for i in "01"], columns = [i for i in "01"])
plt.figure(figsize = (10,7))
sns.set(font_scale=1.4)
sns.heatmap(df_cm, cmap="Oranges", annot=True, annot_kws={"size": 16})
plt.title("Plot of Confusion Matrix")

plt.savefig("ResNet50_CM")
plt.show()

In [None]:
model2 = HabitatElementRegResnet50()
ckpt = 'resnet50_100epochs_model_state.pt'
is_cuda = (device != 'cpu')
checkpoint = torch.load(ckpt, map_location=None if is_cuda else torch.device('cpu'))
model2.load_state_dict(checkpoint)
print('Loaded model from [{}].'.format(ckpt))

In [None]:
# torch.save(opt_func())
opt_func

In [None]:
dl = test_loader
model = model2
p3, y3 = predict_dl(dl, model, threshold=0.5)

In [None]:
test_preds = p3
f1 = f1_score(actual_label, test_preds)
f_score = float(np.array(F_score(torch.tensor(np.array(test_preds).reshape(len(test_preds), 1)), torch.tensor(np.array(actual_label).reshape(len(actual_label), 1)))))
accuracy = accuracy_score(actual_label, test_preds)
cm = confusion_matrix(actual_label, test_preds)
report = classification_report(actual_label, test_preds)

print("Model F-Score (Test Data): ", f_score)
print("Model F1-Score (Test Data): ", f1)
print("Model Accuracy: ", accuracy)
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", report)

In [None]:
df_cm = pd.DataFrame(cm, index = [i for i in "01"], columns = [i for i in "01"])
plt.figure(figsize = (10,7))
sns.set(font_scale=1.4)
sns.heatmap(df_cm, cmap="Oranges", annot=True, annot_kws={"size": 16})
plt.title("Plot of Confusion Matrix")

plt.savefig("ResNet50_CM")
plt.show()

In [None]:
from PIL import Image
waters = os.listdir('./datadir/test/with_water')
zeros = 0
ones = 0
for water in waters:
  # print(water)
  sample_img = Image.open('/content/datadir/test/with_water/'+water)
  # sample_img = Image.open('/content/datadir/test/without_water/305726081.jpg')

  sample_tensor = data_transforms['test'](sample_img).unsqueeze(0).to(device)
  p = int(model2(sample_tensor) > 0.5)
  if p == 0:
    zeros += 1
  else:
    ones += 1
print('zeros:',zeros, 'ones:', ones)

In [None]:
nowaters = os.listdir('./datadir/test/without_water')
zeros = 0
ones = 0
for nowater in nowaters:
  # print(water)
  img = Image.open('/content/datadir/test/without_water/'+nowater).convert('RGB')
  sample_tensor = data_transforms['test'](img).unsqueeze(0).to(device)
  p = int(model2(sample_tensor) > 0.5)
  if p == 0:
    zeros += 1
  else:
    ones += 1
print('zeros:',zeros, 'ones:', ones)

In [None]:
# 0:with_water
# 1:wiout_water

In [None]:
model3 = HabitatElementRegResnet50()
is_cuda = (device != 'cpu')
checkpoint = model2.state_dict()
model3.load_state_dict(checkpoint)

dl = test_loader
model = model2
p4, y4 = predict_dl(dl, model3, threshold=0.5)

test_preds = p4
f1 = f1_score(actual_label, test_preds)
f_score = float(np.array(F_score(torch.tensor(np.array(test_preds).reshape(len(test_preds), 1)), torch.tensor(np.array(actual_label).reshape(len(actual_label), 1)))))
accuracy = accuracy_score(actual_label, test_preds)
cm = confusion_matrix(actual_label, test_preds)
report = classification_report(actual_label, test_preds)

print("Model F-Score (Test Data): ", f_score)
print("Model F1-Score (Test Data): ", f1)
print("Model Accuracy: ", accuracy)
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", report)

In [None]:
from google.colab import drive
drive.mount('/content/drive')