<h2><center> Digital Africa Plantation Counting Challenge</h2></center>


*About the problem*
> Palm oil is an edible vegetable oil derived from the mesocarp (reddish pulp) of the fruit of the oil palms. The oil is used in food manufacturing, beauty products, and as biofuel.

*Objective of this challenge*
> The objective of this challenge is to create a semi-supervised machine learning algorithm to count the number of palm oil trees in an image.


This will aid farmers to determine the number of trees on their plot and estimated crop yield. The semi supervised nature of this solution will allow this solution to be applied to other plantations such as banana palms.



In [None]:
# Import libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import utils
from sklearn.model_selection import train_test_split
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torch.optim import lr_scheduler
import cv2
import shutil
import time
import copy
import torch.nn.init as init
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Set seed for reproducability
SEED = 12345

In [None]:
# load files
data_path = ''

train = pd.read_csv(data_path + 'Train.csv')
test = pd.read_csv(data_path + 'Test.csv')
sample_submission = pd.read_csv(data_path + 'SampleSubmission.csv')

# Unzip images
shutil.unpack_archive(data_path + 'TreeImages.zip', 'TreeImages')

# Preview train
train.head()

Unnamed: 0,ImageId,Target
0,Id_jdqw9hlv6j.png,14.0
1,Id_6xtrolmuvc.png,18.0
2,Id_2m49sj3xd9.png,0.0
3,Id_9jwg5pcnn4.png,28.0
4,Id_vnm6e8n0p3.png,21.0


In [None]:
# Preview test
test.head()

Unnamed: 0,ImageId
0,Id_ohk78h9ld8.png
1,Id_eeyj2u4j7y.png
2,Id_wsd7vx2ifa.png
3,Id_6vfneamaoh.png
4,Id_9wil3575fv.png


In [None]:
# Preview sample submission
sample_submission.head()

Unnamed: 0,ImageId,Target
0,Id_ohk78h9ld8.png,0
1,Id_eeyj2u4j7y.png,0
2,Id_wsd7vx2ifa.png,0
3,Id_6vfneamaoh.png,0
4,Id_9wil3575fv.png,0


## Linear Network

In [None]:
# split data to train-test
image_train, image_test = train_test_split(train,test_size=.2,random_state=SEED)
image_train, image_test = image_train.reset_index(drop=True),image_test.reset_index(drop=True)
image_train.shape, image_test.shape

((1601, 2), (401, 2))

In [None]:
# Dataloader
class TreeCountingDataset(Dataset):
  def __init__(self, imagesPath, imagesName, imagesLabel):
        self.imagespath = imagesPath
        self.imagesname = imagesName
        self.imageslabels = imagesLabel
                      
  def __len__(self):
        return len(self.imagesname)
    
  def __getitem__(self, idx):
       
        imagename = self.imagesname.iloc[idx]
        path = os.path.join(self.imagespath,imagename)
        image = plt.imread(path)
        #flatten image
        image = image.reshape((1,-1))
        label =  self.imageslabels.iloc[idx]
        return torch.from_numpy(image).float(), torch.tensor(label).float()

In [None]:
# Network
class ConvCountingNet(nn.Module):
  def __init__(self):
    super(ConvCountingNet, self).__init__()
    self.linear1 = nn.Linear(1024*1024*3, 1)
        
  def forward(self,image):
      out = F.elu(self.linear1(image))
      return out

In [None]:
# weights initilization functions
def weight_init(m):
    
    if isinstance(m, nn.Linear):
        init.xavier_normal_(m.weight.data)
        try:
            init.normal_(m.bias.data)
        except AttributeError:
            pass

In [None]:
def train(model, criterion, lr, dataloaders, device, epochs):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1000000.0
    model_w_arr = []

    #initialize optimizer and scheduler each cycle
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer, 10*len(dataloaders['train']))
    
    for epoch in range(epochs):
        print('Epoch {}/{}'.format(epoch+1, epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0

            # Iterate over data.
            for i,batch in enumerate(dataloaders[phase]):
                image, biomasse = batch
                image = image.to(device)
                biomasse = biomasse.to(device)
                
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(image)
                    loss = criterion(outputs, biomasse)
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        scheduler.step()

                    # statistic
                    running_loss += loss.item() * image.size(0)
                
            epoch_loss = running_loss/(len(dataloaders[phase]))
            
            print('{} RMSE: {:.4f} '.format(
                phase,np.sqrt(epoch_loss )))

            # deep copy the model
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
        print()
    model.load_state_dict(best_model_wts) 

    return model 

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#set all seeds
torch.manual_seed(SEED)
np.random.seed(SEED)

In [None]:
imagespath = "TreeImages"
X_train = image_train["ImageId"]
X_test = image_test["ImageId"]
y_train = image_train["Target"]
y_test = image_test["Target"]

test_ = test["ImageId"]

In [None]:
image_datasets = {'train': TreeCountingDataset(imagespath,X_train,y_train),
                  'val': TreeCountingDataset(imagespath,X_test, y_test)}

dataloaders = {'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=10, shuffle=True, num_workers=2),
                'val': torch.utils.data.DataLoader(image_datasets['val'], batch_size=10, shuffle=False, num_workers=2)}

model_ft = ConvCountingNet()
model_ft = model_ft.to(device)
model_ft.apply(weight_init)

criterion = nn.MSELoss()

dataset_sizes = {x:len(image_datasets[x]) for x in ['train', 'val']}

#train a model on this data 
model = train(model_ft, criterion, 0.001, dataloaders, device,epochs=20)

Epoch 1/20
----------
train RMSE: 324.0706 
val RMSE: 55.1872 

Epoch 2/20
----------
train RMSE: 54.6798 
val RMSE: 55.1846 

Epoch 3/20
----------
train RMSE: 54.6884 
val RMSE: 55.1816 

Epoch 4/20
----------
train RMSE: 54.6836 
val RMSE: 55.1783 

Epoch 5/20
----------
train RMSE: 54.6662 
val RMSE: 55.1751 

Epoch 6/20
----------
train RMSE: 54.6871 
val RMSE: 55.1726 

Epoch 7/20
----------
train RMSE: 54.6775 
val RMSE: 55.1707 

Epoch 8/20
----------
train RMSE: 54.6622 
val RMSE: 55.1695 

Epoch 9/20
----------
train RMSE: 54.6935 
val RMSE: 55.1691 

Epoch 10/20
----------
train RMSE: 54.6728 
val RMSE: 55.1690 

Epoch 11/20
----------
train RMSE: 54.6716 
val RMSE: 55.1689 

Epoch 12/20
----------
train RMSE: 54.6723 
val RMSE: 55.1682 

Epoch 13/20
----------
train RMSE: 54.6758 
val RMSE: 55.1663 

Epoch 14/20
----------
train RMSE: 54.6787 
val RMSE: 55.1625 

Epoch 15/20
----------
train RMSE: 54.6783 
val RMSE: 55.1562 

Epoch 16/20
----------
train RMSE: 54.6615 
val 

## Use pre-trained model

In [None]:
# Dataloader
class TreeCountingDataset(Dataset):
  def __init__(self, imagesPath,imagesName, imagesLabel):
        self.imagespath = imagesPath
        self.imagesname = imagesName
        self.imageslabels = imagesLabel
                      
  def __len__(self):
        return len(self.imagesname)
    
  def __getitem__(self, idx):
        imagename = self.imagesname.iloc[idx]
        path = os.path.join(self.imagespath,imagename)
        image = plt.imread(path)
        image = image.transpose(2,0,1)
        label =  self.imageslabels.iloc[idx]
        return torch.from_numpy(image).float(), torch.tensor(label).float()

In [None]:
def get_net():
    resnet = models.resnet34(pretrained=True)
    
    # Substitute the FC output layer
    resnet.fc = torch.nn.Linear(resnet.fc.in_features, 1)
    torch.nn.init.xavier_uniform_(resnet.fc.weight)
    return resnet

In [None]:
image_datasets = {'train': TreeCountingDataset(imagespath,X_train,y_train),
                  'val': TreeCountingDataset(imagespath,X_test, y_test)}

dataloaders = {'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=10, shuffle=True, num_workers=2),
                'val': torch.utils.data.DataLoader(image_datasets['val'], batch_size=10, shuffle=False, num_workers=2)}

#call pretrained model
model_ft = get_net()
model_ft = model_ft.to(device)

criterion = nn.MSELoss()

dataset_sizes = {x:len(image_datasets[x]) for x in ['train', 'val']}

#train a model on this data 
model = train(model_ft, criterion, 0.001, dataloaders, device,epochs=20)

Epoch 1/20
----------
train RMSE: 42.9559 
val RMSE: 43.2239 

Epoch 2/20
----------
train RMSE: 42.5608 
val RMSE: 43.1920 

Epoch 3/20
----------
train RMSE: 42.3230 
val RMSE: 47.4972 

Epoch 4/20
----------
train RMSE: 42.5981 
val RMSE: 43.3827 

Epoch 5/20
----------
train RMSE: 42.1833 
val RMSE: 43.0780 

Epoch 6/20
----------
train RMSE: 41.9739 
val RMSE: 43.2641 

Epoch 7/20
----------
train RMSE: 41.8615 
val RMSE: 43.2771 

Epoch 8/20
----------
train RMSE: 41.6049 
val RMSE: 43.2438 

Epoch 9/20
----------
train RMSE: 41.5280 
val RMSE: 43.4544 

Epoch 10/20
----------
train RMSE: 41.1089 
val RMSE: 43.2490 

Epoch 11/20
----------
train RMSE: 41.2888 
val RMSE: 43.2320 

Epoch 12/20
----------
train RMSE: 41.4844 
val RMSE: 43.2302 

Epoch 13/20
----------
train RMSE: 41.3138 
val RMSE: 43.2010 

Epoch 14/20
----------
train RMSE: 41.5237 
val RMSE: 43.1628 

Epoch 15/20
----------
train RMSE: 41.2512 
val RMSE: 43.5362 

Epoch 16/20
----------
train RMSE: 41.7289 
val R

In [None]:
# Predict on test set
test_dataset = TreeCountingDataset(imagespath,test_, pd.Series(np.zeros(test.shape[0])))
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=10, shuffle=False, num_workers=2)

model.eval()

preds = []
with torch.no_grad():
  for batch in test_dataloader:
    images, _ = batch
    pred = model(images.to('cuda:0'))
    preds.extend([x[0] for x in pred.detach().cpu().numpy()])

In [None]:
# Prepare submission file
sub = pd.DataFrame({'ImageId': test.ImageId, 'Target': preds})
sub.head()

Unnamed: 0,ImageId,Target
0,Id_ohk78h9ld8.png,10.082159
1,Id_eeyj2u4j7y.png,9.732224
2,Id_wsd7vx2ifa.png,10.020741
3,Id_6vfneamaoh.png,9.991602
4,Id_9wil3575fv.png,9.968956


In [None]:
# Create csv file, download and upload to Zindi
sub.to_csv('BaselineSubmission.csv', index = False)