In [None]:
# importing necessary libraries

import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import numpy as np
import cv2
from tqdm import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from keras import backend as k
from keras.preprocessing.image import ImageDataGenerator

In [None]:
# loading train_class

train_df= pd.read_csv('../input/planets-dataset/planet/planet/train_classes.csv')

In [None]:
#Explore train labels distribution

labels = train_df['tags'].apply(lambda x: x.split(' '))
from collections import Counter, defaultdict
counts = defaultdict(int) #dictionary containing each individual label
for l in labels:
    for l2 in l:
        counts[l2] += 1

tag_list=list(counts.keys()) 


In [None]:
#Create a dictionary assigning a numerical value to each label
label_map = {i:j for j, i in enumerate(tag_list)}
label_map

In [None]:
# One hot encode the training labels. Convert the images into pixels and resize them

X_train, y_train = [], []
for img_name, label in tqdm(train_df.values, miniters = 1000):
  target = np.zeros(17)
  for tag in label.split(' '):
    target[label_map[tag]]=1
  X_train.append(cv2.resize(cv2.imread('../input/planets-dataset/planet/planet/train-jpg/{}.jpg'.format(img_name)), (64,64)))
  y_train.append(target)

In [None]:
X_train= np.array(X_train)/255.0
y_train = np.array(y_train)
print(X_train.shape)
print(y_train.shape)

In [None]:
# splitting the trains into validation sets

from sklearn.model_selection import train_test_split
X_train, x_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.2, shuffle = True, random_state = 1)

print(X_train.shape, y_train.shape, x_val.shape, y_val.shape)

In [None]:
from keras import backend
 
# calculate fbeta score for multi-class/label classification
def fbeta(y_true, y_pred, beta=2):
	# clip predictions
	y_pred = backend.clip(y_pred, 0, 1)
	# calculate elements
	tp = backend.sum(backend.round(backend.clip(y_true * y_pred, 0, 1)), axis=1)
	fp = backend.sum(backend.round(backend.clip(y_pred - y_true, 0, 1)), axis=1)
	fn = backend.sum(backend.round(backend.clip(y_true - y_pred, 0, 1)), axis=1)
	# calculate precision
	p = tp / (tp + fp + backend.epsilon())
	# calculate recall
	r = tp / (tp + fn + backend.epsilon())
	# calculate fbeta, averaged across each class
	bb = beta ** 2
	fbeta_score = backend.mean((1 + bb) * (p * r) / (bb * p + r + backend.epsilon()))
	return fbeta_score

In [None]:

          
# base model. Feel free to try out other architectures and ideas to improve fbeta score


from keras.layers import Conv2D, Dense, Flatten, MaxPooling2D, BatchNormalization, Dropout
from keras.optimizers import Adam, RMSprop


model = keras.Sequential()

model.add(Conv2D(64, 5, 2, activation = "relu", input_shape = (64, 64, 3)))
model.add(MaxPooling2D())
          
model.add(Conv2D(128, (3,3), activation = "relu"))
model.add(MaxPooling2D())

model.add(Conv2D(256, (3,3), activation = "relu"))
model.add(MaxPooling2D())
          
model.add(Flatten())
          
model.add(Dense(512, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(17, activation = "sigmoid"))

model.compile(loss = "binary_crossentropy", optimizer = Adam(), metrics = [fbeta])
model.fit(X_train, y_train, validation_data = (x_val, y_val), epochs = 50, batch_size = 128)

In [None]:
model.summary()

In [None]:
model.save_weights('Amazon_Model2.h5')

In [None]:
del X_train
del y_train

In [None]:
import gc

gc.collect

#### making Predictions

In [None]:
submission_sample= pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')
submission_sample.head()

In [None]:
test_img= submission_sample[0: 40669]
files= submission_sample[40669: ]

In [None]:
#convert the test images to pixels and resize them as well

X_test=[]
for img_name, label in tqdm(submission_sample[:40669].values, miniters = 1000):
  X_test.append(cv2.resize(cv2.imread('../input/planets-dataset/planet/planet/test-jpg/{}.jpg'.format(img_name)), (64, 64)))
for img_name, label in tqdm(submission_sample[40669:].values, miniters = 1000):
  X_test.append(cv2.resize(cv2.imread('../input/planets-dataset/test-jpg-additional/test-jpg-additional/{}.jpg'.format(img_name)), (64, 64)))

X_test = np.array(X_test)/255

In [None]:
import gc

gc.collect

In [None]:
y_pred = []
p_test = model.predict(X_test)
y_pred.append(p_test)
y_pred

In [None]:
labels1 = ['haze', 'primary', 'agriculture', 'clear', 'water', 'habitation', 'road', 'cultivation', 'slash_burn', 'cloudy', 'partly_cloudy', 'conventional_mine', 'bare_ground', 'artisinal_mine', 'blooming', 'selective_logging', 'blow_down']
labels1

In [None]:
result = np.array(y_pred[0])
for i in range(1, len(y_pred)):
    result += np.array(y_pred[i])
result = pd.DataFrame(result, columns=labels1)
result

In [None]:
# Translating the probability predictions to the unique labels
preds = []
for i in tqdm(range(result.shape[0]), miniters=1000):
    a = result.loc[[i]]
    a = a.apply(lambda x: x>0.2, axis=1)
    a = a.transpose()
    a = a.loc[a[i] == True]
    ' '.join(list(a.index))
    preds.append(' '.join(list(a.index)))

In [None]:
# Replacing the tags columns with the predicted labels
submission_sample['tags'] = preds
submission_sample.head()

In [None]:
# Converting the dataframe to a csv file for submission
submission_sample.to_csv('Amazon Project_2.csv', index=False)