In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
cd /content/drive/.shortcut-targets-by-id/1rRUZuKu_DvqrEfrad09qFlSUyUsVZj-C/DL

/content/drive/.shortcut-targets-by-id/1rRUZuKu_DvqrEfrad09qFlSUyUsVZj-C/DL


In [1]:
import pandas as pd                                     # Data analysis and manipultion tool
import numpy as np                                      # Fundamental package for linear algebra and multidimensional arrays
import tensorflow as tf                                 # Deep Learning Tool
import os                                               # OS module in Python provides a way of using operating system dependent functionality
import cv2                                              # Library for image processing
from sklearn.model_selection import train_test_split    # For splitting the data into train and validation set

In [6]:
labels = pd.read_csv("weather/Training_set.csv")   # loading the labels
labels.head()           # will display the first five rows in labels dataframe

Unnamed: 0,filename,label
0,Image_1.jpg,sunrise
1,Image_2.jpg,shine
2,Image_3.jpg,cloudy
3,Image_4.jpg,shine
4,Image_5.jpg,sunrise


In [7]:
labels.tail()            # will display the last five rows in labels dataframe

Unnamed: 0,filename,label
1043,Image_1044.jpg,foggy
1044,Image_1045.jpg,sunrise
1045,Image_1046.jpg,cloudy
1046,Image_1047.jpg,rainy
1047,Image_1048.jpg,sunrise


In [9]:
file_paths = [[fname, 'weather/train/' + fname] for fname in labels['filename']]

In [10]:
# Confirm if number of images is same as number of labels given
if len(labels) == len(file_paths):
    print('Number of labels i.e. ', len(labels), 'matches the number of filenames i.e. ', len(file_paths))
else:
    print('Number of labels does not match the number of filenames')

Number of labels i.e.  1048 matches the number of filenames i.e.  1048


In [11]:
images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
images.head()

Unnamed: 0,filename,filepaths
0,Image_1.jpg,weather/train/Image_1.jpg
1,Image_2.jpg,weather/train/Image_2.jpg
2,Image_3.jpg,weather/train/Image_3.jpg
3,Image_4.jpg,weather/train/Image_4.jpg
4,Image_5.jpg,weather/train/Image_5.jpg


In [12]:
train_data = pd.merge(images, labels, how = 'inner', on = 'filename')
train_data.head()       

Unnamed: 0,filename,filepaths,label
0,Image_1.jpg,weather/train/Image_1.jpg,sunrise
1,Image_2.jpg,weather/train/Image_2.jpg,shine
2,Image_3.jpg,weather/train/Image_3.jpg,cloudy
3,Image_4.jpg,weather/train/Image_4.jpg,shine
4,Image_5.jpg,weather/train/Image_5.jpg,sunrise


In [13]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train_data['label'] = le.fit_transform(train_data['label'])

In [14]:
data = []     # initialize an empty numpy array
image_size = 100      # image size taken is 100 here. one can take other size too
for i in range(len(train_data)):
  
  img_array = cv2.imread(train_data['filepaths'][i], cv2.IMREAD_GRAYSCALE)   # converting the image to gray scale

  new_img_array = cv2.resize(img_array, (image_size, image_size))      # resizing the image array
  data.append([new_img_array, train_data['label'][i]])

In [15]:
# image pixels of a image
data[5]

[array([[129, 130, 131, ..., 129, 128, 127],
        [129, 130, 131, ..., 129, 128, 128],
        [129, 130, 131, ..., 130, 130, 129],
        ...,
        [ 14,  14,  14, ...,  18,  18,  18],
        [ 16,  13,  14, ...,  17,  17,  17],
        [ 24,  11,  13, ...,  16,  17,  17]], dtype=uint8), 0]

In [16]:
np.random.shuffle(data)

In [17]:
x = []
y = []
for image in data:
  x.append(image[0])
  y.append(image[1])

# converting x & y to numpy array as they are list
x = np.array(x)
y = np.array(y)

In [18]:
np.unique(y, return_counts=True)

(array([0, 1, 2, 3, 4]), array([210, 210, 209, 174, 245]))

In [19]:
x =  x.reshape(-1, 100, 100, 1)

In [20]:
# split the data
X_train, X_val, y_train, y_val = train_test_split(x,y,test_size=0.3, random_state = 42)

In [21]:
cnn = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    # tf.keras.layers.Flatten(input_shape=(100, 100, 1)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(5, activation='softmax')
])

In [22]:
cnn.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [23]:
cnn.fit(X_train, y_train, epochs=200, batch_size=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x7f6430210750>

In [24]:
cnn.evaluate(X_val, y_val)



[116.1197280883789, 0.3460317552089691]

In [26]:
# Loading the order of the image's name that has been provided
test_image_order = pd.read_csv("weather/Testing_set.csv")
test_image_order.head()

Unnamed: 0,filename
0,Image_1.jpg
1,Image_2.jpg
2,Image_3.jpg
3,Image_4.jpg
4,Image_5.jpg


In [27]:
file_paths = [[fname, 'weather/test/' + fname] for fname in test_image_order['filename']]

In [28]:
# Confirm if number of images is same as number of labels given
if len(test_image_order) == len(file_paths):
    print('Number of image names i.e. ', len(test_image_order), 'matches the number of file paths i.e. ', len(file_paths))
else:
    print('Number of image names does not match the number of filepaths')

Number of image names i.e.  450 matches the number of file paths i.e.  450


In [29]:
test_images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
test_images.head()

Unnamed: 0,filename,filepaths
0,Image_1.jpg,weather/test/Image_1.jpg
1,Image_2.jpg,weather/test/Image_2.jpg
2,Image_3.jpg,weather/test/Image_3.jpg
3,Image_4.jpg,weather/test/Image_4.jpg
4,Image_5.jpg,weather/test/Image_5.jpg


In [30]:
test_pixel_data = []     # initialize an empty numpy array
image_size = 100      # image size taken is 100 here. one can take other size too
for i in range(len(test_images)):
  
  img_array = cv2.imread(test_images['filepaths'][i], cv2.IMREAD_GRAYSCALE)   # converting the image to gray scale

  new_img_array = cv2.resize(img_array, (image_size, image_size))      # resizing the image array

  test_pixel_data.append(new_img_array)

In [31]:
test_pixel_data = np.array(test_pixel_data)

In [32]:
test_pixel_data =  test_pixel_data.reshape(-1, 100, 100, 1)

In [33]:
pred = cnn.predict(test_pixel_data)

In [34]:
# The predicted values are the probabilities value
pred[0]

array([0., 0., 1., 0., 0.], dtype=float32)

In [35]:
prediction = []
for value in pred:
  prediction.append(np.argmax(value))

In [36]:
predictions = le.inverse_transform(prediction)

In [37]:
res = pd.DataFrame({'filename': test_images['filename'], 'label': predictions})  # prediction is nothing but the final predictions of your model on input features of your new unseen test data
res.to_csv("submission.csv", index = False)      # the csv file will be saved locally on the same location where this notebook is located.

In [38]:
res = pd.DataFrame({'filename': test_images['filename'], 'label': predictions})  # prediction is nothing but the final predictions of your model on input features of your new unseen test data
res.to_csv("submission.csv", index = False) 

# To download the csv file locally
from google.colab import files        
files.download('submission.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>