In [86]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [87]:
# Load the training data
train_df = pd.read_csv('train_v2.csv')

train_df

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road
...,...,...
40474,train_40474,clear primary
40475,train_40475,cloudy
40476,train_40476,agriculture clear primary
40477,train_40477,agriculture clear primary road


In [88]:
# Add .jpg to the image_name
train_df['image_name'] = train_df['image_name'] + '.jpg'

train_df

Unnamed: 0,image_name,tags
0,train_0.jpg,haze primary
1,train_1.jpg,agriculture clear primary water
2,train_2.jpg,clear primary
3,train_3.jpg,clear primary
4,train_4.jpg,agriculture clear habitation primary road
...,...,...
40474,train_40474.jpg,clear primary
40475,train_40475.jpg,cloudy
40476,train_40476.jpg,agriculture clear primary
40477,train_40477.jpg,agriculture clear primary road


In [89]:
# Fix tags column
train_df['tags'] = train_df['tags'].apply(lambda entry: entry.split(' '))

# Get a list of all unique labels
all_labels = set()
for tags in train_df['tags']:
    all_labels.update(tags)

# Create a dictionary to map labels to integers
label_to_int = {label: i for i, label in enumerate(all_labels)}
num_classes = len(label_to_int)

# Convert labels to one-hot encoded vectors
train_df['labels'] = train_df['tags'].apply(lambda entry: [label_to_int[label] for label in entry])

# Create separate columns for each label
for label in all_labels:
    train_df[label] = train_df['tags'].apply(lambda entry: int(label in entry))

train_df

Unnamed: 0,image_name,tags,labels,conventional_mine,road,agriculture,artisinal_mine,primary,habitation,haze,cultivation,selective_logging,partly_cloudy,slash_burn,blooming,cloudy,water,bare_ground,blow_down,clear
0,train_0.jpg,"[haze, primary]","[6, 4]",0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0
1,train_1.jpg,"[agriculture, clear, primary, water]","[2, 16, 4, 13]",0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1
2,train_2.jpg,"[clear, primary]","[16, 4]",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
3,train_3.jpg,"[clear, primary]","[16, 4]",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
4,train_4.jpg,"[agriculture, clear, habitation, primary, road]","[2, 16, 5, 4, 1]",0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40474,train_40474.jpg,"[clear, primary]","[16, 4]",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
40475,train_40475.jpg,[cloudy],[12],0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
40476,train_40476.jpg,"[agriculture, clear, primary]","[2, 16, 4]",0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1
40477,train_40477.jpg,"[agriculture, clear, primary, road]","[2, 16, 4, 1]",0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1


In [90]:
# Split the data into training and validation sets
train_set, val_set = train_test_split(train_df, test_size = 0.2, random_state = 42)

In [91]:
# Define the image size and batch size
image_size = (128, 128)
batch_size = 32

In [92]:
# Create an image data generator with augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=40,
    zoom_range=0.2,
    shear_range=0.2)

In [93]:
# Load and preprocess the training images
train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_set,
    directory = 'training-images/',
    x_col = 'image_name',
    y_col = list(all_labels),
    target_size = image_size,
    class_mode = 'raw',
    batch_size = batch_size,
    subset = 'training',
    interpolation = 'nearest')

train_generator

Found 32383 validated image filenames.


<keras.preprocessing.image.DataFrameIterator at 0x1432f2a70>

In [94]:
# Load and preprocess the validation images
val_generator = train_datagen.flow_from_dataframe(
    dataframe = train_df,
    directory = 'training-images/',
    x_col = 'image_name',
    y_col = list(all_labels),
    target_size = image_size,
    class_mode = 'raw',
    batch_size = batch_size,
    subset = 'training',
    interpolation = 'nearest')

val_generator

Found 40479 validated image filenames.


<keras.preprocessing.image.DataFrameIterator at 0x1430ffac0>

In [95]:
# Create the CNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (image_size[0], image_size[1], 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (3, 3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (3, 3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation = 'relu'),
    tf.keras.layers.Dense(num_classes, activation = 'sigmoid')
])

# Compile the model
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [96]:
# Train the model
model.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples//train_generator.batch_size,
    epochs = 30,
    validation_data=val_generator, validation_steps = val_generator.samples//val_generator.batch_size,
    verbose = 1)

  model.fit_generator(


Epoch 1/30


2023-06-06 01:24:56.406029: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2023-06-06 01:31:18.492562: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1430a48b0>

In [99]:
# Exporting happens here
model.save('model-60')



INFO:tensorflow:Assets written to: model-60/assets


INFO:tensorflow:Assets written to: model-60/assets
