In [6]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sklearn.model_selection

# SYDE 522 Final Project

## By Anirudh Kilambi and Youngwoo Lee

The purpose of this project is to apply techniques used in class to detect and identify faults and defects found in sewer pipelines. To accomplish this, we used training data found from the 2022 VideoPipe challenge for Real-World Video Understanding for Urban Pipe Inspection. For this project, we investigated the effectiveness of Convolutional Neural Networks (CNN) and Support Vector Machines (SVM) in order to come to an experimental conclusion on which model is most effective for this task.

## Data Loading

In [16]:
raw_data_path = "data/raw_images/"
categorization = [("4736", 0), ("2383", 0), ("d20017", 1), ("26852", 1)]

folders = os.listdir(raw_data_path)
sewer_data = {}
sewer_data["data"] = []
sewer_data["target"] = []

for folder, cat in categorization:
    images = os.listdir(f"{raw_data_path}{folder}/")
    for image in images:
        sewer_data["data"].append(f"{raw_data_path}{folder}/{image}")
        sewer_data["target"].append(cat)

    
x = sewer_data["data"]
y = sewer_data["target"]

x_trainvalid, x_test, y_trainvalid, y_test = sklearn.model_selection.train_test_split(
    x, y, test_size=0.2, shuffle=True, random_state=0
)

x_train, x_valid, y_train, y_valid = sklearn.model_selection.train_test_split(
    x, y, test_size=0.2, shuffle=True, random_state=0
)

print(len(y), len(y_train,), len(y_test), len(y_valid))
print(x[1].split("/")[2])

1066 852 214 214
4736


In [17]:
def moveFiles(x,y,type):
    xpath = f"data/{type}/0/"
    ypath = f"data/{type}/1/"
    if not os.path.exists(xpath):
        os.makedirs(xpath)
    if not os.path.exists(ypath):
        os.makedirs(ypath)
    
    for i in range(len(x)):
        x_split = x[i].split("/")
        img_name = x_split[2] + x_split[3]

        if y[i] == 0:
            os.system(f"cp {x[i]} {xpath}/{img_name}")
        else:
            os.system(f"cp {x[i]} {ypath}/{img_name}")

moveFiles(x_train, y_train, "train")
moveFiles(x_test, y_test, "test")
moveFiles(x_valid, y_valid, "valid")

In [28]:
train_ds = tf.keras.utils.image_dataset_from_directory(
    directory='data/train/',
    labels='inferred',
    label_mode='categorical',
    batch_size=1000,
    image_size=(1280, 720))

test_ds = tf.keras.utils.image_dataset_from_directory(
    directory='data/test/',
    labels='inferred',
    label_mode='categorical',
    batch_size=1000,
    image_size=(1280, 720))

valid_ds = tf.keras.utils.image_dataset_from_directory(
    directory='data/valid/',
    labels='inferred',
    label_mode='categorical',
    batch_size=1000,
    image_size=(1280, 720))

print(valid_ds)

Found 852 files belonging to 2 classes.
Found 214 files belonging to 2 classes.
Found 214 files belonging to 2 classes.
<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 1280, 720, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2), dtype=tf.float32, name=None))>


## Part 1: Convolutional Neural Networks 

In [34]:
model = tf.keras.models.Sequential([
  tf.keras.layers.InputLayer(input_shape = (1280, 720, 3)),
  tf.keras.layers.Flatten(input_shape=(1280, 720, 3)),   
  tf.keras.layers.Dense(32, activation='relu'),    # 32 neurons in the middle "hidden" layer
  tf.keras.layers.Dense(2, activation='relu')     # 10 outputs (one for each category)
])

def my_loss(y_true, y_predict):
    return (y_true-y_predict)**2

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.1), # use stochastic gradient descent
              loss='categorical_crossentropy',
              metrics=['accuracy']  # in addition to the loss, also compute the categorization accuracy
             )

model.fit(train_ds, epochs=5, validation_data=valid_ds)

plt.figure(figsize=(12,5))
plt.subplot(1, 2, 1)
plt.plot(model.history.history['accuracy'], c='k')
plt.ylabel('training accuracy')
plt.xlabel('epochs')
plt.twinx()
plt.plot(model.history.history['loss'], c='b')
plt.ylabel('training loss (error)')
plt.title('training')

plt.subplot(1, 2, 2)
plt.plot(model.history.history['val_accuracy'], c='k')
plt.ylabel('testing accuracy')
plt.xlabel('epochs')
plt.twinx()
plt.plot(model.history.history['val_loss'], c='b')
plt.ylabel('testing loss (error)')
plt.title('testing')
plt.tight_layout()
plt.show()

Epoch 1/5
