# CarNet

Welcome to the final assignment in this course. It's been a long journey, but you are now ready to unleash the powers of neural networks at any task. In this assignment, we will be working with a collection of photos related to driving vehicles.

In [3]:
import pandas as pd
import numpy as np
import math
import random
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import json
sns.set_context('talk')
%config InlineBackend.figure_format = 'retina'

# Download Data

The dataset consists of 2 segments: 
* train: the metadata file contains the file names and the count of different objects in each image.
* score: the metadata file contains the file names of images to be used for the final predictions.

Target variables (in this order):
1. signal
2. vehicle

These target variables are defined as follows:
* signal =  traffic light + stop sign
* vehicle = car + bus + truck + train + motorcycle + bicycle + airplane + boat

In [4]:
# !aws s3 cp s3://danylo-ucla/carnet_dataset.zip ./
# !unzip -u -q carnet_dataset.zip

In [17]:
train_metadata = pd.read_csv('carnet_dataset/train/metadata.csv')
train_metadata.head()

Unnamed: 0,car,bus,truck,train,motorcycle,bicycle,airplane,boat,traffic light,stop sign,vehicle,signal,file_name
0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,000000000064.jpg
1,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,000000000073.jpg
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,000000000074.jpg
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,000000000081.jpg
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,000000000086.jpg


In [23]:
# Score metadata defines the expected order of the photos in the submitted predictions file.

score_metadata = pd.read_csv('carnet_dataset/score/metadata.csv')
score_metadata.head()

Unnamed: 0,file_name
0,000000000071.jpg
1,000000000149.jpg
2,000000000260.jpg
3,000000000307.jpg
4,000000000690.jpg


In [19]:
# Data size

print(train_metadata.shape)
print(score_metadata.shape)

(23228, 13)
(5766, 1)


In [6]:
# Display random image
# random.seed(43)

# i = random.randint(0, train_metadata.shape[0])

# file_name = train_metadata['file_name'].iloc[i]
# vehicles = train_metadata['vehicle'].iloc[i]
# signals = train_metadata['signal'].iloc[i] 

# plt.title(f'Vehicles: {vehicles}, Signals: {signals}')
# plt.imshow(Image.open(f'carnet_dataset/train/images/{file_name}'))
# plt.xticks([])
# plt.yticks([])

# Load in Object Detection Results

In [4]:
file_paths = ['detector_result_train_merged.json', 'detector_result_train_extra.json']
file_jsons = None
for file_path in file_paths:
    with open(file_path, 'r') as file:
        file_json = json.load(file)
        if file_jsons == None:
            file_jsons = file_json
        else:
            file_jsons.update(file_json)
    

In [5]:
file_jsons.keys()

dict_keys(['0', '1', '2', '3', '4', '5', '6'])

In [6]:
# Specifying the file path to save the JSON file
# file_path = 'detector_result_train1.json'
file_path = 'detector_result_train_extra_merged.json'

# Saving the dictionary to a JSON file
with open(file_path, 'w') as json_file:
    json.dump(file_jsons, json_file)

In [36]:
# file_path = 'detector_result_train_merged.json'
file_path = 'detector_result_train_extra_merged.json'
file_jsons = None
with open(file_path, 'r') as file:
    file_jsons = json.load(file)

# Heuristic Approach using 7 Object Detection Models

Looping through confidence score threshold from 20% to 50% using either the maximum or the mode predictions

In [75]:
num_images = 3000
detector_outputs = []
for i in range(0, num_images):
    outputs = []
    for j in range(0, len(file_jsons)):
        detection_classes = tf.convert_to_tensor(np.array(file_jsons[str(j)][str(i)]['detection_classes']['value']))
        detection_scores = tf.convert_to_tensor(np.array(file_jsons[str(j)][str(i)]['detection_scores']['value']))
        outputs.append((detection_classes, detection_scores))
    detector_outputs.append(outputs)

In [76]:
print(len(detector_outputs))  # number of images
print(len(detector_outputs[0]))  # number of detectors

3000
7


In [25]:
from collections import Counter
from utils.mscoco import load_class_map


def get_counts(threshold):
    class_map = load_class_map()
    num_images = 3000
    signals = ['traffic light', 'stop sign']
    vehicles = ['car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle', 'airplane', 'boat']
    train_pred_max = []
    train_pred_mode = []

    for i in range(0, num_images):
        model_outputs = detector_outputs[i]
        image_pred = []

        for j in range(0, 7):
            detection_classes, detection_scores = model_outputs[j]
            classes_np = detection_classes[0].numpy()
            scores_np = detection_scores[0].numpy()

            # Store the scores > 0.5
            scores_g50_indices = np.where(scores_np > threshold)
            scores_g50 = scores_np[scores_g50_indices]

            # Store the classes with scores > 0.5
            classes_g50 = classes_np[scores_g50_indices]

            # Turn the classes into labels
            class_labels = class_map.loc[classes_g50].reset_index(drop=True)
            class_scores = pd.Series(scores_g50).rename('score')

            class_predictions = pd.concat([class_labels, class_scores], axis=1)
            class_predictions = class_predictions.sort_values('score', ascending=False)
            class_labels_pred = class_predictions['label'].value_counts()

            # Check if the classes are in the vehicles or signal labels, and keep counts
            veh_ct = class_labels_pred.loc[(class_labels_pred.index.isin(vehicles))].sum()
            sig_ct = class_labels_pred.loc[(class_labels_pred.index.isin(signals))].sum()
            image_pred.append([veh_ct, sig_ct])

        # Get the maximum across models
        max_veh_ct = max(m[0] for m in image_pred)
        max_sig_ct = max(m[1] for m in image_pred)

        # Get the most frequent counts across models
        veh_ct_list = [m[0] for m in image_pred]
        sig_ct_list = [m[1] for m in image_pred]
        veh_ct_counter = Counter(veh_ct_list)
        mode_veh_ct = veh_ct_counter.most_common(1)[0][0]
        sig_ct_counter = Counter(sig_ct_list)
        mode_sig_ct = sig_ct_counter.most_common(1)[0][0]

        # print(image_pred)
        train_pred_max.append([max_veh_ct, max_sig_ct])
        train_pred_mode.append([mode_veh_ct, mode_sig_ct])
        
    return (train_pred_max, train_pred_mode)

In [14]:
def calculate_mse(real_label, pred_label):
    if len(real_label) != len(pred_label):
        raise ValueError("Lists must be of equal length")
        
    sq_diff = [(a-b) **2 for a, b in zip(real_label, pred_label)]
    mse = sum(sq_diff) / len(sq_diff)
    
    return mse

def calculate_avg_mse(mse1, mse2):
    return 1/2 * mse1 + 1/2 * mse2

In [20]:
# Split into train, val and test sets 
# train_data, validate_data, test_data = np.split(train_metadata.sample(frac=1, random_state=43), [int(.8*len(train_metadata)), int(.9*len(train_metadata))])

train_data = train_metadata[:3000]
print(train_data.shape)

(3000, 13)


In [22]:
train_veh_labels = train_data['vehicle'].tolist()

In [23]:
train_sig_labels = train_data['signal'].tolist()

In [26]:
%%time
lowest = ""
lowest_mse = 100000
for i in range (20, 50):
    train_pred_max, train_pred_mode = get_counts(.01*i)
    
    train_y_hat = pd.DataFrame(train_pred_max, columns=['vehicle', 'signal'])
    agg_vehicles_pred = train_y_hat['vehicle'].tolist()
    agg_signals_pred = train_y_hat['signal'].tolist()
    veh_mse = calculate_mse(train_veh_labels, agg_vehicles_pred)
    sig_mse = calculate_mse(train_sig_labels, agg_signals_pred)
    avg_mse = calculate_avg_mse(veh_mse, sig_mse)
    if (avg_mse < lowest_mse):
        lowest = f"{i} max: {avg_mse}"
        lowest_mse = avg_mse
    print(i, "max", "->", veh_mse, sig_mse, avg_mse)
    
    train_y_hat = pd.DataFrame(train_pred_mode, columns=['vehicle', 'signal'])
    agg_vehicles_pred = train_y_hat['vehicle'].tolist()
    agg_signals_pred = train_y_hat['signal'].tolist()
    veh_mse = calculate_mse(train_veh_labels, agg_vehicles_pred)
    sig_mse = calculate_mse(train_sig_labels, agg_signals_pred)
    avg_mse = calculate_avg_mse(veh_mse, sig_mse)
    if (avg_mse < lowest_mse):
        lowest = f"{i} mode: {avg_mse}"
        lowest_mse = avg_mse
    print(i, "mode", "->", veh_mse, sig_mse, avg_mse)
    print()
print(lowest)

20 max -> 387.4866666666667 8.96 198.22333333333333
20 mode -> 8.400333333333334 1.4223333333333332 4.911333333333333

21 max -> 323.15766666666667 7.4286666666666665 165.29316666666668
21 mode -> 8.395 1.4136666666666666 4.904333333333333

22 max -> 265.8566666666667 6.306666666666667 136.08166666666668
22 mode -> 8.059666666666667 1.4026666666666667 4.731166666666667

23 max -> 214.97666666666666 5.450333333333333 110.2135
23 mode -> 8.407333333333334 1.4376666666666666 4.9225

24 max -> 174.04433333333333 4.755333333333334 89.39983333333333
24 mode -> 7.694 1.454 4.574

25 max -> 138.72466666666668 4.214666666666667 71.46966666666667
25 mode -> 9.007666666666667 1.4453333333333334 5.226500000000001

26 max -> 111.268 3.5566666666666666 57.412333333333336
26 mode -> 7.794333333333333 1.454 4.6241666666666665

27 max -> 88.458 3.0456666666666665 45.75183333333333
27 mode -> 8.159 1.4813333333333334 4.820166666666667

28 max -> 69.68666666666667 2.5736666666666665 36.13016666666667
28 

# Blender Model on top of 7 Object Detection Models

In [89]:
num_images = 3000
detector_outputs = []
for i in range(0, num_images):
    dc_tensor = None
    ds_tensor = None
    db_tensor = None
    for j in range(0, len(file_jsons)):
        curr_dc_tensor = tf.convert_to_tensor(np.array(file_jsons[str(j)][str(i)]['detection_classes']['value']))
        curr_ds_tensor = tf.convert_to_tensor(np.array(file_jsons[str(j)][str(i)]['detection_scores']['value']))
        curr_db_tensor = tf.convert_to_tensor(np.array(file_jsons[str(j)][str(i)]['detection_boxes']['value']))
        curr_dc_tensor = tf.slice(curr_dc_tensor, [0,0], [1,30])  # 100
        curr_ds_tensor = tf.slice(curr_ds_tensor, [0,0], [1,30])
        curr_db_tensor = tf.slice(curr_db_tensor, [0,0,0], [1,30,4])
        if dc_tensor is None:
            dc_tensor = curr_dc_tensor
            ds_tensor = curr_ds_tensor
            db_tensor = curr_db_tensor
        else:
            dc_tensor = tf.concat([dc_tensor, curr_dc_tensor], 1)
            ds_tensor = tf.concat([ds_tensor, curr_ds_tensor], 1)
            db_tensor = tf.concat([db_tensor, curr_db_tensor], 1)
    detector_outputs.append((dc_tensor, ds_tensor, db_tensor))

In [38]:
print(len(detector_outputs))

3000


In [46]:
print(len(file_jsons))

7


In [39]:
train_veh_labels = train_data['vehicle'].tolist()

In [40]:
train_sig_labels = train_data['signal'].tolist()

In [41]:
train_car_labels = train_data['car'].tolist()

In [66]:
num_images = len(detector_outputs)
labels = train_car_labels

def build_generator_labeled():
    def generator():
        for i in range(0, num_images):

            current_output = detector_outputs[i]
            model_input = (tf.reshape(current_output[0], [210]), tf.reshape(current_output[1], [210]) ) # tf.reshape(current_output[2], [210, 4]

            model_output = tf.convert_to_tensor(np.array([labels[i]]))

            yield (model_input, model_output)

    return generator

# See Tensorflow Dataset
# https://www.tensorflow.org/api_docs/python/tf/data/Dataset
def build_dataset_labeled():
    scores_signature = tf.TensorSpec(shape=(210,), dtype=tf.float32)  # type: ignore
    # boxes_signature = tf.TensorSpec(shape=(210,4), dtype=tf.float32)

    model_input = (scores_signature, scores_signature) #, boxes_signature
    model_output = tf.TensorSpec(shape=(1,), dtype=tf.int32)  # type: ignore

    dataset_signature = (model_input, model_output)

    dataset = tf.data.Dataset.from_generator(
        build_generator_labeled(), 
        output_signature=dataset_signature
    )

    return dataset

In [67]:
a = build_dataset_labeled()

In [68]:
print(next(iter(a)))

((<tf.Tensor: shape=(210,), dtype=float32, numpy=
array([85.,  3.,  8., 13., 85., 85.,  3.,  1.,  3., 13., 85.,  1.,  8.,
       85.,  3.,  1.,  1.,  3.,  3.,  1.,  3.,  3.,  2.,  8.,  6.,  3.,
        3.,  2.,  3.,  3., 85.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,
        3.,  3.,  3., 85.,  3.,  3.,  3.,  1.,  3.,  3.,  3.,  8.,  3.,
        3.,  8.,  8.,  3.,  1.,  3.,  3.,  3., 85.,  3.,  8.,  3.,  3.,
       65.,  3.,  3.,  8., 44.,  3., 44.,  3.,  3.,  6.,  3.,  3.,  3.,
       65., 81., 65.,  8.,  1.,  1.,  3.,  3.,  3.,  8.,  8., 63., 85.,
        3.,  3.,  3.,  3.,  8.,  3.,  3.,  3.,  3.,  3.,  3., 15.,  8.,
        3.,  3.,  8.,  3.,  3.,  3.,  3.,  2.,  3.,  8.,  8.,  6.,  3.,
        3.,  3.,  3., 85.,  3., 13.,  8.,  3.,  8.,  3.,  3.,  8.,  3.,
        3.,  3.,  3.,  3., 14.,  3.,  8., 14.,  3.,  3.,  3.,  8.,  3.,
        3.,  8.,  1.,  3.,  3., 85.,  1., 85.,  3.,  3.,  3.,  3.,  8.,
        3.,  3.,  3.,  8.,  3.,  3.,  3.,  3.,  3., 85.,  3.,  3.,  3.,
        3., 64

In [69]:
# Split into train, validate, and test sets 

train_size = 2000 
validate_size = 500
test_size = 500

train_set = a.take(train_size)
validate_set = a.skip(train_size).take(validate_size)
test_set = a.skip(train_size).skip(validate_size)

In [70]:
train_set_batch = train_set.batch(16)
validate_set_batch = validate_set.batch(16)
test_set_batch = test_set.batch(16)

In [71]:
inputs = [
    tf.keras.layers.Input((210,)),
    tf.keras.layers.Input((210,)),
    # tf.keras.layers.Input((120,4)),
]

# Process classes
x_classes = tf.keras.layers.Dense(64, activation='relu')(inputs[0])
# x_classes = tf.keras.layers.Dense(64, activation='relu')(x_classes)
x_classes = tf.keras.layers.BatchNormalization()(x_classes)
x_classes = tf.keras.layers.Dropout(0.2)(x_classes)
# x_classes = tf.keras.layers.Dense(64, activation='relu')(x_classes)

# Process scores
x_scores = tf.keras.layers.Dense(64, activation='relu')(inputs[1])
# x_scores = tf.keras.layers.Dense(64, activation='relu')(x_scores)
x_scores = tf.keras.layers.BatchNormalization()(x_scores)
x_scores = tf.keras.layers.Dropout(0.2)(x_scores)
# x_scores = tf.keras.layers.Dense(64, activation='relu')(x_scores)

# Process boxes
# x_boxes = tf.keras.layers.Flatten()(inputs[2])
# x_boxes = tf.keras.layers.Dense(128, activation='relu')(x_boxes)
# x_boxes = tf.keras.layers.Dense(64, activation='relu')(x_boxes)
# x_boxes = tf.keras.layers.BatchNormalization()(x_boxes)
# x_boxes = tf.keras.layers.Dropout(0.2)(x_boxes)
# x_boxes = tf.keras.layers.Dense(64, activation='relu')(x_boxes)

# Combine
outputs = tf.keras.layers.Concatenate(axis=-1)([x_classes, x_scores])
outputs = tf.keras.layers.Dense(64, activation='relu')(outputs)
# outputs = tf.keras.layers.BatchNormalization()(outputs)
# outputs = tf.keras.layers.Dropout(0.2)(outputs)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(outputs)

# Overall model
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)

model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_7 (InputLayer)           [(None, 210)]        0           []                               
                                                                                                  
 input_8 (InputLayer)           [(None, 210)]        0           []                               
                                                                                                  
 dense_12 (Dense)               (None, 64)           13504       ['input_7[0][0]']                
                                                                                                  
 dense_13 (Dense)               (None, 64)           13504       ['input_8[0][0]']                
                                                                                            

In [72]:
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), 
              loss='mean_squared_error',
              metrics=['mse'])

In [73]:
epochs = 10
batch_size = 128
verbose = 1

# Use early_stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

model.fit(
        train_set_batch,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=validate_set_batch,
        callbacks=[early_stopping],
        verbose=verbose
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f3341242b90>

In [74]:
test_eval = model.evaluate(test_set_batch)
print("Test MSE:", test_eval)

Test MSE: [8.7991943359375, 8.7991943359375]


# Saving the model and predictions

In [6]:
%%time 
import tensorflow_hub as hub

faster_rcnn_inception_resnet_640 = hub.load("https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1")
# faster_rcnn_inception_resnet_1024 = hub.load("https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_1024x1024/1")
faster_rcnn_resnet152 = hub.load("https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_640x640/1")
# faster_rcnn_resnet101 = hub.load("https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_640x640/1")
# faster_rcnn_resnet50 = hub.load("https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_640x640/1")
# ssd_mobilenet_v1_fpn = hub.load("https://tfhub.dev/tensorflow/ssd_mobilenet_v1/fpn_640x640/1")
ssd_mobilenet_v2 = hub.load("https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2")
ssd_mobilenet_v2_fpnlite = hub.load("https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1")

CPU times: user 59 s, sys: 3.03 s, total: 1min 2s
Wall time: 1min 10s


In [7]:
# Generate score data

def build_generator_score():
    metadata = pd.read_csv('carnet_dataset/score/metadata.csv')
    
    for _, row in metadata.iterrows():
        scoring_path = 'carnet_dataset/score/images/' + row['file_name']
        model_input = np.array(Image.open(scoring_path)).astype(np.uint8)
        yield model_input

def build_dataset_score() -> tf.data.Dataset:
    model_input = tf.TensorSpec(shape=(224, 224, 3), dtype=tf.uint8)  # type: ignore

    dataset_signature = model_input

    dataset = tf.data.Dataset.from_generator(
        build_generator_score, 
        output_signature=dataset_signature
    )

    return dataset

In [8]:
# Batch score set 
score_batch = build_dataset_score().batch(1)
print(next(iter(score_batch)).shape)

(1, 224, 224, 3)


In [9]:
# Extract score images for detector
score_images = []

for score_image in score_batch:
    score_images.append(np.expand_dims(np.array(score_image), 0))
    
score_images = tf.concat(score_images, axis=0)
print(type(score_images))

<class 'tensorflow.python.framework.ops.EagerTensor'>


In [10]:
score_images.shape

TensorShape([5766, 1, 224, 224, 3])

In [11]:
print(next(iter(score_images)).shape)

(1, 224, 224, 3)


In [None]:
%%time
modules = [faster_rcnn_inception_resnet_640, ssd_mobilenet_v2, ssd_mobilenet_v2_fpnlite, faster_rcnn_resnet152]

i=0
results_list = []
for mod in modules:
    print(f'Object detection model: {i}')
    results = [mod(x) for x in score_images]
    results_list.append(results)
    print(f'# of images: {len(results)}')
    i+=1
    
print(len(results_list))

Object detection model: 0
# of images: 5766
Object detection model: 1
# of images: 5766
Object detection model: 2


ResourceExhaustedError: Graph execution error:

2 root error(s) found.
  (0) RESOURCE_EXHAUSTED:  OOM when allocating tensor with shape[1,96,160,160] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node ssd_mobile_net_v2fpn_keras_feature_extractor/functional_1/block_1_expand_BN/FusedBatchNormV3}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

	 [[StatefulPartitionedCall/Postprocessor/BatchMultiClassNonMaxSuppression/MultiClassNonMaxSuppression/Reshape_8/_50]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

  (1) RESOURCE_EXHAUSTED:  OOM when allocating tensor with shape[1,96,160,160] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node ssd_mobile_net_v2fpn_keras_feature_extractor/functional_1/block_1_expand_BN/FusedBatchNormV3}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

0 successful operations.
0 derived errors ignored. [Op:__inference_restored_function_body_445307]

In [57]:
%%time
modules = [faster_rcnn_resnet152, ssd_mobilenet_v2_fpnlite]

i=0
results_list = []
for mod in modules:
    print(f'Object detection model: {i}')
    results = [mod(x) for x in score_images]
    results_list.append(results)
    print(f'# of images: {len(results)}')
    i+=1
    
print(len(results_list))

Object detection model: 0
# of images: 5766
Object detection model: 1


ResourceExhaustedError: Graph execution error:

2 root error(s) found.
  (0) RESOURCE_EXHAUSTED:  OOM when allocating tensor with shape[1,96,160,160] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node ssd_mobile_net_v2fpn_keras_feature_extractor/functional_1/block_1_expand_BN/FusedBatchNormV3}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

	 [[StatefulPartitionedCall/Postprocessor/BatchMultiClassNonMaxSuppression/MultiClassNonMaxSuppression/Reshape_8/_50]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

  (1) RESOURCE_EXHAUSTED:  OOM when allocating tensor with shape[1,96,160,160] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node ssd_mobile_net_v2fpn_keras_feature_extractor/functional_1/block_1_expand_BN/FusedBatchNormV3}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

0 successful operations.
0 derived errors ignored. [Op:__inference_restored_function_body_445307]

In [12]:
%%time

# ssd_mobilenet_v1_fpn = hub.load("https://tfhub.dev/tensorflow/ssd_mobilenet_v1/fpn_640x640/1")
modules = [ssd_mobilenet_v2_fpnlite]

i=0
results_list = []
for mod in modules:
    print(f'Object detection model: {i}')
    results = [mod(x) for x in score_images]
    results_list.append(results)
    print(f'# of images: {len(results)}')
    i+=1
    
print(len(results_list))

Object detection model: 0
# of images: 5766
1
CPU times: user 32min 49s, sys: 3min 49s, total: 36min 39s
Wall time: 2min 43s


In [13]:
print(len(results_list[0])) # number of images
print(len(results_list[0][0].keys())) # number of items for each image
print(results_list[0][0].keys())

5766
8
dict_keys(['raw_detection_scores', 'detection_anchor_indices', 'raw_detection_boxes', 'num_detections', 'detection_classes', 'detection_scores', 'detection_boxes', 'detection_multiclass_scores'])


In [14]:
num_images = len(results_list[0])
detector_result_dict = {}
for i in list(range(num_images)):
    detector_result_dict[i] = {'detection_classes': {},
                               'detection_boxes': {},
                               'detection_scores': {}
                              }
    
    detector_result_dict[i]['detection_classes']['shape'] = results_list[0][i]['detection_classes'].shape.as_list()
    detector_result_dict[i]['detection_classes']['value'] = results_list[0][i]['detection_classes'].numpy().tolist()
    
    detector_result_dict[i]['detection_boxes']['shape'] = results_list[0][i]['detection_boxes'].shape.as_list()
    detector_result_dict[i]['detection_boxes']['value'] = results_list[0][i]['detection_boxes'].numpy().tolist()
    
    detector_result_dict[i]['detection_scores']['shape'] = results_list[0][i]['detection_scores'].shape.as_list()
    detector_result_dict[i]['detection_scores']['value'] = results_list[0][i]['detection_scores'].numpy().tolist()

In [15]:
print(len(detector_result_dict.keys()))

5766


In [16]:
# Specifying the file path to save the JSON file
# file_path = 'detector_result_score1.json'
# file_path = 'detector_result_score3.json'
file_path = 'detector_result_score4.json'

# Saving the dictionary to a JSON file
with open(file_path, 'w') as json_file:
    json.dump(detector_result_dict, json_file)

In [50]:
num_images = len(results_list[0])
detector_result_dict = {}
for i in list(range(num_images)):
    detector_result_dict[i] = {'detection_classes': {},
                               'detection_boxes': {},
                               'detection_scores': {}
                              }
    
    detector_result_dict[i]['detection_classes']['shape'] = results_list[1][i]['detection_classes'].shape.as_list()
    detector_result_dict[i]['detection_classes']['value'] = results_list[1][i]['detection_classes'].numpy().tolist()
    
    detector_result_dict[i]['detection_boxes']['shape'] = results_list[1][i]['detection_boxes'].shape.as_list()
    detector_result_dict[i]['detection_boxes']['value'] = results_list[1][i]['detection_boxes'].numpy().tolist()
    
    detector_result_dict[i]['detection_scores']['shape'] = results_list[1][i]['detection_scores'].shape.as_list()
    detector_result_dict[i]['detection_scores']['value'] = results_list[1][i]['detection_scores'].numpy().tolist()

In [52]:
print(len(detector_result_dict.keys()))

# Specifying the file path to save the JSON file
file_path = 'detector_result_score2.json'

# Saving the dictionary to a JSON file
with open(file_path, 'w') as json_file:
    json.dump(detector_result_dict, json_file)

5766


In [12]:
%%time

# ssd_mobilenet_v1_fpn = hub.load("https://tfhub.dev/tensorflow/ssd_mobilenet_v1/fpn_640x640/1")
modules = [ssd_mobilenet_v2_fpnlite]

i=0
results_list = []
for mod in modules:
    print(f'Object detection model: {i}')
    results = [mod(x) for x in score_images]
    results_list.append(results)
    print(f'# of images: {len(results)}')
    i+=1
    
print(len(results_list))

Object detection model: 0
# of images: 5766
1
CPU times: user 32min 49s, sys: 3min 49s, total: 36min 39s
Wall time: 2min 43s


In [13]:
print(len(results_list[0])) # number of images
print(len(results_list[0][0].keys())) # number of items for each image
print(results_list[0][0].keys())

5766
8
dict_keys(['raw_detection_scores', 'detection_anchor_indices', 'raw_detection_boxes', 'num_detections', 'detection_classes', 'detection_scores', 'detection_boxes', 'detection_multiclass_scores'])


In [27]:
file_path = 'detector_result_score1.json'

with open(file_path, 'r') as file:
    detector_result_score1 = json.load(file)

In [28]:
print(len(detector_result_score1.keys()))

5766


In [29]:
detector_result_score1['1']

{'detection_classes': {'shape': [1, 100],
  'value': [[38.0,
    38.0,
    1.0,
    1.0,
    38.0,
    1.0,
    1.0,
    1.0,
    1.0,
    38.0,
    3.0,
    1.0,
    1.0,
    1.0,
    1.0,
    19.0,
    1.0,
    1.0,
    1.0,
    38.0,
    38.0,
    1.0,
    38.0,
    38.0,
    21.0,
    8.0,
    1.0,
    38.0,
    1.0,
    38.0,
    1.0,
    1.0,
    1.0,
    38.0,
    1.0,
    1.0,
    16.0,
    3.0,
    38.0,
    1.0,
    38.0,
    19.0,
    1.0,
    1.0,
    38.0,
    38.0,
    1.0,
    1.0,
    38.0,
    1.0,
    38.0,
    1.0,
    1.0,
    38.0,
    1.0,
    1.0,
    1.0,
    1.0,
    38.0,
    2.0,
    1.0,
    38.0,
    16.0,
    1.0,
    1.0,
    38.0,
    21.0,
    3.0,
    1.0,
    1.0,
    1.0,
    2.0,
    2.0,
    16.0,
    15.0,
    3.0,
    1.0,
    1.0,
    62.0,
    1.0,
    62.0,
    21.0,
    1.0,
    1.0,
    62.0,
    1.0,
    19.0,
    1.0,
    38.0,
    16.0,
    19.0,
    19.0,
    38.0,
    3.0,
    16.0,
    16.0,
    1.0,
    1.0,
    1.0,
    1.0]]},
 'det

In [11]:
# tf.convert_to_tensor(np.array(detector_result_score1['1']['detection_classes']['value']))

file_paths = ["detector_result_score"+str(i)+".json" for i in range(1, 5)]
file_jsons = {}
for i in range(0,4):
    file_path = file_paths[i]
    with open(file_path, 'r') as file:
        file_jsons[str(i)] = json.load(file)

file_path = 'detector_result_score_merged.json'

# Saving the dictionary to a JSON file
with open(file_path, 'w') as json_file:
    json.dump(file_jsons, json_file)

In [4]:
file_paths = ["detector_result_score_merged.json", "detector_result_score_merged2.json"]
file_jsons = {}
for file_path in file_paths:
    with open(file_path, 'r') as file:
        file_jsons.update(json.load(file))

file_path = 'detector_result_score_merged_final.json'

# Saving the dictionary to a JSON file
with open(file_path, 'w') as json_file:
    json.dump(file_jsons, json_file)

In [3]:
file_paths = ["detector_result_score_extra_"+str(i)+".json" for i in range(1, 4)]
file_jsons = None
for file_path in file_paths:
    with open(file_path, 'r') as file:
        file_json = json.load(file)
        if file_jsons is None:
            file_jsons = file_json
        else:
            for i in range(4, 7):
                file_jsons[str(i)].update(file_json[str(i)])

file_path = 'detector_result_score_merged2.json'

# Saving the dictionary to a JSON file
with open(file_path, 'w') as json_file:
    json.dump(file_jsons, json_file)

In [5]:
file_path = 'detector_result_score_merged_final.json'
with open(file_path, 'r') as file:
    file_jsons = json.load(file)

In [16]:
file_jsons.keys()
for i in range(0, 7):
    print(file_jsons[str(i)]['0']['detection_scores']['value'][0][0])

0.9995840191841125
0.5798070430755615
0.9975948929786682
0.5478035807609558
0.9971489310264587
0.5973495244979858
0.9974767565727234


In [17]:
detector_outputs = []
for i in range(0, 5766):
    outputs = []
    for j in range(0, len(file_jsons)):
        detection_classes = tf.convert_to_tensor(np.array(file_jsons[str(j)][str(i)]['detection_classes']['value']))
        detection_scores = tf.convert_to_tensor(np.array(file_jsons[str(j)][str(i)]['detection_scores']['value']))
        outputs.append((detection_classes, detection_scores))
    detector_outputs.append(outputs)

In [18]:
print(len(detector_outputs))
print(len(detector_outputs[0]))

5766
7


In [20]:
from utils.mscoco import load_class_map
class_map = load_class_map()

signals = ['traffic light', 'stop sign']
vehicles = ['car', 'bus', 'truck', 'train', 'motorcycle', 'bicycle', 'airplane', 'boat']
confidence_score_threshold = 0.43 # 0.42 using the first 4 detectors

score_pred = []

for i in range(0, 5766):
    model_outputs = detector_outputs[i]
    image_pred = []
    
    for j in range(0, 7):
        detection_classes, detection_scores = model_outputs[j]
        classes_np = detection_classes[0].numpy()
        scores_np = detection_scores[0].numpy()

        # Store the scores > threshold
        scores_g50_indices = np.where(scores_np > confidence_score_threshold)
        scores_g50 = scores_np[scores_g50_indices]
        
        # Store the classes with scores > threshold
        classes_g50 = classes_np[scores_g50_indices]
        
        # Turn the classes into labels
        class_labels = class_map.loc[classes_g50].reset_index(drop=True)
        class_scores = pd.Series(scores_g50).rename('score')
        
        class_predictions = pd.concat([class_labels, class_scores], axis=1)
        class_predictions = class_predictions.sort_values('score', ascending=False)
        class_labels_pred = class_predictions['label'].value_counts()
        
        # Check if the classes are in the vehicles or signal labels, and keep counts
        veh_ct = class_labels_pred.loc[(class_labels_pred.index.isin(vehicles))].sum()
        sig_ct = class_labels_pred.loc[(class_labels_pred.index.isin(signals))].sum()
        image_pred.append([veh_ct, sig_ct])
        
        # Get the maximum across the models
        max_veh_ct = max(m[0] for m in image_pred)
        max_sig_ct = max(m[1] for m in image_pred)
    
    # print(image_pred)
    score_pred.append([max_veh_ct, max_sig_ct])
    # print(score_pred)
    # break

In [21]:
print(len(score_pred))

5766


In [22]:
score_y_hat = pd.DataFrame(score_pred, columns=['vehicle', 'signal'])
score_y_hat = score_y_hat[['signal','vehicle']]
score_y_hat.head()

Unnamed: 0,signal,vehicle
0,0,2
1,0,0
2,0,1
3,0,1
4,0,1


In [24]:
# Save your predictions on the Score segment as a Pandas data frame into a variable named `score_y_hat`.
# The data frame should contain 2 columns: signal and vehicle.

# Use the following asserts to check the type and shape of the final predictions.
assert type(score_y_hat) == pd.DataFrame
assert score_y_hat.shape == (score_metadata.shape[0], 2)
assert (score_y_hat.columns == ['signal', 'vehicle']).all()

# Use the following code to save the final predictions.
import os 
model_dir = 'carnet_model'
os.makedirs(model_dir, exist_ok=True)
score_y_hat.to_parquet(f'{model_dir}/score_y_hat.parquet')

In [27]:
# EOF