In [None]:
# export locations of the scripts
from export_locations import export_locations

In [None]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import matplotlib.pyplot as plt
import sys
sys.path.append("../sign_recognition/dict/") 

In [None]:
from sign_names_dict import sign_name_carolo_dict

def convert_to_carolo_csv(df, conversion_dict):
    df = df[df['ClassId'].isin(conversion_dict)]
    df['ClassId'].replace(conversion_dict, inplace=True)
    display(df.head())
    df_visual = df.copy()
    df_visual['ClassId'].replace(sign_name_carolo_dict, inplace=True)
    grouped_classes = df_visual.groupby('ClassId').count()['Roi.X1']  # Group the dataframe by classes.
    grouped_classes.plot(kind='bar', figsize=(9,4),  title='Amount of Instances per Class', legend=False)
    plt.show()
    return df.copy()

# GTSDB

In [None]:
from sign_names_dict import sign_name_GTSRB_full_dict

In [None]:
df = pd.read_csv('/data/Images/Datasets/GTSDB/gt.csv')
df_named = df.copy()
df_named['ClassId'].replace(sign_name_GTSRB_full_dict, inplace=True)
df_named.head()  # prints top 5 rows

In [None]:
grouped_classes = df_named.groupby('ClassId').count()['Roi.X1']  # Group the dataframe by classes.
grouped_classes.plot(kind='bar', figsize=(9,4),  title='Amount of Instances per Class', legend=False)
plt.show()

In [None]:
df.head()

In [None]:
from sign_names_dict import gtsrb_to_carolo
df.head()
df = df[df['ClassId'].isin(gtsrb_to_carolo)]
df.head()

In [None]:
df_named_cleaned = df.copy()
df_named_cleaned['ClassId'].replace(sign_name_carolo_dict, inplace=True)
grouped_classes = df_named_cleaned.groupby('ClassId').count()['Roi.X1']  # Group the dataframe by classes.
grouped_classes.plot(kind='bar', figsize=(9,4),  title='Amount of Instances per Class', legend=False)
plt.show()

In [None]:
df.to_csv(export_locations['GTSDB'], sep=',', index=False)

# LISA and LISA-TS-Extended

Only usable signs are the stop signs.


In [None]:
def convert_LISA_to_carolo(df):
    carolo_df = df[['Filename', 'Upper left corner X', 'Upper left corner Y', 'Lower right corner X', 'Lower right corner Y', 'Annotation tag']].copy()
    carolo_df.rename(
      columns={
        'Upper left corner X' : 'Roi.X1',
        'Upper left corner Y' : 'Roi.Y1',
        'Lower right corner X' : 'Roi.X2',
        'Lower right corner Y' : 'Roi.Y2',
        'Annotation tag' : 'ClassId'
      },
      inplace=True
    )
    return carolo_df

## LISA

In [None]:
df = pd.read_csv('/data/Images/Datasets/LISA/TS/LISA_filtered.csv')
df.head()  # prints top 5 rows

In [None]:
from sign_names_dict import LISA_to_carolo
lisa_df = convert_LISA_to_carolo(df)
lisa_df = convert_to_carolo_csv(lisa_df, LISA_to_carolo)
lisa_df.to_csv(export_locations['LISA'], sep=',', index=False)

In [None]:
print('LISA Contains', lisa_df['ClassId'].size, 'stop signs')

## LISA-Extended

Same deal, just stop signs

In [None]:
lisa_extended_df = pd.read_csv('/data/Images/Datasets/LISA/training/allTrainingAnnotations.csv')
lisa_extended_df.head()  # prints top 5 rows

In [None]:
lisa_extended_df.to_csv(export_locations['LISA_EXTENDED'], sep=',', index=False)

In [None]:
print('LISA Extended Contains', lisa_extended_df['ClassId'].size, 'stop signs')

# BTSD

Contains most of our classes except for turn signals and speed zones.

## Testing set

In [None]:
df = pd.read_csv('/data/Images/Datasets/BTSDB/BelgiumTSD_annotations/BTSD_testing_GTclear.txt')

In [None]:
# remove trailing class added by comma and superclass
df = df.drop(['SuperclassId', 'Unnamed: 7'], axis=1)
df['Filename'] = df['Filename'].str.replace('.jp2', '.png')
df.head()

In [None]:
from sign_names_dict import BTSD_to_carolo
btsd_test_df = convert_to_carolo_csv(df, BTSD_to_carolo)

## Training set

In [None]:
df = pd.read_csv('/data/Images/Datasets/BTSDB/BelgiumTSD_annotations/BTSD_training_GTclear.txt')
df = df.drop(['SuperclassId', 'Unnamed: 7'], axis=1)
df['Filename'] = df['Filename'].str.replace('.jp2', '.png')
df.head()

In [None]:
from sign_names_dict import BTSD_to_carolo
btsd_train_df = convert_to_carolo_csv(df, BTSD_to_carolo)

In [None]:
combined_btsd_df = pd.concat([btsd_train_df, btsd_test_df])
combined_btsd_df.to_csv(export_locations['BTSD_TRAINING'], sep=',', index=False)

# Swedish Traffic Sign Dataset (STS)

Sweden also uses yellow as base, same as US, but some more usable signs overall

In [None]:
from sign_names_dict import STS_to_carolo

def read_sts_annotation(filepath):
    data = []
    with open(filepath, 'r') as file:
        text = file.read() 
    lines = text.split('\n')
    for line in lines[0:-2]:
        split_line = line.split(':')
        filename = str(split_line[0]).lstrip()
        if filename == '':
            print('ERROR: file incorrect!')
            break

        if split_line[1] == '':
            continue
        else:
            split_signs = split_line[1].split(';')
            for split_sign in split_signs[0:-2]:
                sign_info = split_sign.split(',')
                
                # skip misc signs with no info
                if sign_info[0] == 'MISC_SIGNS':
                    continue
                
                sign_name = str(sign_info[-1]).lstrip()
                # tl positions are second
                x1 = float(sign_info[-3].lstrip())
                y1 = float(sign_info[-4].lstrip())
                x2 = float(sign_info[-5].lstrip())
                y2 = float(sign_info[-6].lstrip())
                data.append([filename, x1, y1, x2, y2, sign_name])
                
    df = pd.DataFrame(data, columns=['Filename','Roi.X1','Roi.Y1','Roi.X2','Roi.Y2','ClassId'])
    return df

In [None]:
df = read_sts_annotation('/data/Images/Datasets/STS/annotations_1.txt')
df.head()

In [None]:
sts_annotations_1_df = convert_to_carolo_csv(df, STS_to_carolo)

In [None]:
df = read_sts_annotation('/data/Images/Datasets/STS/annotations_2.txt')
df.head()

In [None]:
sts_annotations_2_df = convert_to_carolo_csv(df, STS_to_carolo)

In [None]:
combined_sts_df = pd.concat([sts_annotations_1_df, sts_annotations_2_df])
combined_sts_df.to_csv(export_locations['STS'], sep=',', index=False)

# Dataset of Italian Traffic Signs (DITS)

For some reason no annotations are uploaded for the detection subset and detection subset is more like GTSRB (cropped window) even though in their paper it looks fine (and in the test data) and in the paper they promise to "improve soon".

# Russian Traffic Sign Dataset 

Merged subclass annotations into single file for each of the three dataset-parts, bash script is in the dataset root.

In [None]:
import os

def parse_rtsb_filenames(filepath):
    files = []
    with open(os.path.join('/data/Images/Datasets/rtsd-public/detection/', filepath, 'train_filenames.txt'), 'r') as f:
        files += f.read().splitlines()
    with open(os.path.join('/data/Images/Datasets/rtsd-public/detection/', filepath, 'test_filenames.txt'), 'r') as f:
        files += f.read().splitlines()
    return files

In [None]:
def convert_rtsb_df(rtsb_csv_path):
    df = pd.read_csv(rtsb_csv_path)
    df.rename(
      columns={
        'x_from': 'Roi.X1',
        'y_from' : 'Roi.Y1',
        'sign_class' : 'ClassId',
        'width' : 'Width',
        'height' : 'Height',
        'filename' : 'Filename',
      },
      inplace=True
    )
    df['Roi.X2'] = df['Roi.X1'] + df['Width']
    df['Roi.Y2'] = df['Roi.Y1'] + df['Height']
    df['Width']=1280
    df['Height']=720
    df = df[['Filename', 'Width', 'Height', 'Roi.X1', 'Roi.Y1', 'Roi.X2', 'Roi.Y2','ClassId']]
    display(df.head())
    return df

In [None]:
from sign_names_dict import rtsd_to_carolo

rtsb_merged_paths = ['/data/Images/Datasets/rtsd-public/detection/rtsd-d1-gt/merged.csv',
                     '/data/Images/Datasets/rtsd-public/detection/rtsd-d2-gt/merged.csv',
                     '/data/Images/Datasets/rtsd-public/detection/rtsd-d3-gt/merged.csv']

rtsb_folder_prefixes = ['rtsd-d1-', 'rtsd-d2-', 'rtsd-d3-']

# since all datasets contain all labels and just the image name lists differ for each subdataset, we display only once
annotation_dfs = []
for rtsb_merged_path, rtsb_folder_prefix in zip(rtsb_merged_paths, rtsb_folder_prefixes):
    print('Processing csv: {} corresponding to folder: {}'.format(rtsb_merged_path, rtsb_folder_prefix))
    df = convert_rtsb_df(rtsb_merged_paths[0])
    filenames = parse_rtsb_filenames(rtsb_folder_prefix + 'gt')
    df = df[df['Filename'].isin(filenames)]
    # todo: filter filenames so that only the ones in the set remain
    df['Filename'] = rtsb_folder_prefix + 'frames/' + df['Filename'].astype('str')
    df = convert_to_carolo_csv(df, rtsd_to_carolo)
    annotation_dfs.append(df.copy())

combined_rtsd_df = pd.concat(annotation_dfs)
combined_rtsd_df.to_csv(export_locations['RTSD'], sep=',', index=False)

 ## Mapillary Traffic Sign Detection Dataset (MTSD)
 
 Contains pretty much all traffic signs from the stvo, so we can just use these labels directly. 
 
 Images are annotated in a per-image json format.
 
 The class list is quite hidden, but can be found [here](https://www.mapillary.com/developer/api-documentation/#traffic-signs). Traffic signs are split into "appearance groups". These are not fully exhaustive however, for instance the speed limit zone signs are generalized even though they differ between the US and European Countries.

In [None]:
MTSD_BASE_PATH = "/data_2/Datasets/Signs/MTSD"

In [None]:
import json
import glob
import os
import json

from sign_names_dict import MTSD_to_carolo


def write_mapillary_annotations(data):
    traffic_sign_images = glob.glob(os.path.join(MTSD_BASE_PATH, 'images/*'))
    traffic_sign_annotations = glob.glob(os.path.join(MTSD_BASE_PATH, 'annotations/*'))
    
    for image_path, annotation_path in zip(sorted(traffic_sign_images), sorted(traffic_sign_annotations)):
        assert image_path.rstrip('.')[0] == annotation_path.rstrip('.')[0]
        
        with open(annotation_path, 'r') as f:
            annotations = json.load(f)
            
        for annotation in annotations['objects']:
            if annotation['label'] in MTSD_to_carolo:
                data['Filename'].append(image_path)
                data['Roi.X1'].append(annotation['xmin'])
                data['Roi.Y1'].append(annotation['ymin'])
                data['Roi.X2'].append(annotation['xmax'])
                data['Roi.Y2'].append(annotation['ymax'])
                data['ClassId'].append(MTSD_to_carolo[annotation['label']])
    return data        

In [None]:
data = {'Filename': [], 'Roi.X1': [], 'Roi.Y1': [], 'Roi.X2': [], 'Roi.Y2': [], 'ClassId': []}
data = write_mapillary_annotations(data)
mtsd_df = pandas.DataFrame(data)
mtsd_df.to_csv(export_locations['MTSD'], sep=',', index=False)