In [None]:
# export locations of the scripts
from export_locations import export_locations

# GTSDB

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
import sys
sys.path.append("../sign_recognition/dict/") 
from sign_names_dict import sign_name_GTSRB_full_dict

In [None]:
df = pd.read_csv('/data/Images/Datasets/GTSDB/gt.csv')
df_named = df.copy()
df_named['ClassId'].replace(sign_name_GTSRB_full_dict, inplace=True)
df_named.head()  # prints top 5 rows

In [None]:
grouped_classes = df_named.groupby('ClassId').count()['Roi.X1']  # Group the dataframe by classes.
grouped_classes.plot(kind='bar', figsize=(9,4),  title='Amount of Instances per Class', legend=False)
plt.show()

In [None]:
df.head()

In [None]:
from sign_names_dict import gtsrb_to_carolo
df.head()
df = df[df['ClassId'].isin(gtsrb_to_carolo)]
df.head()

In [None]:
from sign_names_dict import sign_name_carolo_dict
df_named_cleaned = df.copy()
df_named_cleaned['ClassId'].replace(sign_name_carolo_dict, inplace=True)
grouped_classes = df_named_cleaned.groupby('ClassId').count()['Roi.X1']  # Group the dataframe by classes.
grouped_classes.plot(kind='bar', figsize=(9,4),  title='Amount of Instances per Class', legend=False)
plt.show()

In [None]:
df.to_csv(export_locations['GTSDB'], sep=',', index=False)

# LISA and LISA-TS-Extended

Only usable signs are the stop signs.

## LISA

In [None]:
df = pd.read_csv('/data/Images/Datasets/LISA/TS/LISA_filtered.csv')
df.head()  # prints top 5 rows

In [None]:
from sign_names_dict import LISA_to_carolo
carolo_df = df[['Filename', 'Upper left corner X', 'Upper left corner Y', 'Lower right corner X', 'Lower right corner Y', 'Annotation tag']].copy()
carolo_df.rename(
  columns={
    'Upper left corner X' : 'Roi.X1',
    'Upper left corner Y' : 'Roi.Y1',
    'Lower right corner X' : 'Roi.X2',
    'Lower right corner Y' : 'Roi.Y2',
    'Annotation tag' : 'ClassId'
  },
  inplace=True
)
carolo_df['ClassId'].replace(LISA_to_carolo, inplace=True)
carolo_df.to_csv(export_locations['LISA'], sep=',', index=False)
carolo_df.head()

In [None]:
print('LISA Contains', carolo_df['ClassId'].size, 'stop signs')

## LISA-Extended

Same deal, just stop signs

In [None]:
df = pd.read_csv('/data/Images/Datasets/LISA/training/allTrainingAnnotations.csv')
df.head()  # prints top 5 rows

In [None]:
carolo_df = df[['Filename', 'Upper left corner X', 'Upper left corner Y', 'Lower right corner X', 'Lower right corner Y', 'Annotation tag']].copy()
carolo_df.rename(
  columns={
    'Upper left corner X' : 'Roi.X1',
    'Upper left corner Y' : 'Roi.Y1',
    'Lower right corner X' : 'Roi.X2',
    'Lower right corner Y' : 'Roi.Y2',
    'Annotation tag' : 'ClassId'
  },
  inplace=True
)
carolo_df['ClassId'].replace(LISA_to_carolo, inplace=True)
carolo_df.to_csv(export_locations['LISA_EXTENDED'], sep=',', index=False)
carolo_df.head()

In [None]:
print('LISA Extended Contains', carolo_df['ClassId'].size, 'stop signs')

# BTSD

Contains most of our classes except for turn signals and speed zones.

## Testing set

In [None]:
df = pd.read_csv('/data/Images/Datasets/BTSDB/BelgiumTSD_annotations/BTSD_testing_GTclear.txt')

In [None]:
# remove trailing class added by comma and superclass
df = df.drop(['SuperclassId', 'Unnamed: 7'], axis=1)
df.head()

In [None]:
from sign_names_dict import BTSD_to_carolo
df = df[df['ClassId'].isin(BTSD_to_carolo)]
df['ClassId'].replace(BTSD_to_carolo, inplace=True)
df_visual = df.copy()
df_visual['ClassId'].replace(sign_name_carolo_dict, inplace=True)
grouped_classes = df_visual.groupby('ClassId').count()['Roi.X1']  # Group the dataframe by classes.
grouped_classes.plot(kind='bar', figsize=(9,4),  title='Amount of Instances per Class', legend=False)
plt.show()

In [None]:
df.to_csv(export_locations['BTSD_TESTING'], sep=',', index=False)

## Training set

In [None]:
df = pd.read_csv('/data/Images/Datasets/BTSDB/BelgiumTSD_annotations/BTSD_training_GTclear.txt')
df = df.drop(['SuperclassId', 'Unnamed: 7'], axis=1)
df.head()

In [None]:
df = df[df['ClassId'].isin(BTSD_to_carolo)]
df['ClassId'].replace(BTSD_to_carolo, inplace=True)
df_visual = df.copy()
df_visual['ClassId'].replace(sign_name_carolo_dict, inplace=True)
grouped_classes = df_visual.groupby('ClassId').count()['Roi.X1']  # Group the dataframe by classes.
grouped_classes.plot(kind='bar', figsize=(9,4),  title='Amount of Instances per Class', legend=False)
plt.show()

In [None]:
df.to_csv(export_locations['BTSD_TRAINING'], sep=',', index=False)

# Swedish Traffic Sign Dataset (STS)

Sweden also uses yellow as base, same as US, but some more usable signs overall

In [None]:
# parse the dataset
def read_sts_annotation(filepath):
    data = []
    with open(filepath, 'r') as file:
        text = file.read() 
    lines = text.split('\n')
    for line in lines[0:-2]:
        split_line = line.split(':')
        filename = str(split_line[0]).lstrip()
        if filename == '':
            print('ERROR: file incorrect!')
            break

        if split_line[1] == '':
            continue
        else:
            split_signs = split_line[1].split(';')
            for split_sign in split_signs[0:-2]:
                sign_info = split_sign.split(',')
                
                # skip misc signs with no info
                if sign_info[0] == 'MISC_SIGNS':
                    continue
                
                sign_name = str(sign_info[-1]).lstrip()
                # tl positions are second
                x1 = float(sign_info[-4].lstrip())
                y1 = float(sign_info[-3].lstrip())
                x2 = float(sign_info[-5].lstrip())
                y2 = float(sign_info[-6].lstrip())
                data.append([filename, x1, y1, x2, y2, sign_name])
                
    df = pd.DataFrame(data, columns=['Filename','Roi.X1','Roi.Y1','Roi.X2','Roi.Y2','ClassId'])
    return df

In [None]:
df = read_sts_annotation('/data/Images/Datasets/STS/annotations_1.txt')
df.head()

In [None]:
from sign_names_dict import STS_to_carolo
df = df[df['ClassId'].isin(STS_to_carolo)]
df['ClassId'].replace(STS_to_carolo, inplace=True)
df_visual = df.copy()
df_visual['ClassId'].replace(sign_name_carolo_dict, inplace=True)
grouped_classes = df_visual.groupby('ClassId').count()['Roi.X1']  # Group the dataframe by classes.
grouped_classes.plot(kind='bar', figsize=(9,4),  title='Amount of Instances per Class', legend=False)
plt.show()

In [None]:
df.to_csv('/data/Images/Datasets/STS/annotations_1_carolo.csv', sep=',', index=False)

In [None]:
df = read_sts_annotation('/data/Images/Datasets/STS/annotations_2.txt')
df.head()

In [None]:
df = df[df['ClassId'].isin(STS_to_carolo)]
df['ClassId'].replace(STS_to_carolo, inplace=True)
df_visual = df.copy()
df_visual['ClassId'].replace(sign_name_carolo_dict, inplace=True)
grouped_classes = df_visual.groupby('ClassId').count()['Roi.X1']  # Group the dataframe by classes.
grouped_classes.plot(kind='bar', figsize=(9,4),  title='Amount of Instances per Class', legend=False)
plt.show()

In [None]:
df.to_csv(export_locations['STS'], sep=',', index=False)

# Dataset of Italian Traffic Signs (DITS)

For some reason no annotations are uploaded for the detection subset and detection subset is more like GTSRB (cropped window) even though in their paper it looks fine (and in the test data) and in the paper they promise to "improve soon".

# Russian Traffic Sign Dataset 

Merged subclass annotations into single file for each of the three dataset-parts, bash script is in the dataset root.

In [None]:
from IPython.display import display

def convert_rtsb_df(rtsb_csv_path):
    df = pd.read_csv(rtsb_csv_path)
    df.rename(
      columns={
        'x_from': 'Roi.X1',
        'y_from' : 'Roi.Y1',
        'sign_class' : 'ClassId',
          'width': 'Width'
          'height': 'Height'
        'filename': 
      },
      inplace=True
    )
    df['Roi.X2'] = df['Roi.X1'] + df['Width']
    df['Roi.Y2'] = df['Roi.Y1'] + df['Height']
    #df = df.drop(columns=['width', 'height'])
    df['Width']=
    df['Height']=
    df = df[['filename', 'Width', 'Height', 'Roi.X1', 'Roi.Y1', 'Roi.X2', 'Roi.Y2','ClassId']]
    display(df.head())
    return df

In [None]:
def display_dataset_eval(df_in):
    df_in = df_in[df_in['ClassId'].isin(rtsd_to_carolo)]
    df_in['ClassId'].replace(rtsd_to_carolo, inplace=True)
    df_visual = df_in.copy()
    df_visual['ClassId'].replace(sign_name_carolo_dict, inplace=True)
    grouped_classes = df_visual.groupby('ClassId').count()['Roi.X1']  # Group the dataframe by classes.
    grouped_classes.plot(kind='bar', figsize=(9,4),  title='Amount of Instances per Class', legend=False)
    plt.show()
    return df_in

In [None]:
from sign_names_dict import rtsd_to_carolo

rtsb_merged_paths = ['/data/Images/Datasets/rtsd-public/detection/rtsd-d1-gt/merged.csv']#,
                     #'/data/Images/Datasets/rtsd-public/detection/rtsd-d2-gt/merged.csv',
                     #'/data/Images/Datasets/rtsd-public/detection/rtsd-d3-gt/merged.csv']

# since all datasets contain all labels and just the image name lists differ for each subdataset, we display only once

for rtsb_merged_path in rtsb_merged_paths:
    print(rtsb_merged_path)
    df = convert_rtsb_df(rtsb_merged_paths[0])
    df = display_dataset_eval(df)

In [None]:
df.to_csv('/data/Images/Datasets/rtsd-public/detection/rtsd-d1-gt'

In [None]:
df.to_csv(export_locations['RTSD'], sep=',', index=False)