In [1]:
from fastai.vision.all import *
from fastbook import *
from fastai.vision.widgets import *


# Exploring the data
The DeepShip dataset is downloaded from google drive, and extracted to the DeepShip folder manually.

In [2]:
Path.absolute = './'
path = Path('DeepShip')

List folders inside dataset

In [3]:
classes = [f for f in os.listdir(path) if os.path.isdir(Path(path/f))]
classes

['Tug', 'Cargo', 'Tanker', 'Passengership']

Get all sound files, to see if we can label based on their path.

In [4]:
fns = get_files(path,'.wav')
fns

(#609) [Path('DeepShip/Tug/20171202-37/071515.wav'),Path('DeepShip/Tug/20171106a-5/162112.wav'),Path('DeepShip/Tug/20171217a-59/050224.wav'),Path('DeepShip/Tug/20171118c-15/185844.wav'),Path('DeepShip/Tug/20171219a-60/210546.wav'),Path('DeepShip/Tug/20171202a-38/145149.wav'),Path('DeepShip/Tug/20171229b-68/200347.wav'),Path('DeepShip/Tug/20171124-22/012051.wav'),Path('DeepShip/Tug/20171223b-63/233721.wav'),Path('DeepShip/Tug/20171118b-14/161506.wav')...]

Sound files is stored inside timestamped id foldes `date-id`, inside label folders `Label`. <br>
The sound filenames vary a bit but they are orgianized more or less in ths form `Label\date-id\time.wav`
<br>We can now write a simple label function based on parent folders.

## Adding some labelfunctions, based on path and parent folder

In [None]:
def label_func(p : Path):
    return pathlib.PurePath(p).parent.parent.name

In [None]:
def path_to_id(p : Path):
    cls  = pathlib.PurePath(p).parent.parent.name
    id = pathlib.PurePath(p).parent.name
    idx = id.rfind('-')
    if idx == -1: return (cls,-1)
    return (cls,id[idx+1:])

In [None]:
path_to_id(fns[203])

## Adding metadata
The deepShip dataset includes metadata as csv files under the class folders `Class/class-metafile` <br>
We can read this into pandas dataframes

In [None]:
columns=['id', 'class','name', 'date', 'time', 'seconds', 'distance?']
df = pd.read_csv(f'{path/classes[0]/classes[0].lower()}-metafile', names=columns, usecols = [0,1,2,3,4,5,6])
df['label'] = classes[0]

for p in classes[1:]:
    newDf = pd.read_csv(f'{path/p/p.lower()}-metafile',names = columns,usecols = [0,1,2,3,4,5,6])
    newDf['label'] = p
    df = df.append(newDf)

In [None]:
df.describe()

No we can replace class id with class name, for easier lookup

In [None]:
df

In [None]:
df.to_csv('deepShip.csv', header=True, index=False)