In [555]:
import os
import sys
from scipy.io import wavfile
import numpy as np
import pandas as pd

# Conversion between types
This part and the following require [SoX package](http://sox.sourceforge.net/) (Sound eXchange). For Max OS, run
```
brew install sox
```

In [556]:
def wav_to_data(wav_filepath):
  sample_rate, data = wavfile.read(wav_filepath)
  return data

In [557]:
def sphere_to_data(sphere_filepath):
  try:
    tmp_filepath = '/tmp/haha.wav'
    os.system('sox -t sph ' + sphere_filepath + ' ' + tmp_filepath)
    res = wav_to_data(tmp_filepath)
    return res
  except:
    raise ValueError("Converting SPHERE files requires SoX installed. For Max OS, run 'brew install sox'.")
    
def sphere_to_bytes(sphere_filepath):
  with open(sphere_filepath, 'rb') as f:
    f.read(1024)
    data_in_bytes = f.read()
  return data_in_bytes

# Play audio files

In [558]:
PLAY_CMD = 'play' # For Mac OS with SoX installed

def play_audio_from_file(audio_filename, cmd=PLAY_CMD):
  os.system(cmd + ' ' + audio_filename)

In [559]:
def play_audio_from_bytes(data_in_bytes, nchannels=1, sampwidth=2, 
                          framerate=16000, comptype='NONE', 
                          compname='not compressed'):
  
  if not isinstance(data_in_bytes, bytes):
    data_in_bytes = bytes(data_in_bytes)
    
  # Create a tmp file
  tmp_filepath = '/tmp/haha.wav'
  
  with wave.open(tmp_filepath, 'w') as f:
    
    # Set audio file parameters
    f.setcomptype(comptype, compname)
    f.setframerate(framerate)
    f.setnchannels(nchannels)
    f.setsampwidth(sampwidth)
    f.setnframes(len(data_in_bytes)//sampwidth)
    
    # Write data
    f.writeframes(data_in_bytes)
    
  play_audio_from_file(tmp_filepath)
  
  # Delete the tmp file
  os.system('rm ' + tmp_filepath)

In [560]:
filename1 = 'example1.sph' # NIST SPHERE file
filename2 = 'example2.sph' # NIST SPHERE file

In [83]:
data1 = sphere_to_data(filename1)
data1

array([ 1, -1,  2, ..., -1, -5, -8], dtype=int16)

In [84]:
data2 = sphere_to_data(filename2)
data2

array([-1,  0,  1, ...,  3,  1, -2], dtype=int16)

In [85]:
# Play audio
play_audio_from_bytes(data1)

In [10]:
play_audio_from_bytes(data2)

In [11]:
# Combine 2 audio files
play_audio_from_bytes(data1 + np.pad(data2, (0, data1.size - data2.size), mode='constant'))

In [12]:
# Play an interval
play_audio_from_bytes(data1[10000:20000])

# Dealing with TIMIT dataset

In [8]:
%ls ../../datasets/speech/timit/

[1m[34mCONVERT[m[m/    README.txt  [1m[34mSPHERE[m[m/     [1m[34mTIMIT[m[m/


In [43]:
timit_dir = '../../datasets/speech/timit/'

In [10]:
os.listdir(timit_dir)

['.DS_Store', 'SPHERE', 'README.txt', 'TIMIT', 'CONVERT']

In [301]:
# traverse training set of TIMIT
path = timit_dir + 'TIMIT/train'

li = []

for (dirpath, dirnames, filenames) in os.walk(path):
  for filename in filenames:
    if filename.endswith('.WAV'):
      dir_split = dirpath.split(os.sep)
      data_type, region, speaker =  dir_split[6:9]
      gender = speaker[0]
      basename = filename.split('.')[0]
      phonetic_label = basename + '.PHN'
      word_label = basename + '.WRD'
      sentence_label = basename + '.TXT'
      li.append((dirpath, filename, phonetic_label, word_label, sentence_label, data_type, region, speaker, gender))

In [302]:
# Verify that all files found are indeed valid files on the disk

for x in li:
  for idx in range(1,5):
    filepath = os.path.abspath(os.path.join(x[0], x[idx]))
    if not os.path.isfile(filepath):
      print("Holy shit! The file {} doesn't exist!".format())
      break
else:
  print("It's all good. All files exist on the disk.")

It's all good. All files exist on the disk.


In [303]:
li[0]

('../../datasets/speech/timit/TIMIT/train/DR4/MMDM0',
 'SI681.WAV',
 'SI681.PHN',
 'SI681.WRD',
 'SI681.TXT',
 'train',
 'DR4',
 'MMDM0',
 'M')

In [304]:
# Convert to pandas dataframes

timit_df = pd.DataFrame({'dirpath':        [x[0] for x in li],
                         'wavfile':        [x[1] for x in li],
                         'phonetic_label': [x[2] for x in li],
                         'word_label':     [x[3] for x in li],
                         'sentence_label': [x[4] for x in li],
                         'data_type':      [x[5] for x in li],
                         'region':         [x[6] for x in li],
                         'speaker':        [x[7] for x in li],
                         'gender':         [x[8] for x in li]})

In [561]:
timit_df.head()

Unnamed: 0,data_type,dirpath,gender,phonetic_label,region,sentence_label,speaker,wavfile,word_label
0,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
1,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SA2.PHN,DR4,SA2.TXT,MMDM0,SA2.WAV,SA2.WRD
2,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SX411.PHN,DR4,SX411.TXT,MMDM0,SX411.WAV,SX411.WRD
3,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SA1.PHN,DR4,SA1.TXT,MMDM0,SA1.WAV,SA1.WRD
4,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SX231.PHN,DR4,SX231.TXT,MMDM0,SX231.WAV,SX231.WRD


In [65]:
# Save...
timit_df.to_csv('timit_files_info.csv', index=False)

# and load
timit_df = pd.read_csv('timit_files_info.csv')

In [None]:
# and load
timit_df = pd.read_csv('timit_files_info.csv')

In [86]:
# and we see we get the same thing back
timit_df.head()

Unnamed: 0,data_type,dirpath,gender,phonetic_label,region,sentence_label,speaker,wavfile,word_label
0,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
1,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SA2.PHN,DR4,SA2.TXT,MMDM0,SA2.WAV,SA2.WRD
2,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SX411.PHN,DR4,SX411.TXT,MMDM0,SX411.WAV,SX411.WRD
3,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SA1.PHN,DR4,SA1.TXT,MMDM0,SA1.WAV,SA1.WRD
4,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SX231.PHN,DR4,SX231.TXT,MMDM0,SX231.WAV,SX231.WRD


## Extract features and labels on phonetic level

In [562]:
def parse_label_file(label_file):
  """Parse a label file in TIMIT dataset to a pandas.DataFrame object.
  
  Returns:
    a pandas.DataFrame object containing 3 columns: begin, end, label
  """
  with open(label_file, 'r') as f:
    lines = f.readlines()
  begins = [int(line.split(' ')[0]) for line in lines]
  ends = [int(line.split(' ')[1]) for line in lines]
  labels = [' '.join(line.split(' ')[2:])[:-1] for line in lines]
  df = pd.DataFrame({'begin': begins, 'end': ends, 'label': labels})
  df['label'] = df['label'].astype('category')
  df['label_file'] = label_file
  return df

In [563]:
label_file = os.path.join(li[0][0], li[0][2])
label_file

'../../datasets/speech/timit/TIMIT/train/DR4/MMDM0/SI681.PHN'

In [564]:
haha = parse_label_file(label_file)
haha.head()

Unnamed: 0,begin,end,label,label_file
0,0,1960,h#,../../datasets/speech/timit/TIMIT/train/DR4/MM...
1,1960,2466,w,../../datasets/speech/timit/TIMIT/train/DR4/MM...
2,2466,3480,ix,../../datasets/speech/timit/TIMIT/train/DR4/MM...
3,3480,4000,dcl,../../datasets/speech/timit/TIMIT/train/DR4/MM...
4,4000,5960,s,../../datasets/speech/timit/TIMIT/train/DR4/MM...


In [565]:
merged_df.index

Int64Index([], dtype='int64')

In [567]:
level='phonetic'

li = []

for index, row in timit_df.iterrows():
  label_file = os.path.join(row['dirpath'], row[level + '_label'])
  li.append(parse_label_file(label_file))

In [568]:
phonetic_df = pd.concat(li, ignore_index=True)

In [569]:
phonetic_df

Unnamed: 0,begin,end,label,label_file
0,0,1960,h#,../../datasets/speech/timit/TIMIT/train/DR4/MM...
1,1960,2466,w,../../datasets/speech/timit/TIMIT/train/DR4/MM...
2,2466,3480,ix,../../datasets/speech/timit/TIMIT/train/DR4/MM...
3,3480,4000,dcl,../../datasets/speech/timit/TIMIT/train/DR4/MM...
4,4000,5960,s,../../datasets/speech/timit/TIMIT/train/DR4/MM...
5,5960,7480,ah,../../datasets/speech/timit/TIMIT/train/DR4/MM...
6,7480,7880,tcl,../../datasets/speech/timit/TIMIT/train/DR4/MM...
7,7880,9400,ch,../../datasets/speech/timit/TIMIT/train/DR4/MM...
8,9400,9960,ix,../../datasets/speech/timit/TIMIT/train/DR4/MM...
9,9960,10680,n,../../datasets/speech/timit/TIMIT/train/DR4/MM...


In [571]:
timit_df['label_file'] = timit_df.apply(lambda row:
      os.path.join(row['dirpath'], row[level + '_label']), axis=1)

In [572]:
merged_df = pd.merge(phonetic_df, timit_df, on='label_file')

In [258]:
useful_df = merged_df[['begin', 'end', 'label', 'dirpath', 'wavfile', 'gender', 'region', 'data_type']]
useful_df.head()

Unnamed: 0,begin,end,label,dirpath,wavfile,gender,region,data_type
0,0,1960,h#,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,SI681.WAV,M,DR4,train
1,1960,2466,w,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,SI681.WAV,M,DR4,train
2,2466,3480,ix,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,SI681.WAV,M,DR4,train
3,3480,4000,dcl,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,SI681.WAV,M,DR4,train
4,4000,5960,s,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,SI681.WAV,M,DR4,train


## Convert categorical labels to integers

In [309]:
labels_df = merged_df[['gender', 'region', 'label']]\
              .astype({'label':'category', 
                       'gender':'category', 
                       'region':'category'})
labels_df.head()

Unnamed: 0,gender,region,label
0,M,DR4,h#
1,M,DR4,w
2,M,DR4,ix
3,M,DR4,dcl
4,M,DR4,s


In [493]:
def total_num_class(categorical_labels_df):
  df = categorical_labels_df
  nums_categories = [len(df[col].cat.categories) for col in df.columns]
  return sum(nums_categories)

total_num_class(labels_df)

def cat_to_num(categorical_labels_df):
  """Convert a pd.DataFrame object having only categorical columns to 
  a pd.DataFrame object having only integer values. 
  
  This can be considered as an extension of `index encoding` for several
  columns at the same time: firstly, each categorical column is  converted 
  to integer value independently then translated by the sum of numbers of
  categories of all columns on the right.
  """
  df = categorical_labels_df
  nums_categories = [len(df[col].cat.categories) for col in df.columns]
  translation = 0
  li =[]
  for idx, col in enumerate(df.columns):
    assert(str(df[col].dtype) == 'category')
    translated_codes = df[col].cat.codes + translation # to avoid index conflict
    translated_codes = translated_codes.rename('label' + str(idx))
    li.append(translated_codes)
    translation += nums_categories[idx]
  return pd.concat(li, axis=1)
  
cat_to_num(labels_df).head()

71

In [308]:
def cat_to_num(categorical_labels_df):
  """Convert a pd.DataFrame object having only categorical columns to 
  a pd.DataFrame object having only integer values. 
  
  This can be considered as an extension of `index encoding` for several
  columns at the same time: firstly, each categorical column is  converted 
  to integer value independently then translated by the sum of numbers of
  categories of all columns on the right.
  """
  df = categorical_labels_df
  nums_categories = [len(df[col].cat.categories) for col in df.columns]
  translation = 0
  li =[]
  for idx, col in enumerate(df.columns):
    assert(str(df[col].dtype) == 'category')
    translated_codes = df[col].cat.codes + translation # to avoid index conflict
    translated_codes = translated_codes.rename('label' + str(idx))
    li.append(translated_codes)
    translation += nums_categories[idx]
  return pd.concat(li, axis=1)
  
cat_to_num(labels_df).head()

Unnamed: 0,label0,label1,label2
0,1,5,37
1,1,5,67
2,1,5,41
3,1,5,23
4,1,5,58


In [295]:
def has_repeated_labels(labels_df):
  has_repeated = labels_df.apply(lambda row: len(row) != len(set(row)), axis=1)
  return any(has_repeated)

In [313]:
# Check if some row has repeated index (a conflict).
# This step can be a bit slow
translated_labels_df = cat_to_num(labels_df)
has_repeated_labels(translated_labels_df)

False

In [318]:
useful_df.shape

(177080, 8)

In [316]:
translated_labels_df.shape

(177080, 3)

## Get interval data

In [321]:
def get_interval_data(wav_filepath, begin, end):
  data = sphere_to_data(wav_filepath)
  return data[begin: end]

def get_interval_data_from_row(row):
  wav_filepath = os.path.join(row['dirpath'], row['wavfile'])
  begin = row['begin']
  end = row['end']
  return get_interval_data(wav_filepath, begin, end)

In [424]:
for index, row in merged_df.iterrows():
  sample_interval = get_interval_data_from_row(row)
  if index > 42:
    print(row)
    break

begin                                                         13200
end                                                           14360
label                                                             k
label_file        ../../datasets/speech/timit/TIMIT/train/DR4/MM...
data_type                                                     train
dirpath           ../../datasets/speech/timit/TIMIT/train/DR4/MMDM0
gender                                                            M
phonetic_label                                              SA2.PHN
region                                                          DR4
sentence_label                                              SA2.TXT
speaker                                                       MMDM0
wavfile                                                     SA2.WAV
word_label                                                  SA2.WRD
Name: 43, dtype: object


In [425]:
# Play the cut audio to see whether it corresponds to the label
play_audio_from_bytes(sample_interval)

In [394]:
len(haha)

1890

# Convert TIMIT to AutoDLDataset (TFRecord)

In [88]:
import tensorflow as tf

In [426]:
time_series = sample_interval

In [434]:
# Helper functions

def _int64_feature(value):
  # Here `value` is a list of integers
  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def _bytes_feature(value):
  # Here `value` is a list of bytes
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))

def _float_feature(value):
  # Here `value` is a list of floats
  return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def _feature_list(feature):
  # Here `feature` is a list of tf.train.Feature
  return tf.train.FeatureList(feature=feature)

In [433]:
filename = 'timit-train-phonetic'

In [436]:
type(time_series)

numpy.ndarray

In [None]:
time

In [486]:
timit_tfrecord_filename = 'timit-train-phonetic'

In [488]:
def time_series_to_sequence_example_df(merged_df, labels_df, filename):
  """Convert a list of time series (Numpy array) to TFRecords 
  following SequenceExample proto.
  
  Args:
    merged_df: a pd.DataFrame object containing columns: `dirpath`, `wavfile`, `begin`, `end`
    labels_df: a pd.DataFrame object only containing integer labels
    filename: a string
  Returns:
    None. Save a TFRecord to filename
  """
  num_examples = merged_df.shape[0]
  num_labels = labels_df.shape[0] # number of lines of labels
  if num_examples != num_labels:
    raise ValueError("Number of examples {:d} does not match number of labels {:d}."\
                     .format(num_examples, num_labels))
  
  feature_label_generator = zip(merged_df.iterrows(), labels_df.iterrows())
  print("Writing to: {}... Total number of examples: {:d}".format(filename, num_examples))
  with tf.python_io.TFRecordWriter(filename) as writer:
    for (index, feature_row), (_, label_row) in feature_label_generator:
      if index % 1000 == 0:
        print("Writing example of index: ", index)
      le = len(label_row) # number of labels in this line
      label_array = label_row.values
      feature_array = get_interval_data_from_row(feature_row)
      feature_list = [_float_feature([x]) for x in feature_array]
      
      context = tf.train.Features(
            feature={
                'id': _int64_feature([index]), # use index as id
                'label_index': _int64_feature(label_array),
                'label_score': _float_feature([1]*le)
            })
      feature_lists = tf.train.FeatureLists(
          feature_list={
          '0_dense_input': _feature_list(feature_list)
          })
      sequence_example = tf.train.SequenceExample(
          context=context,
          feature_lists=feature_lists)
      writer.write(sequence_example.SerializeToString())

In [485]:
time_series_to_sequence_example_df(merged_df, translated_labels_df, timit_tfrecord_filename)

Writing to: timit-train-phonetic... Total number of examples: 177080
Writing example of index:  0
Writing example of index:  100
Writing example of index:  200
Writing example of index:  300
Writing example of index:  400
Writing example of index:  500
Writing example of index:  600
Writing example of index:  700
Writing example of index:  800
Writing example of index:  900


In [437]:
def time_series_to_sequence_example(li_time_series, labels, filename):
  """Convert a list of time series (Numpy array) to TFRecords 
  following SequenceExample proto.
  
  Args:
    li_time_series: a generator(or list) of numpy array
    labels: a generator(or list) of lists (or tuples) of integers
    filename: a string
  Returns:
    None. Save a TFRecord to filename
  """
  num_examples = len(li_time_series)
  num_labels = len(labels) # number of lines of labels
  if num_examples != num_labels:
    raise ValueError("Number of examples {:d} does not match number of labels {:d}."\
                     .format(num_examples, num_labels))
  
  print("Writing: {}... Total number of examples: {:d}".format(filename, num_examples))
  with tf.python_io.TFRecordWriter(filename) as writer:
    for index in range(num_examples):
      if index == 1000:
        print("Writing example of index: ", index)
        break  # for debugging
      le = len(labels[index]) # number of labels in this line
      feature_list = [_float_feature(x) for x in features[index]]
      context = tf.train.Features(
            feature={
                'id': _int64_feature([index]), # use index as id
                'label_index': _int64_feature(labels[index]),
                'label_score': _float_feature([1]*le)
            })
      feature_lists = tf.train.FeatureLists(
          feature_list={
          '0_dense_input': _feature_list(feature_list)
          })
      sequence_example = tf.train.SequenceExample(
          context=context,
          feature_lists=feature_lists)
      writer.write(sequence_example.SerializeToString())

In [489]:
def print_first_sequence_example(path_to_tfrecord):
  for bytes in tf.python_io.tf_record_iterator(path_to_tfrecord):
    sequence_example = tf.train.SequenceExample.FromString(bytes)
    print(sequence_example)
    break

In [596]:
os.path.pardir('hahahah/haha')

'haha'

In [490]:
print_first_sequence_example(timit_tfrecord_filename)

context {
  feature {
    key: "id"
    value {
      int64_list {
        value: 0
      }
    }
  }
  feature {
    key: "label_index"
    value {
      int64_list {
        value: 1
        value: 5
        value: 37
      }
    }
  }
  feature {
    key: "label_score"
    value {
      float_list {
        value: 1.0
        value: 1.0
        value: 1.0
      }
    }
  }
}
feature_lists {
  feature_list {
    key: "0_dense_input"
    value {
      feature {
        float_list {
          value: -7.0
        }
      }
      feature {
        float_list {
          value: 2.0
        }
      }
      feature {
        float_list {
          value: 1.0
        }
      }
      feature {
        float_list {
          value: 1.0
        }
      }
      feature {
        float_list {
          value: -3.0
        }
      }
      feature {
        float_list {
          value: 0.0
        }
      }
      feature {
        float_list {
          value: -1.0
        }
      }
      feature 

## Test this file with dataset.py

In [494]:
dataset_dir = 'timit-train'
os.path.isdir(dataset_dir)

True

In [498]:
%ls ../../tfrecord_format/autodl-format-definition/

data.proto   data_pb2.py  dataset.py


In [500]:
autodl_def_dir = "../../tfrecord_format/autodl-format-definition/"
sys.path.append(autodl_def_dir)

In [516]:
from dataset import AutoDLDataset

def test_autodldataset(dataset_dir):
  autodl_dataset = AutoDLDataset(dataset_dir)
  autodl_dataset.init()
  dataset = autodl_dataset.get_dataset()
  iterator = dataset.make_one_shot_iterator()
  features, labels = iterator.get_next()
  with tf.Session() as sess:
    val_features = features.eval()
    val_labels = labels.eval()
    print("Successfully parsed the dataset!")
    print("Feature shape: {}, label shape: {}".format(val_features.shape, val_labels.shape))
    print("Features: ", val_features)
    print("Labels: ", val_labels)
  return val_features, val_labels

In [517]:
val_features, val_labels = test_autodldataset(dataset_dir)

INFO:tensorflow:Number of training files: 1.
Successfully parsed the dataset!
Feature shape: (30, 2000, 1, 1), label shape: (30, 71)
Features:  [[[[  -7.]]

  [[   2.]]

  [[   1.]]

  ...

  [[   0.]]

  [[   0.]]

  [[   0.]]]


 [[[   9.]]

  [[  11.]]

  [[   8.]]

  ...

  [[   0.]]

  [[   0.]]

  [[   0.]]]


 [[[ -86.]]

  [[ -92.]]

  [[-105.]]

  ...

  [[   0.]]

  [[   0.]]

  [[   0.]]]


 ...


 [[[ 104.]]

  [[ 102.]]

  [[  93.]]

  ...

  [[   0.]]

  [[   0.]]

  [[   0.]]]


 [[[  -6.]]

  [[  86.]]

  [[ -64.]]

  ...

  [[   0.]]

  [[   0.]]

  [[   0.]]]


 [[[  11.]]

  [[  -5.]]

  [[ -17.]]

  ...

  [[   0.]]

  [[   0.]]

  [[   0.]]]]
Labels:  [[0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]


In [537]:
def generate_labels_array(test_dataset_dir):
  autodl_dataset = AutoDLDataset(test_dataset_dir)
  autodl_dataset.init(batch_size=10000, repeat=False)
  dataset = autodl_dataset.get_dataset()
  iterator = dataset.make_one_shot_iterator()
  features, labels = iterator.get_next()
  
  li_batch = []
  num_batch = 0
  with tf.train.MonitoredTrainingSession() as sess:
    while not sess.should_stop():
        label_batch = sess.run(labels)
        li_batch.append(label_batch)
        num_batch += 1
        print("Num Batch: ", num_batch)
        
  all_labels = np.concatenate(li_batch)
  np.savetxt(os.path.join(test_dataset_dir, test_dataset_dir + '.solution'), all_labels, fmt='%.0f')

In [538]:
haha = generate_labels_array('timit-train')

INFO:tensorflow:Number of training files: 1.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Num Batch:  1


In [526]:
haha.shape

(1000, 71)

# Aggregate all to have a global dataset formatter

In [542]:
timit_dir

'../../datasets/speech/timit/'

In [None]:
os.path.isfx

In [None]:
if not num_examples or index < num_examples

In [541]:
1 < 0

False

In [576]:
merged_df[merged_df['data_type'] == 'train']

Unnamed: 0,begin,end,label,label_file,data_type,dirpath,gender,phonetic_label,region,sentence_label,speaker,wavfile,word_label
0,0,1960,h#,../../datasets/speech/timit/TIMIT/train/DR4/MM...,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
1,1960,2466,w,../../datasets/speech/timit/TIMIT/train/DR4/MM...,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
2,2466,3480,ix,../../datasets/speech/timit/TIMIT/train/DR4/MM...,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
3,3480,4000,dcl,../../datasets/speech/timit/TIMIT/train/DR4/MM...,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
4,4000,5960,s,../../datasets/speech/timit/TIMIT/train/DR4/MM...,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
5,5960,7480,ah,../../datasets/speech/timit/TIMIT/train/DR4/MM...,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
6,7480,7880,tcl,../../datasets/speech/timit/TIMIT/train/DR4/MM...,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
7,7880,9400,ch,../../datasets/speech/timit/TIMIT/train/DR4/MM...,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
8,9400,9960,ix,../../datasets/speech/timit/TIMIT/train/DR4/MM...,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD
9,9960,10680,n,../../datasets/speech/timit/TIMIT/train/DR4/MM...,train,../../datasets/speech/timit/TIMIT/train/DR4/MMDM0,M,SI681.PHN,DR4,SI681.TXT,MMDM0,SI681.WAV,SI681.WRD


In [574]:
merged_df.shape

(177080, 13)

In [584]:
labels_df.columns

Index(['gender', 'region', 'label'], dtype='object')

In [585]:
yo = labels_df['region'].cat

In [586]:
yo.categories

Index(['DR1', 'DR2', 'DR3', 'DR4', 'DR5', 'DR6', 'DR7', 'DR8'], dtype='object')

In [591]:
bool('a')

True

In [598]:
'hha/hahah/'.split(os.path.sep)

['hha', 'hahah', '']