[View in Colaboratory](https://colab.research.google.com/github/youqad/Neurorobotics_Intent-Recognition/blob/master/TP_Intent_Recognition.ipynb)

# TP: Intent recognition

# *Ex1*: Automatic detection of speaker’s intention from supra-segmental features

## 1. Extraction of prosodic features ($f_0$ and energy)

In [0]:
import urllib.request
import numpy as np
import random
import pandas as pd
from google.colab import files as google_files

In [0]:
def list_from_URL(file_URL, function_applied=None):
  lines_bytes = urllib.request.urlopen(file_URL).readlines()
  lines = []

  for line in lines_bytes:
    line = line.decode("utf-8").rstrip()
    
    if function_applied is not None:
      line = function_applied(line)
    
    lines.append(line)
   
  return lines

filenames = list_from_URL('https://raw.githubusercontent.com/youqad/Neurorobotics_Intent-Recognition/master/filenames.txt')
filenames = list(set(filenames))

In [0]:
# # /!\ NO NEED TO EXECUTE THIS CELL AGAIN !!!
# 
# files = []
# indices = []
# 
# for file in filenames:
# 
#     URL_f0 = 'https://raw.githubusercontent.com/youqad/Neurorobotics_Intent-Recognition/master/data_files/{}.f0'.format(file)
#     file_dicts = [{key:val for key, val in zip(['time', 'f0'], map(float, l.split()))} for l in list_from_URL(URL_f0)]
# 
#     URL_en = 'https://raw.githubusercontent.com/youqad/Neurorobotics_Intent-Recognition/master/data_files/{}.en'.format(file)
#     for l, d in zip(list_from_URL(URL_en), file_dicts):
#       d["file"] = file
#       d["en"] = float(l.split()[1])
# 
#     files.extend(file_dicts)
# 
# # How `files` looks like:
# # files = [ 
# #           {"file": "cy0001at", "time": 0.02, "f0": 0., "en": 0.},
# #           {"file": "cy0001at", "time": 1.28, "f0": 0., "en": 0.},
# #           ...
# #           {"file": "li1450at", "time": 0.02, "f0": 0., "en": 0.},
# #           {"file": "li1450at", "time": 1.56, "f0": 404., "en": 65.}
# #         ]
# 
# pd.DataFrame(files).to_csv('data.csv', encoding='utf-8', index=False) # To reuse it next time
# google_files.download('data.csv')

In [21]:
# loading training data
df = pd.read_csv('https://raw.githubusercontent.com/youqad/Neurorobotics_Intent-Recognition/master/data.csv').set_index('file')
df.head()

Unnamed: 0_level_0,en,f0,time
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cy0234pw,0.0,0.0,0.02
cy0234pw,0.0,0.0,0.04
cy0234pw,59.0,0.0,0.06
cy0234pw,59.0,0.0,0.08
cy0234pw,58.0,0.0,0.1


In [29]:
print(df.columns.values)

df.groupby('file').mean().head()

['en' 'f0' 'time']


Unnamed: 0_level_0,en,f0,time
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cy0001at,47.296875,110.609375,0.65
cy0002at,47.337079,105.640449,0.9
cy0005at,47.296875,110.609375,0.65
cy0006at,47.337079,105.640449,0.9
cy0007pw,52.313725,92.284314,1.03


In [30]:
df.groupby('file').max().head()

Unnamed: 0_level_0,en,f0,time
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cy0001at,73.0,402.0,1.28
cy0002at,76.0,430.0,1.78
cy0005at,73.0,402.0,1.28
cy0006at,76.0,430.0,1.78
cy0007pw,71.0,257.0,2.04


In [31]:
df.groupby('file').var().head()

Unnamed: 0_level_0,en,f0,time
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cy0001at,327.323165,27607.511657,0.138667
cy0002at,269.11236,27108.528345,0.267
cy0005at,327.323165,27607.511657,0.138667
cy0006at,269.11236,27108.528345,0.267
cy0007pw,228.455057,10372.542128,0.3502


In [32]:
df.groupby('file').median().head()

Unnamed: 0_level_0,en,f0,time
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cy0001at,41.5,0.0,0.65
cy0002at,41.0,0.0,0.9
cy0005at,41.5,0.0,0.65
cy0006at,41.0,0.0,0.9
cy0007pw,52.0,0.0,1.03


In [34]:
df.groupby('file').quantile([.25, .75]).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,en,f0,time
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cy0001at,0.25,40.0,0.0,0.335
cy0001at,0.75,67.0,331.0,0.965
cy0002at,0.25,40.0,0.0,0.46
cy0002at,0.75,62.0,251.0,1.34
cy0005at,0.25,40.0,0.0,0.335


In [38]:
agg_features = {
    'mean': 'mean',
    'max': 'max',
    'range': lambda x: max(x)-min(x),
    'var': 'var',
    'median': 'median',
    '1st_quartile': lambda x: x.quantile(.25),
    '3rd_quartile': lambda x: x.quantile(.75),
    'mean_absolute_local_derivate': lambda x: abs(x.diff()).mean()
}

df.groupby('file')['f0','en'].agg(agg_features)

  return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)


Unnamed: 0_level_0,mean,mean,max,max,range,range,var,var,median,median,1st_quartile,1st_quartile,3rd_quartile,3rd_quartile,mean_absolute_local_derivate,mean_absolute_local_derivate
Unnamed: 0_level_1,f0,en,f0,en,f0,en,f0,en,f0,en,f0,en,f0,en,f0,en
file,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
cy0001at,110.609375,47.296875,402.0,73.0,402.0,73.0,27607.511657,327.323165,0.0,41.5,0.0,40.00,331.00,67.00,23.777778,3.809524
cy0002at,105.640449,47.337079,430.0,76.0,430.0,76.0,27108.528345,269.112360,0.0,41.0,0.0,40.00,251.00,62.00,15.636364,3.272727
cy0005at,110.609375,47.296875,402.0,73.0,402.0,73.0,27607.511657,327.323165,0.0,41.5,0.0,40.00,331.00,67.00,23.777778,3.809524
cy0006at,105.640449,47.337079,430.0,76.0,430.0,76.0,27108.528345,269.112360,0.0,41.0,0.0,40.00,251.00,62.00,15.636364,3.272727
cy0007pw,92.284314,52.313725,257.0,71.0,257.0,71.0,10372.542128,228.455057,0.0,52.0,0.0,41.00,189.50,66.00,13.683168,2.970297
cy0008pw,78.431373,47.725490,250.0,70.0,250.0,70.0,9930.090196,321.963137,0.0,43.0,0.0,41.00,192.00,64.50,26.440000,3.960000
cy0009pw,69.065789,49.473684,243.0,74.0,243.0,74.0,8927.182281,260.839298,0.0,42.0,0.0,40.75,182.25,66.00,12.853333,3.520000
cy0010pw,29.196078,46.049020,221.0,77.0,221.0,77.0,4696.178994,165.789652,0.0,42.0,0.0,41.00,0.00,50.75,15.267327,3.306931
cy0011pw,110.743590,53.653846,230.0,71.0,230.0,71.0,9290.400932,258.125375,172.0,62.0,0.0,41.25,192.50,66.00,7.506494,2.337662
cy0012pw,74.539474,50.250000,224.0,67.0,224.0,67.0,7363.451754,241.523333,0.0,42.5,0.0,41.00,167.00,66.00,6.346667,2.320000


In [0]:
def statistics(data_matrix):
  output = []
  
  for file in data_matrix:
    mean = np.mean(file[:,1])
    
    maximum = np.max(file[:,1]) 
    range = maximum - np.min(file[:,1])
    
    variance = np.var(file[:,1])
    median = np.median(file[:,1])
    
    first_quartile = np.percentile(file[:,1],25)
    third_quartile = np.percentile(file[:,1],75)
    mean_absolute_local_derivate = np.mean(np.abs(np.diff(file[:,1])))
    
    output.append([mean,maximum,range,variance,median,first_quartile,third_quartile,mean_absolute_local_derivate])
    
  return np.array(output)

features = {}
features['f0'] = []
features['en'] = []


for ext in ['f0', 'en']:
  features[ext] = statistics(files[ext])

In [5]:
features

{'en': array([[60.94252874, 83.        , 83.        , ..., 59.        ,
         63.5       ,  3.06976744],
        [49.5826087 , 86.        , 86.        , ..., 40.        ,
         64.5       ,  3.66666667],
        [70.02      , 91.        , 91.        , ..., 57.        ,
         83.        ,  4.64646465],
        ...,
        [66.72580645, 85.        , 85.        , ..., 55.        ,
         82.        ,  4.45901639],
        [50.609375  , 83.        , 83.        , ..., 40.        ,
         66.5       ,  3.7480315 ],
        [55.31304348, 81.        , 81.        , ..., 42.        ,
         69.5       ,  3.92982456]]),
 'f0': array([[ 34.94252874, 258.        , 258.        , ...,   0.        ,
           0.        ,  12.51162791],
        [114.76521739, 508.        , 508.        , ...,   0.        ,
         337.        ,  29.9122807 ],
        [115.3       , 501.        , 501.        , ...,   0.        ,
         192.        ,  38.32323232],
        ...,
        [105.01612903, 2

In [0]:
# Question 3

## Explanation: A sound can be either voiced, like a vowel, or unvoiced, like a consonant. We want to see which of them contains more intention
 
## ps: change "data" by the correct name

voiced = data.loc[data[:,1] != 0]
unvoiced = data.loc[data[:,1] == 0]
  

# Question 4


tot_sz = length(data)

train_sz = round(tot_sz*0.6)
random.shuffle(data)
train_data = data[:train_sz]
test_data = data[train_sz:]
  

  


In [0]:
x = np.array([[1, 3], 
              [0, 5],
              [0, 5]])


#np.mean(np.abs(np.diff(x, axis=0)), axis=0)
np.mean(np.abs(np.diff(x, axis=0)), axi

array([0.5, 1. ])