This file contains code to quantify differences in song structure and sequence based on sequence linearity, consistency and stereotypy [Sakata and Brainard 2006] and transition entropy [Scharff and Nottebohm 1991].

Date: 13/10/22

In [1]:
import math
import pandas
import os
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
#Function to find the indices of all the occurrence of 'End' in Syllable label
def find_end_indices(list, element):
    indices = [0]
    for idx, value in enumerate(list):
        if value == element:
            indices.append(idx)
    return indices

#Define a function to pick song bouts that are long in length - Returns a list of data frames
def motif_finder(bouts_list):
    song_w_motif = bouts_list.copy()
    syll_num = []
    short_song_idx = []
    for i in range(len(bouts_list)):
        bout = bouts_list[i]
        syllable_list = bout['Syll Label'].tolist()
        syll_num.append(len(syllable_list))
    avg_syl_num = np.mean(syll_num)
    for i in range(len(syll_num)):
        #if syll_num[i] < avg_syl_num/3:
        if syll_num[i] < 6:
            short_song_idx.append(i)
    #as each index is deleted, the list shortens, so indices have to be updated
    for i in range(len(short_song_idx)):
        short_song_idx[i] -= i
    for j in short_song_idx:
        del song_w_motif[j]
    return song_w_motif

#Define a function to pick song bouts that more than 3 unique syllables in the bout
#Returns a list of data frames
def motif_finder_unisyl(bouts_list):
    song_w_motif = bouts_list.copy()
    uni_syll_num = []
    short_song_idx = []
    for i in range(len(bouts_list)):
        bout = bouts_list[i]
        syl_n = bout['Syll Label'].value_counts()
        uni_syll_num.append(len(syl_n))
    #print(uni_syll_num)
    for i in range(len(uni_syll_num)):
        #Start, End, 3 unique syllables = 5
        #Any bout with only 3 unique syllables should be removed as a call bout
        if uni_syll_num[i] < 6:
            short_song_idx.append(i)
    #as each index is deleted, the list shortens, so indices have to be updated
    for i in range(len(short_song_idx)):
        short_song_idx[i] -= i
    for j in short_song_idx:
        del song_w_motif[j]
    return song_w_motif

#Combines all the dataframes in a list
def concatenate_bout_list(bout_list):
    new_df = pandas.DataFrame()
    for i in range(len(bout_list)):
        new_df = pandas.concat([new_df, bout_list[i]])
    start_row = [0, 0, 'Start', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    new_df = pandas.DataFrame(np.insert(new_df.values, 0, start_row, axis=0))
    return new_df

#To extract mean value (from even-sized arrays only!)
def mean_value(data):
    avg = []
    for i in range(len(data[0])):
        temp = []
        for j in range(len(data)):
            t = data[j, i]
            temp.append(t)
        avg.append(np.mean(temp))
    return avg

#To calculate inter-note intervals
def intervals(song_list):
    intervals = []
    for i in range(len(song_list)):
        bout = song_list[i]
        in_int = []
        start = np.array(bout[' Syll Onset (ms)'])
        stop = np.array(bout[' Syll Offset (ms)'])
        for j in range(1, len(bout)):
            diff = start[j] - stop[j-1]
            in_int.append(diff)
        intervals.append(in_int)
    return intervals

In [3]:
data = {
    'Bird name' : [],
    'Species' : [],
    'No. of bouts' : [],
    'Avg. No. of Syll per song' : [],
    'Song duration' : [],
    'Avg. repeats of first syll' : [],
    'Minimum motif length' : []
}

df = pandas.DataFrame(data)
df.to_csv('02. Syll_quantifiers.csv', index=False)

In [4]:
labels = ['BCC_M01', 'BCC_M03', 'BCC_M08', 'BCC_M09', 'BCC_M11', 'BCC_M19', 'BCC_M21', 'BCC_M22', 
    'BF_brn24pnk13', 'BF_brn25pnk14', 'BF_brn35pnk21', 'BF_brn36pnk24', 'BF_org27ylw19',
    'JF_red28blu13', 'JF_ylw14gry00', 'JF_ylw20gry09', 'JF_ylw22gry08', 'JF_ylw25gry11',
    'ZF_grn21org41', 'ZF_org01wht58', 'ZF_org11pnk05', 'ZF_pnk93pnk91', 'ZF_red15ylw15', 'ZF_red77pnk45', 'ZF_ylw67brn42', 'ZF_ylw95ylw29']

sp_label = ['BCC', 'BCC', 'BCC', 'BCC', 'BCC', 'BCC', 'BCC', 'BCC', 
    'BF', 'BF', 'BF', 'BF', 'BF', 
    'JF', 'JF', 'JF', 'JF', 'JF',
    'ZF', 'ZF', 'ZF', 'ZF', 'ZF', 'ZF', 'ZF', 'ZF', ]


In [19]:
directory = 'D:\\4th Year\\Semester 7\\BI4313 Sem Project\\IN-comparative-analysis\\IN-comparative-analysis\\test\\Bout2_Start_End'

song_bout_files = []

# iterate over files in
# that directory
for filename in os.scandir(directory):
    if filename.is_file():
        song_bout_files.append(filename.path)

In [6]:

for file_idx in range(len(song_bout_files)):
    data = pandas.read_csv(song_bout_files[file_idx], header=None,
      names = ['FileName', 'Syll #', 'Syll Label',
      ' Syll Onset (ms)', ' Syll Offset (ms)', ' Syll Duration (sec)', ' Mean Frequency (Hz)',
      ' Entropy', 'Log Amplitude (dB)', 'Pitch Goodness', 'FrequencyModulation',
      'AmplitudeModulation', 'EntropyVariance', 'Fundamental Frequency (Hz)', 'RMS Amplitude'])

    syl = data['Syll Label']
    all_syl = syl.tolist()
    end_indices = find_end_indices(all_syl, 'End')

    song_bouts = []
    #This method of slicing includes 'Start' and 'End'
    for i in range(len(end_indices)-1):
        x = data.iloc[end_indices[i] + 2 : end_indices[i+1], :]
        song_bouts.append(x)

    song_motif = song_bouts

    #Number of bouts
    n_bouts = len(song_motif)

    #No. of syllables in a song
    syll_num = []
    for i in range(len(song_motif)):
        bout = song_motif[i]
        syllable_list = bout['Syll Label'].tolist()
        syll_num.append(len(syllable_list))
    avg_syl_num = int(np.mean(syll_num))
    #print(avg_syl_num)

    #Avg no. of times the first syllable repeats
    repeat_num = []
    for i in range(len(song_motif)):
        bout = song_motif[i]
        syllable_list = bout['Syll Label'].tolist()
        for t in range(1, len(syllable_list)):
            if syllable_list[t] != syllable_list[0]:
                first_syllable_repeats = t
                break
        repeat_num.append(first_syllable_repeats)
    avg_repeat_num = np.mean(repeat_num)

    #Song duration
    song_dur = []
    for i in range(len(song_motif)):
        bout = song_motif[i]
        onset = bout[' Syll Onset (ms)'].tolist()
        offset = bout[' Syll Offset (ms)'].tolist()
        t_start = float(onset[0])
        t_end = float(offset[len(bout)-1])
        diff = t_end - t_start
        dur = round(diff/1000, 2)
        song_dur.append(dur)
    avg_song_dur = np.mean(song_dur)

    mml = int(np.min(syll_num))

    data = {
    'Bird name' : [labels[file_idx]],
    'Species' : [sp_label[file_idx]],
    'No. of bouts' : [n_bouts],
    'Avg. No. of Syll per song' : [avg_syl_num],
    'Song duration' : [avg_song_dur],
    'Avg. repeats of first syll' : [avg_repeat_num],
    'Minimum motif length' : [mml]
    }

    df = pandas.DataFrame(data)
    df.to_csv('02. Syll_quantifiers.csv', mode = 'a', index=False, header=False)

In [20]:
amp_data = {
    'Bird name' : [],
    'Species' : [],
    '#1' : [], '#2' : [], '#3' : [], '#4' : [], '#5' : []
    #, '#6' : [], '#7' : [], '#8' : [], '#9' : [], '#10' : []
}
amp_df = pandas.DataFrame(amp_data)
amp_df.to_csv('03. Avg amplitude values.csv', index=False)


freq_data = {
    'Bird name' : [],
    'Species' : [],
    '#1' : [], '#2' : [], '#3' : [], '#4' : [], '#5' : []
    #, '#6' : [], '#7' : [], '#8' : [], '#9' : [], '#10' : []
}
freq_df = pandas.DataFrame(freq_data)
freq_df.to_csv('04. Avg frequency values.csv', index=False)


dur_data = {
    'Bird name' : [],
    'Species' : [],
    '#1' : [], '#2' : [], '#3' : [], '#4' : [], '#5' : []
    #, '#6' : [], '#7' : [], '#8' : [], '#9' : [], '#10' : []
}
dur_df = pandas.DataFrame(dur_data)
dur_df.to_csv('05. Avg syllable duration values.csv', index=False)


int_data = {
    'Bird name' : [],
    'Species' : [],
    '#1' : [], '#2' : [], '#3' : [], '#4' : [], '#5' : []
    #, '#6' : [], '#7' : [], '#8' : [], '#9' : [], '#10' : []
}
int_df = pandas.DataFrame(int_data)
int_df.to_csv('06. Avg internote interval values.csv', index=False)

In [21]:
for file_idx in range(len(song_bout_files)):
    data = pandas.read_csv(song_bout_files[file_idx], header=None,
      names = ['FileName', 'Syll #', 'Syll Label',
      ' Syll Onset (ms)', ' Syll Offset (ms)', ' Syll Duration (sec)', ' Mean Frequency (Hz)',
      ' Entropy', 'Log Amplitude (dB)', 'Pitch Goodness', 'FrequencyModulation',
      'AmplitudeModulation', 'EntropyVariance', 'Fundamental Frequency (Hz)', 'RMS Amplitude'])

    syl = data['Syll Label']
    all_syl = syl.tolist()
    end_indices = find_end_indices(all_syl, 'End')

    song_bouts = []
    #This method of slicing includes 'Start' and 'End'
    for i in range(len(end_indices)-1):
        x = data.iloc[end_indices[i] + 2 : end_indices[i+1], :]
        song_bouts.append(x)

    song_motif = motif_finder(song_bouts)

    syll_num = []
    for i in range(len(song_motif)):
        bout = song_motif[i]
        syllable_list = bout['Syll Label'].tolist()
        syll_num.append(len(syllable_list))
    
    #mml - minimum motif length (for which we'll plot the plots)
    # mml = int(np.min(syll_num))
    # #print(mml)
    # if mml > 10:
    #     mml = 10
    # else:
    #     mml = 5

    mml = 5

    #Average amplitude values
    amp = []
    for i in range(len(song_motif)):
        a = song_motif[i]['Log Amplitude (dB)']
        a = a.tolist()[:mml]
        amp.append(a)
    amp_10syl = np.array([np.array(i) for i in amp])
    avg_amp = mean_value(amp_10syl)
    amp_data = {'Bird name' : [labels[file_idx]], 'Species' : [sp_label[file_idx]], '#1' : [avg_amp[0]], 
        '#2' : [avg_amp[1]], '#3' : [avg_amp[2]], '#4' : [avg_amp[3]], '#5' : [avg_amp[4]]}
        # '#6' : [avg_amp[5]], '#7' : [avg_amp[6]], '#8' : [avg_amp[7]], '#9' : [avg_amp[8]], '#10' : [avg_amp[9]]}
    amp_df = pandas.DataFrame(amp_data)
    amp_df.to_csv('03. Avg amplitude values.csv', mode='a', index=False, header=False)

    #Average frequency values
    frequency = []
    for i in range(len(song_motif)):
        f = song_motif[i][' Mean Frequency (Hz)']
        f = f.tolist()[:mml]
        frequency.append(f)
    freq_10syl = np.array([np.array(i) for i in frequency])
    avg_freq = mean_value(freq_10syl)
    freq_data = {'Bird name' : [labels[file_idx]], 'Species' : [sp_label[file_idx]], '#1' : [avg_freq[0]], 
        '#2' : [avg_freq[1]], '#3' : [avg_freq[2]], '#4' : [avg_freq[3]], '#5' : [avg_freq[4]]}
        # '#6' : [avg_freq[5]], '#7' : [avg_freq[6]], '#8' : [avg_freq[7]], '#9' : [avg_freq[8]], '#10' : [avg_freq[9]]}
    freq_df = pandas.DataFrame(freq_data)
    freq_df.to_csv('04. Avg frequency values.csv', mode='a', index=False, header=False)

    #Average syllable duration values
    duration = []
    for i in range(len(song_motif)):
        d = song_motif[i][' Syll Duration (sec)']
        d = d.tolist()[:mml]
        duration.append(d)
    dur_10syl = np.array([np.array(i) for i in duration])
    avg_dur = mean_value(dur_10syl)
    dur_data = {'Bird name' : [labels[file_idx]], 'Species' : [sp_label[file_idx]], '#1' : [avg_dur[0]], 
        '#2' : [avg_dur[1]], '#3' : [avg_dur[2]], '#4' : [avg_dur[3]], '#5' : [avg_dur[4]]}
        # '#6' : [avg_dur[5]], '#7' : [avg_dur[6]], '#8' : [avg_dur[7]], '#9' : [avg_dur[8]], '#10' : [avg_dur[9]]}
    dur_df = pandas.DataFrame(dur_data)
    dur_df.to_csv('05. Avg syllable duration values.csv', mode='a', index=False, header=False)

    #Average internote interval values
    internote_int = intervals(song_motif)
    int_all_syl = np.array([np.array(i) for i in internote_int])
    int_10syl = []
    for i in range(len(int_all_syl)):
        b = int_all_syl[i]
        b = b.tolist()[:mml]
        int_10syl.append(b)
    #int_10syl = np.array(int_10syl)
    int_10syl = np.array([np.array(i) for i in int_10syl])
    avg_int = mean_value(int_10syl)
    int_data = {'Bird name' : [labels[file_idx]], 'Species' : [sp_label[file_idx]], '#1' : [avg_int[0]], 
        '#2' : [avg_int[1]], '#3' : [avg_int[2]], '#4' : [avg_int[3]], '#5' : [avg_int[4]]}
        #'#6' : [avg_int[5]], '#7' : [avg_int[6]], '#8' : [avg_int[7]], '#9' : [avg_int[8]], '#10' : [avg_int[9]]}
    int_df = pandas.DataFrame(int_data)
    int_df.to_csv('06. Avg internote interval values.csv', mode='a', index=False, header=False)

  int_all_syl = np.array([np.array(i) for i in internote_int])


In [9]:
directory = 'D:\\4th Year\\Semester 7\\BI4313 Sem Project\\IN-comparative-analysis\\IN-comparative-analysis\\test\\Bout_trans_prob_processed'

bout_tp_files = []

# iterate over files in
# that directory
for filename in os.scandir(directory):
    if filename.is_file():
        bout_tp_files.append(filename.path)

In [17]:
data = {
    'Bird name' : [],
    'Species' : [],
    'Syll label' : [],
    'Start transition prob' : [],
    'Self-transition prob' : []
}

df = pandas.DataFrame(data)
df.to_csv('07. Start-Self Transition Prob.csv', index = False)

In [18]:
#Saves the beginning bout probability and self-transition probability of ALL syllables of all birds
#IMPORTANT: This includes the 'Start' and 'End' "notes" for all birds

for file_idx in range(len(bout_tp_files)):
    file = pandas.read_csv(bout_tp_files[file_idx], header=None)
    #Drop the row and column labels, just keep the numeric values
    trans_prob = file.iloc[1:, 1:]
    trans_prob = np.array(trans_prob)
    trans_prob = trans_prob.astype(float)

    trans_num = np.zeros_like(trans_prob)
    trans_num[trans_prob > 0] = 1

    #Get the set of unique syllables for the individual with their own index (so convert array to list)
    uniq_syl = file.iloc[0].to_numpy()
    uniq_syl = np.delete(uniq_syl, 0)

    syl_list = uniq_syl.tolist()
    s = syl_list.index('Start')

    # def find_indices(list, element):
    #     indices = []
    #     for idx, value in enumerate(list):
    #         if value == element:
    #             indices.append(idx)
    #     return indices
    
    # first_syl = trans_num[s].tolist()
    # first_syl_idx = find_indices(first_syl, 1)

    for i in range(len(syl_list)):
        syl_label = syl_list[i]
        start_tp = trans_prob[s,i]
        self_tp = trans_prob[i,i]
        first_syl_data = {
            'Bird name' : [labels[file_idx]],
            'Species' : [sp_label[file_idx]],
            'Syll label' : [syl_label],
            'Start transition prob' : [start_tp],
            'Self-transition prob' : [self_tp]
        }
        df = pandas.DataFrame(first_syl_data)
        df.to_csv("07. Start-Self Transition Prob.csv", mode = 'a', index=False, header=False)
