## Rseed Test

This notebook helps test if the path lengths for a given bird song is consistent across UMAP renditions.

Here, we start with the information in the scripts given by Alam et. al, and use it to generate 20 UMAP embeddings of the dataset of 31 tutored birdsongs, by changing only the seed to the random number generator.

The 20 UMAP embeddings of the dataset of 31 tutored birdsongs generated here is used in all further tests.

In [1]:
# Importing required python packages

import umap
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Setting data paths

data_folder ="../../../AlamDataHandling/Data/" # Set path to the folder containing the source data "tut.csv" provided by Alam et al.
results_folder ="../Results/"

We use the source file "tut.csv", provided by the authors.
It contains the dataset of the songs of the 31 tutored birds.



In [3]:
# Read the source file "tut.csv" with the dataset of the songs of 31 tutored birds
Clust= pd.read_csv(data_folder + r"tut.csv", index_col=0)

In [4]:
# Script provided by the authors to read the cluster ids that correspond to syllable clusters in the UMAP embedding
Clust2tut=Clust.drop(['file_name','cluster'], axis=1)
Clustnptut=Clust2tut.to_numpy()
Clustnptut= np.float32(Clustnptut)

Clustnpcom = Clustnptut

The authors have only included syllable cluster ids in their source csv.

Here, they link syllable ids with the corresponding bird ids.

In [5]:
# Linking syllable ids to bird ids as per the script provided by Alam et al.

label1=list(Clust['cluster'])
labeltut=[]
# v=[]
for i in range(len(label1)):
    if label1[i]<=5:
        # v.append(0)
        labeltut.append(0)
    if label1[i]>=6 and label1[i]<=10:          # To be read as syllable ids #6-#10 belong to bird id #1
        labeltut.append(1)
    if label1[i]>=11 and label1[i]<=16:
        labeltut.append(2)
    if label1[i]>=17 and label1[i]<=20:
        labeltut.append(3)
    if label1[i]>=21 and label1[i]<=24:
        labeltut.append(4)        
    if label1[i]>=25 and label1[i]<=28:
        labeltut.append(5)        
    if label1[i]>=29 and label1[i]<=33:
        labeltut.append(6)        
    if label1[i]>=34 and label1[i]<=41:
        labeltut.append(7)        
    if label1[i]>=42 and label1[i]<=45:
        labeltut.append(8)        
    if label1[i]>=46 and label1[i]<=51:
        labeltut.append(9)        
    if label1[i]>=52 and label1[i]<=56:
        labeltut.append(10)
    if label1[i]>=57 and label1[i]<=61:
        labeltut.append(11)
    if label1[i]>=62 and label1[i]<=64:
        labeltut.append(12)
    if label1[i]>=65 and label1[i]<=71:
        labeltut.append(13)        
    if label1[i]>=72 and label1[i]<=75:
        labeltut.append(14)        
    if label1[i]>=76 and label1[i]<=81:
        labeltut.append(15)        
    if label1[i]>=82 and label1[i]<=84:
        labeltut.append(16)        
    if label1[i]>=85 and label1[i]<=88:
        labeltut.append(17)        
    if label1[i]>=89 and label1[i]<=94:
        labeltut.append(18)  
    if label1[i]>=95 and label1[i]<=97:
        labeltut.append(19)
    if label1[i]>=98 and label1[i]<=101:
        labeltut.append(20)
    if label1[i]>=102 and label1[i]<=107:
        labeltut.append(21)        
    if label1[i]>=108 and label1[i]<=115:
        labeltut.append(22)        
    if label1[i]>=116 and label1[i]<=120:
        labeltut.append(23)        
    if label1[i]>=121 and label1[i]<=124:
        labeltut.append(24)        
    if label1[i]>=125 and label1[i]<=128:
        labeltut.append(25)        
    if label1[i]>=129 and label1[i]<=131:
        labeltut.append(26)  
    if label1[i]>=132 and label1[i]<=135:
        labeltut.append(27)        
    if label1[i]>=136 and label1[i]<=140:
        labeltut.append(28)        
    if label1[i]>=141 and label1[i]<=144:
        labeltut.append(29)        
    if label1[i]>=145 and label1[i]<=149:
        labeltut.append(30)     

sns.set(context="paper", style="white")
   

We generate different states to initialise the random number generator for each UMAP computation

In [6]:
# Initialising a global random state and generating random seeds to initialise each UMAP embedding

np.random.seed(42)
rseeds = np.random.randint(0, 1e8, 100)
rseeds

array([65682867, 56755036, 56882282, 21081788, 13315092, 35788921,
       26735830, 93410762, 96319575, 91090292, 31632483, 76737383,
       88358551, 88409749,  4981505, 13953367, 85652971,  4521373,
        3344769, 98750923, 76893497, 30349564, 99052376, 42860080,
       77751354, 62250665, 66209791, 46792155, 21498555, 97117135,
       60221198, 79757501, 16861870, 52286002, 32049003, 61136438,
       62194931, 65285250, 69537252, 59248434, 61306900, 55831368,
       10959014, 56972561, 48900483, 20193880,  3385357, 44738553,
       68574553, 16845364, 52157313, 95672411, 11392366, 69778859,
       88883975, 13479854, 73506850, 13187277, 68979792, 37709731,
       26939239, 23027075, 81200125, 86191493, 87796277, 66401385,
       22335235,   271836,  3584702, 52631083,  8585377, 86544585,
       85157821, 87655395, 17824013, 36601694, 67105583, 60304654,
       34119117, 36433622, 89482491, 60031992, 55105831,  3366612,
       58743503, 91693393, 36258670, 69739572, 55848857, 57055

### Generating 20 different UMAP embeddings for the same dataset by changing only the initialisation state

In [7]:
# Iterate through each rseed and generate a UMAP embedding for each rseed
for rseed in tqdm(rseeds):
    # Intialise the random state
    np.random.seed(rseed)

    # Generate UMAP embedding with the default parameters as used by the authors
    embedding = umap.UMAP(
        n_neighbors=15,
        min_dist=.1,
        metric='euclidean'
    ).fit_transform(Clustnpcom)

    # Save the embedding to a csv file
    e=pd.DataFrame(embedding)
    e.to_csv(results_folder + '/Embeddings/' + 'embedding_gen_' + str(rseed) + '.csv')

    # Plot the UMAP embedding with the syllable cluster ids as the labels
    fig, ax = plt.subplots(figsize=(12, 10))
        
    plt.scatter(embedding[:, 0], embedding[:, 1], c=labeltut, cmap="rainbow", s=0.1, alpha=.1)

    plt.setp(ax, xticks=[], yticks=[])
    cbar = plt.colorbar()
    # cbar.set_ticks(range(18))

    plt.title(str(rseed) + " by bird", fontsize=18)

    plt.xlim(embedding[:,0].min()-5, embedding[:,0].max()+5)
    plt.ylim(embedding[:,1].min()-5, embedding[:,1].max()+5)

    plt.savefig(results_folder + '/Embeddings/'  + "UMAP_" + str(rseed) + ".png", dpi=300)





  0%|          | 0/100 [00:00<?, ?it/s]
