In [1]:
import umap
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def generate_UMAP_plot(subset_df, focal_bird, combo_index):
    embedding = subset_df[['x', 'y']].values
    bird_labels = subset_df['bird_id'].values
    syll_labels = subset_df['syll_id'].values
    cluster_labels = syll_labels * (bird_labels==focal_bird)

    fig, ax = plt.subplots(figsize=(12, 10))
        
    plt.scatter(
        embedding[:, 0], embedding[:, 1], c=cluster_labels, cmap="rainbow", s=0.1, alpha=.1
    )

    cbar = plt.colorbar()

    plt.title("UMAP embedding " + str(combo_index) + " for focal bird " + str(focal_bird), fontsize=18)

    plt.xlim(embedding[:,0].min()-5, embedding[:,0].max()+5)
    plt.ylim(embedding[:,1].min()-5, embedding[:,1].max()+5)

    # plt.show()

    plt.savefig(results_folder + '/Embeddings/' + 'UMAP_gen_' + str(focal_bird) + '_' + str(combo_index) + '.png')

    plt.close()


In [3]:
def generate_UMAP_embedding(subset_df, focal_bird, combo_index):

    specs_data = subset_df.iloc[:,0:2352].values


    np.random.seed(rseed)

    embedding = umap.UMAP(
        n_neighbors=15,
        min_dist=.1,
        metric='euclidean'
    ).fit_transform(specs_data)

    subset_embedding_df = pd.DataFrame([])
    subset_embedding_df['filename'] = subset_df['filename']
    subset_embedding_df['bird_name'] = subset_df['bird_name']
    subset_embedding_df['bird_id'] = subset_df['bird_id']
    subset_embedding_df['syll_id'] = subset_df['syll_id']
    subset_embedding_df['x'] = embedding[:,0]
    subset_embedding_df['y'] = embedding[:,1]

    subset_embedding_df.to_csv(results_folder + '/Embeddings/' + 'embedding_gen_' + str(focal_bird) + '_' + str(combo_index) + '.csv')

    return subset_embedding_df



In [4]:
def construct_subset_df(subset_birds):
    mask = label_tutored_df['bird_id'].isin(subset_birds)
    subset_df = label_tutored_df[mask]

    return subset_df

In [5]:

data_folder = "/home/remya/Work/AlamTest/Alam JC/Oct 2024/Alam tests/Main script/Data/"
labels_folder = "/home/remya/Work/AlamTest/Alam JC/Oct 2024/Alam tests/Aux scripts/Results/"
results_folder ="/home/remya/Work/AlamTest/Alam JC/Oct 2024/Alam tests/Aux scripts/Permutations_w_focal_bird/Results/"

In [6]:
tutored_df = pd.read_csv(data_folder + r"tut.csv", index_col=0)


In [7]:
tutored_df

Unnamed: 0,file_name,cluster,0,1,2,3,4,5,6,7,...,2342,2343,2344,2345,2346,2347,2348,2349,2350,2351
0,B181_43824.21890893_12_25_6_4_50_27.jpg,0,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,B181_43824.21890893_12_25_6_4_50_36.jpg,0,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,B181_43824.21904695_12_25_6_5_4_3.jpg,0,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,B181_43824.21907655_12_25_6_5_7_12.jpg,0,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,B181_43824.21913457_12_25_6_5_13_11.jpg,0,0.003922,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4401,S343_43085.62887440_12_16_17_28_7_19.jpg,149,0.000000,0.000000,0.000000,0.015686,0.011765,0.074510,0.031373,0.023529,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4402,S343_43085.63082390_12_16_17_31_22_25.jpg,149,0.000000,0.000000,0.003922,0.000000,0.000000,0.003922,0.007843,0.003922,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4403,S343_43085.63082390_12_16_17_31_22_40.jpg,149,0.000000,0.000000,0.007843,0.000000,0.000000,0.003922,0.007843,0.007843,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4404,S343_43085.63100030_12_16_17_31_40_12.jpg,149,0.000000,0.000000,0.003922,0.003922,0.000000,0.015686,0.003922,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
label_file = labels_folder + "tut_labels.csv"

In [9]:
label_df = pd.read_csv(label_file)
label_df

Unnamed: 0,bird_id,bird_name,syll_id,filename
0,0,B181,0,B181_43824.21890893_12_25_6_4_50_27.jpg
1,0,B181,0,B181_43824.21890893_12_25_6_4_50_36.jpg
2,0,B181,0,B181_43824.21904695_12_25_6_5_4_3.jpg
3,0,B181,0,B181_43824.21907655_12_25_6_5_7_12.jpg
4,0,B181,0,B181_43824.21913457_12_25_6_5_13_11.jpg
...,...,...,...,...
365914,30,S343,149,S343_43085.62887440_12_16_17_28_7_19.jpg
365915,30,S343,149,S343_43085.63082390_12_16_17_31_22_25.jpg
365916,30,S343,149,S343_43085.63082390_12_16_17_31_22_40.jpg
365917,30,S343,149,S343_43085.63100030_12_16_17_31_40_12.jpg


In [10]:
tutored_df = tutored_df.drop(columns=['file_name', 'cluster'])
tutored_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2342,2343,2344,2345,2346,2347,2348,2349,2350,2351
0,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,0.003922,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,0.007843,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.003922,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4401,0.000000,0.000000,0.000000,0.015686,0.011765,0.074510,0.031373,0.023529,0.113725,0.007843,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4402,0.000000,0.000000,0.003922,0.000000,0.000000,0.003922,0.007843,0.003922,0.015686,0.003922,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4403,0.000000,0.000000,0.007843,0.000000,0.000000,0.003922,0.007843,0.007843,0.023529,0.007843,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4404,0.000000,0.000000,0.003922,0.003922,0.000000,0.015686,0.003922,0.000000,0.035294,0.003922,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:

# Concatenate embedding_df and label_df
label_tutored_df = pd.concat([tutored_df.reset_index(drop=True), label_df.reset_index(drop=True)], axis=1)
label_tutored_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2346,2347,2348,2349,2350,2351,bird_id,bird_name,syll_id,filename
0,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,0.003922,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0,B181,0,B181_43824.21890893_12_25_6_4_50_27.jpg
1,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0,B181,0,B181_43824.21890893_12_25_6_4_50_36.jpg
2,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0,B181,0,B181_43824.21904695_12_25_6_5_4_3.jpg
3,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,0.007843,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0,B181,0,B181_43824.21907655_12_25_6_5_7_12.jpg
4,0.003922,0.000000,0.003922,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0,B181,0,B181_43824.21913457_12_25_6_5_13_11.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
365914,0.000000,0.000000,0.000000,0.015686,0.011765,0.074510,0.031373,0.023529,0.113725,0.007843,...,0.0,0.0,0.0,0.0,0.0,0.0,30,S343,149,S343_43085.62887440_12_16_17_28_7_19.jpg
365915,0.000000,0.000000,0.003922,0.000000,0.000000,0.003922,0.007843,0.003922,0.015686,0.003922,...,0.0,0.0,0.0,0.0,0.0,0.0,30,S343,149,S343_43085.63082390_12_16_17_31_22_25.jpg
365916,0.000000,0.000000,0.007843,0.000000,0.000000,0.003922,0.007843,0.007843,0.023529,0.007843,...,0.0,0.0,0.0,0.0,0.0,0.0,30,S343,149,S343_43085.63082390_12_16_17_31_22_40.jpg
365917,0.000000,0.000000,0.003922,0.003922,0.000000,0.015686,0.003922,0.000000,0.035294,0.003922,...,0.0,0.0,0.0,0.0,0.0,0.0,30,S343,149,S343_43085.63100030_12_16_17_31_40_12.jpg


In [12]:
rseed = 42
np.random.seed(rseed)

In [13]:
n_total_birds = 31
n_birds = 30
n_choose = 5
n_test_combos = 10


In [14]:
for focal_bird in tqdm(np.arange(n_total_birds)):
    print("Focal bird is ", focal_bird)
    non_focal_birds = list(np.arange(0, n_total_birds))
    non_focal_birds.remove(focal_bird)

    for n_combo in np.arange(n_test_combos):

        subset = list(np.random.choice(non_focal_birds, n_choose, replace=False))
        subset.append(focal_bird)

        

        subset_df = construct_subset_df(subset)
        
        
        
        subset_emb_df = generate_UMAP_embedding(subset_df, focal_bird, n_combo)
    
        generate_UMAP_plot(subset_emb_df, focal_bird, n_combo)

    






  0%|          | 0/31 [00:00<?, ?it/s]

Focal bird is  0


  3%|▎         | 1/31 [13:37<6:48:41, 817.37s/it]

Focal bird is  1


  6%|▋         | 2/31 [28:00<6:48:12, 844.56s/it]

Focal bird is  2


 10%|▉         | 3/31 [41:10<6:22:28, 819.58s/it]

Focal bird is  3


 13%|█▎        | 4/31 [55:11<6:12:31, 827.84s/it]

Focal bird is  4


 16%|█▌        | 5/31 [1:09:36<6:04:32, 841.26s/it]

Focal bird is  5


 19%|█▉        | 6/31 [1:26:19<6:13:23, 896.15s/it]

Focal bird is  6


 23%|██▎       | 7/31 [1:41:19<5:59:04, 897.70s/it]

Focal bird is  7


 26%|██▌       | 8/31 [1:59:29<6:07:28, 958.63s/it]

Focal bird is  8


 29%|██▉       | 9/31 [2:13:21<5:37:04, 919.31s/it]

Focal bird is  9


 32%|███▏      | 10/31 [2:29:19<5:25:51, 931.02s/it]

Focal bird is  10


 35%|███▌      | 11/31 [2:40:35<4:44:24, 853.20s/it]

Focal bird is  11


 39%|███▊      | 12/31 [2:53:36<4:23:11, 831.14s/it]

Focal bird is  12


 42%|████▏     | 13/31 [3:09:09<4:18:36, 862.05s/it]

Focal bird is  13


 45%|████▌     | 14/31 [3:28:19<4:28:53, 949.04s/it]

Focal bird is  14


 48%|████▊     | 15/31 [3:44:12<4:13:23, 950.20s/it]

Focal bird is  15


 52%|█████▏    | 16/31 [3:56:37<3:42:06, 888.43s/it]

Focal bird is  16


 55%|█████▍    | 17/31 [4:08:05<3:13:12, 828.06s/it]

Focal bird is  17


 58%|█████▊    | 18/31 [4:20:38<2:54:32, 805.58s/it]

Focal bird is  18


 61%|██████▏   | 19/31 [4:35:19<2:45:39, 828.33s/it]

Focal bird is  19


 65%|██████▍   | 20/31 [4:49:45<2:33:55, 839.60s/it]

Focal bird is  20


 68%|██████▊   | 21/31 [5:01:56<2:14:30, 807.02s/it]

Focal bird is  21


 71%|███████   | 22/31 [5:16:12<2:03:14, 821.63s/it]

Focal bird is  22


 74%|███████▍  | 23/31 [5:31:50<1:54:12, 856.56s/it]

Focal bird is  23


 77%|███████▋  | 24/31 [5:44:54<1:37:22, 834.69s/it]

Focal bird is  24


 81%|████████  | 25/31 [5:58:24<1:22:44, 827.45s/it]

Focal bird is  25


 84%|████████▍ | 26/31 [6:09:54<1:05:30, 786.18s/it]

Focal bird is  26


 87%|████████▋ | 27/31 [6:21:28<50:33, 758.48s/it]  

Focal bird is  27


 90%|█████████ | 28/31 [6:33:48<37:39, 753.04s/it]

Focal bird is  28


 94%|█████████▎| 29/31 [6:48:16<26:14, 787.50s/it]

Focal bird is  29


 97%|█████████▋| 30/31 [7:01:49<13:14, 794.93s/it]

Focal bird is  30


100%|██████████| 31/31 [7:14:03<00:00, 840.13s/it]
