In [1]:
!pip install google-cloud-speech --quiet
from pydub import AudioSegment
import os
import random
from tqdm import tqdm
from google.cloud import speech_v1p1beta1 as speech



In [5]:
hindi_folder = "/Users/yash/Desktop/MTP-2k23-24/Wav2vec-codes/pureAudio/Hindi_female"
english_folder = "/Users/yash/Desktop/MTP-2k23-24/Wav2vec-codes/pureAudio/English_female"
output_folder_audio = "/Users/yash/Desktop/MTP-2k23-24/Wav2vec-codes/testDiralisationOutput/HE_codemixed_audio_SingleSpeakerFemale"
output_folder_rttm = "/Users/yash/Desktop/MTP-2k23-24/Wav2vec-codes/testDiralisationOutput/rttm"
file_name = "HECodemixedFemale"
count = 1
# Function to resample audio to 16 kHz
def resample_to_16k(audio_segment):
    return audio_segment.set_frame_rate(16000)

hindi_audios = [resample_to_16k(AudioSegment.from_file(f"{hindi_folder}/{file}")) for file in os.listdir(hindi_folder)]
english_audios = [resample_to_16k(AudioSegment.from_file(f"{english_folder}/{file}")) for file in os.listdir(english_folder)]

print(f"Total Hindi samples of the same speaker are: {len(hindi_audios)}")
print(f"Total English samples of the same speaker are: {len(english_audios)}")


Total Hindi samples of the same speaker are: 1661
Total English samples of the same speaker are: 2913


In [8]:
def create_codemixed_audio_and_rttm(output_folder_audio, output_folder_rttm):
    global count
    name = file_name + str(count)
    # Set the overlap duration
    overlap_ms = 270  # 0.27 seconds
    # Randomly select the number of Hindi-English segments to join (multiple of 2 up to 8)
    num_segments = random.choice([2, 3, 4, 5, 6])
    print("Number of segments:", num_segments)
    # Randomly choose which language starts
    start_with_english = random.choice([True, False])
    # print("Start with English:", start_with_english)

    codemixed_audio = AudioSegment.silent()
    rttm_content = ""
    start_time = 0.0

    for i in range(num_segments):
        duration = None
        if len(english_audios)==0 or len(hindi_audios)==0:
            return None, None
        # Append audio segment based on starting language
        if (i % 2 == 0 and start_with_english) or (i % 2 == 1 and not start_with_english):
            random_english = random.choice(english_audios)
            english_audios.remove(random_english)
            codemixed_audio += random_english
            duration = len(random_english)
            lang_label = "English"
        else:
            random_hindi = random.choice(hindi_audios)
            hindi_audios.remove(random_hindi)
            codemixed_audio += random_hindi
            duration = len(random_hindi)
            lang_label = "Hindi"

        # Add overlap between segments
        codemixed_audio += AudioSegment.silent(duration=overlap_ms)

        # Update RTTM content
        ## Took reference from here: 
        ## https://web.archive.org/web/20100606092041if_/http://www.itl.nist.gov/iad/mig/tests/rt/2009/docs/rt09-meeting-eval-plan-v2.pdf
        rttm_content += f"Language {name} 1 {start_time:.3f} {(duration+overlap_ms)/ 1000.0:.3f} <NA> {lang_label} <NA> <NA>\n"

        # Calculate start for RTTM
        start_time = len(codemixed_audio) / 1000.0

    # Export codemixed audio
    output_audio_filename = f"{output_folder_audio}/{name}.wav"
    codemixed_audio.export(output_audio_filename, format="wav")
    count += 1

    # Export RTTM file
    output_rttm_filename = f"{output_folder_rttm}/rttm_{name}.rttm"
    with open(output_rttm_filename, "w") as rttm_file:
        rttm_file.write(rttm_content)

    return output_audio_filename, output_rttm_filename


In [9]:
for _ in tqdm(range(10000)):
    err, err = create_codemixed_audio_and_rttm(output_folder_audio, output_folder_rttm)
    if err is None:
        break
    print(f"Length of samples: eng->{len(english_audios)} | hin-> {len(hindi_audios)}")

  1%|          | 64/10000 [00:00<00:29, 333.58it/s]

Number of segments: 5
Length of samples: eng->2911 | hin-> 1658
Number of segments: 2
Length of samples: eng->2910 | hin-> 1657
Number of segments: 2
Length of samples: eng->2909 | hin-> 1656
Number of segments: 3
Length of samples: eng->2907 | hin-> 1655
Number of segments: 3
Length of samples: eng->2906 | hin-> 1653
Number of segments: 3
Length of samples: eng->2905 | hin-> 1651
Number of segments: 5
Length of samples: eng->2902 | hin-> 1649
Number of segments: 3
Length of samples: eng->2901 | hin-> 1647
Number of segments: 6
Length of samples: eng->2898 | hin-> 1644
Number of segments: 4
Length of samples: eng->2896 | hin-> 1642
Number of segments: 2
Length of samples: eng->2895 | hin-> 1641
Number of segments: 2
Length of samples: eng->2894 | hin-> 1640
Number of segments: 4
Length of samples: eng->2892 | hin-> 1638
Number of segments: 6
Length of samples: eng->2889 | hin-> 1635
Number of segments: 5
Length of samples: eng->2886 | hin-> 1633
Number of segments: 5
Length of samples:

  1%|          | 98/10000 [00:00<00:33, 292.61it/s]

Length of samples: eng->2770 | hin-> 1518
Number of segments: 3
Length of samples: eng->2768 | hin-> 1517
Number of segments: 2
Length of samples: eng->2767 | hin-> 1516
Number of segments: 4
Length of samples: eng->2765 | hin-> 1514
Number of segments: 2
Length of samples: eng->2764 | hin-> 1513
Number of segments: 2
Length of samples: eng->2763 | hin-> 1512
Number of segments: 4
Length of samples: eng->2761 | hin-> 1510
Number of segments: 6
Length of samples: eng->2758 | hin-> 1507
Number of segments: 5
Length of samples: eng->2755 | hin-> 1505
Number of segments: 6
Length of samples: eng->2752 | hin-> 1502
Number of segments: 4
Length of samples: eng->2750 | hin-> 1500
Number of segments: 4
Length of samples: eng->2748 | hin-> 1498
Number of segments: 6
Length of samples: eng->2745 | hin-> 1495
Number of segments: 3
Length of samples: eng->2743 | hin-> 1494
Number of segments: 2
Length of samples: eng->2742 | hin-> 1493
Number of segments: 2
Length of samples: eng->2741 | hin-> 149

  2%|▏         | 178/10000 [00:00<00:28, 349.37it/s]

Length of samples: eng->2644 | hin-> 1396
Number of segments: 2
Length of samples: eng->2643 | hin-> 1395
Number of segments: 6
Length of samples: eng->2640 | hin-> 1392
Number of segments: 4
Length of samples: eng->2638 | hin-> 1390
Number of segments: 2
Length of samples: eng->2637 | hin-> 1389
Number of segments: 4
Length of samples: eng->2635 | hin-> 1387
Number of segments: 5
Length of samples: eng->2632 | hin-> 1385
Number of segments: 3
Length of samples: eng->2631 | hin-> 1383
Number of segments: 5
Length of samples: eng->2628 | hin-> 1381
Number of segments: 2
Length of samples: eng->2627 | hin-> 1380
Number of segments: 6
Length of samples: eng->2624 | hin-> 1377
Number of segments: 6
Length of samples: eng->2621 | hin-> 1374
Number of segments: 3
Length of samples: eng->2620 | hin-> 1372
Number of segments: 4
Length of samples: eng->2618 | hin-> 1370
Number of segments: 4
Length of samples: eng->2616 | hin-> 1368
Number of segments: 5
Length of samples: eng->2613 | hin-> 136

  3%|▎         | 259/10000 [00:00<00:26, 370.76it/s]

Length of samples: eng->2470 | hin-> 1227
Number of segments: 5
Length of samples: eng->2468 | hin-> 1224
Number of segments: 6
Length of samples: eng->2465 | hin-> 1221
Number of segments: 4
Length of samples: eng->2463 | hin-> 1219
Number of segments: 4
Length of samples: eng->2461 | hin-> 1217
Number of segments: 2
Length of samples: eng->2460 | hin-> 1216
Number of segments: 4
Length of samples: eng->2458 | hin-> 1214
Number of segments: 6
Length of samples: eng->2455 | hin-> 1211
Number of segments: 5
Length of samples: eng->2452 | hin-> 1209
Number of segments: 5
Length of samples: eng->2450 | hin-> 1206
Number of segments: 3
Length of samples: eng->2448 | hin-> 1205
Number of segments: 2
Length of samples: eng->2447 | hin-> 1204
Number of segments: 5
Length of samples: eng->2445 | hin-> 1201
Number of segments: 3
Length of samples: eng->2444 | hin-> 1199
Number of segments: 3
Length of samples: eng->2442 | hin-> 1198
Number of segments: 3
Length of samples: eng->2441 | hin-> 119

  3%|▎         | 344/10000 [00:00<00:24, 395.15it/s]

Length of samples: eng->2301 | hin-> 1058
Number of segments: 5
Length of samples: eng->2299 | hin-> 1055
Number of segments: 6
Length of samples: eng->2296 | hin-> 1052
Number of segments: 4
Length of samples: eng->2294 | hin-> 1050
Number of segments: 6
Length of samples: eng->2291 | hin-> 1047
Number of segments: 4
Length of samples: eng->2289 | hin-> 1045
Number of segments: 4
Length of samples: eng->2287 | hin-> 1043
Number of segments: 2
Length of samples: eng->2286 | hin-> 1042
Number of segments: 4
Length of samples: eng->2284 | hin-> 1040
Number of segments: 3
Length of samples: eng->2282 | hin-> 1039
Number of segments: 3
Length of samples: eng->2281 | hin-> 1037
Number of segments: 3
Length of samples: eng->2279 | hin-> 1036
Number of segments: 5
Length of samples: eng->2276 | hin-> 1034
Number of segments: 4
Length of samples: eng->2274 | hin-> 1032
Number of segments: 5
Length of samples: eng->2272 | hin-> 1029
Number of segments: 3
Length of samples: eng->2270 | hin-> 102

  4%|▍         | 425/10000 [00:01<00:24, 388.32it/s]

Length of samples: eng->2137 | hin-> 895
Number of segments: 6
Length of samples: eng->2134 | hin-> 892
Number of segments: 4
Length of samples: eng->2132 | hin-> 890
Number of segments: 5
Length of samples: eng->2130 | hin-> 887
Number of segments: 6
Length of samples: eng->2127 | hin-> 884
Number of segments: 5
Length of samples: eng->2125 | hin-> 881
Number of segments: 5
Length of samples: eng->2122 | hin-> 879
Number of segments: 4
Length of samples: eng->2120 | hin-> 877
Number of segments: 4
Length of samples: eng->2118 | hin-> 875
Number of segments: 6
Length of samples: eng->2115 | hin-> 872
Number of segments: 6
Length of samples: eng->2112 | hin-> 869
Number of segments: 3
Length of samples: eng->2110 | hin-> 868
Number of segments: 3
Length of samples: eng->2108 | hin-> 867
Number of segments: 5
Length of samples: eng->2106 | hin-> 864
Number of segments: 3
Length of samples: eng->2105 | hin-> 862
Number of segments: 5
Length of samples: eng->2103 | hin-> 859
Number of segm

  5%|▌         | 508/10000 [00:01<00:23, 397.70it/s]

Length of samples: eng->1976 | hin-> 736
Number of segments: 5
Length of samples: eng->1974 | hin-> 733
Number of segments: 5
Length of samples: eng->1972 | hin-> 730
Number of segments: 2
Length of samples: eng->1971 | hin-> 729
Number of segments: 3
Length of samples: eng->1969 | hin-> 728
Number of segments: 6
Length of samples: eng->1966 | hin-> 725
Number of segments: 6
Length of samples: eng->1963 | hin-> 722
Number of segments: 4
Length of samples: eng->1961 | hin-> 720
Number of segments: 5
Length of samples: eng->1958 | hin-> 718
Number of segments: 4
Length of samples: eng->1956 | hin-> 716
Number of segments: 6
Length of samples: eng->1953 | hin-> 713
Number of segments: 6
Length of samples: eng->1950 | hin-> 710
Number of segments: 6
Length of samples: eng->1947 | hin-> 707
Number of segments: 5
Length of samples: eng->1944 | hin-> 705
Number of segments: 4
Length of samples: eng->1942 | hin-> 703
Number of segments: 4
Length of samples: eng->1940 | hin-> 701
Number of segm

  6%|▌         | 594/10000 [00:01<00:22, 409.77it/s]

Length of samples: eng->1787 | hin-> 557
Number of segments: 4
Length of samples: eng->1785 | hin-> 555
Number of segments: 3
Length of samples: eng->1783 | hin-> 554
Number of segments: 3
Length of samples: eng->1781 | hin-> 553
Number of segments: 6
Length of samples: eng->1778 | hin-> 550
Number of segments: 3
Length of samples: eng->1776 | hin-> 549
Number of segments: 3
Length of samples: eng->1775 | hin-> 547
Number of segments: 3
Length of samples: eng->1774 | hin-> 545
Number of segments: 4
Length of samples: eng->1772 | hin-> 543
Number of segments: 4
Length of samples: eng->1770 | hin-> 541
Number of segments: 2
Length of samples: eng->1769 | hin-> 540
Number of segments: 5
Length of samples: eng->1767 | hin-> 537
Number of segments: 3
Length of samples: eng->1765 | hin-> 536
Number of segments: 5
Length of samples: eng->1762 | hin-> 534
Number of segments: 5
Length of samples: eng->1760 | hin-> 531
Number of segments: 2
Length of samples: eng->1759 | hin-> 530
Number of segm

  7%|▋         | 678/10000 [00:01<00:23, 390.75it/s]

Length of samples: eng->1615 | hin-> 387
Number of segments: 6
Length of samples: eng->1612 | hin-> 384
Number of segments: 5
Length of samples: eng->1609 | hin-> 382
Number of segments: 4
Length of samples: eng->1607 | hin-> 380
Number of segments: 3
Length of samples: eng->1605 | hin-> 379
Number of segments: 4
Length of samples: eng->1603 | hin-> 377
Number of segments: 4
Length of samples: eng->1601 | hin-> 375
Number of segments: 2
Length of samples: eng->1600 | hin-> 374
Number of segments: 4
Length of samples: eng->1598 | hin-> 372
Number of segments: 3
Length of samples: eng->1596 | hin-> 371
Number of segments: 3
Length of samples: eng->1594 | hin-> 370
Number of segments: 6
Length of samples: eng->1591 | hin-> 367
Number of segments: 3
Length of samples: eng->1589 | hin-> 366
Number of segments: 2
Length of samples: eng->1588 | hin-> 365
Number of segments: 6
Length of samples: eng->1585 | hin-> 362
Number of segments: 5
Length of samples: eng->1583 | hin-> 359
Number of segm

  8%|▊         | 775/10000 [00:01<00:21, 435.28it/s]

Length of samples: eng->1449 | hin-> 223
Number of segments: 4
Length of samples: eng->1447 | hin-> 221
Number of segments: 2
Length of samples: eng->1446 | hin-> 220
Number of segments: 2
Length of samples: eng->1445 | hin-> 219
Number of segments: 2
Length of samples: eng->1444 | hin-> 218
Number of segments: 2
Length of samples: eng->1443 | hin-> 217
Number of segments: 3
Length of samples: eng->1442 | hin-> 215
Number of segments: 4
Length of samples: eng->1440 | hin-> 213
Number of segments: 4
Length of samples: eng->1438 | hin-> 211
Number of segments: 6
Length of samples: eng->1435 | hin-> 208
Number of segments: 5
Length of samples: eng->1433 | hin-> 205
Number of segments: 2
Length of samples: eng->1432 | hin-> 204
Number of segments: 2
Length of samples: eng->1431 | hin-> 203
Number of segments: 6
Length of samples: eng->1428 | hin-> 200
Number of segments: 3
Length of samples: eng->1427 | hin-> 198
Number of segments: 3
Length of samples: eng->1425 | hin-> 197
Number of segm

  8%|▊         | 818/10000 [00:02<00:23, 389.85it/s]

Length of samples: eng->1260 | hin-> 32
Number of segments: 5
Length of samples: eng->1257 | hin-> 30
Number of segments: 2
Length of samples: eng->1256 | hin-> 29
Number of segments: 5
Length of samples: eng->1254 | hin-> 26
Number of segments: 3
Length of samples: eng->1253 | hin-> 24
Number of segments: 6
Length of samples: eng->1250 | hin-> 21
Number of segments: 4
Length of samples: eng->1248 | hin-> 19
Number of segments: 4
Length of samples: eng->1246 | hin-> 17
Number of segments: 6
Length of samples: eng->1243 | hin-> 14
Number of segments: 5
Length of samples: eng->1241 | hin-> 11
Number of segments: 2
Length of samples: eng->1240 | hin-> 10
Number of segments: 2
Length of samples: eng->1239 | hin-> 9
Number of segments: 6
Length of samples: eng->1236 | hin-> 6
Number of segments: 5
Length of samples: eng->1233 | hin-> 4
Number of segments: 3
Length of samples: eng->1231 | hin-> 3
Number of segments: 3
Length of samples: eng->1230 | hin-> 1
Number of segments: 4



