In [3]:
import librosa
import os
import numpy as np
import sys
from dtw import dtw
from numpy.linalg import norm
from numpy import array
import pyaudio
import wave

import heapq

**|Heap| = 80**  
**n_mfcc = 20**  
**DTWSeq = mfcc**  
**Window-MinMax**

In [15]:
def initialCorpus(path):
    # 音乐库位置
    audioList = os.listdir(path)

    raw_audioList = {}
    beat_database = {}

    for tmp in audioList:
        audioName = os.path.join(path, tmp)
        if audioName.endswith('.wav'):
            # 读入一维音频序列
            y, sr = librosa.load(audioName)
            # 提取 MFCC 特征
            f = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
            # 存入数据表
            beat_database[audioName] = f

    # 保存音乐节奏数据库
    np.save('beatDatabase_mfcc_20.npy', beat_database)
    
    return beat_database

In [16]:
def readCorpus(path):
    
    # 读入音乐节奏数据库
    all_data = np.load(path, allow_pickle=True)
    beat_database = all_data.item()
    
    return beat_database

In [17]:
def updateCorpus(path, dbpath):
    
    # 音乐库位置
    audioList = os.listdir(path)
    
    # 已保存序列的文件
    raw_db = readCorpus(dbPath)
    raw_files = raw_db.keys()
    
    for tmp in audioList:
        audioName = os.path.join(path, tmp)
        if audioName.endswith('.wav') and audioName not in raw_files:
            y, sr = librosa.load(audioName)
            # 提取 MFCC 特征
            f = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
            # 存入数据表
            beat_database[audioName] = f

    # 保存音乐节奏数据库
    np.save(dbpath, beat_database)

In [18]:
def normlize(data):
    n_mean = np.mean(data, axis=0)
    n_std  = np.std(data, axis=0)
    
    norm_data = np.divide(np.subtract(data, n_mean), n_std)
    return norm_data

In [19]:
def voiceCompare_quick(dbPath, tPath):
    
    # 读入语料库
    all_data = np.load(dbPath, allow_pickle=True)
    beat_database = all_data.item()

    # 读入要识别的录音
    y, sr = librosa.load(tPath)

    # 识别录音的节奏序列
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    beat_frames = librosa.feature.delta(beat_frames,mode ='nearest')
    x = array(beat_frames).reshape(-1, 1)

    # 将待识别的录音序列与语料库中语音逐一做DTW对比
    compare_result = {}
    
    for songID in beat_database.keys():
        y = beat_database[songID]
        y = array(y).reshape(-1, 1)
        
        dist = dtw(x, y).distance
        # print('两段话的差异程度为： ', songID.split("\\")[1], ": ", dist)
        
        compare_result[songID] = dist

    matched_song = min(compare_result, key=compare_result.get)
    print("最接近的录音是：", matched_song)

In [23]:
from sklearn import preprocessing

def voiceCompare(dbPath, tPath):
    # 最大检索数
    aimNum = 100
    
    # 读入语料库
    all_data = np.load(dbPath, allow_pickle=True)
    beat_database = all_data.item()

    # ==== 读入要识别的录音 ====
    y, sr = librosa.load(tPath)

    # 提取录音的 MFCC 特征
    x = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)  # n1 * 20
    
    # 标准化
    for i in range(0, 20):
        x[i] = preprocessing.minmax_scale(x[i])
        # x[i] = normlize(x[i])
    
    x = x.T
    lenx = len(x)

    # ==== 将待识别的录音序列与语料库中语音逐一做DTW对比 ====
    
    # heap for [dist, 时间段，文件名]
    heap = []
    heapq.heapify(heap)  
    
    for songID in beat_database.keys():
        # 取出文件名对应的 mfcc 序列
        y = beat_database[songID]
        
        # 标准化
        for i in range(0, 20):
            y[i] = preprocessing.minmax_scale(y[i])
            # y[i] = normlize(y[i])
            
        y = y.T    
        leny = len(y) # n2 * 20 
        print(leny)

        for tp in range(0, leny - lenx):
            # *加速* 设定距离上限
            full = False  # 堆是否已满
            dist_UB = -10000  # DTW 距离上限
            overBound = False  # 是否过限
            
            if (len(heap) >= aimNum):
                full = True
                dist_UB = -heap[0][0]  # heap top (biggest) DTW dist as UB  
                
            # 计算 DTW(y[tp : tp + lenx])
            total_dist = 0
            
            for i in range(0, lenx):
                # DTW dist
                total_dist += dtw(x[i], y[tp + i], distance_only=False).distance
                
                # *加速* 超过上限直接取消
                if (full and total_dist > dist_UB):
                    overBound = True
                    break
            
            # *加速* 超过上限
            if (overBound):
                continue
            
            # 入栈
            tupleY = (-total_dist, tp, songID) # dtw 距离加负数转为大根堆
            
            heapq.heappush(heap, tupleY)
            if (len(heap) > aimNum):
                heapq.heappop(heap)
            
            print(tupleY)
            
        # end for
        
        # 处理同名短间隔问题
        
        
    return heap

In [10]:
def getTimePoint_dense(dbPath, tPath, vheap):
    res_num = 20 # 定义取出前 res_num 位的结果作为识别结果
    
    # 读入语料库
    all_data = np.load(dbPath, allow_pickle=True)
    beat_database = all_data.item()
    
    # 得到要识别的录音时长
    tTime = librosa.get_duration(filename=tPath)
    
    # 提取前 res_num 个相似的片段并输出对应时间段
    similar_n = heapq.nlargest(res_num, vheap)
    
    print("开始输出相似片段：")
    
    for i in range(0, res_num):
        music_name = similar_n[i][2]  # 录音文件名
        music_time = librosa.get_duration(filename=music_name)  # 录音时长
        
        music_pos = similar_n[i][1]  # 时间段所在帧数
        music_all = len(beat_database[music_name][0])  # 录音总帧数

        frag_st = music_time / music_all * music_pos  # 时间段起点
        frag_en = frag_st + tTime  # 时间段终点
        
        # print(music_name, music_time, music_pos, music_all, frag_st)
        # print("相似度第", i + 1, "位的为文件 ", music_name, "的 ", '%.2f' % frag_st, "到", '%.2f' % frag_en, "秒")
        
        print(music_name, ",", '%.2f' % frag_st, "秒,", '%.2f' % frag_en, "秒")


In [11]:
def getTimePoint(dbPath, tPath, vheap):
    # 读入语料库
    all_data = np.load(dbPath, allow_pickle=True)
    beat_database = all_data.item()
    
    # 得到要识别的录音时长
    tTime = librosa.get_duration(filename=tPath)
    
    heapq.nlargest(20, vheap)
    
    # ====== 对 vheap 进行去重 ======
    # 取出文件名
    name_set = set()
    for tp in vheap:
        name_set.add(tp[2])
    # print(name_set)
    
    # 合并下标差小于5的片段
    sheap = []
    for name in name_set:
        # 按下标排序
        nList = [x for x in vheap if x[2] == name]
        sortL = sorted(nList, key=lambda t:t[1])
        
        # 去重
        for tp in sortL:
            if len(sheap) < 1 or sheap[-1][2] != name or abs(sheap[-1][1] - tp[1]) > 5:
                sheap.append(tp)
            else:  
                if (sheap[-1][0] < tp[0]): 
                    sheap[-1] = tp  # 保留距离较小项

    # print(sheap)
    # 提取相似片段并输出对应时间段
    similar_n = sheap
    
    print("开始输出相似片段：")
    
    for i in range(0, len(sheap)):
        music_name = similar_n[i][2]  # 录音文件名
        music_time = librosa.get_duration(filename=music_name)  # 录音时长
        
        music_pos = similar_n[i][1]  # 时间段所在帧数
        music_all = len(beat_database[music_name][0])  # 录音总帧数
        
        frag_st = music_time / music_all * music_pos  # 时间段起点
        frag_en = frag_st + tTime  # 时间段终点
        # print(music_name, music_time, music_pos, "/", music_all, "=", frag_st)
        
        # print(music_name, music_time, music_pos, music_all, frag_st)
        # print("相似度第", i + 1, "位的为文件 ", music_name, "的 ", '%.2f' % frag_st, "到", '%.2f' % frag_en, "秒")
        
        print(music_name, ",", '%.2f' % frag_st, "秒,", '%.2f' % frag_en, "秒")

In [12]:
# 语料库路径
corpus_path = './corpus'

# 数据表路径
dbPath = './beatDatabase_mfcc_20.npy';

# test file path
# testPath = './input/00415250-前5s.wav'
# testPath = './input/00429126-53s_60s.wav'
testPath = './input/00430105-hou5s.wav'

In [13]:
# 1 初始化语料序列库
# beatDB = initialCorpus(corpus_path)

# 2 更新语料库中新音乐文件的序列
# updateCorpus(corpus_path, dbPath)

# 3 读入语料序列库
# beat_database = readCorpus(dbPath)

vheap = voiceCompare(dbPath, testPath)

3876
(-479.5739887058735, 0, './corpus\\00415250.wav')
(-478.8359127268195, 1, './corpus\\00415250.wav')
(-479.90067633613944, 2, './corpus\\00415250.wav')
(-481.2924239560962, 3, './corpus\\00415250.wav')
(-481.4842509776354, 4, './corpus\\00415250.wav')
(-480.5962525382638, 5, './corpus\\00415250.wav')
(-481.10502410680056, 6, './corpus\\00415250.wav')
(-481.6134977713227, 7, './corpus\\00415250.wav')
(-482.53090477734804, 8, './corpus\\00415250.wav')
(-483.77399249374866, 9, './corpus\\00415250.wav')
(-484.31166733801365, 10, './corpus\\00415250.wav')
(-486.95738607645035, 11, './corpus\\00415250.wav')
(-486.54270680993795, 12, './corpus\\00415250.wav')
(-487.483578376472, 13, './corpus\\00415250.wav')
(-485.9326030090451, 14, './corpus\\00415250.wav')
(-486.74262200295925, 15, './corpus\\00415250.wav')
(-487.95961052924395, 16, './corpus\\00415250.wav')
(-487.9801718592644, 17, './corpus\\00415250.wav')
(-488.25053407996893, 18, './corpus\\00415250.wav')
(-488.891846857965, 19, './

(-426.7879495881498, 231, './corpus\\00415250.wav')
(-428.50468269363046, 232, './corpus\\00415250.wav')
(-428.0144963823259, 233, './corpus\\00415250.wav')
(-430.36644941195846, 234, './corpus\\00415250.wav')
(-431.16326856054366, 235, './corpus\\00415250.wav')
(-433.44248409383, 236, './corpus\\00415250.wav')
(-438.06445511616766, 237, './corpus\\00415250.wav')
(-436.1065330002457, 238, './corpus\\00415250.wav')
(-432.63440509326756, 239, './corpus\\00415250.wav')
(-434.30009694583714, 240, './corpus\\00415250.wav')
(-437.6492672432214, 241, './corpus\\00415250.wav')
(-441.32652161084116, 242, './corpus\\00415250.wav')
(-440.67171574756503, 577, './corpus\\00415250.wav')
(-439.0983931608498, 578, './corpus\\00415250.wav')
(-434.22254530340433, 579, './corpus\\00415250.wav')
(-431.249937415123, 580, './corpus\\00415250.wav')
(-432.2801982611418, 581, './corpus\\00415250.wav')
(-430.8166861310601, 582, './corpus\\00415250.wav')
(-435.48425767198205, 583, './corpus\\00415250.wav')
(-434

(-425.28280286863446, 11015, './corpus\\00429239.wav')
10164
7150
(-425.28826301544905, 538, './corpus\\00430105.wav')
(-421.8456977792084, 539, './corpus\\00430105.wav')
(-419.8633433133364, 540, './corpus\\00430105.wav')
(-419.6242553703487, 541, './corpus\\00430105.wav')
(-421.2429046407342, 542, './corpus\\00430105.wav')
(-420.30602064728737, 543, './corpus\\00430105.wav')
(-419.2217673063278, 544, './corpus\\00430105.wav')
(-418.9880572259426, 545, './corpus\\00430105.wav')
(-419.70400462672114, 546, './corpus\\00430105.wav')
(-423.4374586343765, 547, './corpus\\00430105.wav')
(-421.70921817421913, 548, './corpus\\00430105.wav')
(-423.52491994574666, 549, './corpus\\00430105.wav')
(-424.9459434039891, 550, './corpus\\00430105.wav')
(-424.9347672946751, 1376, './corpus\\00430105.wav')
(-423.2475371938199, 1419, './corpus\\00430105.wav')
(-421.21548597328365, 1422, './corpus\\00430105.wav')
(-417.9064630474895, 1423, './corpus\\00430105.wav')
(-415.8519718144089, 1424, './corpus\\00

(-415.00058743171394, 5287, './corpus\\00430105.wav')
(-413.3677337821573, 5288, './corpus\\00430105.wav')
(-411.265333045274, 5413, './corpus\\00430105.wav')
(-409.4585402496159, 5414, './corpus\\00430105.wav')
(-412.19745526090264, 5415, './corpus\\00430105.wav')
(-414.90161470137537, 5457, './corpus\\00430105.wav')
(-414.65513816289604, 5462, './corpus\\00430105.wav')
(-412.4992214422673, 5464, './corpus\\00430105.wav')
(-411.73135192506015, 5465, './corpus\\00430105.wav')
(-409.94347155466676, 5466, './corpus\\00430105.wav')
(-407.5415839217603, 5467, './corpus\\00430105.wav')
(-409.0249039903283, 5468, './corpus\\00430105.wav')
(-412.8509140815586, 5480, './corpus\\00430105.wav')
(-412.57313495874405, 5481, './corpus\\00430105.wav')
(-414.09017450734973, 5482, './corpus\\00430105.wav')
(-410.7143637612462, 6018, './corpus\\00430105.wav')
(-412.93105640634894, 6019, './corpus\\00430105.wav')
(-414.37562019377947, 6023, './corpus\\00430105.wav')
(-413.026610262692, 6024, './corpus\\

In [14]:
getTimePoint(dbPath, testPath, vheap)

开始输出相似片段：
./corpus\00430105.wav , 59.34 秒, 61.41 秒
./corpus\00430105.wav , 59.50 秒, 61.57 秒
./corpus\00430105.wav , 59.76 秒, 61.83 秒
./corpus\00430105.wav , 59.95 秒, 62.02 秒
./corpus\00430105.wav , 78.59 秒, 80.66 秒
./corpus\00430105.wav , 81.10 秒, 83.17 秒
./corpus\00430105.wav , 109.82 秒, 111.88 秒
./corpus\00430105.wav , 121.26 秒, 123.33 秒
./corpus\00430105.wav , 122.77 秒, 124.84 秒
./corpus\00430105.wav , 125.70 秒, 127.76 秒
./corpus\00430105.wav , 126.93 秒, 129.00 秒
./corpus\00430105.wav , 127.25 秒, 129.32 秒
./corpus\00430105.wav , 139.72 秒, 141.79 秒
./corpus\00430105.wav , 140.00 秒, 142.07 秒
./corpus\00430105.wav , 140.28 秒, 142.35 秒


In [None]:
inPara = sys.argv

if (len(inPara) < 2):
    print("请输入待识别录音文件路径！")
else:
    if (len(inPara) > 2):
        print("给定语料库路径为:", sys.argv[2])
        corpus_path = sys.argv[2]
    else:
        print("默认语料库路径为：", corpus_path)
    
    if (len(inPara) > 3):
        print("给定数据表路径为:", sys.argv[3])
        dbPath = sys.argv[3]
    else:
        print("默认数据表路径为：", dbPath)
    
    testPath = sys.argv[1]
    vheap = voiceCompare(dbPath, testPath)
    getTimePoint(dbPath, testPath, vheap)
  

In [24]:
testPath2 = './input/00430105-hou5s.wav'
vheap2 = voiceCompare(dbPath, testPath2)

3876
(-479.5739887058735, 0, './corpus\\00415250.wav')
(-478.8359127268195, 1, './corpus\\00415250.wav')
(-479.90067633613944, 2, './corpus\\00415250.wav')
(-481.2924239560962, 3, './corpus\\00415250.wav')
(-481.4842509776354, 4, './corpus\\00415250.wav')
(-480.5962525382638, 5, './corpus\\00415250.wav')
(-481.10502410680056, 6, './corpus\\00415250.wav')
(-481.6134977713227, 7, './corpus\\00415250.wav')
(-482.53090477734804, 8, './corpus\\00415250.wav')
(-483.77399249374866, 9, './corpus\\00415250.wav')
(-484.31166733801365, 10, './corpus\\00415250.wav')
(-486.95738607645035, 11, './corpus\\00415250.wav')
(-486.54270680993795, 12, './corpus\\00415250.wav')
(-487.483578376472, 13, './corpus\\00415250.wav')
(-485.9326030090451, 14, './corpus\\00415250.wav')
(-486.74262200295925, 15, './corpus\\00415250.wav')
(-487.95961052924395, 16, './corpus\\00415250.wav')
(-487.9801718592644, 17, './corpus\\00415250.wav')
(-488.25053407996893, 18, './corpus\\00415250.wav')
(-488.891846857965, 19, './

(-451.4064991828054, 158, './corpus\\00415250.wav')
(-448.49945178441703, 159, './corpus\\00415250.wav')
(-448.44275126978755, 160, './corpus\\00415250.wav')
(-443.6567609645426, 161, './corpus\\00415250.wav')
(-443.14834136143327, 162, './corpus\\00415250.wav')
(-444.71535183116794, 163, './corpus\\00415250.wav')
(-444.0209492109716, 164, './corpus\\00415250.wav')
(-444.7062832452357, 165, './corpus\\00415250.wav')
(-445.0355458520353, 166, './corpus\\00415250.wav')
(-447.7565952949226, 167, './corpus\\00415250.wav')
(-451.2093938998878, 168, './corpus\\00415250.wav')
(-448.65624417737126, 169, './corpus\\00415250.wav')
(-448.8663149140775, 170, './corpus\\00415250.wav')
(-448.9028553105891, 171, './corpus\\00415250.wav')
(-450.49098547920585, 172, './corpus\\00415250.wav')
(-448.2081885561347, 173, './corpus\\00415250.wav')
(-450.55474443361163, 174, './corpus\\00415250.wav')
(-451.2813191600144, 175, './corpus\\00415250.wav')
(-452.22529601305723, 176, './corpus\\00415250.wav')
(-45

(-436.78203891962767, 929, './corpus\\00415250.wav')
(-437.273842882365, 930, './corpus\\00415250.wav')
(-436.86136558279395, 931, './corpus\\00415250.wav')
(-437.055340167135, 932, './corpus\\00415250.wav')
(-437.7768993154168, 933, './corpus\\00415250.wav')
(-439.6146819964051, 934, './corpus\\00415250.wav')
(-437.26491855829954, 1193, './corpus\\00415250.wav')
(-433.98758267238736, 1194, './corpus\\00415250.wav')
(-431.8095784597099, 1195, './corpus\\00415250.wav')
(-428.3650158382952, 1196, './corpus\\00415250.wav')
(-424.0315500460565, 1197, './corpus\\00415250.wav')
(-424.51934215053916, 1198, './corpus\\00415250.wav')
(-421.9846497140825, 1199, './corpus\\00415250.wav')
(-421.02416844293475, 1200, './corpus\\00415250.wav')
(-421.2821301110089, 1201, './corpus\\00415250.wav')
(-421.83471727743745, 1202, './corpus\\00415250.wav')
(-425.03819746896625, 1203, './corpus\\00415250.wav')
(-425.3238441608846, 1204, './corpus\\00415250.wav')
(-426.65026672556996, 1205, './corpus\\0041525

(-429.91104279085994, 3014, './corpus\\00429126.wav')
(-428.59479427337646, 3121, './corpus\\00429126.wav')
(-428.6407672576606, 3122, './corpus\\00429126.wav')
(-429.6307844147086, 3123, './corpus\\00429126.wav')
(-429.78610606491566, 3124, './corpus\\00429126.wav')
(-429.49407724291086, 3126, './corpus\\00429126.wav')
(-428.51086354255676, 3127, './corpus\\00429126.wav')
(-427.9176327511668, 3128, './corpus\\00429126.wav')
(-426.0699055567384, 3129, './corpus\\00429126.wav')
(-425.71992149949074, 3130, './corpus\\00429126.wav')
(-429.3200901374221, 3131, './corpus\\00429126.wav')
(-427.9549486115575, 3279, './corpus\\00429126.wav')
(-428.5742611736059, 3453, './corpus\\00429126.wav')
(-427.3304733969271, 3454, './corpus\\00429126.wav')
(-423.8941284827888, 3455, './corpus\\00429126.wav')
(-428.58808633312583, 3456, './corpus\\00429126.wav')
(-428.5069592036307, 3960, './corpus\\00429126.wav')
(-426.09020706638694, 3961, './corpus\\00429126.wav')
(-423.4887530878186, 3962, './corpus\\

(-426.0242983046919, 1452, './corpus\\00430105.wav')
(-423.12340068630874, 1453, './corpus\\00430105.wav')
(-425.95187243260443, 1458, './corpus\\00430105.wav')
(-422.53533753193915, 1459, './corpus\\00430105.wav')
(-422.1958701927215, 1460, './corpus\\00430105.wav')
(-420.56877858377993, 1461, './corpus\\00430105.wav')
(-420.97135486640036, 1462, './corpus\\00430105.wav')
(-421.257393181324, 1463, './corpus\\00430105.wav')
(-423.3239668942988, 1464, './corpus\\00430105.wav')
(-421.6185422781855, 1465, './corpus\\00430105.wav')
(-420.87232117913663, 1466, './corpus\\00430105.wav')
(-422.9079420398921, 1467, './corpus\\00430105.wav')
(-424.4819390233606, 1468, './corpus\\00430105.wav')
(-424.5040961969644, 1469, './corpus\\00430105.wav')
(-422.4554184842855, 1470, './corpus\\00430105.wav')
(-421.5734483432025, 1471, './corpus\\00430105.wav')
(-421.54718872345984, 1472, './corpus\\00430105.wav')
(-421.5766307655722, 1473, './corpus\\00430105.wav')
(-414.20960601232946, 1474, './corpus\\0

(-419.0712669175118, 3285, './corpus\\00430105.wav')
(-419.48583554290235, 3286, './corpus\\00430105.wav')
(-417.8107894677669, 3383, './corpus\\00430105.wav')
(-416.6697864923626, 3384, './corpus\\00430105.wav')
(-411.29208561964333, 3385, './corpus\\00430105.wav')
(-412.88647556118667, 3386, './corpus\\00430105.wav')
(-413.28819909505546, 3387, './corpus\\00430105.wav')
(-414.91058655641973, 3388, './corpus\\00430105.wav')
(-417.9699545260519, 3389, './corpus\\00430105.wav')
(-419.5374367851764, 3392, './corpus\\00430105.wav')
(-417.6660437975079, 3393, './corpus\\00430105.wav')
(-419.30053533799946, 3394, './corpus\\00430105.wav')
(-419.6753205396235, 3473, './corpus\\00430105.wav')
(-419.5739481616765, 3488, './corpus\\00430105.wav')
(-419.11730446852744, 3489, './corpus\\00430105.wav')
(-417.13295073993504, 3490, './corpus\\00430105.wav')
(-415.0313705597073, 3491, './corpus\\00430105.wav')
(-413.93241647072136, 3492, './corpus\\00430105.wav')
(-411.60524426586926, 3493, './corpus

In [25]:
getTimePoint(dbPath, testPath, vheap2)

开始输出相似片段：
./corpus\00430105.wav , 33.08 秒, 35.15 秒
./corpus\00430105.wav , 34.24 秒, 36.31 秒
./corpus\00430105.wav , 34.38 秒, 36.45 秒
./corpus\00430105.wav , 59.34 秒, 61.41 秒
./corpus\00430105.wav , 59.50 秒, 61.57 秒
./corpus\00430105.wav , 59.76 秒, 61.83 秒
./corpus\00430105.wav , 59.95 秒, 62.02 秒
./corpus\00430105.wav , 66.93 秒, 69.00 秒
./corpus\00430105.wav , 75.64 秒, 77.71 秒
./corpus\00430105.wav , 76.01 秒, 78.08 秒
./corpus\00430105.wav , 78.59 秒, 80.66 秒
./corpus\00430105.wav , 81.10 秒, 83.17 秒
./corpus\00430105.wav , 109.82 秒, 111.88 秒
./corpus\00430105.wav , 119.24 秒, 121.31 秒
./corpus\00430105.wav , 121.08 秒, 123.14 秒
./corpus\00430105.wav , 121.26 秒, 123.33 秒
./corpus\00430105.wav , 122.49 秒, 124.56 秒
./corpus\00430105.wav , 122.77 秒, 124.84 秒
./corpus\00430105.wav , 125.70 秒, 127.76 秒
./corpus\00430105.wav , 126.93 秒, 129.00 秒
./corpus\00430105.wav , 127.25 秒, 129.32 秒
./corpus\00430105.wav , 139.72 秒, 141.79 秒
./corpus\00430105.wav , 140.00 秒, 142.07 秒
./corpus\00430105.wav , 1