In [1]:
import tensorflow as tf
from google.colab import drive

# 使用工具colab的接口挂载google drive目录，这样可以从外部获取数据并且可以把训练好的模型保存在google drive上
drive.mount('/content/gdrive')
tf.test.gpu_device_name()

Mounted at /content/gdrive


'/device:GPU:0'

In [2]:
import scipy.io.wavfile as wav
import numpy as np
from scipy.fftpack import fft
from scipy.fftpack import ifft
from scipy import signal
import scipy
import librosa

def compute_PSD_matrix(audio, window_size):
    """
	First, perform STFT.
	Then, compute the PSD.
	Last, normalize PSD.
    """


    win = np.sqrt(8.0/3.) * librosa.core.stft(audio, center=False)

    real=np.real(win)
    imag=np.imag(win)

    #stft=librosa.core.stft(audio, center=False)
    #magnitude, phase = librosa.magphase(stft)
    z = abs(win / window_size)
    psd_max = np.max(z*z)
    psd = 10 * np.log10(z * z + 0.0000000000000000001)
    PSD = 96 - np.max(psd) + psd
    return win,z,psd, PSD, psd_max, np.max(psd),imag,real

def Bark(f):
    """returns the bark-scale value for input frequency f (in Hz)"""
    return 13*np.arctan(0.00076*f) + 3.5*np.arctan(pow(f/7500.0, 2))

def quiet(f):
     """returns threshold in quiet measured in SPL at frequency f with an offset 12(in Hz)"""
     thresh = 3.64*pow(f*0.001,-0.8) - 6.5*np.exp(-0.6*pow(0.001*f-3.3,2)) + 0.001*pow(0.001*f,4) - 12
     return thresh

def two_slops(bark_psd, delta_TM, bark_maskee):
    """
	returns the masking threshold for each masker using two slopes as the spread function 
    """
    Ts = []
    for tone_mask in range(bark_psd.shape[0]):
        bark_masker = bark_psd[tone_mask, 0]
        dz = bark_maskee - bark_masker
        zero_index = np.argmax(dz > 0)
        sf = np.zeros(len(dz))
        sf[:zero_index] = 27 * dz[:zero_index]
        sf[zero_index:] = (-27 + 0.37 * max(bark_psd[tone_mask, 1] - 40, 0)) * dz[zero_index:] 
        T = bark_psd[tone_mask, 1] + delta_TM[tone_mask] + sf
        Ts.append(T)
    return Ts
    
def compute_th(PSD, barks, ATH, freqs):
    """ returns the global masking threshold
    """
    # Identification of tonal maskers
    # find the index of maskers that are the local maxima
    length = len(PSD)
    masker_index = signal.argrelextrema(PSD, np.greater)[0]
    #print(masker_index.shape)
    
    
    # delete the boundary of maskers for smoothing
    if 0 in masker_index:
        masker_index = np.delete(0)
    if length - 1 in masker_index:
        masker_index = np.delete(length - 1)
    num_local_max = len(masker_index)

    # treat all the maskers as tonal (conservative way)
    # smooth the PSD 
    p_k = pow(10, PSD[masker_index]/10.)    
    p_k_prev = pow(10, PSD[masker_index - 1]/10.)
    p_k_post = pow(10, PSD[masker_index + 1]/10.)
    P_TM = 10 * np.log10(p_k_prev + p_k + p_k_post)
    
    # bark_psd: the first column bark, the second column: P_TM, the third column: the index of points
    _BARK = 0
    _PSD = 1
    _INDEX = 2
    bark_psd = np.zeros([num_local_max, 3])
    bark_psd[:, _BARK] = barks[masker_index]
    bark_psd[:, _PSD] = P_TM
    bark_psd[:, _INDEX] = masker_index
    
    # delete the masker that doesn't have the highest PSD within 0.5 Bark around its frequency 
    for i in range(num_local_max):
        next = i + 1
        if next >= bark_psd.shape[0]:
            break
            
        while bark_psd[next, _BARK] - bark_psd[i, _BARK]  < 0.5:
            # masker must be higher than quiet threshold
            if quiet(freqs[int(bark_psd[i, _INDEX])]) > bark_psd[i, _PSD]:
                bark_psd = np.delete(bark_psd, (i), axis=0)
            if next == bark_psd.shape[0]:
                break
                
            if bark_psd[i, _PSD] < bark_psd[next, _PSD]:
                bark_psd = np.delete(bark_psd, (i), axis=0)
            else:
                bark_psd = np.delete(bark_psd, (next), axis=0)
            if next == bark_psd.shape[0]:
                break        
    
    # compute the individual masking threshold
    delta_TM = 1 * (-6.025  -0.275 * bark_psd[:, 0])
    Ts = two_slops(bark_psd, delta_TM, barks) 
    Ts = np.array(Ts)
    
    # compute the global masking threshold
    theta_x = np.sum(pow(10, Ts/10.), axis=0) + pow(10, ATH/10.) 
 
    return theta_x, masker_index

def generate_th(audio, fs, window_size=2048):
    """
	returns the masking threshold theta_xs and the max psd of the audio
    """
    win,z,psd_unormalized,PSD, psd_max, max_,imag,real= compute_PSD_matrix(audio , window_size)  
    freqs = librosa.core.fft_frequencies(fs, window_size)
    barks = Bark(freqs)

    # compute the quiet threshold 
    ATH = np.zeros(len(barks)) - np.inf
    bark_ind = np.argmax(barks > 1)
    ATH[bark_ind:] = quiet(freqs[bark_ind:])

    # compute the global masking threshold theta_xs 
    theta_xs = []
    masker_index=[]
    # compute the global masking threshold in each window
    for i in range(PSD.shape[1]):
        a,b=compute_th(PSD[:,i], barks, ATH, freqs)
        theta_xs.append(a)
        masker_index.append(b)
    theta_xs = np.array(theta_xs)
    masker_index=np.array(masker_index)
    return win,z,psd_unormalized, PSD,theta_xs, psd_max,max_,imag,real,masker_index

In [3]:
import math
class Transform(object):
    '''
    Return: PSD
    '''    
    def __init__(self, window_size):
        self.scale = 8. / 3.
        self.frame_length = int(window_size)
        self.frame_step = int(window_size//4)
        self.window_size = window_size
    
    def __call__(self, x, psd_max_ori):
        win = tf.contrib.signal.stft(x, self.frame_length, self.frame_step)
        z = self.scale *tf.abs(win / self.window_size)
        psd = tf.square(z)
        PSD = tf.pow(10., 9.6) / tf.reshape(psd_max_ori, [-1, 1, 1]) * psd
        return PSD

def Psd(audio,max_ori):
  #scale = 8. / 3.
  window_size = 2048
  scale = 8. / 3.
  frame_length = int(window_size)
  frame_step = int(window_size//4)
  psd_max_ori=max_ori


  fs, data_true = wavfile.read(audio)
  data_true=data_true.astype(np.float64)
  #win = tf.signal.stft(data_true, frame_length, frame_step)
  win=librosa.core.stft(data_true, center=False).transpose()
  #注意，有transpose
  real=np.real(win)
  imag=np.imag(win)
  #win=librosa.core.stft(audio, center=False)
  z = scale *abs(win / window_size)
  psd = z*z
  psd_max_ori=max_ori
  print(psd.shape)
  #PSD = tf.cast(tf.pow(10., 9.6),dtype=tf.float64) / tf.cast(tf.reshape(psd_max_ori, [-1, 1, 1]),dtype=tf.float64) * psd
  PSD=pow(10., 9.6)/psd_max_ori.reshape([-1, 1, 1]).astype(np.float64)*psd
  return PSD,win*np.sqrt(8.0/3.),real,imag,psd_max_ori



In [4]:
from scipy.io import wavfile
import numpy as np
from scipy.fftpack import fft
from scipy.fftpack import ifft
from scipy import signal
import scipy
import copy

def add_noise(audio):
  audio_path=audio
  scale = 8. / 3.
  window_size=2048
  frame_length = 2048
  frame_step = 512
  fs, data_true = wavfile.read(audio_path)
  print(data_true)
  length=len(data_true)
  data_true=data_true.reshape((length,)).astype(np.float64)
  win_ori,z_ori,psd_unnormalized,PSD, theta_xs, psd_max, max_,imag,real,masker_index=generate_th(data_true, fs)
  PSD_,win_trans,real_add,imag_add,psd_max_add=Psd(audio_path,psd_max)

  a=theta_xs*psd_max.reshape([-1, 1, 1]).astype(np.float64)

  b=pow(10., 9.6)
  psd=a/b

  z_=np.sqrt(psd)/scale*2048.0
  print(z_)
  z=z_.reshape((z_.shape[1],z_.shape[2]))
  print(z)
  #temp=pow(10.,np.log10(z)+np.log10(z))
  #temp_=pow(10.,np.log10(imag_add)+np.log10(imag_add))

  real__=z*z-imag_add*imag_add
  real_=copy.deepcopy(real__)
  real_[real_<0]=0
  real_=np.sqrt(real_)

  #real_=np.sqrt(temp-temp_)
  print(real_.shape)

  for i in range(real_add.shape[0]):
    for j in range(real_add.shape[1]):
      #if(real_[i][j]==inf or real_[i][j]==-inf):
      # real_[i][j]=0
      if real_add[i][j]<0:
        real_[i][j]=real_[i][j]*(-1)

  print(real_)

  win_back=real_+1j*imag_add
  print(win_back)
  win_ori_=win_ori/np.sqrt(scale)
  for i in range(masker_index.shape[0]):
    for j in range(len(masker_index[i])):
      win_back[i][j]=copy.deepcopy(win_ori_.transpose()[i][j])
  win_back=win_back.transpose()
  y_inv=librosa.core.istft(win_back)

  print(y_inv)
  #print(data_true)
  data_true[0:len(y_inv)]=copy.deepcopy(y_inv)
  data_true=data_true.astype(np.int16)
  return data_true
#y_inv=y_inv.astype(np.int16)
#wavfile.write("/content/gdrive/MyDrive/play_83-11691-0038.wav", fs, data_true)

In [8]:
from scipy.io import wavfile
import numpy as np
from scipy.fftpack import fft
from scipy.fftpack import ifft
from scipy import signal
import scipy
import copy

def add_noise_frames(audio,key):
  audio_path=audio
  scale = 8. / 3.
  window_size=2048
  frame_length = 2048
  frame_step = 512
  fs, data_true = wavfile.read(audio_path)
  print(data_true)
  length=len(data_true)
  data_true=data_true.reshape((length,)).astype(np.float64)
  win_ori,z_ori,psd_unnormalized,PSD, theta_xs, psd_max, max_,imag,real,masker_index=generate_th(data_true, fs)
  PSD_,win_trans,real_add,imag_add,psd_max_add=Psd(audio_path,psd_max)

  a=theta_xs*psd_max.reshape([-1, 1, 1]).astype(np.float64)

  b=pow(10., 9.6)
  psd=a/b

  z_=np.sqrt(psd)/scale*2048.0
  print(z_)
  z=z_.reshape((z_.shape[1],z_.shape[2]))
  print(z)
  #temp=pow(10.,np.log10(z)+np.log10(z))
  #temp_=pow(10.,np.log10(imag_add)+np.log10(imag_add))

  real__=z*z-imag_add*imag_add
  real_=copy.deepcopy(real__)
  real_[real_<0]=0
  real_=np.sqrt(real_)

  #real_=np.sqrt(temp-temp_)
  print(real_.shape)

  for i in range(real_add.shape[0]):
    for j in range(real_add.shape[1]):
      #if(real_[i][j]==inf or real_[i][j]==-inf):
      # real_[i][j]=0
      if real_add[i][j]<0:
        real_[i][j]=real_[i][j]*(-1)

  print(real_)

  win_back=real_+1j*imag_add
  print(win_back)
  win_ori_=win_ori/np.sqrt(scale)
  for i in range(masker_index.shape[0]):
    for j in range(len(masker_index[i])):
      win_back[i][j]=copy.deepcopy(win_ori_.transpose()[i][j])
  data_n=(len(data_true)-1536)//512
  print(data_n)
  index_all=[i for i in range(data_n)]
  print(index_all)
  #index_all=range(0,data_n)
  n=data_frame[key]
  print(n)
  index_=dic_index[key]
  num_frames=n*len(index_)
  for i in index_:
    for j in range(n):
      index_all.remove(i*n+j)
    #del index_all[i*n:i*(n+1)]
  if(len(index_all)>=num_frames):
    index_random=random.sample(index_all,num_frames)
  else:
    index_random=index_all
  #index_random=int_random(0,data_n,num_frames)
  #print(index_random)
  #print(data_true[0*n*512:1*n*512])
  print(index_)

  for item in index_:
    for i in range(n):
      #win_back[item*n+i][:]=copy.deepcopy(win_ori_.transpose()[item*n+i][:])
      win_ori_.transpose()[item*n+i][:]=copy.deepcopy(win_back[item*n+i][:])

  '''

  for item in index_random:
    #print(win_back[:][item*n]-win_ori[:][item*n])
    win_back[item*n][:]=copy.deepcopy(win_ori_.transpose()[item*n][:])
  '''
  win_back=copy.deepcopy(win_ori_.transpose().transpose())
  y_inv=librosa.core.istft(win_back)

  print(y_inv)
  #print(data_true)
  data_true[0:len(y_inv)]=copy.deepcopy(y_inv)
  data_true=data_true.astype(np.int16)
  return data_true
#y_inv=y_inv.astype(np.int16)
#wavfile.write("/content/gdrive/MyDrive/play_83-11691-0038.wav", fs, data_true)

In [9]:
from scipy.io import wavfile
import numpy as np
from scipy.fftpack import fft
from scipy.fftpack import ifft
from scipy import signal
import scipy
import copy

def add_noise_random(audio,key):
  audio_path=audio
  scale = 8. / 3.
  window_size=2048
  frame_length = 2048
  frame_step = 512
  fs, data_true = wavfile.read(audio_path)
  print(data_true)
  length=len(data_true)
  data_true=data_true.reshape((length,)).astype(np.float64)
  win_ori,z_ori,psd_unnormalized,PSD, theta_xs, psd_max, max_,imag,real,masker_index=generate_th(data_true, fs)
  PSD_,win_trans,real_add,imag_add,psd_max_add=Psd(audio_path,psd_max)

  a=theta_xs*psd_max.reshape([-1, 1, 1]).astype(np.float64)

  b=pow(10., 9.6)
  psd=a/b

  z_=np.sqrt(psd)/scale*2048.0
  print(z_)
  z=z_.reshape((z_.shape[1],z_.shape[2]))
  print(z)
  #temp=pow(10.,np.log10(z)+np.log10(z))
  #temp_=pow(10.,np.log10(imag_add)+np.log10(imag_add))

  real__=z*z-imag_add*imag_add
  real_=copy.deepcopy(real__)
  real_[real_<0]=0
  real_=np.sqrt(real_)

  #real_=np.sqrt(temp-temp_)
  print(real_.shape)

  for i in range(real_add.shape[0]):
    for j in range(real_add.shape[1]):
      #if(real_[i][j]==inf or real_[i][j]==-inf):
      # real_[i][j]=0
      if real_add[i][j]<0:
        real_[i][j]=real_[i][j]*(-1)

  print(real_)

  win_back=real_+1j*imag_add
  print(win_back)
  win_ori_=win_ori/np.sqrt(scale)
  for i in range(masker_index.shape[0]):
    for j in range(len(masker_index[i])):
      win_back[i][j]=copy.deepcopy(win_ori_.transpose()[i][j])
  data_n=(len(data_true)-1536)//512
  index_all=[i for i in range(data_n)]
  n=data_frame[key]
  print(n)
  index_=dic_index[key]
  num_frames=n*len(index_)
  for i in index_:
    for j in range(n):
      index_all.remove(i*n+j)
    #del index_all[i*n:i*(n+1)]
  if(len(index_all)>=num_frames):
    index_random=random.sample(index_all,num_frames)
  else:
    index_random=index_all
  #index_random=int_random(0,data_n,num_frames)
  #print(index_random)
  #print(data_true[0*n*512:1*n*512])
  '''

  for item in index_:
    for i in range(n):
      win_back[item*n+i][:]=copy.deepcopy(win_ori_.transpose()[item*n+i][:])

  '''

  for item in index_random:
    #print(win_back[:][item*n]-win_ori[:][item*n])
    win_ori_.transpose()[item][:]=copy.deepcopy(win_back[item][:])
  
  win_back=copy.deepcopy(win_ori_.transpose().transpose())
  y_inv=librosa.core.istft(win_back)

  print(y_inv)
  #print(data_true)
  data_true[0:len(y_inv)]=copy.deepcopy(y_inv)
  data_true=data_true.astype(np.int16)
  return data_true
#y_inv=y_inv.astype(np.int16)
#wavfile.write("/content/gdrive/MyDrive/play_83-11691-0038.wav", fs, data_true)

In [10]:
import csv 
import random 
import numpy as np
import numpy as np  
import numpy as np
# save np.load
import csv    
csv_file=open('/content/gdrive/MyDrive/deepspeech/outfile_addn_oritrans_WER.csv')    
csv_reader_lines = csv.reader(csv_file)   
data_frame={} 
data_WER={}
data_trans={}   
num = 0
for one_line in csv_reader_lines:
  if(one_line[1]=='ID'):
    continue
  else:
    data_frame[one_line[1]]=int(one_line[3])  
    data_WER[one_line[1]]=float(one_line[5]) 
    data_trans[one_line[1]]=one_line[2].lower()
    num+=1    
print(data_frame)
print(data_WER)
print(data_trans)
np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

# call load_data with allow_pickle implicitly set to true
index_list = np.load("/content/gdrive/MyDrive/deepspeech/index.npy") 

# restore np.load for future normal usage
np.load = np_load_old
#error_list = np.load("/content/gdrive/MyDrive/deepspeech/error_all.npy") 
#print(error_list)  

indexlist=index_list.tolist()
dic_index={}
i=0
for key in data_frame:

  dic_index[key]=indexlist[i]
  i+=1
  print(key,dic_index[key]) 

import os
path_ref=r"/content/gdrive/MyDrive/Startagain/librispeech/OP/All"
path = r"/content/gdrive/MyDrive/deepspeech/audios_wav"
path_noise=r"/content/gdrive/MyDrive/Startagain/librispeech/OP/Frames_new"
#okfiles=os.listdir(path_noise)
filenames = os.listdir(path_ref)
num=0
#filenames=['83-11691-00'+i for i in ['32','34','38','39','40','41','42','43','44','45']]
for filename in filenames:
    #if(filename in okfiles):
      #continue
    name_,category=os.path.splitext(filename)
    if(name_=="sample-000000"):
      continue
    if(category!=".wav"):
      continue
    name=path+'/'+filename

    print("add noise File Name is ", name)
    #fs,data = wav.read(name)
    path_noise_final=path_noise +'/'+ filename
    data_noise = add_noise_frames(name,name_)
    fs=16000
    
    wav.write(path_noise_final,fs, data_noise)
    num+=1
    if(num==200):
      break
    print(path_noise_final)
    print("=========================")
 
print('run over！')

import os
path_ref=r"/content/gdrive/MyDrive/Startagain/librispeech/OP/All"
path = r"/content/gdrive/MyDrive/deepspeech/audios_wav"
path_noise=r"/content/gdrive/MyDrive/Startagain/librispeech/OP/Random_new"
#okfiles=os.listdir(path_noise)
filenames = os.listdir(path_ref)
#okfiles=os.listdir(path_noise)
#filenames = os.listdir(path)
num=0
#filenames=['83-11691-00'+i for i in ['32','34','38','39','40','41','42','43','44','45']]
for filename in filenames:
    #if(filename in okfiles):
      #continue
    name_,category=os.path.splitext(filename)
    if(name_=="sample-000000"):
      continue
    if(category!=".wav"):
      continue
    name=path+'/'+filename

    print("add noise File Name is ", name)
    #fs,data = wav.read(name)
    path_noise_final=path_noise +'/'+ filename
    data_noise = add_noise_random(name,name_)
    fs=16000
    
    wav.write(path_noise_final,fs, data_noise)
    num+=1
    if(num==200):
      break
    print(path_noise_final)
    print("=========================")
 
print('run over！')



{'19-198-0000': 1, '19-198-0001': 3, '19-198-0002': 2, '19-198-0003': 3, '19-198-0004': 3, '19-198-0005': 3, '19-198-0006': 3, '19-198-0007': 3, '19-198-0008': 1, '19-198-0009': 3, '19-198-0010': 3, '19-198-0011': 3, '19-198-0012': 3, '19-198-0013': 3, '19-198-0014': 3, '19-198-0015': 3, '19-198-0016': 3, '19-198-0017': 3, '19-198-0018': 3, '19-198-0019': 3, '19-198-0020': 3, '19-198-0021': 3, '19-198-0022': 3, '19-198-0023': 3, '19-198-0024': 3, '19-198-0025': 3, '19-198-0026': 3, '19-198-0027': 3, '19-198-0028': 3, '19-198-0029': 3, '19-198-0030': 3, '19-198-0031': 3, '19-198-0032': 3, '19-198-0033': 3, '19-198-0034': 3, '19-198-0035': 3, '19-198-0036': 3, '19-198-0037': 1, '19-227-0000': 3, '19-227-0001': 3, '19-227-0002': 3, '19-227-0003': 3, '19-227-0004': 3, '19-227-0005': 3, '19-227-0006': 3, '19-227-0007': 3, '19-227-0008': 3, '19-227-0009': 3, '19-227-0010': 3, '19-227-0011': 3, '19-227-0012': 3, '19-227-0013': 2, '19-227-0014': 3, '19-227-0015': 3, '19-227-0016': 3, '19-227-0



(459, 1025)
[[[11803.67748385 15004.59791951 19073.67143083 ...   115.43449566
     113.53691405   111.67460558]
  [11354.22567781 14433.26380409 18347.3981076  ...    56.37307906
      55.50277316    54.64975777]
  [10768.65267889 13688.89516023 17401.16529199 ...   110.47104497
     108.65823677   106.87919193]
  ...
  [ 9868.57646071 12544.73633607 15946.72382053 ...    73.27360753
      72.10311616    70.95507309]
  [ 9996.50548609 12707.35714562 16153.44551382 ...    70.82653594
      69.69900562    68.59317841]
  [ 9730.48010239 12369.19101694 15723.57264005 ...    54.51256453
      53.67595025    52.85605584]]]
[[11803.67748385 15004.59791951 19073.67143083 ...   115.43449566
    113.53691405   111.67460558]
 [11354.22567781 14433.26380409 18347.3981076  ...    56.37307906
     55.50277316    54.64975777]
 [10768.65267889 13688.89516023 17401.16529199 ...   110.47104497
    108.65823677   106.87919193]
 ...
 [ 9868.57646071 12544.73633607 15946.72382053 ...    73.27360753
     7



(426, 1025)
[[[10413.92435542 13237.97163477 16827.95651884 ...    86.63628379
      85.21544094    83.8210786 ]
  [10280.86323046 13068.82700324 16612.94181845 ...   119.9769985
     117.98915691   116.03794008]
  [10666.69156373 13559.28423698 17236.40538458 ...    93.23417501
      91.70064642    90.19560528]
  ...
  [10508.62681963 13358.35550662 16980.98710517 ...   130.11113095
     127.95180655   125.83219156]
  [10984.1836433  13962.87379652 17749.44376741 ...    80.19174079
      78.88130715    77.59539395]
  [10785.76426341 13710.64706304 17428.81605941 ...    78.90335106
      77.61503835    76.35085423]]]
[[10413.92435542 13237.97163477 16827.95651884 ...    86.63628379
     85.21544094    83.8210786 ]
 [10280.86323046 13068.82700324 16612.94181845 ...   119.9769985
    117.98915691   116.03794008]
 [10666.69156373 13559.28423698 17236.40538458 ...    93.23417501
     91.70064642    90.19560528]
 ...
 [10508.62681963 13358.35550662 16980.98710517 ...   130.11113095
    127.



[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
  [ 227.84204193  289.62823095  368.17205925 ...  132.84351497
    130.68679016  128.56866884]
  [ 731.8268859   930.2836497  1182.56582196 ...  282.8995919
    278.57512565  274.32342576]
  [  84.15299312  106.97359591  135.98359857 ...  188.41007185
    185.41770607  182.47769823]]]
[[1338.76986505 1701.81738356 2163.33058595 ...  290.1528175
   285.73681185  281.39450527]
 [2249.39338562 2859.38372683 3634.81554073 ...  186.38613203
   183.41170515  180.48921689]
 [  95.12845552  120.92538344  153.71894961 ...  183.94017449
   181.03695326  178.18391126]
 ...
 [ 227.84204193  289.62823095  368.17205925 ...  132.84351497
   130.68679016  128.56866884]
 [ 731.8268859   930.2836497  1182.56582196 ...  282.8995919
   278.57512565  274.32342576]
 [  84.15299312  106.97359591  135.98359857 ...  188.41007185
   185.41770607  182.47769823]]
(428, 1025)
[[-1338.76986505     0.          2038.90878274 ...     0.
     -0.           281.39450527]
 [-2249.

In [11]:
import csv 
import random 
import numpy as np
  
csv_file=open('/content/gdrive/MyDrive/deepspeech/commonvoice_original_data/commonvoice_trans.csv')    
csv_reader_lines = csv.reader(csv_file)   

data_ori={} 
data_trans={}#yuanben经过deepspeech翻译的trans
data_WER={}  
data_frame={} 
dic_index={}
num = 0
for one_line in csv_reader_lines:
  if(one_line[1]=='filename'):
    continue
  else:
    data_ori[one_line[1]]=one_line[2]

dic=np.load('/content/gdrive/MyDrive/deepspeech/commonvoice_original_data/commonvoice_ori_trans.npy',allow_pickle=True)
print(len(dic.item()))
data_trans=dic.item()
print(data_trans)

dic=np.load('/content/gdrive/MyDrive/deepspeech/commonvoice_original_data/data_frame.npy',allow_pickle=True)
print(len(dic.item()))
data_frame=dic.item()
print(data_frame)

dic=np.load('/content/gdrive/MyDrive/deepspeech/commonvoice_original_data/indexlist.npy',allow_pickle=True)
print(len(dic.item()))
dic_index=dic.item()
print(dic_index)

dic=np.load('/content/gdrive/MyDrive/deepspeech/commonvoice_original_data/original_WER.npy',allow_pickle=True)
print(len(dic.item()))
data_WER=dic.item()
print(data_WER)

import os
#path_ref=r"/content/gdrive/MyDrive/Startagain/librispeech/DE/All"
path = r"/content/gdrive/MyDrive/deepspeech/commonvoice_wav"
path_noise=r"/content/gdrive/MyDrive/Startagain/commonvoice/OP/Frames_new"
#okfiles=os.listdir(path_noise)
filenames = os.listdir(path)
num=0
#filenames=['83-11691-00'+i for i in ['32','34','38','39','40','41','42','43','44','45']]
for filename in filenames:
    #if(filename in okfiles):
     # continue
    name_,category=os.path.splitext(filename)
    if(name_=="sample-000000"):
      continue
    if(category!=".wav"):
      continue
    name=path+'/'+filename

    print("add noise File Name is ", name)
    #fs,data = wav.read(name)
    path_noise_final=path_noise +'/'+ filename
    data_noise = add_noise_frames(name,name_)
    fs=16000
    
    wav.write(path_noise_final,fs, data_noise)
    num+=1
    if(num==200):
      break
    print(path_noise_final)
    print("=========================")
 
print('run over！')

import os
#path_ref=r"/content/gdrive/MyDrive/Startagain/librispeech/DE/All"
path = r"/content/gdrive/MyDrive/deepspeech/commonvoice_wav"
path_noise=r"/content/gdrive/MyDrive/Startagain/commonvoice/OP/Random_new"
#okfiles=os.listdir(path_noise)
filenames = os.listdir(path)
num=0
#filenames=['83-11691-00'+i for i in ['32','34','38','39','40','41','42','43','44','45']]
for filename in filenames:
    #if(filename in okfiles):
     # continue
    name_,category=os.path.splitext(filename)
    if(name_=="sample-000000"):
      continue
    if(category!=".wav"):
      continue
    name=path+'/'+filename

    print("add noise File Name is ", name)
    #fs,data = wav.read(name)
    path_noise_final=path_noise +'/'+ filename
    data_noise = add_noise_random(name,name_)
    fs=16000
    
    wav.write(path_noise_final,fs, data_noise)
    num+=1
    if(num==200):
      break
    print(path_noise_final)
    print("=========================")
 
print('run over！')

202
{'sample-000000': 'learn direcognized omends and follow them the old king had said', 'sample-000001': 'averything in the universe evolved he said', 'sample-000002': 'you came so that she could learn a bude cream said the old woman a', 'sample-000003': 'soono i fea nothing bepos it was tosomans that brot you to me', 'sample-000004': 'it o startuer indians to greedings nec ne be the first melcommitwar', 'sample-000005': 'a shepherd may like to travel but he should never forget about a sheep', 'sample-000006': 'nightfell and ten e sougtment of fighting men and merchants and pat and exitet the tent', 'sample-000007': 'iherd a faint movement under my fiev a', 'sample-000008': 'bujackie rin on the staff', 'sample-000009': 'we is big niarelise aca te prthlat his dehissad', 'sample-000010': 'you must not let up even after having come so far be continued', 'sample-000011': 'if you have to wait until the war is over then weit', 'sample-000012': 'my wife pointed out to me the brightness of th



[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
 [ 2500.62668071 -3132.19393808    -0.         ...   578.44676245
    503.48462732  -569.83165365]]
[[  -22.23592502   +0.j             0.         -373.64545628j
     11.45897879  +34.0550373j  ...  -353.33452418  +27.40044261j
    338.05330551  -86.09340201j   343.39278311   +0.j        ]
 [  -60.7033346    +0.j             0.         +666.30032966j
     -0.         -193.23524264j ...  -150.1585347  +130.13693446j
    -20.22446843 -194.63541527j   192.72224594   +0.j        ]
 [   95.66960442   +0.j             0.         +126.54039226j
    139.75303214  -66.09242346j ...     0.         -200.02190177j
    -42.99177909 +159.7704838j    163.00590974   +0.j        ]
 ...
 [  119.41046298   +0.j             0.        +3526.82555645j
   -161.94771584 -104.90536934j ...   332.53117246  +40.98976007j
   -294.66731955 +147.65725677j  -324.24036424   +0.j        ]
 [ 1957.70976253   +0.j         -2387.75615254 +701.25407737j
   2552.64141244+1868.591885

In [None]:
import os
path_ref=r"/content/gdrive/MyDrive/Startagain/librispeech/DE/All"
path = r"/content/gdrive/MyDrive/deepspeech/audios_wav"
path_noise=r"/content/gdrive/MyDrive/Startagain/librispeech/OP/All"
#okfiles=os.listdir(path_noise)
filenames = os.listdir(path_ref)
num=0
#filenames=['83-11691-00'+i for i in ['32','34','38','39','40','41','42','43','44','45']]
for filename in filenames:
    #if(filename in okfiles):
     # continue
    name_,category=os.path.splitext(filename)
    if(category!=".wav"):
      continue
    name=path+'/'+filename

    print("add noise File Name is ", name)
    #fs,data = wav.read(name)
    path_noise_final=path_noise +'/'+ filename
    data_noise = add_noise(name)
    fs=16000
    
    wav.write(path_noise_final,fs, data_noise)
    num+=1
    if(num==200):
      break
    print(path_noise_final)
    print("=========================")
 
print('run over！')

add noise File Name is  /content/gdrive/MyDrive/deepspeech/audios_wav/19-227-0019.wav
[-112  -79  -73 ...  -64  -59  -47]




[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
    9281.37467644  9195.4920704 ]]]
[[  692.64252263   880.47327354  1119.24744757 ...   777.13277178
    765.93689181   754.9229977 ]
 [  707.20481511   898.98456748  1142.77879044 ...   447.92415479
    441.69402126   435.55988273]
 [  404.44294207   514.11975085   653.54308436 ...   475.1976936
    468.07577629   461.07318345]
 ...
 [ 5177.32598588  6581.31288513  8366.08886341 ...  6850.32240327
   6775.94323971  6702.48092442]
 [ 4666.22967116  5931.61750748  7540.20360945 ... 14593.07324499
  14455.60329743 14319.63144098]
 [ 2416.43928237  3071.72911818  3904.74654763 ...  9368.18833039
   9281.37467644  9195.4920704 ]]
(466, 1025)
[[   692.64252263     -0.             -0.         ...      0.
      -0.            754.9229977 ]
 [   707.20481511     -0.              0.         ...     -0.
    -226.12717772    435.55988273]
 [  -404.44294207    448.37415767     -0.         ...      0.
      -0.            461.07318345]
 ...
 [ -5177.3259858

In [None]:
import numpy as np  
import numpy as np
# save np.load
import csv    
csv_file=open('/content/gdrive/MyDrive/deepspeech/outfile_addn_oritrans_WER.csv')    
csv_reader_lines = csv.reader(csv_file)   
data_frame={} 
data_WER={}
data_trans={}   
num = 0
for one_line in csv_reader_lines:
  if(one_line[1]=='ID'):
    continue
  else:
    data_frame[one_line[1]]=int(one_line[3])  
    data_WER[one_line[1]]=float(one_line[5]) 
    data_trans[one_line[1]]=one_line[2].lower()
    num+=1    
print(data_frame)
print(data_WER)
print(data_trans)
np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

# call load_data with allow_pickle implicitly set to true
index_list = np.load("/content/gdrive/MyDrive/deepspeech/index.npy") 

# restore np.load for future normal usage
np.load = np_load_old
#error_list = np.load("/content/gdrive/MyDrive/deepspeech/error_all.npy") 
#print(error_list)  

indexlist=index_list.tolist()
dic_index={}
i=0
for key in data_frame:

  dic_index[key]=indexlist[i]
  i+=1
  print(key,dic_index[key])

{'19-198-0000': 1, '19-198-0001': 3, '19-198-0002': 2, '19-198-0003': 3, '19-198-0004': 3, '19-198-0005': 3, '19-198-0006': 3, '19-198-0007': 3, '19-198-0008': 1, '19-198-0009': 3, '19-198-0010': 3, '19-198-0011': 3, '19-198-0012': 3, '19-198-0013': 3, '19-198-0014': 3, '19-198-0015': 3, '19-198-0016': 3, '19-198-0017': 3, '19-198-0018': 3, '19-198-0019': 3, '19-198-0020': 3, '19-198-0021': 3, '19-198-0022': 3, '19-198-0023': 3, '19-198-0024': 3, '19-198-0025': 3, '19-198-0026': 3, '19-198-0027': 3, '19-198-0028': 3, '19-198-0029': 3, '19-198-0030': 3, '19-198-0031': 3, '19-198-0032': 3, '19-198-0033': 3, '19-198-0034': 3, '19-198-0035': 3, '19-198-0036': 3, '19-198-0037': 1, '19-227-0000': 3, '19-227-0001': 3, '19-227-0002': 3, '19-227-0003': 3, '19-227-0004': 3, '19-227-0005': 3, '19-227-0006': 3, '19-227-0007': 3, '19-227-0008': 3, '19-227-0009': 3, '19-227-0010': 3, '19-227-0011': 3, '19-227-0012': 3, '19-227-0013': 2, '19-227-0014': 3, '19-227-0015': 3, '19-227-0016': 3, '19-227-0

In [None]:
from scipy.io import wavfile
import numpy as np
from scipy.fftpack import fft
from scipy.fftpack import ifft
from scipy import signal
import random
import scipy
import copy

url_already_audio='/content/gdrive/MyDrive/Startagain/librispeech/OP/All'
import os
allreadyfile=[]
txt_files=os.listdir(url_already_audio)
print(txt_files)
if len(txt_files)==0:
  pass
else:
  for files in txt_files:
    name,category=os.path.splitext(files)
    allreadyfile.append(name)
print(len(allreadyfile))
url_original_audio='/content/gdrive/MyDrive/deepspeech/audios_wav/'

url = '/content/gdrive/MyDrive/Startagain/librispeech/OP/Random/'

for key in dic_index:
  if key not in allreadyfile:
    continue
  original_url=url_original_audio+key+'.wav'
  all_url=url_already_audio+'/'+key+'.wav'
  fs, data_true = wav.read(original_url)
  print(original_url)
  fs, data_all = wav.read(all_url)
  data_final=copy.deepcopy(data_true)
  data_n=(len(data_true)-1536)//512
  print(data_n)
  index_all=[i for i in range(data_n)]
  print(index_all)
  #index_all=range(0,data_n)
  n=data_frame[key]
  print(n)
  index_=dic_index[key]
  num_frames=n*len(index_)
  for i in index_:
    for j in range(n):
      index_all.remove(i*n+j)
    #del index_all[i*n:i*(n+1)]
  if(len(index_all)>=num_frames):
    index_random=random.sample(index_all,num_frames)
  else:
    index_random=index_all
  #index_random=int_random(0,data_n,num_frames)
  print(index_random)
  #print(data_true[0*n*512:1*n*512])

  for item in index_random:
    
    data_final[item*512:(item+1)*512]=copy.deepcopy(data_all[item*512:(item+1)*512])

  wavfile.write(url+key+'.wav', fs, data_final)
  print("============================================================================")

['19-227-0043.wav', '19-227-0045.wav', '19-227-0046.wav', '19-227-0042.wav', '19-227-0041.wav', '19-227-0040.wav', '19-227-0044.wav', '19-227-0039.wav', '19-227-0047.wav', '19-227-0052.wav', '19-227-0048.wav', '19-227-0056.wav', '19-227-0053.wav', '19-227-0051.wav', '19-227-0059.wav', '19-227-0057.wav', '19-227-0054.wav', '19-227-0049.wav', '19-227-0058.wav', '19-227-0050.wav', '19-227-0055.wav', '19-227-0070.wav', '19-227-0069.wav', '19-227-0060.wav', '19-227-0067.wav', '19-227-0062.wav', '19-227-0061.wav', '19-227-0063.wav', '19-227-0064.wav', '19-227-0068.wav', '19-227-0066.wav', '19-227-0065.wav', '19-227-0072.wav', '19-227-0071.wav', '26-495-0003.wav', '26-495-0001.wav', '26-495-0002.wav', '26-495-0004.wav', '26-495-0008.wav', '26-495-0005.wav', '26-495-0006.wav', '26-495-0007.wav', '26-495-0000.wav', '26-495-0014.wav', '26-495-0017.wav', '26-495-0015.wav', '26-495-0019.wav', '26-495-0009.wav', '26-495-0012.wav', '26-495-0013.wav', '26-495-0018.wav', '26-495-0011.wav', '26-495-001

In [None]:
from scipy.io import wavfile
import numpy as np
from scipy.fftpack import fft
from scipy.fftpack import ifft
from scipy import signal
import random
import scipy
import copy

url_already_audio='/content/gdrive/MyDrive/Startagain/librispeech/OP/All'
import os
allreadyfile=[]
txt_files=os.listdir(url_already_audio)
print(txt_files)
if len(txt_files)==0:
  pass
else:
  for files in txt_files:
    name,category=os.path.splitext(files)
    allreadyfile.append(name)
print(len(allreadyfile))
url_original_audio='/content/gdrive/MyDrive/deepspeech/audios_wav/'

url = '/content/gdrive/MyDrive/Startagain/librispeech/OP/Frames/'

for key in dic_index:
  if key not in allreadyfile:
    continue
  original_url=url_original_audio+key+'.wav'
  all_url=url_already_audio+'/'+key+'.wav'
  fs, data_true = wavfile.read(original_url)
  fs, data_all = wavfile.read(all_url)
  data_final=copy.deepcopy(data_true)
  data_n=(len(data_true)-1536)//512
  print(data_n)
  index_all=[i for i in range(data_n)]
  print(index_all)
  #index_all=range(0,data_n)
  n=data_frame[key]
  print(n)
  index_=dic_index[key]
  num_frames=n*len(index_)
  for i in index_:
    for j in range(n):
      index_all.remove(i*n+j)
    #del index_all[i*n:i*(n+1)]
  if(len(index_all)>=num_frames):
    index_random=random.sample(index_all,num_frames)
  else:
    index_random=index_all
  #index_random=int_random(0,data_n,num_frames)
  #print(index_random)
  #print(data_true[0*n*512:1*n*512])
  print(index_)

  for item in index_:
    
    data_final[item*512*n:(item+1)*512*n]=copy.deepcopy(data_all[item*512*n:(item+1)*512*n])
  wavfile.write(url+key+'.wav', fs, data_final)
  print("============================================================================")

['19-227-0043.wav', '19-227-0045.wav', '19-227-0046.wav', '19-227-0042.wav', '19-227-0041.wav', '19-227-0040.wav', '19-227-0044.wav', '19-227-0039.wav', '19-227-0047.wav', '19-227-0052.wav', '19-227-0048.wav', '19-227-0056.wav', '19-227-0053.wav', '19-227-0051.wav', '19-227-0059.wav', '19-227-0057.wav', '19-227-0054.wav', '19-227-0049.wav', '19-227-0058.wav', '19-227-0050.wav', '19-227-0055.wav', '19-227-0070.wav', '19-227-0069.wav', '19-227-0060.wav', '19-227-0067.wav', '19-227-0062.wav', '19-227-0061.wav', '19-227-0063.wav', '19-227-0064.wav', '19-227-0068.wav', '19-227-0066.wav', '19-227-0065.wav', '19-227-0072.wav', '19-227-0071.wav', '26-495-0003.wav', '26-495-0001.wav', '26-495-0002.wav', '26-495-0004.wav', '26-495-0008.wav', '26-495-0005.wav', '26-495-0006.wav', '26-495-0007.wav', '26-495-0000.wav', '26-495-0014.wav', '26-495-0017.wav', '26-495-0015.wav', '26-495-0019.wav', '26-495-0009.wav', '26-495-0012.wav', '26-495-0013.wav', '26-495-0018.wav', '26-495-0011.wav', '26-495-001