In [1]:
from pydub import AudioSegment
from pydub.silence import split_on_silence
import csv

def db_to_float(db, using_amplitude=True):
    """
    Converts the input db to a float, which represents the equivalent
    ratio in power.
    """
    db = float(db)
    if using_amplitude:
        return 10 ** (db / 20)
    else:  # using power
        return 10 ** (db / 10)

# returns the silent ranges who's length is longer than the average 
# silent range in the audio file.
def getSubjSilentRanges(wavFilePath , silence_threshold):
    SLICE_LEN=0
    STEP_SIZE=0
    silence_thresh=0
    
    with open('silence_detection_parameters.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        SLICE_LEN = int(next(csv_reader)[0].split(':')[1])
        STEP_SIZE = int(next(csv_reader)[0].split(':')[1])
        silence_threshold = int(next(csv_reader)[0].split(':')[1])

    
    switchPoints = []
    audio = AudioSegment.from_wav(wavFilePath)
    audio_len = len(audio) # result is in ms

    #SLICE_LEN = 1000 # ms   --- slice the audio to secondes
    #STEP_SIZE = 100 # ms   --- step of window lookig for silence

    # find silence and add start and end indicies to the to_cut list
    silence_starts = []

    last_slice_start_point = audio_len - SLICE_LEN
    slice_start_points = range(0, last_slice_start_point + 1,STEP_SIZE)

    #silence_thresh = silence_threshold # silence threshold

    # convert silence threshold to a float value (so we can compare it to rms)
    silence_thresh = db_to_float(silence_threshold) * audio.max_possible_amplitude

    for i in slice_start_points:
        audio_slice = audio[i:i + SLICE_LEN]
        #print(audio_slice.rms)
        if audio_slice.rms <= silence_thresh:
            silence_starts.append(i)
    
    # combine the silence we detected into ranges (start ms - end ms)
    silent_ranges = []

    prev_i = silence_starts.pop(0)
    current_range_start = prev_i

    for silence_start_i in silence_starts:
        continuous = (silence_start_i == prev_i + 1)

        # sometimes two small blips are enough for one particular slice to be
        # non-silent, despite the silence all running together. Just combine
        # the two overlapping silent ranges.
        silence_has_gap = silence_start_i > (prev_i + SLICE_LEN)

        if not continuous and silence_has_gap:
            silent_ranges.append([current_range_start,prev_i + SLICE_LEN])
            current_range_start = silence_start_i
        prev_i = silence_start_i

    silent_ranges.append([current_range_start,prev_i + SLICE_LEN])

    silence_ranges_len = list(map(lambda silence_range : silence_range[1]-silence_range[0] ,silent_ranges))

    avg_silence_time = sum(list(silence_ranges_len))/len(list(silence_ranges_len))

    subject_switch_silent_ranges=[]

    for idx,silence_range in enumerate(silence_ranges_len):
        if silence_range > avg_silence_time:
            subject_switch_silent_ranges.append(silent_ranges[idx])
    
    
    with open('./../models/audio_optimization/results_'+str(SLICE_LEN) + '_' + str(STEP_SIZE) + '_' + str(silence_threshold) + '.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=',',
                                quotechar='|', quoting=csv.QUOTE_MINIMAL)
        header=['num','start_time','end_time']
        writer = csv.DictWriter(csvfile, fieldnames=header)
        writer.writeheader()
        
        for idx,silence_range in enumerate(subject_switch_silent_ranges):
            writer.writerow({'num': str(idx+1), 'start_time': str(silence_range[0]) , 'end_time': str(silence_range[1])})
    
        
    return subject_switch_silent_ranges
    

In [2]:
# Test 

# Download audio from youtube
from __future__ import unicode_literals

!pip install --upgrade youtube-dl
!pip install --upgrade pygame
!pip install --upgrade pydub
!pip install --upgrade ffprobe
!pip install --upgrade ffmpeg

Collecting youtube-dl
  Downloading youtube_dl-2020.5.3-py2.py3-none-any.whl (1.8 MB)
Installing collected packages: youtube-dl
  Attempting uninstall: youtube-dl
    Found existing installation: youtube-dl 2020.3.24
    Uninstalling youtube-dl-2020.3.24:
      Successfully uninstalled youtube-dl-2020.3.24
Successfully installed youtube-dl-2020.5.3
Requirement already up-to-date: pygame in c:\users\ruti kevesh\anaconda3\lib\site-packages (1.9.6)
Requirement already up-to-date: pydub in c:\users\ruti kevesh\anaconda3\lib\site-packages (0.23.1)
Requirement already up-to-date: ffprobe in c:\users\ruti kevesh\anaconda3\lib\site-packages (0.5)
Requirement already up-to-date: ffmpeg in c:\users\ruti kevesh\anaconda3\lib\site-packages (1.4)


In [1]:
import youtube_dl

ydl_opts = {
    'format': 'bestaudio/best',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'wav',
        'preferredquality': '192',
    }],
}

with youtube_dl.YoutubeDL(ydl_opts) as ydl:
    ydl.download(['https://www.youtube.com/watch?v=MkiUBJcgdUY'])
    

[youtube] MkiUBJcgdUY: Downloading webpage
[download] Destination: Mod-01 Lec-01 Foundation of Scientific Computing-01-MkiUBJcgdUY.m4a
[download]   4.0% of 59.32MiB at 152.57KiB/s ETA 06:22

KeyboardInterrupt: 

In [4]:
import time
start_time = time.time()

# run the algorithm and receive the silence ranges
silent_ranges = getSubjSilentRanges("Introduction to Number Theory-SCvtxjpVQms.wav",0)

# get the length of each silent part
silence_ranges_len = list(map(lambda silence_range : silence_range[1]-silence_range[0] ,silent_ranges))


print("the silent range are (ms): ")
print(silent_ranges)
print("the silent range's lengths are (ms):")
print(silence_ranges_len)
print("amount of silent parts: " + str(len(silence_ranges_len)))

# print the run time in seconds
print("--- %s seconds ---" % (time.time() - start_time))

FileNotFoundError: [Errno 2] No such file or directory: 'Introduction to Number Theory-SCvtxjpVQms.wav'

In [None]:
# plot the ranges

import matplotlib.pyplot as plt

n, bins, patches = plt.hist(x=silence_ranges_len, bins='auto', color='#0504aa',
                            alpha=0.7, rwidth=0.85)
plt.grid(axis='y', alpha=0.75)
plt.xlabel('ms')
plt.ylabel('count')
plt.title('My Very Own Histogram')
plt.text(23, 45, r'$\mu=15, b=3$')
maxfreq = n.max()
# Set a clean upper y-axis limit.
plt.ylim(ymax=np.ceil(maxfreq / 10) * 10 if maxfreq % 10 else maxfreq + 10)