In [73]:
from pydub import AudioSegment
from pydub.silence import detect_silence, detect_nonsilent
from pydub.playback import play

In [74]:
def process_lecture(input_filepath, output_filepath, min_silence_len=3000, silence_thresh=-20, seek_step=3000):
    assert file_path.endswith('.mp4'), "Only processes mp4 videos!"
    
    # Import mp4
    segment = AudioSegment.from_file(input_filepath, "mp4")
    
    silent_ranges = detect_silence(segment, min_silence_len, silence_thresh, seek_step)
    print("Silent ranges in ms... {}".format(silent_ranges))
    
    # Trim silence
    # AudioSegments do not support `remove` functionality. Instead of removing silence, this code concatenates non-silence.
    # Internally its the same as `detect_nonsilence` uses `detect_silence` and takes the complement of the result.
    print("Concatenating nonsilent ranges...")
    nonsilent_ranges = detect_nonsilent(segment, min_silence_len, silence_thresh, seek_step) 
    result = AudioSegment.empty()
    for nonsilent_range in nonsilent_ranges:
        [start, end] = nonsilent_range
        result += segment[start: end]
        
    # Double check
    silent_ranges = detect_silence(result, min_silence_len, silence_thresh, seek_step)
    assert silent_ranges == [], "There are still silent ranges. Double check parameters of detect_nonsilent"
    
    # Set frame rate to 123000
    result = result.set_channels(1)
    result = result.set_frame_rate(123000)
    
    # Output as monochrome wav
    result.export(output_filepath, format='wav')
    print("Audio output in {}".format(output_filepath))

In [75]:
input_filepath = "../raw_data/bryan_lectures/bayes_inference.mp4"
output_filepath = "../raw_data/bryan_lectures/bayes_inference_processed.wav"
process_lecture(input_filepath, output_filepath)

Silent ranges in ms... [[0, 27000], [30000, 33000], [36000, 63000], [102000, 105000], [138000, 144000], [153000, 156000], [159000, 162000], [246000, 249000], [282000, 285000], [327000, 330000], [522000, 525000], [582000, 585000], [789000, 792000], [816000, 819000], [831000, 837000], [882000, 885000], [912000, 915000], [1053000, 1056000], [1092000, 1095000], [1122000, 1125000], [1131000, 1134000], [1287000, 1293000], [1365000, 1371000], [1377000, 1380000], [1431000, 1434000], [1521000, 1524000], [1539000, 1545000], [1575000, 1578000], [1644000, 1647000], [1740000, 1743000], [1785000, 1788000], [1803000, 1806000], [1842000, 1845000], [1881000, 1884000], [1905000, 1911000], [1920000, 1923000], [1986000, 1989000], [1995000, 1998000], [2013000, 2016000], [2070000, 2073000], [2082000, 2085000], [2088000, 2091000], [2124000, 2127000], [2145000, 2151000], [2217000, 2223000], [2253000, 2256000], [2265000, 2271000], [2286000, 2289000], [2292000, 2301000], [2316000, 2325000], [2334000, 2337000], 