# Audio Analysis using Python
The default format of the audio file used in this jupyter file is wav.  
**This jupyter file contains useful functions and template for:**  
1) Playing audio file  
2) Extracting features from audio file  
3) Visualizing audio signals and features  
 
### Some packages used
1) pyaudio: Play audio file [Documentation](https://people.csail.mit.edu/hubert/pyaudio/docs/)  
2) wave: Load audio file [Documentation](https://docs.python.org/3/library/wave.html)  
3) librosa: Audio analysis library [Documentation](https://librosa.github.io/librosa/index.html)  



In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pyaudio
import wave
import librosa.display
import librosa
import scipy

In [None]:
# Stores an audio file for easy playback, audio metadata are converted to mono
class Audio():
    def __init__(self, filename):
        self.filename = filename
        self.chunk = 1024
        
        with wave.open(filename, 'rb') as wf:
            # Divide by number of channels for librosa's case because librosa convert stereo to mono by default
            self.rate = wf.getframerate() // wf.getnchannels()
            self.total_sample_size = wf.getnframes() // wf.getnchannels()
    
#     Return the sample size for the given duration, return the maximum length of the sample size if duration exceed the sample's duration
    def get_sample_size(self, duration):
        return min(self.rate * duration, self.total_sample_size)
    
#     Play the audio file for the given duration, play the whole file if duration is not given
    def play(self, duration=0):
        wf = wave.open(self.filename, 'rb')
        p = pyaudio.PyAudio()
        
        stream = p.open(format =
                p.get_format_from_width(wf.getsampwidth()),
                channels = wf.getnchannels(),
                rate = wf.getframerate(),
                output = True)
        
        if duration:
            d_chunk = duration * wf.getframerate()
            data = wf.readframes(d_chunk)
            stream.write(data)
        else:
            data = wf.readframes(self.chunk)

            while len(data) > 0:
                stream.write(data)
                data = wf.readframes(self.chunk)
        
        stream.close()
        p.terminate()
        wf.close()

In [None]:
wayne_1 = Audio("./working_data/wayne_test_1.wav")
wayne_2 = Audio("./working_data/wayne_test_2.wav")
wayne_3 = Audio("./working_data/wayne_test_3.wav")
wayne_4 = Audio("./working_data/wayne_sing_1.wav")
wayne_5 = Audio("./working_data/wayne_sing_2.wav")
wayne_6 = Audio("./working_data/wayne_sing_3.wav")

In [None]:
def display_linear_frequency_spectogram(audio, num_samples=5000):
    x, rate = librosa.load(audio.filename)
    x = x[:num_samples]
    
    D = librosa.amplitude_to_db(librosa.stft(x), ref=np.max)
    librosa.display.specshow(D, y_axis='linear')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Linear-frequency power spectogram')
    plt.show()

In [None]:
def display_wave(audio, num_samples=5000):
    y, rate = librosa.load(audio.filename)
    y = y[:num_samples]
    
    x = np.arange(num_samples)
    
    plt.plot(x,y)
    plt.show()

In [None]:
display_wave(wayne_1, wayne_1.get_sample_size(3))
display_linear_frequency_spectogram(wayne_1, wayne_1.get_sample_size(3))

In [None]:
display_wave(wayne_4, wayne_4.get_sample_size(3))
display_linear_frequency_spectogram(wayne_4, wayne_4.get_sample_size(3))

In [None]:
ex_1 = Audio("./working_data/abbott_10.wav")
ex_2 = Audio("./working_data/optic_10.wav")
ex_3 = Audio("./working_data/aeschylus_10.wav")

In [None]:
display_wave(ex_1, ex_1.get_sample_size(3))
display_linear_frequency_spectogram(ex_1, ex_1.get_sample_size(3))