<a href="https://colab.research.google.com/github/tuomaseerola/music_and_science_seminar/blob/master/corpus_analysis_tutorial_key.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Music and Science – Audio Corpus Analysis Tutorial 

[Tuomas Eerola](https://www.durham.ac.uk/staff/tuomas-eerola/), Durham University, Music Department, 2023.


In [None]:
#PROMPT: Press the play button to set up the technical system (import libraries etc.)
import os
import numpy as np
import librosa
import librosa.display
import IPython.display as ipd
from matplotlib import pyplot as plt 
%matplotlib inline
print(librosa.__version__)

# 1. Get an audio corpus using 

mirdata.
https://colab.research.google.com/drive/1QhvbtoWlDeBC6EGJA6y0--WxWZYM6euK#scrollTo=GpOi9EGvWHNZ


In [None]:
import sys
!{sys.executable} -m pip install mirdata

---
### Learning Task X
---
Find **tempo** of the extract in Beats Per Minute (BPM). 
*Tip. There is a command called* `beat.tempo` *that can be used to calculate the tempo.*

In [None]:
import mirdata
beatport_key = mirdata.initialize('beatport_key')
beatport_key.download()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import librosa.display

# class that uses the librosa library to analyze the key that an mp3 is in
# arguments:
#     waveform: an mp3 file loaded by librosa, ideally separated out from any percussive sources
#     sr: sampling rate of the mp3, which can be obtained when the file is read with librosa
#     tstart and tend: the range in seconds of the file to be analyzed; default to the beginning and end of file if not specified
class Tonal_Fragment(object):
    def __init__(self, waveform, sr, tstart=None, tend=None):
        self.waveform = waveform
        self.sr = sr
        self.tstart = tstart
        self.tend = tend
        
        if self.tstart is not None:
            self.tstart = librosa.time_to_samples(self.tstart, sr=self.sr)
        if self.tend is not None:
            self.tend = librosa.time_to_samples(self.tend, sr=self.sr)
        self.y_segment = self.waveform[self.tstart:self.tend]
        self.chromograph = librosa.feature.chroma_cqt(y=self.y_segment, sr=self.sr, n_octaves=5, threshold=0.07, fmin=65.4, bins_per_octave=36,hop_length=8192)
        
        # chroma_vals is the amount of each pitch class present in this time interval
        self.chroma_vals = []
        for i in range(12):
            self.chroma_vals.append(np.sum(self.chromograph[i]))
        pitches = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']
        # dictionary relating pitch names to the associated intensity in the song
        self.keyfreqs = {pitches[i]: self.chroma_vals[i] for i in range(12)} 
        
        keys = [pitches[i] + ' major' for i in range(12)] + [pitches[i] + ' minor' for i in range(12)]

        # use of the Krumhansl-Schmuckler key-finding algorithm, which compares the chroma
        # data above to typical profiles of major and minor keys:
        maj_profile = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
        min_profile = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]

        # Temperley profiles
        maj_profile = [0.748, 0.060, 0.488, 0.082, 0.670, 0.460, 0.096, 0.715, 0.104, 0.366, 0.057, 0.400]
        min_profile = [0.712, 0.084, 0.474, 0.618, 0.049, 0.460, 0.105, 0.747, 0.404, 0.067, 0.133, 0.330]

        # finds correlations between the amount of each pitch class in the time interval and the above profiles,
        # starting on each of the 12 pitches. then creates dict of the musical keys (major/minor) to the correlation
        self.min_key_corrs = []
        self.maj_key_corrs = []
        for i in range(12):
            key_test = [self.keyfreqs.get(pitches[(i + m)%12]) for m in range(12)]
            # correlation coefficients (strengths of correlation for each key)
            self.maj_key_corrs.append(round(np.corrcoef(maj_profile, key_test)[1,0], 3)) # cosine distance is better
            self.min_key_corrs.append(round(np.corrcoef(min_profile, key_test)[1,0], 3))

        # names of all major and minor keys
        self.key_dict = {**{keys[i]: self.maj_key_corrs[i] for i in range(12)}, 
                         **{keys[i+12]: self.min_key_corrs[i] for i in range(12)}}
        
        # this attribute represents the key determined by the algorithm
        self.key = max(self.key_dict, key=self.key_dict.get)
        self.bestcorr = max(self.key_dict.values())
        
        # this attribute represents the second-best key determined by the algorithm,
        # if the correlation is close to that of the actual key determined
        self.altkey = None
        self.altbestcorr = None

        for key, corr in self.key_dict.items():
            if corr > self.bestcorr*0.9 and corr != self.bestcorr:
                self.altkey = key
                self.altbestcorr = corr
                
    # prints the relative prominence of each pitch class            
    def print_chroma(self):
        self.chroma_max = max(self.chroma_vals)
        for key, chrom in self.keyfreqs.items():
            print(key, '\t', f'{chrom/self.chroma_max:5.3f}')

    # plots the relative prominence of each pitch class            
    def plot_chroma(self, title=None):
        self.chroma_max = max(self.chroma_vals)
        pc = []
        pckey = []
        for key, chrom in self.keyfreqs.items():
            pckey.append(key)
            pc.append(chrom/self.chroma_max)
        print('print')    
        plt.figure(figsize=(10,4))
        plt.bar(pckey,pc,color='darkred')
        plt.xlabel("Pitch classes")
        plt.ylabel("Relative weight")
        if title is None:
            plt.title('Pitch Class Distribution')
        else:
            plt.title(title)
        plt.tight_layout()
        plt.show()
                
    # prints the correlation coefficients associated with each major/minor key
    def corr_table(self):
        for key, corr in self.key_dict.items():
            print(key, '\t', f'{corr:6.3f}')
    
    # printout of the key determined by the algorithm; if another key is close, that key is mentioned
    def print_key(self):
        print("likely key: ", max(self.key_dict, key=self.key_dict.get), ", correlation: ", self.bestcorr, sep='')
        if self.altkey is not None:
                print("also possible: ", self.altkey, ", correlation: ", self.altbestcorr, sep='')
    
    # prints a chromagram of the file, showing the intensity of each pitch class over time
    def chromagram(self, title=None):
        C = librosa.feature.chroma_cqt(y=self.waveform, sr=sr, bins_per_octave=24)
        plt.figure(figsize=(12,4))
        librosa.display.specshow(C, sr=sr, x_axis='time', y_axis='chroma', vmin=0, vmax=1)
        if title is None:
            plt.title('Chromagram')
        else:
            plt.title(title)
        plt.colorbar()
        plt.tight_layout()
        plt.show()

## Example track

In [None]:
ID = 25
beatport_key_ids = beatport_key.track_ids
beatport_key_data = beatport_key.load_tracks()
example_track = beatport_key_data[beatport_key_ids[ID]]
print(example_track.track_id, example_track.title, example_track.key, example_track.tempo)

In [None]:
from librosa.core import audio
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

plt.figure(figsize=(12, 3))
y,sr = librosa.load(example_track.audio_path,offset=5,duration=30)
#y = example_track.audio[0]

sr = example_track.audio[1]
librosa.display.waveshow(y=y,sr=sr)
ipd.display(ipd.Audio(data=y, rate=sr))

This track is annotated to be in G minor key (and alternatively in Bb major, but we only look at the first key). Let's try the algorithm. We first filter out the percussive components and focus only on the harmonic components using librosa's own harmonic percussive source separation filter, called librosa.effects.hpss. We then apply our custom function, Tonal_Fragment, which extracts the chromagram, summarise the energies across the pitch classes, and correlates these to the major and minor key-profiles, and infers the key. Let's look at the elements in turn.

In [None]:
ton = Tonal_Fragment(y, sr)                           # key-finding
ton.chromagram(example_track.title)                   # plot chromagram

In [None]:
ton.plot_chroma(example_track.title)

In [None]:
ton.print_key()
ton.corr_table()


# Let's see how this works on a corpus

In [None]:
#pip install mir_eval
import mir_eval

In [None]:
results = []
for i in tqdm(range(len(beatport_key_data))):
#for i in tqdm(range(604,1000)):
    example_track = beatport_key_data[beatport_key_ids[i]]
    y,sr = librosa.load(example_track.audio_path,offset=15,duration=2)
    ton = Tonal_Fragment(y, sr)
    estimated_key = max(ton.key_dict, key=ton.key_dict.get)
    estimated_keycor = ton.bestcorr
    score = []
    r_key=example_track.key[0]
    #print([estimated_key + ' -> ' + r_key])
    try:
        mir_eval.key.validate_key(r_key)
    except ValueError as ve:
        print(f' {r_key} is not a valid key.')
        results.append(-1)
    if r_key=='X':
        print(f' {r_key} is not a valid key.')
        results.append(-1)
    elif r_key=='x':
        print(f' {r_key} is not a valid key.')
        results.append(-1)
    else:
        reference_key = r_key.split(' ')[0] + ' ' + r_key.split(' ')[1]
        if not '^' in reference_key and not '_' in reference_key:
            score.append(mir_eval.key.weighted_score(reference_key, estimated_key))
        if not '^' in reference_key and not '_' in reference_key:
            results.append(str(max(score)))


## Compile results

In [None]:
plt.hist(results)
plt.show()

In [None]:
from matplotlib import pyplot as plt 
import numpy as np  

unique, counts = np.unique(results, return_counts=True)
print(unique) # ['-1' '0.0' '0.2' '0.3' '0.5' '1.0']
print(counts) # [123 541 183  66  52 521]

print(round(sum(counts[3:6])/sum(counts[1:6]),2)) #  45% partially correct
print(round(sum(counts[5:6])/sum(counts[1:6]),2)) #  32% fully correct


## References

* Krumhansl, C. L. (1990). _Cognitive foundations of musical pitch_. Oxford, UK: Oxford University Press.
* Faraldo, Á. (2017). Tonality Estimation in Electronic Dance Music: A Computational and Musically Informed Examination. PhD Thesis. Universitat Pompeu Fabra, Barcelona.