#### Importing the processed data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df_meta = pd.read_csv('./data/processed/metadata.csv')
df_meta

Unnamed: 0,id,composer,composition,movement,ensemble,seconds,key,tonic,tonic_encoded
0,1727,Schubert,Piano Quintet in A major,2. Andante,Piano Quintet,447,A major,A,9.0
1,1728,Schubert,Piano Quintet in A major,3. Scherzo: Presto,Piano Quintet,251,A major,A,9.0
2,1729,Schubert,Piano Quintet in A major,4. Andantino - Allegretto,Piano Quintet,444,A major,A,9.0
3,1730,Schubert,Piano Quintet in A major,5. Allegro giusto,Piano Quintet,368,A major,A,9.0
4,1733,Schubert,Piano Sonata in A major,2. Andantino,Solo Piano,546,A major,A,9.0
...,...,...,...,...,...,...,...,...,...
325,2632,Beethoven,Piano Sonata No 10 in G major,2. Andante,Solo Piano,341,G major,G,7.0
326,2633,Beethoven,Piano Sonata No 10 in G major,3. Scherzo: Allegro assai,Solo Piano,227,G major,G,7.0
327,2659,Bach,Violin Partita No 1 in B minor,6. Double,Solo Violin,108,B minor,B,11.0
328,2677,Beethoven,Piano Sonata No 9 in E major,1. Allegro,Solo Piano,445,E major,E,4.0


In [3]:
df_meta.dtypes

id                 int64
composer          object
composition       object
movement          object
ensemble          object
seconds            int64
key               object
tonic             object
tonic_encoded    float64
dtype: object

In [16]:
df_meta.shape

(330, 9)

In [4]:
# Who are the composers and how many pieces they have in this dataset?
df_meta.composer.value_counts()

Beethoven    157
Bach          67
Schubert      30
Mozart        24
Brahms        24
Cambini        9
Dvorak         8
Faure          4
Ravel          4
Haydn          3
Name: composer, dtype: int64

In [5]:
# What are the most common ensembles?
df_meta.ensemble.value_counts()

Solo Piano                     156
String Quartet                  57
Accompanied Violin              22
Solo Cello                      12
Solo Violin                      9
Wind Quintet                     9
Piano Quartet                    8
Accompanied Cello                7
Piano Trio                       7
Pairs Clarinet-Horn-Bassoon      6
String Sextet                    5
Violin and Harpsichord           4
Horn Piano Trio                  4
Piano Quintet                    4
Wind Octet                       4
Accompanied Clarinet             4
Clarinet Quintet                 3
Solo Flute                       3
Clarinet-Cello-Piano Trio        3
Wind and Strings Octet           2
Viola Quintet                    1
Name: ensemble, dtype: int64

In [6]:
# Let's dive a bit in the "Solo Piano" pieces
df_solopiano = df_meta[df_meta['ensemble']=='Solo Piano']
df_solopiano

Unnamed: 0,id,composer,composition,movement,ensemble,seconds,key,tonic,tonic_encoded
4,1733,Schubert,Piano Sonata in A major,2. Andantino,Solo Piano,546,A major,A,9.0
5,1734,Schubert,Piano Sonata in A major,3. Scherzo. Allegro vivace,Solo Piano,325,A major,A,9.0
6,1735,Schubert,Piano Sonata in A major,4. Rondo. Allegretto,Solo Piano,714,A major,A,9.0
9,1749,Schubert,Piano Sonata in A minor,1. Moderato,Solo Piano,696,A minor,A,9.0
10,1750,Schubert,Piano Sonata in A minor,2. Andante poco mosso,Solo Piano,784,A minor,A,9.0
...,...,...,...,...,...,...,...,...,...
318,2620,Beethoven,Piano Sonata No 6 in F major,3. Presto,Solo Piano,159,F major,F,5.0
325,2632,Beethoven,Piano Sonata No 10 in G major,2. Andante,Solo Piano,341,G major,G,7.0
326,2633,Beethoven,Piano Sonata No 10 in G major,3. Scherzo: Allegro assai,Solo Piano,227,G major,G,7.0
328,2677,Beethoven,Piano Sonata No 9 in E major,1. Allegro,Solo Piano,445,E major,E,4.0


### Exploratory Data Analysis

Let's check Beethoven's data

In [7]:
# Let's check Beethoven
df_bet = df_meta[df_meta['composer']=='Beethoven']
df_bet

Unnamed: 0,id,composer,composition,movement,ensemble,seconds,key,tonic,tonic_encoded
172,2313,Beethoven,String Quartet No 15 in A minor,1. Allegro,String Quartet,593,A minor,A,9.0
173,2314,Beethoven,String Quartet No 15 in A minor,2. Allegro ma non tanto,String Quartet,568,A minor,A,9.0
174,2315,Beethoven,String Quartet No 15 in A minor,3. Molto Adagio; Andante,String Quartet,1069,A minor,A,9.0
175,2318,Beethoven,Trio in E-flat major,1. Allegro moderato,Clarinet-Cello-Piano Trio,641,E-flat major,E-flat,3.0
176,2319,Beethoven,Trio in E-flat major,2. Scherzo: Allegro ma non troppo,Clarinet-Cello-Piano Trio,620,E-flat major,E-flat,3.0
...,...,...,...,...,...,...,...,...,...
324,2629,Beethoven,Violin Sonata No 10 in G major,4. Poco allegretto,Accompanied Violin,517,G major,G,7.0
325,2632,Beethoven,Piano Sonata No 10 in G major,2. Andante,Solo Piano,341,G major,G,7.0
326,2633,Beethoven,Piano Sonata No 10 in G major,3. Scherzo: Allegro assai,Solo Piano,227,G major,G,7.0
328,2677,Beethoven,Piano Sonata No 9 in E major,1. Allegro,Solo Piano,445,E major,E,4.0


In [8]:
df_bet.tonic.value_counts()

E-flat     28
A          23
C          21
G          20
F          18
D          13
B-flat     11
E          11
A-flat      6
C-sharp     5
F-sharp     1
Name: tonic, dtype: int64

In [9]:
df_bet.tonic_encoded.value_counts()

3.0     28
9.0     23
0.0     21
7.0     20
5.0     18
2.0     13
10.0    11
4.0     11
8.0      6
1.0      5
6.0      1
Name: tonic_encoded, dtype: int64

In [10]:
df_piano = df_meta[df_meta['ensemble'] == 'Solo Piano']
df_piiano

NameError: name 'df_piiano' is not defined

### After merged

In [None]:
# I'll add a column indicating the difference between each note played and the tonic of the composition
# This way, we'll know which notes on a chromatic scale are being used
# Thus, we may determine which scales are used

#df_scores['chromatic_degree'] = df_scores.note - df_scores.tonic_encoded

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=177ea176-1f94-4265-9666-0cca06278d8e' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>