In [1]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import re, glob
import os, sys
from scipy import stats

In [2]:
# Input track list per country
country_tracks_data_df = pd.read_csv("New Zealand_FIX_tracks_with_audio_features.csv")
country_tracks_data_df.head()                       

Unnamed: 0,Position,Track Name,Artist,Streams,URL,Year,Month,Track ID,Danceability,Valence,Energy
0,1,7 rings,Ariana Grande,562985,https://open.spotify.com/track/14msK75pk3pA33p...,2019,1,6ocbgoVGwYJhOv1GgI9NsF,0.778,0.317,0.327
1,2,Sunflower - Spider-Man: Into the Spider-Verse,Post Malone,389293,https://open.spotify.com/track/3KkXRkHbMCARz0a...,2019,1,3KkXRkHbMCARz0aVfEt68P,0.76,0.479,0.913
2,3,Wow.,Post Malone,345858,https://open.spotify.com/track/6MWtB6iiXyIwun0...,2019,1,7xQAfvXzm3AkraOtGPWIZg,0.829,0.539,0.388
3,4,Without Me,Halsey,280760,https://open.spotify.com/track/5p7ujcrUXASCNwR...,2019,1,7lQ8MOhq6IN2w8EYcFNSUk,0.908,0.669,0.662
4,5,Sweet but Psycho,Ava Max,276126,https://open.spotify.com/track/25sgk305KZfyuqV...,2019,1,7DnAm9FOTWE3cUvso43HhI,0.72,0.706,0.62


In [3]:
# Obtain total streams per month
total_sum = country_tracks_data_df.groupby(['Year','Month']).Streams.sum()
# Create data frame
data = pd.DataFrame(total_sum)
# Magic
my_data = pd.DataFrame(data.to_records())

my_data.head()

Unnamed: 0,Year,Month,Streams
0,2019,1,17016137
1,2019,2,17065715
2,2019,3,15840837
3,2019,4,15354252
4,2019,5,15997780


In [4]:
# Merge total streams per month to original file in column Streams_y
next_step = country_tracks_data_df.merge(my_data, how='inner', left_on=['Year', 'Month'], right_on=['Year', 'Month'])
next_step.head()

Unnamed: 0,Position,Track Name,Artist,Streams_x,URL,Year,Month,Track ID,Danceability,Valence,Energy,Streams_y
0,1,7 rings,Ariana Grande,562985,https://open.spotify.com/track/14msK75pk3pA33p...,2019,1,6ocbgoVGwYJhOv1GgI9NsF,0.778,0.317,0.327,17016137
1,2,Sunflower - Spider-Man: Into the Spider-Verse,Post Malone,389293,https://open.spotify.com/track/3KkXRkHbMCARz0a...,2019,1,3KkXRkHbMCARz0aVfEt68P,0.76,0.479,0.913,17016137
2,3,Wow.,Post Malone,345858,https://open.spotify.com/track/6MWtB6iiXyIwun0...,2019,1,7xQAfvXzm3AkraOtGPWIZg,0.829,0.539,0.388,17016137
3,4,Without Me,Halsey,280760,https://open.spotify.com/track/5p7ujcrUXASCNwR...,2019,1,7lQ8MOhq6IN2w8EYcFNSUk,0.908,0.669,0.662,17016137
4,5,Sweet but Psycho,Ava Max,276126,https://open.spotify.com/track/25sgk305KZfyuqV...,2019,1,7DnAm9FOTWE3cUvso43HhI,0.72,0.706,0.62,17016137


In [5]:
# Calculate new columns
next_step['Danceability %'] = next_step['Danceability']*next_step['Streams_x']/next_step['Streams_y']
next_step['Energy %'] = next_step['Energy']*next_step['Streams_x']/next_step['Streams_y']
next_step['Valence %'] = next_step['Valence']*next_step['Streams_x']/next_step['Streams_y']
next_step.head()

Unnamed: 0,Position,Track Name,Artist,Streams_x,URL,Year,Month,Track ID,Danceability,Valence,Energy,Streams_y,Danceability %,Energy %,Valence %
0,1,7 rings,Ariana Grande,562985,https://open.spotify.com/track/14msK75pk3pA33p...,2019,1,6ocbgoVGwYJhOv1GgI9NsF,0.778,0.317,0.327,17016137,0.02574,0.010819,0.010488
1,2,Sunflower - Spider-Man: Into the Spider-Verse,Post Malone,389293,https://open.spotify.com/track/3KkXRkHbMCARz0a...,2019,1,3KkXRkHbMCARz0aVfEt68P,0.76,0.479,0.913,17016137,0.017387,0.020887,0.010959
2,3,Wow.,Post Malone,345858,https://open.spotify.com/track/6MWtB6iiXyIwun0...,2019,1,7xQAfvXzm3AkraOtGPWIZg,0.829,0.539,0.388,17016137,0.01685,0.007886,0.010955
3,4,Without Me,Halsey,280760,https://open.spotify.com/track/5p7ujcrUXASCNwR...,2019,1,7lQ8MOhq6IN2w8EYcFNSUk,0.908,0.669,0.662,17016137,0.014982,0.010923,0.011038
4,5,Sweet but Psycho,Ava Max,276126,https://open.spotify.com/track/25sgk305KZfyuqV...,2019,1,7DnAm9FOTWE3cUvso43HhI,0.72,0.706,0.62,17016137,0.011684,0.010061,0.011456


In [6]:
# Generate final file

last_step = next_step.groupby(["Year", 'Month'], as_index=False)[['Danceability %','Energy %', 'Valence %']].sum()

last_step.head()

Unnamed: 0,Year,Month,Danceability %,Energy %,Valence %
0,2019,1,0.690073,0.464874,0.600466
1,2019,2,0.685002,0.441264,0.588278
2,2019,3,0.686947,0.457023,0.588093
3,2019,4,0.694459,0.481932,0.566785
4,2019,5,0.694453,0.491478,0.587496


In [7]:
last_step.to_csv("New Zealand_Weighted_tracks_with_audio_features.csv")