In [39]:
#%pip install pandas

In [6]:
import pandas as pd
import glob

In [41]:
# Load the data

In [11]:
data = glob.glob('./data/*.json')

In [12]:
# collect data
temp = []
for file in data:
    df = pd.read_json(file, date_unit="ms", convert_axes=True)
    # Store new frame in an array (Performance: https://stackoverflow.com/questions/36489576/why-does-concatenation-of-dataframes-get-exponentially-slower)
    temp.append(df)
spotifyData = pd.concat(temp)

In [44]:
# What is our current date range?

In [14]:
f'Currently analyzing data between: {min(spotifyData.endTime)}, and {max(spotifyData.endTime)}'

'Currently analyzing data between: 2018-10-12 14:47, and 2022-12-03 20:30'

In [46]:
# Limit data to this year
#spotifyData = spotifyData.query("endTime > '2023-01-01'").sort_values(by='endTime', ascending=True)

In [77]:
spotifyData = spotifyData.query("endTime < '2021-09-16'").sort_values(by='endTime', ascending=True)

In [78]:
spotifyData

Unnamed: 0,endTime,artistName,trackName,msPlayed
0,2018-10-12 14:47,Corey Layzell,Oxygen,190276
1,2018-10-12 14:48,Jacob Lee,Black Sheep,84898
2,2018-10-12 14:51,THE DLX,Idk,191829
3,2018-10-12 14:52,Streex,You Mean the World to Me (feat. Nick Thompson),25834
4,2018-10-12 15:03,Secret Nation,Home,62855
...,...,...,...,...
6917,2021-03-22 17:28,JLow,Collateral,204507
6918,2021-03-22 17:32,Kuren,Lose My Mind,237120
6919,2021-03-22 17:35,Brandt,I Miss You,179638
6920,2021-03-22 17:39,CADE,Better Off Alone - TENZO Remix,199846


In [47]:
# How many unique values do we have? / Do we have duplicates?

In [10]:
f'There is a total number of {spotifyData.count().endTime} rows. {spotifyData.drop_duplicates().count().endTime} of which are unique' 

'There is a total number of 182528 rows. 108345 of which are unique'

In [49]:
spotifyData = spotifyData.drop_duplicates()

In [50]:
# Filter out any songs that don't meet the 30s requirement
spotifyData = spotifyData.query("msPlayed >= 30000")

# Remove my podcasts
spotifyData = spotifyData.query("artistName != 'FOOTBALL BROMANCE'").query("artistName != 'Billy Yang Podcast'").query("artistName != 'Der beVegt-Podcast | vegan leben und laufen'").query("artistName != 'Trail Society'")

In [51]:
# Whats the most played songs by ms played? How long did I play them?

In [52]:
spotifyData.groupby(["trackName", "artistName"]).sum().sort_values("msPlayed", ascending=False).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,endTime,msPlayed
trackName,artistName,Unnamed: 2_level_1,Unnamed: 3_level_1
Nevers Road,Witt Lowry,2020-06-12 15:472020-06-13 14:082020-07-28 13:...,34924375
Fingerprints,Dylan Owen,2020-07-05 16:112019-09-23 05:322019-10-06 13:...,27282057
Ghosts Revisited (feat. Kiah Victoria),Dylan Owen,2020-06-01 18:402020-07-29 09:522020-08-28 17:...,21929218
Kindest Regards,Witt Lowry,2020-06-23 10:442020-08-02 11:512020-08-10 08:...,21785297
Ghosts,Dylan Owen,2019-11-06 20:302019-08-14 08:012022-04-01 09:...,20016741
Time,NF,2020-06-05 12:382020-06-15 16:052020-07-17 07:...,18965277
We’ll Always Have Paris,Capstan,2020-06-12 02:372020-06-13 12:442020-06-13 14:...,18776460
Zuhause,Fynn Kliemann,2020-05-31 10:332020-06-23 10:042020-07-07 05:...,18148392
Unknown Track,Unknown Artist,2020-08-07 08:382020-08-07 09:102020-08-14 09:...,18136504
Stories,Rowlan,2020-05-31 15:232020-06-05 12:422020-06-15 16:...,17944581


In [53]:
# Lets run this again, but instead make our data more readable by adding seconds/minutes/hours

In [16]:
spotifyData["secondsPlayed"] = spotifyData.msPlayed/1000
spotifyData["minutesPlayed"] = spotifyData.secondsPlayed/60
spotifyData["hoursPlayed"] = spotifyData.minutesPlayed/60

In [17]:
spotifyData.groupby(["trackName", "artistName"]).agg(
    totalTimePlayedMs=('msPlayed', 'sum'),
    totalTimePlayedMinutes=('minutesPlayed', 'sum'),
    totalPlayCounts=('msPlayed', 'count')).sort_values("totalTimePlayedMs", ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,totalTimePlayedMs,totalTimePlayedMinutes,totalPlayCounts
trackName,artistName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Nevers Road,Witt Lowry,68732092,1145.534867,288
I Don't Care,Our Last Night,43849000,730.816667,231
Make A Promise (feat. Elle Vee),Culture Code,40559419,675.990317,193
The New American Religion,Signals,37695732,628.262200,218
Silhouettes,FYKE,37272131,621.202183,194
...,...,...,...,...
The Other Girl,First to Eleven,0,0.000000,2
Blue and Yellow,The Used,0,0.000000,1
Bonfire,The Hunna,0,0.000000,3
Die Trying,New Medicine,0,0.000000,1


In [18]:
spotifyData.groupby(["trackName", "artistName"]).agg(
    totalTimePlayedMs=('msPlayed', 'sum'),
    totalTimePlayedMinutes=('minutesPlayed', 'sum'),
    totalPlayCounts=('msPlayed', 'count')).sort_values("totalPlayCounts", ascending=False).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,totalTimePlayedMs,totalTimePlayedMinutes,totalPlayCounts
trackName,artistName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Nevers Road,Witt Lowry,68732092,1145.534867,288
I Don't Care,Our Last Night,43849000,730.816667,231
hot girl bummer,Our Last Night,33087957,551.46595,223
We’ll Always Have Paris,Capstan,34827160,580.452667,219
The New American Religion,Signals,37695732,628.2622,218
Zuhause,Fynn Kliemann,32372031,539.53385,206
Silhouettes,FYKE,37272131,621.202183,194
Make A Promise (feat. Elle Vee),Culture Code,40559419,675.990317,193
Unknown Track,Unknown Artist,23816979,396.94965,192
All That I Know,Cian Ducrot,28704541,478.409017,186


In [57]:
# What are my most played artists by play time?

In [19]:
spotifyData.groupby("artistName").sum().sort_values("msPlayed", ascending=False).head(20)

Unnamed: 0_level_0,endTime,trackName,msPlayed,secondsPlayed,minutesPlayed,hoursPlayed
artistName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dylan Owen,2020-06-11 10:082020-06-12 12:042020-06-15 16:...,Break Some IceBones and RibsBruisesGarden of t...,522024692,522024.692,8700.411533,145.006859
Witt Lowry,2020-06-11 12:522020-06-12 12:042020-06-12 15:...,Care Too MuchLast LetterNevers RoadOxyginNever...,460520404,460520.404,7675.340067,127.922334
Our Last Night,2020-06-11 15:512020-06-13 10:402020-06-13 12:...,Losthot girl bummerLostHomehot girl bummerSame...,420282549,420282.549,7004.70915,116.745153
EDEN,2020-06-12 02:232020-06-12 10:502020-06-16 10:...,catch me if you can - Bonus TrackEnd Creditslo...,257991409,257991.409,4299.856817,71.66428
Fynn Kliemann,2020-06-13 09:492020-06-13 09:582020-06-14 12:...,Die HookWartenDie HookWartenSchmeiß mein Leben...,253176739,253176.739,4219.612317,70.326872
Mayday Parade,2020-06-11 11:282020-06-12 12:042020-06-13 11:...,When You See My FriendsI'd Rather Make Mistake...,185237729,185237.729,3087.295483,51.454925
NF,2020-06-15 16:052020-07-17 06:552020-07-17 06:...,TimeLet You DownThe SearchLeave Me AloneNateCh...,169223358,169223.358,2820.3893,47.006488
Sleeping At Last,2020-06-16 15:002020-06-17 07:392020-07-20 10:...,SaturnAtlas: TwoChasing CarsEverywhere I GoSat...,151205771,151205.771,2520.096183,42.001603
The Chainsmokers,2020-06-12 11:082020-06-12 11:302020-06-13 15:...,P.S. I Hope You're HappyP.S. I Hope You're Hap...,149821311,149821.311,2497.02185,41.617031
William Ryan Key,2020-06-15 08:472020-06-16 13:292020-06-24 07:...,"VulturesNo More, No LessThirty DaysThirty Days...",135280721,135280.721,2254.678683,37.577978


In [59]:
# By number of plays?

In [20]:
spotifyData.groupby("artistName").agg(totalPlayCounts=('msPlayed', 'count')).sort_values("totalPlayCounts", ascending=False).head(20)

Unnamed: 0_level_0,totalPlayCounts
artistName,Unnamed: 1_level_1
Dylan Owen,3106
Our Last Night,2712
Witt Lowry,2605
EDEN,1670
Fynn Kliemann,1572
Mayday Parade,1302
Linkin Park,1203
NF,1105
The Chainsmokers,965
Sleeping At Last,960


In [61]:
# Whats the total number of minutes that I listed to music?

In [21]:
totalTime = spotifyData.minutesPlayed.sum()

In [22]:
totalTime

np.float64(520464.6474000001)

In [23]:
totalTime/60, totalTime/60/24

(np.float64(8674.410790000002), np.float64(361.43378291666676))

In [33]:
spotifyData.query('artistName.str.contains("Only")')

Unnamed: 0,endTime,artistName,trackName,msPlayed,secondsPlayed,minutesPlayed,hoursPlayed
8827,2020-09-05 16:44,Only Emily,Amethysts,4950,4.95,0.0825,0.001375
8828,2020-09-05 16:44,Only Emily,Amethysts,7333,7.333,0.122217,0.002037
2208,2022-05-20 06:36,Only Liars,Kerosene,210250,210.25,3.504167,0.058403
2252,2022-05-22 09:57,Only Liars,Kerosene,210250,210.25,3.504167,0.058403
4184,2022-06-20 06:11,Only Twin,St. Mark's Place,269562,269.562,4.4927,0.074878
4216,2022-06-20 08:49,Only Twin,St. Mark's Place,269562,269.562,4.4927,0.074878
4246,2022-06-20 13:07,Only Twin,St. Mark's Place,269562,269.562,4.4927,0.074878
7383,2022-08-13 11:29,Only Liars,Kerosene,210250,210.25,3.504167,0.058403
7396,2022-08-13 11:34,Only Liars,Kerosene,1300,1.3,0.021667,0.000361
807,2020-09-05 16:44,Only Emily,Amethysts,4950,4.95,0.0825,0.001375
