# Import libraries

In [184]:
import os
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Find Most Played Track

### Exploration

In [89]:
streaming_hist0 = pd.read_json("./data/yesh/StreamingHistory0.json")
streaming_hist0

Unnamed: 0,endTime,artistName,trackName,msPlayed
0,2020-04-29 05:59,Frank Ocean,Lost,4118
1,2020-04-29 06:02,Frank Ocean,Forrest Gump,194840
2,2020-04-29 06:05,Frank Ocean,Swim Good,159520
3,2020-04-29 09:21,Frank Ocean,Forrest Gump,194840
4,2020-04-29 09:25,Frank Ocean,Crack Rock,224146
...,...,...,...,...
9995,2020-09-20 10:16,The Weeknd,In Your Eyes,180940
9996,2020-09-20 10:19,Drake,One Dance,173986
9997,2020-09-20 10:20,Lil Nas X,Old Town Road - Remix,1060
9998,2020-09-20 10:20,Khalid,Therapy,0


In [122]:
sorted_most_played_tracks = streaming_hist0.groupby('trackName').sum().sort_values('msPlayed', ascending=False).reset_index()
sorted_most_played_tracks

Unnamed: 0,trackName,msPlayed
0,Saint Pablo,11181698
1,The Hills,10422645
2,Laugh Now Cry Later (feat. Lil Durk),10261616
3,Pink Matter,9857501
4,GREECE (feat. Drake),9044331
...,...,...
2746,No Option,0
2747,Jump Out The Window,0
2748,Banded Up (feat. XXXTENTACION),0
2749,KOALA,0


In [124]:
sorted_most_played_tracks.head(1)

Unnamed: 0,trackName,msPlayed
0,Saint Pablo,11181698


### Extract into function

In [125]:
def find_most_played_track(df):
    sorted_most_played_tracks = df.groupby('trackName').sum().sort_values(
        'msPlayed', ascending=False).reset_index()
    return sorted_most_played_tracks.iloc[0]

In [126]:
find_most_played_track(streaming_hist0)

trackName    Saint Pablo
msPlayed        11181698
Name: 0, dtype: object

### Find all streaming history files in a user folder

In [127]:
test_path = './data/yesh'
for root, dirs, files in os.walk(test_path):
    for filename in files:
        if 'StreamingHistory' in filename:
            print(filename)

StreamingHistory2.json
StreamingHistory0.json
StreamingHistory1.json


In [169]:
def get_all_user_streaming_history(user_path):
    all_streaming_history = []
    for root, dirs, files in os.walk(user_path):
        for filename in files:
            if 'StreamingHistory' in filename:
                all_streaming_history.append(filename)
    return all_streaming_history

In [170]:
get_all_user_streaming_history(yesh_path)

['StreamingHistory2.json', 'StreamingHistory0.json', 'StreamingHistory1.json']

### Find most listened track for one user

In [171]:
yesh_path = './data/yesh/'
most_listened_tracks = []

for streaming_history_filename in get_all_user_streaming_history(yesh_path):
    df_streaming_history = pd.read_json(os.path.join(yesh_path, streaming_history_filename))
    most_listened_tracks.append(find_most_played_track(df_streaming_history))
sorted_df_most_listened_tracks = pd.DataFrame(most_listened_tracks).sort_values('msPlayed', ascending=False)
sorted_df_most_listened_tracks.head(1)

Unnamed: 0,trackName,msPlayed
0,Chicago Freestyle (feat. Giveon),15632381


In [172]:
def get_most_listened_track_for_user(user_path):
    most_listened_tracks = []

    for streaming_history_filename in get_all_user_streaming_history(user_path):
        df_streaming_history = pd.read_json(os.path.join(user_path, streaming_history_filename))
        most_listened_tracks.append(find_most_played_track(df_streaming_history))
        
    sorted_df_most_listened_tracks = pd.DataFrame(most_listened_tracks).sort_values('msPlayed', ascending=False)
    return sorted_df_most_listened_tracks.iloc[0]
    
get_most_listened_track_for_user(yesh_path)

trackName    Chicago Freestyle (feat. Giveon)
msPlayed                             15632381
Name: 0, dtype: object

### Find most listened track for all users

In [198]:
def get_most_listened_track_for_all_users(data_path):
    def convert_ms_to_hours(ms):
        seconds = (ms/1000) % 60
        minutes = (ms/(1000 * 60)) % 60
        hours   = (ms/(1000 * 60 * 60)) % 24
        return datetime.time(int(hours), int(minutes), int(seconds))
    
    all_users_most_listened_tracks = []
    
    for user in os.listdir(data_path):
        # format: [user, track name, time]
        data = [user.capitalize()]
        user_most_listened_track = get_most_listened_track_for_user(os.path.join(data_path, user))
        data.append(user_most_listened_track['trackName'])
        data.append(convert_ms_to_hours(user_most_listened_track['msPlayed']))
        
        all_users_most_listened_tracks.append(data)
        
        df = pd.DataFrame(all_users_most_listened_tracks, columns=['Name', 'Track Name', 'Time'])
    
    return df.sort_values('Time', ascending=False)
    
df_most_listened_tracks = get_most_listened_track_for_all_users("./data/")
df_most_listened_tracks

Unnamed: 0,Name,Track Name,Time
6,Yash,Let Her Go,17:26:00
10,Jly,Unknown Track,15:48:06
8,Yusra,After Hours,13:38:42
7,Jxu,Rockstar Knights (with Trippie Redd),09:11:38
4,Andres,Chicago Freestyle (feat. Giveon),06:43:19
13,Noosh,505,05:27:38
11,Aaron,Lemon Pepper Freestyle (feat. Rick Ross),04:42:12
5,Yesh,Chicago Freestyle (feat. Giveon),04:20:32
14,Shrav,Over Now (with The Weeknd),04:10:05
12,Hursh,L$D,03:46:20


In [199]:
df_most_listened_tracks.head(1)

Unnamed: 0,Name,Track Name,Time
6,Yash,Let Her Go,17:26:00


### Yash has listened to Let Her Go for about 17 hours and 26 minutes, that is the most listened track in Pi Chi Chapter