<a href="https://colab.research.google.com/github/yuki-tamaribuchi/ml_with_spotify_api/blob/master/spotify_api_dataprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install spotipy pydub tqdm



In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import json
import sys
import os
import io
import numpy as np
import pandas as pd
import IPython.display as ipd
from pydub import AudioSegment
import missingno as msno

In [None]:
with open('/content/drive/MyDrive/api_keys/spotify_for_ml.json') as f:
  api_key=json.load(f)

auth_manager=SpotifyClientCredentials(client_id=api_key['client_id'],client_secret=api_key['client_cecret'])
sp=spotipy.Spotify(auth_manager=auth_manager)

In [None]:
def create_df_from_playlist(spotify,playlist_ids):

  results=[spotify.playlist_tracks(id) for id in playlist_ids]

  track_names=[]
  artists=[]
  preview_urls=[]

  for result in results:
    for item in result['items']:
      if len(item['track']['artists'])==1:
        track_names.append(item['track']['name'])
        artists.append(item['track']['artists'][0]['name'])
        preview_urls.append(item['track']['preview_url'])

  df=pd.DataFrame(
    {
        'Track_Name':track_names,
        'Artist':artists,
        'Preview_Url':preview_urls
    }
  )
  del track_names,artists,preview_urls
  return df

In [None]:
playlist_ids=['37i9dQZF1DX7J3LlfnX9oG','37i9dQZF1DZ06evO1QM05u','37i9dQZF1DWSthRe0OXm8b']

df=create_df_from_playlist(sp,playlist_ids)


In [None]:
def create_audio_array(df):

  import pathlib
  from urllib.request import urlopen
  import io
  from pydub import AudioSegment
  import numpy as np
  from tqdm import tqdm

  files=[io.BytesIO(urlopen(url).read()) for url in tqdm(df['Preview_Url'])]

  audiosegments=[AudioSegment.from_file(f,format='mp3') for f in tqdm(files)]

  samples=np.array([np.array(audio.get_array_of_samples()) for audio in tqdm(audiosegments)])

  samples=[sample/32768 for sample in tqdm(samples)]
  
  samples=[np.reshape(sample,newshape=(-1,2)).T for sample in tqdm(samples)]

  df['Audio_array']=samples

  del files,audiosegments,samples
  return df

In [None]:
df=create_audio_array(df)

100%|██████████| 158/158 [00:17<00:00,  9.29it/s]
100%|██████████| 158/158 [00:48<00:00,  3.22it/s]
100%|██████████| 158/158 [00:01<00:00, 129.63it/s]
  
100%|██████████| 158/158 [00:02<00:00, 65.70it/s]
100%|██████████| 158/158 [00:00<00:00, 60736.87it/s]


In [None]:
def create_cens_array(df):
  
  from librosa.feature import chroma_cens
  from librosa import to_mono
  from tqdm import tqdm

  mono_array=[to_mono(y) for y in tqdm(df['Audio_array'])]
  cens_result=[chroma_cens(y=y,sr=44100) for y in tqdm(mono_array)]

  df['Cens']=cens_result

  del mono_array,cens_result

  return df

In [None]:
df=create_cens_array(df)

100%|██████████| 158/158 [00:05<00:00, 29.77it/s]
100%|██████████| 158/158 [03:01<00:00,  1.15s/it]


In [None]:
df

Unnamed: 0,Track_Name,Artist,Preview_Url,Audio_array,Cens
0,ROCKET DIVE,Dragon Ash,https://p.scdn.co/mp3-preview/d05a5ed60f7e743d...,"[[0.087554931640625, -0.085174560546875, -0.31...","[[0.22353559050696337, 0.2245500409016965, 0.2..."
1,Fantasista,Dragon Ash,https://p.scdn.co/mp3-preview/b73d80aac3208bb2...,"[[-0.287750244140625, -0.485687255859375, -0.6...","[[0.2452071422662361, 0.24578546422317596, 0.2..."
2,Revolater,Dragon Ash,https://p.scdn.co/mp3-preview/e3b4a7d4e555b22a...,"[[-0.135955810546875, -0.06500244140625, 0.055...","[[0.24667090252192406, 0.2459724200237336, 0.2..."
3,Viva la revolution,Dragon Ash,https://p.scdn.co/mp3-preview/730ca1e227ead853...,"[[0.182220458984375, 0.221221923828125, 0.2017...","[[0.23306837947403192, 0.23507425933085432, 0...."
4,"Let yourself go, Let myself go",Dragon Ash,https://p.scdn.co/mp3-preview/e78d36236e4b132b...,"[[0.1226806640625, 0.139739990234375, 0.199798...","[[0.0192052584685959, 0.023587659305045833, 0...."
...,...,...,...,...,...
153,また逢える日まで,WANIMA,https://p.scdn.co/mp3-preview/2f0e8097ffcbe1b0...,"[[0.031402587890625, 0.04010009765625, 0.05020...","[[0.41661251574338853, 0.41061265061950325, 0...."
154,花火,WANIMA,https://p.scdn.co/mp3-preview/42d0e5d1462a2526...,"[[0.639434814453125, 0.8055419921875, 0.836303...","[[0.22543977218564235, 0.22646919524449535, 0...."
155,SNOW,WANIMA,https://p.scdn.co/mp3-preview/2aadef61ec7cefdc...,"[[-0.197357177734375, -0.275970458984375, -0.2...","[[0.2532939008009282, 0.2526479697420406, 0.25..."
156,1106,WANIMA,https://p.scdn.co/mp3-preview/36467e84a257959e...,"[[0.22528076171875, 0.2340087890625, 0.1240844...","[[0.2564126554574407, 0.2604965909669341, 0.26..."


In [None]:
name=''
#df.to_pickle('/content/drive/MyDrive/spotify_df/{}'.format(name))