# Part 1 - Data Ingestion and Transformation

## Imports

In [65]:
import pandas as pd
import numpy as np
import spotipy as sp
import spotipy
import sys
from spotipy.oauth2 import SpotifyClientCredentials
import pprint
import sqlite3

import jupyter_black
jupyter_black.load()

# Load environment variables from .env files which is named in the .gitignore file to prevent accidental upload to Github
import os
from dotenv import load_dotenv

load_dotenv()

spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials())

## API call

### Artists

In [2]:
def build_artist_table(artists: list):
    """Returns a dataframe of artist information from the list of artists"""
    artist_tables = []
    for artist in artists:
        artist_tables.append(return_artist_info(artist))
    return pd.DataFrame(artist_tables)

In [3]:
def return_artist_info(artist: str):
    """Returns a dictionary of artist information from Spotify API.
    If there are multiple genres or images, it chooses the first option."""
    name = artist.lower()
    results = spotify.search(q="artist:" + name, type="artist")
    items = results["artists"]["items"]
    try:
        artist = items[0]
    except:
        raise Exception("No artist found")
    artist_table = {
        "artist_id": artist["id"],
        "artist_name": artist["name"],
        "external_url": artist["external_urls"]["spotify"],
        "genre": artist["genres"][0],
        "image_url": artist["images"][0]["url"],
        "followers": artist["followers"]["total"],
        "popularity": artist["popularity"],
        "type": artist["type"],
        "artist_uri": artist["uri"],
    }
    return artist_table

### Album

In [4]:
def build_album_table(artists_table: pd.DataFrame):
    """Returns a dataframe of all albums for the artists in the given artists_table, which was generate using build_artist_table."""
    for i in artists_table.index:
        artist_uri = artists_table.loc[i, "artist_uri"]
        artist_id = artists_table.loc[i, "artist_id"]
        artist_album_table = return_artist_album_table(artist_uri, artist_id)
        if i == 0:
            album_table = artist_album_table
        else:
            album_table = pd.concat([album_table, artist_album_table])
    return album_table.reset_index(drop=True)

In [5]:
def return_artist_album_table(artist_uri: str, artist_id: str):
    """Returns a dataframe of album(s) information for a given artist from Spotify API"""
    results = spotify.artist_albums(
        artist_id=artist_uri, album_type="album", country="US"
    )
    items = results["items"]
    if len(items) == 0:
        raise Exception("No albums found")
    albums_table = []
    for album in items:
        albums_table.append(return_album_info(album, artist_id))
    return pd.DataFrame(albums_table)

In [6]:
def return_album_info(album: dict, artist_id: str):
    """Returns a dictionary of album information from the provided artist item from the Spotify API file"""
    album_table = {
        "album_id": album["id"],
        "album_name": album["name"],
        "external_url": album["external_urls"]["spotify"],
        "image_url": album["images"][0]["url"],
        "release_date": album["release_date"],
        "total_tracks": album["total_tracks"],
        "type": album["type"],
        "album_uri": album["uri"],
        "artist_id": artist_id,
    }
    return album_table

### Tracks

In [7]:
def build_track_table(album_table: pd.DataFrame):
    """Returns a dataframe of all tracks for the albums in the given album_table, which was generate using build_album_table."""
    for i in album_table.index:
        album_id = album_table.loc[i, "album_id"]
        album_tracks_table = return_album_tracks(album_id)
        if i == 0:
            tracks_table = album_tracks_table
        else:
            tracks_table = pd.concat([tracks_table, album_tracks_table])
    return tracks_table.reset_index(drop=True)

In [8]:
def return_album_tracks(album_id: str):
    """Returns a dataframe of album tracks for a given album_id"""
    results = spotify.album_tracks(album_id=album_id, limit=50, offset=0)
    items = results["items"]
    if len(items) == 0:
        raise Exception("No tracks found")
    tracks_table = []
    for track in items:
        tracks_table.append(return_track_info(track, album_id))
    return pd.DataFrame(tracks_table)

In [9]:
def return_track_info(track: dict, album_id: str):
    """Returns a dictionary of track information from the provided track item from the Spotify API file"""
    track_table = {
        "track_id": track["id"],
        "song_name": track["name"],
        "external_url": track["external_urls"]["spotify"],
        "duration_ms": track["duration_ms"],
        "explicit": track["explicit"],
        "disc_number": track["disc_number"],
        "type": track["type"],
        "song_uri": track["uri"],
        "album_id": album_id,
    }
    return track_table

### Track Feature

In [10]:
def build_track_feature_table(track_table: pd.DataFrame):
    """Returns a dataframe of all tracks features for the track in the given track_table, which was generated using build_track_table."""
    # Can only call 100 ids at a time
    hundreds_of_tracks = track_table.shape[0] % 100 + 1
    for i in range(0, hundreds_of_tracks):
        track_ids = track_table.loc[(i * 100) : ((i + 1) * 100) - 1, "track_id"]
        new_track_feature_table = return_track_feature_table(track_ids)
        if i == 0:
            track_feature_table = new_track_feature_table
        else:
            track_feature_table = pd.concat(
                [track_feature_table, new_track_feature_table]
            )
    return track_feature_table.reset_index(drop=True)

In [11]:
def return_track_feature_table(track_ids: list):
    """Returns a dataframe of track features from a given list of track_ids"""
    results = spotify.audio_features(track_ids)
    if len(results) == 0:
        raise Exception("No tracks found")
    tracks_table = []
    for track_id, track in zip(track_ids, results):
        tracks_table.append(return_track_feature_info(track, track_id))
    return pd.DataFrame(tracks_table)

In [12]:
def return_track_feature_info(track: dict, track_id: str):
    """Returns a dictionary of track feature information for a given track_id from results of a Spotipy API call"""
    if track is None:
        keys = [
            "track_id",
            "danceability",
            "energy",
            "instrumentalness",
            "liveness",
            "loudness",
            "speechiness",
            "tempo",
            "type",
            "valence",
            "song_uri",
        ]
        track_feature_dict = dict(zip(keys, [np.nan] * 11))
        track_feature_dict["song_uri"] = track_id
    track_feature_dict = {
        "track_id": track["id"],
        "danceability": track["danceability"],
        "energy": track["energy"],
        "instrumentalness": track["instrumentalness"],
        "liveness": track["liveness"],
        "loudness": track["loudness"],
        "speechiness": track["speechiness"],
        "tempo": track["tempo"],
        "type": track["type"],
        "valence": track["valence"],
        "song_uri": track["uri"],
    }
    return track_feature_dict

# Example API Call

In [13]:
artists = [
    "Foo Fighters",
    "Taylor Swift",
    "Shakira",
    "Phil Collins",
    "Billie Eilish",
    "Camilo",
    "Bad Bunny",
    "Harry Styles",
    "Sublime",
    "Red Hot Chili Peppers",
    "Lizzo",
    "Adele",
    "Maroon 5",
    "Ed Sheeran",
    "Enrique Iglesias",
    "Coldplay",
    "Lady Gaga",
    "Beyonce",
    "Britney Spears",
    "Queen",
    "Eagles",
]

artist_table = build_artist_table(artists)
album_table = build_album_table(artists_table)
track_table = build_track_table(album_table)
track_feature_table = build_track_feature_table(track_table)

In [29]:
artist_table

Unnamed: 0,artist_id,artist_name,external_url,genre,image_url,followers,popularity,type,artist_uri
0,7jy3rLJdDQY21OgRLCZ9sD,Foo Fighters,https://open.spotify.com/artist/7jy3rLJdDQY21O...,alternative metal,https://i.scdn.co/image/ab6761610000e5eb9a43b8...,10203747,78,artist,spotify:artist:7jy3rLJdDQY21OgRLCZ9sD
1,06HL4z0CvFAxyc27GXpf02,Taylor Swift,https://open.spotify.com/artist/06HL4z0CvFAxyc...,pop,https://i.scdn.co/image/ab6761610000e5ebfcf7c3...,58840286,94,artist,spotify:artist:06HL4z0CvFAxyc27GXpf02
2,0EmeFodog0BfCgMzAIvKQp,Shakira,https://open.spotify.com/artist/0EmeFodog0BfCg...,colombian pop,https://i.scdn.co/image/ab6761610000e5eb284894...,24972709,85,artist,spotify:artist:0EmeFodog0BfCgMzAIvKQp
3,4lxfqrEsLX6N1N4OCSkILp,Phil Collins,https://open.spotify.com/artist/4lxfqrEsLX6N1N...,mellow gold,https://i.scdn.co/image/31fbe64783eb5d49316164...,4735839,75,artist,spotify:artist:4lxfqrEsLX6N1N4OCSkILp
4,6qqNVTkY8uBg9cP3Jd7DAH,Billie Eilish,https://open.spotify.com/artist/6qqNVTkY8uBg9c...,art pop,https://i.scdn.co/image/ab6761610000e5ebd8b998...,68743016,88,artist,spotify:artist:6qqNVTkY8uBg9cP3Jd7DAH
5,28gNT5KBp7IjEOQoevXf9N,Camilo,https://open.spotify.com/artist/28gNT5KBp7IjEO...,colombian pop,https://i.scdn.co/image/ab6761610000e5ebc85ae7...,16394686,81,artist,spotify:artist:28gNT5KBp7IjEOQoevXf9N
6,4q3ewBCX7sLwd24euuV69X,Bad Bunny,https://open.spotify.com/artist/4q3ewBCX7sLwd2...,reggaeton,https://i.scdn.co/image/ab6761610000e5eb8ee9a6...,56038789,100,artist,spotify:artist:4q3ewBCX7sLwd24euuV69X
7,6KImCVD70vtIoJWnq6nGn3,Harry Styles,https://open.spotify.com/artist/6KImCVD70vtIoJ...,pop,https://i.scdn.co/image/ab6761610000e5ebf7db7c...,23394701,91,artist,spotify:artist:6KImCVD70vtIoJWnq6nGn3
8,0EdvGhlC1FkGItLOWQzG4J,Sublime,https://open.spotify.com/artist/0EdvGhlC1FkGIt...,reggae fusion,https://i.scdn.co/image/ab6761610000e5ebe72628...,2329786,70,artist,spotify:artist:0EdvGhlC1FkGItLOWQzG4J
9,0L8ExT028jH3ddEcZwqJJ5,Red Hot Chili Peppers,https://open.spotify.com/artist/0L8ExT028jH3dd...,alternative rock,https://i.scdn.co/image/ab6761610000e5ebc33cc1...,18361778,82,artist,spotify:artist:0L8ExT028jH3ddEcZwqJJ5


In [30]:
artist_table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 0 to 20
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   artist_id     21 non-null     object
 1   artist_name   21 non-null     object
 2   external_url  21 non-null     object
 3   genre         21 non-null     object
 4   image_url     21 non-null     object
 5   followers     21 non-null     int64 
 6   popularity    21 non-null     int64 
 7   type          21 non-null     object
 8   artist_uri    21 non-null     object
dtypes: int64(2), object(7)
memory usage: 1.6+ KB


In [17]:
album_table

Unnamed: 0,album_id,album_name,external_url,image_url,release_date,total_tracks,type,album_uri,artist_id
0,2FfewmvnA0wctMD64KjOxP,Dream Widow,https://open.spotify.com/album/2FfewmvnA0wctMD...,https://i.scdn.co/image/ab67616d0000b273a57aba...,2022-03-25,8,album,spotify:album:2FfewmvnA0wctMD64KjOxP,7jy3rLJdDQY21OgRLCZ9sD
1,50QMS2zosvUxhucf6zMRUy,Dee Gees / Hail Satin - Foo Fighters / Live,https://open.spotify.com/album/50QMS2zosvUxhuc...,https://i.scdn.co/image/ab67616d0000b27349bd2e...,2021-07-19,10,album,spotify:album:50QMS2zosvUxhucf6zMRUy,7jy3rLJdDQY21OgRLCZ9sD
2,1FyNZvJ6MHO01kl3ySMPdc,Medicine At Midnight,https://open.spotify.com/album/1FyNZvJ6MHO01kl...,https://i.scdn.co/image/ab67616d0000b273593dc7...,2021-02-05,9,album,spotify:album:1FyNZvJ6MHO01kl3ySMPdc,7jy3rLJdDQY21OgRLCZ9sD
3,4Sb67WEPsYgQxd7GItaAtI,Medicine At Midnight (Track Commentary),https://open.spotify.com/album/4Sb67WEPsYgQxd7...,https://i.scdn.co/image/ab67616d0000b2732546fe...,2021-02-03,9,album,spotify:album:4Sb67WEPsYgQxd7GItaAtI,7jy3rLJdDQY21OgRLCZ9sD
4,6KMkuqIwKkwUhUYRPL6dUc,Concrete and Gold,https://open.spotify.com/album/6KMkuqIwKkwUhUY...,https://i.scdn.co/image/ab67616d0000b273bc02d6...,2017-09-15,11,album,spotify:album:6KMkuqIwKkwUhUYRPL6dUc,7jy3rLJdDQY21OgRLCZ9sD
...,...,...,...,...,...,...,...,...,...
271,5NMAdQPrKw5nutWnGEzfpn,Hotel California (40th Anniversary Expanded Ed...,https://open.spotify.com/album/5NMAdQPrKw5nutW...,https://i.scdn.co/image/ab67616d0000b273d66618...,1976-12-08,19,album,spotify:album:5NMAdQPrKw5nutWnGEzfpn,0ECwFtbIWEVNwjlrfc6xoL
272,0F77QekrNe8vVAjU2sepja,One of These Nights (2013 Remaster),https://open.spotify.com/album/0F77QekrNe8vVAj...,https://i.scdn.co/image/ab67616d0000b2735d0a8e...,1975,9,album,spotify:album:0F77QekrNe8vVAjU2sepja,0ECwFtbIWEVNwjlrfc6xoL
273,2iCHyD9XHtA3vJFJIuXzqu,On the Border (2013 Remaster),https://open.spotify.com/album/2iCHyD9XHtA3vJF...,https://i.scdn.co/image/ab67616d0000b273a7606c...,1974,10,album,spotify:album:2iCHyD9XHtA3vJFJIuXzqu,0ECwFtbIWEVNwjlrfc6xoL
274,09WBxbis5Sixt01FVMs8UM,Desperado (2013 Remaster),https://open.spotify.com/album/09WBxbis5Sixt01...,https://i.scdn.co/image/ab67616d0000b2732d73b1...,1973,11,album,spotify:album:09WBxbis5Sixt01FVMs8UM,0ECwFtbIWEVNwjlrfc6xoL


In [18]:
album_table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 276 entries, 0 to 275
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   album_id      276 non-null    object
 1   album_name    276 non-null    object
 2   external_url  276 non-null    object
 3   image_url     276 non-null    object
 4   release_date  276 non-null    object
 5   total_tracks  276 non-null    int64 
 6   type          276 non-null    object
 7   album_uri     276 non-null    object
 8   artist_id     276 non-null    object
dtypes: int64(1), object(8)
memory usage: 19.5+ KB


In [20]:
track_table

Unnamed: 0,track_id,song_name,external_url,duration_ms,explicit,disc_number,type,song_uri,album_id
0,5k8kaD41vSP6l0Jhe9HzmY,Encino,https://open.spotify.com/track/5k8kaD41vSP6l0J...,98293,True,1,track,spotify:track:5k8kaD41vSP6l0Jhe9HzmY,2FfewmvnA0wctMD64KjOxP
1,1QivlVy4J2yWaCZ6RnWl70,Cold,https://open.spotify.com/track/1QivlVy4J2yWaCZ...,313373,False,1,track,spotify:track:1QivlVy4J2yWaCZ6RnWl70,2FfewmvnA0wctMD64KjOxP
2,58bqM9Eees0hIoem8ed3GF,March of the Insane,https://open.spotify.com/track/58bqM9Eees0hIoe...,211000,False,1,track,spotify:track:58bqM9Eees0hIoem8ed3GF,2FfewmvnA0wctMD64KjOxP
3,58bkJBSouJt9mMdZkYj4ND,The Sweet Abyss,https://open.spotify.com/track/58bkJBSouJt9mMd...,259812,False,1,track,spotify:track:58bkJBSouJt9mMdZkYj4ND,2FfewmvnA0wctMD64KjOxP
4,7ezrc9VG9RlDvRaEOt0K6D,Angel With Severed Wings,https://open.spotify.com/track/7ezrc9VG9RlDvRa...,272880,False,1,track,spotify:track:7ezrc9VG9RlDvRaEOt0K6D,2FfewmvnA0wctMD64KjOxP
...,...,...,...,...,...,...,...,...,...
4442,5ro7xAxDVbtabTl8t3MzHz,Train Leaves Here This Morning - 2013 Remaster,https://open.spotify.com/track/5ro7xAxDVbtabTl...,250456,False,1,track,spotify:track:5ro7xAxDVbtabTl8t3MzHz,51B7LbLWgYLKBVSpkan8Z7
4443,0J8Q2BOEzphO2tTUlfCUln,Take the Devil - 2013 Remaster,https://open.spotify.com/track/0J8Q2BOEzphO2tT...,240827,False,1,track,spotify:track:0J8Q2BOEzphO2tTUlfCUln,51B7LbLWgYLKBVSpkan8Z7
4444,0cuiu7deGyY5kSKZgMEyaJ,Earlybird - 2013 Remaster,https://open.spotify.com/track/0cuiu7deGyY5kSK...,179943,False,1,track,spotify:track:0cuiu7deGyY5kSKZgMEyaJ,51B7LbLWgYLKBVSpkan8Z7
4445,40h65HAR8COEoqkMwUUQHu,Peaceful Easy Feeling - 2013 Remaster,https://open.spotify.com/track/40h65HAR8COEoqk...,257962,False,1,track,spotify:track:40h65HAR8COEoqkMwUUQHu,51B7LbLWgYLKBVSpkan8Z7


In [21]:
track_table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4447 entries, 0 to 4446
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   track_id      4447 non-null   object
 1   song_name     4447 non-null   object
 2   external_url  4447 non-null   object
 3   duration_ms   4447 non-null   int64 
 4   explicit      4447 non-null   bool  
 5   disc_number   4447 non-null   int64 
 6   type          4447 non-null   object
 7   song_uri      4447 non-null   object
 8   album_id      4447 non-null   object
dtypes: bool(1), int64(2), object(6)
memory usage: 282.4+ KB


In [22]:
track_feature_table

Unnamed: 0,track_id,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,type,valence,song_uri
0,5k8kaD41vSP6l0Jhe9HzmY,0.277,0.992,0.836000,0.2720,-6.237,0.0856,103.494,audio_features,0.148,spotify:track:5k8kaD41vSP6l0Jhe9HzmY
1,1QivlVy4J2yWaCZ6RnWl70,0.380,0.969,0.024100,0.3040,-6.147,0.0649,132.869,audio_features,0.352,spotify:track:1QivlVy4J2yWaCZ6RnWl70
2,58bqM9Eees0hIoem8ed3GF,0.170,0.998,0.352000,0.7570,-4.585,0.2320,162.402,audio_features,0.060,spotify:track:58bqM9Eees0hIoem8ed3GF
3,58bkJBSouJt9mMdZkYj4ND,0.111,0.978,0.760000,0.2570,-5.330,0.0737,165.373,audio_features,0.303,spotify:track:58bkJBSouJt9mMdZkYj4ND
4,7ezrc9VG9RlDvRaEOt0K6D,0.253,0.949,0.008090,0.1350,-6.126,0.1110,161.611,audio_features,0.160,spotify:track:7ezrc9VG9RlDvRaEOt0K6D
...,...,...,...,...,...,...,...,...,...,...,...
4442,5ro7xAxDVbtabTl8t3MzHz,0.628,0.320,0.000351,0.2490,-16.010,0.0301,114.526,audio_features,0.545,spotify:track:5ro7xAxDVbtabTl8t3MzHz
4443,0J8Q2BOEzphO2tTUlfCUln,0.600,0.449,0.027400,0.0775,-13.313,0.0374,75.103,audio_features,0.470,spotify:track:0J8Q2BOEzphO2tTUlfCUln
4444,0cuiu7deGyY5kSKZgMEyaJ,0.595,0.784,0.001500,0.3500,-11.435,0.0847,115.273,audio_features,0.460,spotify:track:0cuiu7deGyY5kSKZgMEyaJ
4445,40h65HAR8COEoqkMwUUQHu,0.568,0.634,0.002810,0.2520,-12.336,0.0282,142.686,audio_features,0.866,spotify:track:40h65HAR8COEoqkMwUUQHu


In [23]:
track_feature_table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4447 entries, 0 to 4446
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   track_id          4447 non-null   object 
 1   danceability      4447 non-null   float64
 2   energy            4447 non-null   float64
 3   instrumentalness  4447 non-null   float64
 4   liveness          4447 non-null   float64
 5   loudness          4447 non-null   float64
 6   speechiness       4447 non-null   float64
 7   tempo             4447 non-null   float64
 8   type              4447 non-null   object 
 9   valence           4447 non-null   float64
 10  song_uri          4447 non-null   object 
dtypes: float64(8), object(3)
memory usage: 382.3+ KB


In [25]:
songs = pd.merge(track_feature_table, track_table, on="track_id", how="outer")

In [27]:
songs = pd.merge(songs, album_table, on="album_id", how="outer")

In [33]:
songs = pd.merge(songs, artist_table, on="artist_id", how="outer")

  songs = pd.merge(songs, artist_table, on="artist_id", how="outer")


In [35]:
songs.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4447 entries, 0 to 4446
Data columns (total 35 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   track_id          4447 non-null   object 
 1   danceability      4447 non-null   float64
 2   energy            4447 non-null   float64
 3   instrumentalness  4447 non-null   float64
 4   liveness          4447 non-null   float64
 5   loudness          4447 non-null   float64
 6   speechiness       4447 non-null   float64
 7   tempo             4447 non-null   float64
 8   type_x            4447 non-null   object 
 9   valence           4447 non-null   float64
 10  song_uri_x        4447 non-null   object 
 11  song_name         4447 non-null   object 
 12  external_url_x    4447 non-null   object 
 13  duration_ms       4447 non-null   int64  
 14  explicit          4447 non-null   bool   
 15  disc_number       4447 non-null   int64  
 16  type_y            4447 non-null   object 


In [37]:
songs.to_csv("songs.csv", index=False)

In [39]:
songs.groupby("artist_name").nunique().iloc[:, 0:10]

Unnamed: 0_level_0,track_id,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,type_x,valence
artist_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Adele,46,45,46,20,40,46,45,46,1,46
Bad Bunny,92,84,80,60,78,92,87,92,1,83
Beyoncé,375,257,258,218,262,325,278,332,1,275
Billie Eilish,46,45,41,43,39,44,44,46,1,46
Britney Spears,196,125,136,90,139,158,147,158,1,145
Camilo,32,32,32,5,32,32,31,32,1,32
Coldplay,209,154,169,178,155,184,139,185,1,166
Eagles,161,132,139,140,132,151,123,152,1,140
Ed Sheeran,232,117,131,45,119,145,132,150,1,135
Enrique Iglesias,223,151,154,105,147,185,141,185,1,164


## Load to Database

In [143]:
def store_tables_in_db(table_names: [str], tables: [pd.DataFrame], db: str):
    """Saves the tables as the provided table_names into the given db"""
    conn = sqlite3.connect(f"./{db}.db")
    c = conn.cursor()
    for table_name, table in zip(table_names, tables):
        insert_table(table_name, table, c, conn)

In [144]:
def insert_table(
    table_name: str, table: pd.DataFrame, c: sqlite3.Cursor, conn: sqlite3.Connection
):
    columns = ", ".join(table.columns)
    c.execute(f"""CREATE TABLE IF NOT EXISTS {table_name} ({columns})""")
    table.to_sql(table_name, conn, if_exists="replace", index=False)

In [145]:
def retrieve_table(table_name: str, db: str):
    conn = sqlite3.connect(f"./{db}.db")
    c = conn.cursor()
    data = c.execute(f"""SELECT * FROM {table_name}""").fetchall()
    columns = c.execute(f"""PRAGMA table_info({table_name})""").fetchall()
    columns = [columns[i][1] for i in range(0, len(columns))]
    return pd.DataFrame(data, columns=columns)

In [146]:
table_names = ["artist", "album", "track", "track_feature"]
tables = [artist_table, album_table, track_table, track_feature_table]

store_tables_in_db(table_names, tables, "spotify")

In [147]:
sql_tables = {}
for table_name in table_names:
    sql_tables[f"{table_name}_sql"] = retrieve_table(table_name, "spotify")

In [148]:
(sql_tables["track_feature_sql"] == track_feature_table)

Unnamed: 0,track_id,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,type,valence,song_uri
0,True,True,True,True,True,True,True,True,True,True,True
1,True,True,True,True,True,True,True,True,True,True,True
2,True,True,True,True,True,True,True,True,True,True,True
3,True,True,True,True,True,True,True,True,True,True,True
4,True,True,True,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...
4442,True,True,True,True,True,True,True,True,True,True,True
4443,True,True,True,True,True,True,True,True,True,True,True
4444,True,True,True,True,True,True,True,True,True,True,True
4445,True,True,True,True,True,True,True,True,True,True,True
