# Import Dependencies

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from sqlalchemy import create_engine, inspect
from config import *

# Create Function to WebScrape Spotify

* Accepts chart type (e.g. top_200, viral_50)
* Defaults to Top 200 charts
* Accepts date argument in format 2020-06-14
* Defaults to latest date
* Accepts regional chart argument (e.g. global, us, uk)
* Defaults to global charts

In [2]:
def webScrapeSpotify(region='global', chart_type='top_200', date='latest'):

    #Set Source ID
    s_id =1
    
    #Set Chart Type
    if (chart_type=='top_200'):
        c_type = 'regional'
        c_id = 2
    elif (chart_type=='viral_50'):
        c_type = 'viral'
        c_id = 1
    else:
        print(f"{chart_type} is an invalid chart type")

    #Set Region
    if (region=='global'):
        reg = 'global'
        r_id = 1
    elif (region=='us'):
        reg = 'us'
        r_id = 2
    elif (region=='uk'):
        reg = 'gb'
        r_id = 3
    else:
        print(f"{region} is an invalid region")
    
    #Set URL
    base_url = "https://spotifycharts.com/"
    url = f"{base_url}{c_type}/{reg}/daily/"   
    
    #Submit Request
    html = requests.get(url+date)

    #Create BeautifulSoup Object
    soup = BeautifulSoup(html.text, 'lxml')
    table = soup.findAll('table',{"class":"chart-table"})[0]

    #Import table to dataframe 
    df = pd.read_html(str(table), header=0)[0]
    df[['title','artist']] = df['Track'].str.split(pat = " by ", expand=True)
    df = df.drop(columns=['Unnamed: 0', 'Unnamed: 2', 'Track'])
    df.rename(columns={"Unnamed: 1":"position", "Streams":"streams"}, inplace=True)
    column_names = ["position", "title", "artist", "streams"]
    df = df.reindex(columns=column_names)
    df.insert(loc=4, column='source_id', value=s_id)
    df.insert(loc=5, column='chart_id', value=c_id)
    df.insert(loc=6, column='region_id', value=r_id)
    
    
    return df

### Validate: Global Top 200 for Different Chart Types

In [3]:
df = webScrapeSpotify()
df.head()

Unnamed: 0,position,title,artist,streams,source_id,chart_id,region_id
0,1,ROCKSTAR (feat. Roddy Ricch),DaBaby,5379475,1,2,1
1,2,Blinding Lights,The Weeknd,4614819,1,2,1
2,3,Roses - Imanbek Remix,SAINt JHN,3973534,1,2,1
3,4,Savage Love (Laxed - Siren Beat),Jawsh 685,3370880,1,2,1
4,5,death bed (coffee for your head) (feat. beabad...,Powfu,3267236,1,2,1


In [4]:
df_us = webScrapeSpotify('us')
df_us.head()

Unnamed: 0,position,title,artist,streams,source_id,chart_id,region_id
0,1,ROCKSTAR (feat. Roddy Ricch),DaBaby,1521205,1,2,2
1,2,Party Girl,StaySolidRocky,1054184,1,2,2
2,3,Blueberry Faygo,Lil Mosey,941424,1,2,2
3,4,Blinding Lights,The Weeknd,915398,1,2,2
4,5,THE SCOTTS,THE SCOTTS,871482,1,2,2


In [5]:
df_uk = webScrapeSpotify('uk')
df_uk.head()

Unnamed: 0,position,title,artist,streams,source_id,chart_id,region_id
0,1,ROCKSTAR (feat. Roddy Ricch),DaBaby,500863,1,2,3
1,2,Rain On Me (with Ariana Grande),Lady Gaga,346139,1,2,3
2,3,Breaking Me,Topic,328470,1,2,3
3,4,Rover (feat. DTG),S1mba,320647,1,2,3
4,5,Blinding Lights,The Weeknd,318384,1,2,3


In [6]:
df_v50 = webScrapeSpotify('global', 'viral_50')
df_v50df_uk = webScrapeSpotify('uk').head()

In [7]:
df_us_v50 = webScrapeSpotify('us', 'viral_50')
df_us_v50.head()

Unnamed: 0,position,title,artist,streams,source_id,chart_id,region_id
0,1,I See Red,Everybody Loves an Outlaw,,1,1,2
1,2,Stunnin' (feat. Harm Franklin),Curtis Waters,,1,1,2
2,3,Hard For Me,Michele Morrone,,1,1,2
3,4,Then Leave (feat. Queendome Come),Beatking,,1,1,2
4,5,Hood Baby,KBFR,,1,1,2


In [8]:
df_uk_v50 = webScrapeSpotify('uk', 'viral_50')
df_uk_v50.head()

Unnamed: 0,position,title,artist,streams,source_id,chart_id,region_id
0,1,I See Red,Everybody Loves an Outlaw,,1,1,3
1,2,Stunnin' (feat. Harm Franklin),Curtis Waters,,1,1,3
2,3,Who's That What's That,Niko B,,1,1,3
3,4,Hard For Me,Michele Morrone,,1,1,3
4,5,Feel It,Michele Morrone,,1,1,3


# Create database connection

In [9]:
connection_string = f"postgresql+psycopg2://{SQL_USERNAME}:{SQL_PASSWORD}@{SQL_IP}:{PORT}/{DATABASE}"
engine = create_engine(connection_string)

In [10]:
engine.table_names()

[]

In [11]:
#conn.close()
#engine.dispose

# Create Function to Load DataFrame into Database

In [12]:
def loadDFintoDB(df, table='tracks'):
    
    #Connect to Database
    connection_string = f"postgresql+psycopg2://{SQL_USERNAME}:{SQL_PASSWORD}@{SQL_IP}:{PORT}/{DATABASE}"
    engine = create_engine(connection_string)
    conn =  engine.connect()
    
    #Load DataFrame into Database 
    df.to_sql(name=table, con=conn, if_exists='append',index=False)
    
    #Disconnect from Database
    conn.close()
    engine.dispose

### Load UK Chart into Database

In [13]:
loadDFintoDB(df)

In [14]:
loadDFintoDB(df_us)
loadDFintoDB(df_uk)
loadDFintoDB(df_v50)
loadDFintoDB(df_us_v50)
loadDFintoDB(df_uk_v50)

# Verify Database

In [15]:
#Connect to Database
connection_string = f"postgresql+psycopg2://{SQL_USERNAME}:{SQL_PASSWORD}@{SQL_IP}:{PORT}/{DATABASE}"
engine = create_engine(connection_string)
conn =  engine.connect()

In [16]:
query = """
            SELECT 
                *
            FROM
                tracks
            LIMIT 5;
        """

In [17]:
test = pd.read_sql(query, con=conn)
test.head()

Unnamed: 0,position,title,artist,streams,source_id,chart_id,region_id
0,1,ROCKSTAR (feat. Roddy Ricch),DaBaby,5379475,1,2,1
1,2,Blinding Lights,The Weeknd,4614819,1,2,1
2,3,Roses - Imanbek Remix,SAINt JHN,3973534,1,2,1
3,4,Savage Love (Laxed - Siren Beat),Jawsh 685,3370880,1,2,1
4,5,death bed (coffee for your head) (feat. beabad...,Powfu,3267236,1,2,1


In [18]:
conn.close()
engine.dispose

<bound method Engine.dispose of Engine(postgresql+psycopg2://postgres:***@35.224.117.170:5432/postgres)>