# Song Stream Predictions

### Connecting to Snowflake

This script sets up a connection to Snowflake using SQLAlchemy, facilitates query execution through context management, and provides specific functions (get_sodatone_query_dfs and get_sodatone_query_df) tailored for interacting with Snowflake in a Sodatone-specific context. Sodatone is a specialized data analytics platform tailored for the music industry. It provides comprehensive insights into streaming data from major platforms like Spotify and Apple Music. With Sodatone, we can track performance metrics, analyze audience demographics, and leverage predictive analytics to drive date-driven signings

In [1]:
# imports
import sys
import pandas as pd
from io import BytesIO
import numpy as np
sys.path.append('..')
import snowflake
import contextlib
from sqlalchemy import create_engine
from typing import List
from IPython.display import display
# import okta1

import os
from dotenv import load_dotenv
load_dotenv()



# import dodb
pd.options.display.max_columns = None

# WMG Snowflake general
SNOWFLAKE_AUTHENTICATOR="https://wmg.okta.com"
SNOWFLAKE_ACCOUNT="wmg-datalab"

# Sodatone-specific
SNOWFLAKE_SODATONE_ROLE="ENT_OKTA_SNOWFLAKE_DATALAB_ATLANTIC"
SNOWFLAKE_SODATONE_WAREHOUSE="ATLANTIC_SANDBOX_WH_XS"



# --- CONFIG ---
# your okta email
SNOWFLAKE_OKTA_USER = os.getenv("OKTA_EMAIL")
# your okta password
SNOWFLAKE_OKTA_PASSWORD = os.getenv("OKTA_PASSWORD")
def _get_engine(
        role: str,
        warehouse: str,
        user: str = SNOWFLAKE_OKTA_USER,
        password: str = SNOWFLAKE_OKTA_PASSWORD,
        account: str = SNOWFLAKE_ACCOUNT,
        authenticator: str = SNOWFLAKE_AUTHENTICATOR):
    return create_engine(
        _get_engine_url(role, warehouse, user, password, account),
        connect_args={
            # this will open your computer's default web-browser so you can auth with okta
            # then it will redirect to localhost with a token... that token is consumed by
            # the snowflake engine in order to authenticate queries
            # however, the engine needs to be recreated because of context decorator, so
            # sql queries should be grouped into an iterable of multiple statements to
            # prevent annoying window popups
            'authenticator': 'externalbrowser',
        },
    )

def _get_engine_url(role: str, warehouse: str, user: str, password: str, account: str) -> str:
    return (
        'snowflake://{user}:{password}@{account}/?warehouse={warehouse}&role={role}'.format(
            user=user,
            password=password,
            account=account,
            warehouse=warehouse,
            role=role,
        )
    )

@contextlib.contextmanager
def _get_conn(engine):
    conn = engine.connect()
    try:
        yield conn
    finally:
        conn.close()
        engine.dispose()

def _get_role_conn(role: str, warehouse: str):
    return _get_conn(_get_engine(role, warehouse))

def get_results_as_dfs(role: str, warehouse: str, sql_queries: List[str]) -> List[pd.DataFrame]:
    with _get_role_conn(role, warehouse) as conn:
        print(f"Running sql as {role} in warehouse: {warehouse}")
        return [pd.read_sql(x, conn) for x in sql_queries]

def get_sodatone_query_dfs(sql: List[str]):
    # sodatone specific
    return get_results_as_dfs(SNOWFLAKE_SODATONE_ROLE, SNOWFLAKE_SODATONE_WAREHOUSE, sql)
def get_results_as_df(role: str, warehouse: str, sql_query: str) -> pd.DataFrame:
    with _get_role_conn(role, warehouse) as conn:
        print(f"Running sql as {role} in warehouse: {warehouse}")
        return pd.read_sql(sql_query, conn)
    
def get_sodatone_query_df(sql: str):
    # sodatone specific
    return get_results_as_df(SNOWFLAKE_SODATONE_ROLE, SNOWFLAKE_SODATONE_WAREHOUSE, sql)

In [2]:
query = '''WITH joined AS (
  SELECT
    m.ARTIST,
    m.TITLE,
    m.UNIFIED_SONG_ID,
    m.DATE,
    t.RELEASE_DATE,
    m.THIS_DAY,
    m.THIS_WEEK,
    m.DATE - CAST(t.RELEASE_DATE AS DATE) AS DAYS_SINCE_RELEASE,
    (DAYOFWEEK(m.DATE) + 2) % 7 AS day_of_week,
    ROW_NUMBER() OVER(PARTITION BY m.UNIFIED_SONG_ID, m.DATE, t.RELEASE_DATE ORDER BY (SELECT NULL)) AS rn,
    END_OF_WEEK,
    POPULARITY
  FROM
    "SODATONE"."SODATONE"."LUMINATE_DAILY_SONG_METRICS" AS m
  JOIN
    "APP_REACT"."JOHN_S"."FINAL_TRACKS" AS t ON m.UNIFIED_SONG_ID = t.UNIFIED_SONG_ID
  WHERE
    m.METRIC_TYPE = 'Streaming On-Demand Audio'
    AND m.REGION = 'us'
    AND DAYS_SINCE_RELEASE >= 0
    AND DATE > '2024-01-01'
),
EarliestReleases AS (
    SELECT
        UNIFIED_SONG_ID,
        MIN(RELEASE_DATE) AS earliest_release_date
    FROM
        joined
    GROUP BY
        UNIFIED_SONG_ID
),
filtered_joined AS (
    SELECT
        j.*
    FROM
        joined AS j
    JOIN
        EarliestReleases e
    ON
        j.UNIFIED_SONG_ID = e.UNIFIED_SONG_ID
        AND j.RELEASE_DATE = e.earliest_release_date
    WHERE
        rn = 1
),
lag AS (
  SELECT *,
     THIS_DAY AS day_0,
     LAG(THIS_DAY, 1, NULL) OVER(PARTITION BY UNIFIED_SONG_ID ORDER BY DATE ASC) AS day_1,
     LAG(THIS_DAY, 2, NULL) OVER(PARTITION BY UNIFIED_SONG_ID ORDER BY DATE ASC) AS day_2,
     LAG(THIS_DAY, 3, NULL) OVER(PARTITION BY UNIFIED_SONG_ID ORDER BY DATE ASC) AS day_3,
     LAG(THIS_DAY, 4, NULL) OVER(PARTITION BY UNIFIED_SONG_ID ORDER BY DATE ASC) AS day_4,
     LAG(THIS_DAY, 5, NULL) OVER(PARTITION BY UNIFIED_SONG_ID ORDER BY DATE ASC) AS day_5,
     LAG(THIS_DAY, 6, NULL) OVER(PARTITION BY UNIFIED_SONG_ID ORDER BY DATE ASC) AS day_6,
     LEAD(THIS_WEEK, 8, NULL) OVER(PARTITION BY UNIFIED_SONG_ID ORDER BY DATE ASC) AS target
  FROM filtered_joined
)
SELECT 
    * 
FROM 
    lag
WHERE 
    target IS NOT NULL
    AND END_OF_WEEK = TRUE
ORDER BY 
    UNIFIED_SONG_ID, DATE DESC;'''

In [3]:
df = get_sodatone_query_df(query)
df

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
Going to open: https://wmg.okta.com/app/snowflake/exkab0aaa1IEBMlNX2p7/sso/saml?SAMLRequest=jZJBc9owEIX%2Fikc927IdA7EGyDhAWjfB0EA6mdyELUCDLDlaOSb%2FvrIJHXpIpjeN9N7ut3o7vDmWwnljGriSIxR4PnKYzFXB5W6EntZ37jVywFBZUKEkG6F3BuhmPARaiooktdnLR%2FZaMzCOLSSBdA8jVGtJFAUORNKSATE5WSXzBxJ6Pqm0MipXAl1YvnZQAKaNJTxbCuAWb29MRTBumsZrrjyldzj0fR%2F7MbaqVvLtrD%2FamT7RB9iPWr1VWPnyg%2B2Wy9MXfIW1OYmA%2FFivl%2B5ysVojJzmjTpSEumR6xfQbz9nT48MJAFqCcucW1FBBNx5I1WwFPbBclVVtbD3PnvCWFVioHbcjp9MRqg68iAf3TfGLzr%2F3fr5k2STIkt5ilkyuZ%2F34efJax1kVLpN9vEg3UY6c3%2BdMwzbTFKBmqWyTNPbKDyPX77vhYB2GJIpIz%2Ff6V9ELcqY2SS6p6ZwXuJ46GNqB0arCf5kxOx7oxqeUBunsdi6y57AaYACF21DRaU9I11yP%2F3%2F6Ib70fSxbZv8%2FnS6V4Pm7c6d0Sc3n8QRe0N3wwt12UsJKykVSFJoB2JiEUM1EM2rsThtdM4THp67%2FbvX4Dw%3D%3D&RelayState=54939 to authentic

Unnamed: 0,artist,title,unified_song_id,date,release_date,this_day,this_week,days_since_release,day_of_week,rn,end_of_week,popularity,day_0,day_1,day_2,day_3,day_4,day_5,day_6,target
0,Young Thug,Hercules,100070,2024-06-13,2016-02-05,15999,101589.0,3051,6,1,True,48,15999,15412.0,15053.0,13938.0,12305.0,14530.0,16076.0,100850
1,Young Thug,Hercules,100070,2024-06-06,2016-02-05,15357,100364.0,3044,6,1,True,48,15357,15191.0,14927.0,14039.0,12311.0,14205.0,15559.0,103313
2,Young Thug,Hercules,100070,2024-05-30,2016-02-05,15457,103225.0,3037,6,1,True,48,15457,15049.0,14144.0,12212.0,12667.0,14507.0,16328.0,101589
3,Young Thug,Hercules,100070,2024-05-23,2016-02-05,15598,103165.0,3030,6,1,True,48,15598,15460.0,15692.0,14715.0,12363.0,13894.0,15503.0,100364
4,Young Thug,Hercules,100070,2024-05-16,2016-02-05,15682,103814.0,3023,6,1,True,48,15682,15269.0,14937.0,14127.0,12459.0,14773.0,15918.0,102812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195607,Don Toliver,New Drop,243502812,2024-06-13,2024-06-13,702,,0,6,1,True,0,702,,,,,,,3726859
195608,Don Toliver,5 To 10,243502816,2024-06-13,2024-06-13,600,,0,6,1,True,0,600,,,,,,,2756571
195609,Don Toliver,Last Laugh,243502819,2024-06-13,2024-06-13,552,,0,6,1,True,0,552,,,,,,,2231508
195610,Don Toliver,Hardstone National Anthem,243502822,2024-06-13,2024-06-13,522,,0,6,1,True,0,522,,,,,,,1757961


In [4]:
df.to_csv('csv_files/snowflake_data.csv', index=False)

# Load the DataFrame from the CSV file
data = pd.read_csv('csv_files/snowflake_data.csv')