Goal:
- Determine which country-network pair doesn't have a flat organics conversions number overtime
- Find how we can flatten out these country-network pair

### 1.1. Fetching data from BigQuery (using bigquery api)

In [None]:
from google.cloud import bigquery
import pandas as pd
import matplotlib.pyplot as plt

def run_query(client: bigquery.Client, query: str):
    df = client.query(query).to_dataframe()
    return df

In [None]:
client = bigquery.Client()

In [None]:
MIN_DATE_THRESHOLD='2025-06-01'

In [None]:
monthly_organics_query = f"""
SELECT
    EXTRACT(YEAR FROM date)  AS year,
    EXTRACT(MONTH FROM date) AS month,
    network,
    platform,
    CASE
      WHEN country = "US" THEN "US"
      ELSE "ROW"
    END AS country,
    SUM(installs)  AS installs,
    SUM(trials)    AS trials,
    SUM(paid)      AS paid,
    SUM(revenues)  AS revenue
FROM `relax-melodies-android.ua_organics_prod.organics_ensemble`
WHERE date >= "{MIN_DATE_THRESHOLD}"
GROUP BY year, month, network, platform, country
"""

In [None]:
df_organics = run_query(client, monthly_organics_query)

In [None]:
monthly_appsflyer_aggregate = f"""
select 
    EXTRACT(YEAR FROM date)  AS year,
    EXTRACT(MONTH FROM date) AS month,
    network,
    platform,
    CASE
      WHEN country = 'US' THEN 'US'
      ELSE 'ROW'
    END AS country,
    SUM(installs)  AS installs,
    SUM(trials)    AS trials,
    SUM(paid)      AS paid,
    SUM(revenues)  AS revenue
from `relax-melodies-android.ua_dashboard_prod.pre_final_view`
WHERE 
network = 'Appsflyer Aggregate'
and date >= '{MIN_DATE_THRESHOLD}'
GROUP BY year, month, network, platform, country
"""

In [None]:
df_appsflyer = run_query(client, monthly_appsflyer_aggregate)

### 1.2. Fetching data from BigQuery (using duckdb)

In [None]:
import duckdb

In [None]:
# Connect to DuckDB (using an in-memory database here)
conn = duckdb.connect(':memory:')

# Install and load the community bigquery extension
conn.sql("INSTALL bigquery FROM community")
conn.sql("LOAD bigquery")

In [None]:
BQ_PROJECT_ID = 'relax-melodies-android'

In [None]:
# Attach your BigQuery project
# Replace 'my-gcp-project' with your GCP project ID
conn.sql(f"ATTACH 'project={BQ_PROJECT_ID}' AS bq (TYPE bigquery, READ_ONLY)")

# Optional: Enable the experimental scan for improved performance
conn.sql("SET bq_experimental_use_incubating_scan=TRUE")

In [None]:
dd_organics_results = conn.sql(f"""
    SELECT *
    FROM bigquery_query(
      '{BQ_PROJECT_ID}',
      '{monthly_organics_query}'
    )
""")

In [None]:
df_organics = dd_organics_results.df()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px

# FIXME
def plot_organics_by_network_platform(df: pd.DataFrame, platform: str, country: str):
    platform_mask = df['platform'] == platform
    country_mask = df['country'] == country
    dff = df.loc[platform_mask & country_mask].copy()
    dff['date'] = pd.to_datetime(dff[['year', 'month']].assign(day=1))
    dff = dff.sort_values(by=['date'])
    plt.plot(dff['date'], dff['installs'])
    plt.title(f"{platform} {country}")
    plt.show()
    
    # dff['installs'].plot()
    # fig = px.line(dff, x='date', y="installs")
    # fig.show()
    # return fig
    


In [None]:
fig = plot_organics_by_network_platform(df_organics, 'android', 'US')
fig = plot_organics_by_network_platform(df_organics, 'android', 'ROW')
fig = plot_organics_by_network_platform(df_organics, 'ios', 'US')
fig = plot_organics_by_network_platform(df_organics, 'ios', 'ROW')


In [None]:
# platform = 'android'
# country = 'US'
# df = df_organics.copy()

# platform_mask = df['platform'] == platform
# country_mask = df['country'] == country
# dff = df.loc[platform_mask & country_mask].copy()
# dff['date'] = pd.to_datetime(dff[['year', 'month']].assign(day=1))
# dff = dff.sort_values(by=['date'])

In [None]:
plt.plot(dff['date'], dff['installs'])

### 2. Plot Monthly Organics Conversions (Total)