# SQL Analysis – Day 1 Quick Checks 

In [8]:
from pathlib import Path
import duckdb
import pandas as pd

PROJECT_ROOT = Path("..").resolve()
DATA_PATH    = PROJECT_ROOT / "data" / "processed"
SQL_DIR      = PROJECT_ROOT / "references" / "sql"
FIG_DIR      = PROJECT_ROOT / "reports" / "figures"
TAB_DIR      = PROJECT_ROOT / "reports" / "tables"

FIG_DIR.mkdir(parents=True, exist_ok=True)
TAB_DIR.mkdir(parents=True, exist_ok=True)

con = duckdb.connect()

clean_csv = DATA_PATH / "clean_data.csv"
if not clean_csv.exists():
    raise FileNotFoundError("data/processed/clean_data.csv bulunamadı")

# 1) Tüm sütunları önce VARCHAR olarak içeri alıyoruz (ALL_VARCHAR=TRUE).
# 2) Sonra TRIM + NULLIF + TRY_CAST ile güvenli cast yapıyoruz.
con.execute(f"""
    CREATE OR REPLACE VIEW users AS
    SELECT
      TRY_CAST(TRIM(userid) AS BIGINT)                              AS userid,
      TRIM(version)                                                 AS version,
      TRY_CAST(TRIM(session_count) AS INTEGER)                      AS session_count,

      COALESCE(TRY_CAST(NULLIF(TRIM(retention_1), '') AS BOOLEAN), FALSE) AS retention_1,
      COALESCE(TRY_CAST(NULLIF(TRIM(retention_7), '') AS BOOLEAN), FALSE) AS retention_7,

      TRIM(acquisition_channel)                                     AS acquisition_channel,
      TRIM(country)                                                 AS country,
      TRIM(platform)                                                AS platform,

      COALESCE(TRY_CAST(NULLIF(TRIM(purchase), '') AS INTEGER), 0)  AS purchase,
      COALESCE(TRY_CAST(NULLIF(TRIM("CAC"), '') AS DOUBLE), 0.0)    AS cac,
      COALESCE(TRY_CAST(NULLIF(TRIM(revenue), '') AS DOUBLE), 0.0)  AS revenue,
      COALESCE(TRY_CAST(NULLIF(TRIM("ROI"), '') AS DOUBLE), 0.0)    AS roi_row
    FROM read_csv_auto(
      '{clean_csv.as_posix()}',
      HEADER=TRUE,
      SAMPLE_SIZE=-1,
      ALL_VARCHAR=TRUE
    );
""")

# Hızlı kontrol
preview = con.execute("""
  SELECT *
  FROM users
  LIMIT 5
""").df()
display(preview)

types = con.execute("""
  SELECT
    typeof(userid)            AS t_userid,
    typeof(session_count)     AS t_session_count,
    typeof(retention_1)       AS t_ret1,
    typeof(retention_7)       AS t_ret7,
    typeof(purchase)          AS t_purchase,
    typeof(cac)               AS t_cac,
    typeof(revenue)           AS t_revenue,
    typeof(roi_row)           AS t_roi_row
  FROM users
  LIMIT 1
""").df()
display(types)


Unnamed: 0,userid,version,session_count,retention_1,retention_7,acquisition_channel,country,platform,purchase,cac,revenue,roi_row
0,116,gate_30,3,False,False,Facebook,USA,Google Play,0,2.8,0.038024,-0.98642
1,337,gate_30,38,True,False,TikTok,USA,Google Play,0,1.7,0.100486,-0.94089
2,377,gate_40,165,True,False,Facebook,USA,Google Play,0,2.8,0.140215,-0.949923
3,483,gate_40,1,False,False,Facebook,Mexico,Google Play,0,2.8,0.019012,-0.99321
4,488,gate_40,179,True,True,TikTok,USA,App Store,0,1.7,1.23444,-0.273859


Unnamed: 0,t_userid,t_session_count,t_ret1,t_ret7,t_purchase,t_cac,t_revenue,t_roi_row
0,BIGINT,INTEGER,BOOLEAN,BOOLEAN,INTEGER,DOUBLE,DOUBLE,DOUBLE


In [9]:
q_installs = (SQL_DIR / "installs_by_channel.sql").read_text(encoding="utf-8")
q_funnel   = (SQL_DIR / "funnel_step_rates.sql").read_text(encoding="utf-8")
q_roi      = (SQL_DIR / "roi_by_channel.sql").read_text(encoding="utf-8")

df_installs = con.execute(q_installs).df()
df_funnel   = con.execute(q_funnel).df()
df_roi      = con.execute(q_roi).df()

# Exportlar
df_installs.to_csv(TAB_DIR / "installs_by_channel.csv", index=False)
df_funnel.to_csv(TAB_DIR / "funnel.csv", index=False)
df_roi.to_csv(TAB_DIR / "roi_by_channel.csv", index=False)

display(df_installs.head())
display(df_funnel)
display(df_roi.head())

Unnamed: 0,acquisition_channel,users
0,Instagram,36281
1,Facebook,26776
2,TikTok,17979
3,Organic,9153


Unnamed: 0,n_users,rate_install,rate_d1_from_install,rate_d7_from_d1,rate_purchase_overall,rate_purchase_from_d7
0,90189,1.0,0.44521,0.417926,0.055772,0.299744


Unnamed: 0,acquisition_channel,users,revenue,ad_spend,roi,roas
0,Organic,9153,11075.584689,2745.9,3.033499,4.033499
1,TikTok,17979,23171.149021,30564.3,-0.241888,0.758112
2,Instagram,36281,42801.457825,83446.3,-0.487078,0.512922
3,Facebook,26776,32186.397167,74972.8,-0.570692,0.429308
