In [None]:
%load_ext autoreload
%autoreload 2

import env

import pandas as pd
import numpy as np
import json
from datetime import datetime, timedelta

# --- 1. Загрузка конфигурации ---
service = env.get_gservice()

if service:
    df_sheet = env.read_df_from_spreadsheet(service, env.SHEET_ID, env.SHEET_NAME)
    print("Данные из Google Sheets загружены")
else:
    raise ConnectionError("Не удалось подключиться к Google API")

RS_TABLE = 'incent_opex_check_universal'
RS_SCHEMA = 'ma_data'
ALERT_NAME = "01-incent.cr"

try:
    config_row = df_sheet[df_sheet['name'] == ALERT_NAME].iloc[0]
except IndexError:
    raise ValueError(f"Алерт '{ALERT_NAME}' не найден в Google Sheet")

if config_row['active_flag'] != 'Enabled':
    print(f"Алерт '{ALERT_NAME}' отключен. Пропуск.")
else:
    print(f"Запуск алерта '{ALERT_NAME}'...")

# --- 2. Парсинг параметров ---
ALERT_ACTIVE_FLAG = config_row['active_flag']
N_SIGMAS = abs(float(config_row['n_sigmas'])) 
MIN_INSTALLS = int(config_row['threshold_installs'])
MIN_USERS = int(config_row['threshold_conv'])
ALERT_CATEGORY = config_row['metric_crit_category']

# Критерий формирования алертов: 'ci' или 'change'
_criteria = config_row.get('criteria', 'ci')
CRITERIA = str(_criteria).strip().lower() if pd.notna(_criteria) else 'ci'

if CRITERIA == 'change':
    _tw = config_row.get('threshold_warning', 0)
    _tc = config_row.get('threshold_crit', 0)
    THRESHOLD_WARNING_PCT = abs(float(_tw)) if pd.notna(_tw) else 0
    THRESHOLD_CRIT_PCT = abs(float(_tc)) if pd.notna(_tc) else 0
else:
    CRITERIA = 'ci'
    THRESHOLD_WARNING_PCT = 0
    THRESHOLD_CRIT_PCT = 0

# Хелпер для SQL списков
def to_sql_list(items):
    if not isinstance(items, list):
        items = [items] 
    if not items:
        return "()"
    
    formatted = []
    for x in items:
        if isinstance(x, str):
            formatted.append(f"'{x}'") 
        else:
            formatted.append(str(x))   
            
    return f"({', '.join(formatted)})"

try:
    # Загружаем JSON настроек
    params = json.loads(config_row['config_json'])

    CONFIG_COUNTRIES = to_sql_list(params['countries'])
    CONFIG_PARTNER_ID = int(params['partner_id'])  # Для записи в БД
    CONFIG_PARTNER = f"'{CONFIG_PARTNER_ID}'"  # Для SQL запроса
    CONFIG_RULES = params['cw']
    
    # Флаг проверки стран
    check_countries_val = params.get('check_countries', 'TRUE')
    CHECK_COUNTRIES = str(check_countries_val).upper() == 'TRUE'
    
except json.JSONDecodeError as e:
    raise ValueError(f"Ошибка JSON в ячейке config_json: {e}")
except KeyError as e:
    raise ValueError(f"В JSON отсутствует обязательный ключ: {e}")

print(f"Настройки: Sigma={N_SIGMAS}")
print(f"Thresholds: MinInstalls={MIN_INSTALLS}, MinUsers={MIN_USERS}")
print(f"Check Countries: {CHECK_COUNTRIES}")
if CRITERIA == 'change':
    print(f"Критерий: CHANGE (warning={THRESHOLD_WARNING_PCT:.1%}, crit={THRESHOLD_CRIT_PCT:.1%})")
else:
    print(f"Критерий: CI (n_sigmas={N_SIGMAS})")


# --- 3. Функции статистики ---

def calc_std_error(cr, n):
    return np.sqrt(np.divide(cr * (1 - cr), n, out=np.zeros_like(cr), where=n!=0))

def calc_reference_ci(cr, n, z):
    """Рассчитывает ширину доверительного интервала для reference-значения"""
    se = calc_std_error(cr, n)
    return z * se


# --- 4. Основная функция проверки ---

def run_check_for_window(target_cw, lag_weeks, level_rules_dict):
    
    # A. Формирование SQL условий
    conditions = []
    if 'exceptions' in level_rules_dict:
        for app_name, levels in level_rules_dict['exceptions'].items():
            levels_sql = to_sql_list(levels)
            conditions.append(f"(app = '{app_name}' AND level IN {levels_sql})")
        excluded_apps = list(level_rules_dict['exceptions'].keys())
    else:
        excluded_apps = []

    default_levels_sql = to_sql_list(level_rules_dict['default'])
    
    if excluded_apps:
        excl_apps_sql = to_sql_list(excluded_apps)
        default_cond = f"(app NOT IN {excl_apps_sql} AND level IN {default_levels_sql})"
    else:
        default_cond = f"(level IN {default_levels_sql})"
    
    conditions.append(default_cond)
    level_filter_sql = " AND (" + " OR ".join(conditions) + ")"
    
    # B. Расчет дат
    today = datetime.now().date()
    last_full_sunday = today - timedelta(days=today.weekday() + 1)
    
    current_end = last_full_sunday - timedelta(weeks=lag_weeks - 1)
    current_start = current_end - timedelta(days=6)
    
    prev_end = current_start - timedelta(days=1)
    prev_start = prev_end - timedelta(days=6)
    
    history_end = current_start - timedelta(days=1)
    history_start = history_end - timedelta(weeks=4) + timedelta(days=1)

    print(f"\n--- Checking CW={target_cw} (Lag: {lag_weeks} weeks) ---")
    
    # C. SQL Запрос
    sql_query = f"""
    WITH raw_data AS (
        SELECT 
            app, store, country, level, cw,
            cohort_date::DATE as cohort_date_clean, 
            unique_user_count, installs
        FROM ma_data.vinokurov_cr_data
        WHERE 
            partner_id = {CONFIG_PARTNER}
            AND country IN {CONFIG_COUNTRIES}
            AND cw = {target_cw}
            {level_filter_sql} 
            AND cohort_date::DATE >= '{history_start}' 
            AND cohort_date::DATE <= '{current_end}'
    ),
    historical_stats AS (
        SELECT app, store, country, level,
            SUM(unique_user_count) as hist_users, SUM(installs) as hist_installs
        FROM raw_data
        WHERE cohort_date_clean BETWEEN '{history_start}' AND '{history_end}'
        GROUP BY app, store, country, level
    ),
    previous_stats AS (
        SELECT app, store, country, level,
            SUM(unique_user_count) as prev_users, SUM(installs) as prev_installs
        FROM raw_data
        WHERE cohort_date_clean BETWEEN '{prev_start}' AND '{prev_end}'
        GROUP BY app, store, country, level
    ),
    current_stats AS (
        SELECT app, store, country, level,
            SUM(unique_user_count) as curr_users, SUM(installs) as curr_installs,
            MIN(cohort_date_clean) as cohort_date
        FROM raw_data
        WHERE cohort_date_clean BETWEEN '{current_start}' AND '{current_end}'
        GROUP BY app, store, country, level
    )
    SELECT 
        c.app, c.store, c.country, c.level, {target_cw} as cw, c.cohort_date,
        c.curr_installs, c.curr_users,
        p.prev_installs, p.prev_users,
        h.hist_installs, h.hist_users,
        (c.curr_users::float / NULLIF(c.curr_installs, 0)) as current_cr,
        (p.prev_users::float / NULLIF(p.prev_installs, 0)) as previous_cr,
        (h.hist_users::float / NULLIF(h.hist_installs, 0)) as historical_cr
    FROM current_stats c
    JOIN previous_stats p USING (app, store, country, level)
    JOIN historical_stats h USING (app, store, country, level)
    """
    
    df = env.execute_sql(sql_query)
    
    # Если данных нет
    df = df.fillna(0)
    if df.empty:
        print(f"  >> No data found for CW={target_cw}. Skipping.")
        return df

    print(f"  >> Data fetched: {len(df)} rows")

    # --- Подготовка данных ---
    
    numeric_raw_cols = ['curr_installs', 'curr_users', 'prev_installs', 'prev_users', 'hist_installs', 'hist_users']
    for col in numeric_raw_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    # Агрегация ALL
    group_cols = ['app', 'store', 'level', 'cw', 'cohort_date']
    sum_cols = ['curr_installs', 'curr_users', 'prev_installs', 'prev_users', 'hist_installs', 'hist_users']
    
    df_all = df.groupby(group_cols, as_index=False)[sum_cols].sum()
    df_all['country'] = 'ALL'
    
    if CHECK_COUNTRIES:
        df = pd.concat([df, df_all], ignore_index=True)
    else:
        df = df_all
    
    # Пересчет CR
    df['current_cr'] = np.where(df['curr_installs'] > 0, df['curr_users'] / df['curr_installs'], 0.0)
    df['previous_cr'] = np.where(df['prev_installs'] > 0, df['prev_users'] / df['prev_installs'], 0.0)
    df['historical_cr'] = np.where(df['hist_installs'] > 0, df['hist_users'] / df['hist_installs'], 0.0)

    calc_cols = ['current_cr', 'previous_cr', 'historical_cr', 'curr_installs', 'prev_installs', 'hist_installs']
    for col in calc_cols:
        df[col] = df[col].astype(float)

    # --- ФИЛЬТРАЦИЯ (Thresholds) ---
    
    df = df[
        (df['curr_installs'] >= MIN_INSTALLS) & 
        (df['prev_installs'] >= MIN_INSTALLS) &
        (df['curr_users'] >= MIN_USERS) & 
        (df['prev_users'] >= MIN_USERS)
    ].copy()
    
    if df.empty:
        return df

    # --- Расчет CI для reference-значений (всегда для записи в БД) ---
    
    df['prev_ci'] = calc_reference_ci(df['previous_cr'], df['prev_installs'], N_SIGMAS)
    df['hist_ci'] = calc_reference_ci(df['historical_cr'], df['hist_installs'], N_SIGMAS)

    # --- Расчет change_perc ---
    df['change_perc_prev'] = np.where(
        df['previous_cr'] > 0,
        (df['current_cr'] - df['previous_cr']) / df['previous_cr'],
        0.0
    )
    df['change_perc_hist'] = np.where(
        df['historical_cr'] > 0,
        (df['current_cr'] - df['historical_cr']) / df['historical_cr'],
        0.0
    )

    # --- Логика алертов ---
    if CRITERIA == 'change':
        # Алерт по абсолютному изменению change_perc
        abs_change_prev = np.abs(df['change_perc_prev'].values)
        abs_change_hist = np.abs(df['change_perc_hist'].values)
        
        df['is_alert_prev'] = abs_change_prev >= THRESHOLD_WARNING_PCT
        df['is_critical_prev'] = (THRESHOLD_CRIT_PCT > 0) & (abs_change_prev >= THRESHOLD_CRIT_PCT)
        
        df['is_alert_hist'] = abs_change_hist >= THRESHOLD_WARNING_PCT
        df['is_critical_hist'] = (THRESHOLD_CRIT_PCT > 0) & (abs_change_hist >= THRESHOLD_CRIT_PCT)
    else:
        # CI-based: проверяем попадание current_cr в диапазон reference ± ci
        df['is_alert_prev'] = (df['current_cr'] < df['previous_cr'] - df['prev_ci']) | \
                              (df['current_cr'] > df['previous_cr'] + df['prev_ci'])
        df['is_critical_prev'] = False
        
        df['is_alert_hist'] = (df['current_cr'] < df['historical_cr'] - df['hist_ci']) | \
                              (df['current_cr'] > df['historical_cr'] + df['hist_ci'])
        df['is_critical_hist'] = False

    df['is_alert_any'] = df['is_alert_hist'] | df['is_alert_prev']
    
    return df


# --- 5. Запуск цикла ---

result_frames = []

# Лаги (Lag Map)
LAG_MAP = {7: 2, 30: 5, 90: 14} 

for cw_key_str, rules in CONFIG_RULES.items():
    cw = int(cw_key_str) 
    lag = LAG_MAP.get(cw, 5) 
    
    df_res = run_check_for_window(cw, lag, rules)
    if not df_res.empty:
        result_frames.append(df_res)

# --- 6. Отчет ---

if result_frames:
    full_report = pd.concat(result_frames, ignore_index=True)
    
    if not full_report.empty:
        # --- Преобразование в универсальный формат ---
        # Создаем отдельные записи для previous_cr и historical_cr (ВСЕ строки, не только алерты)
        
        # Записи для previous_cr
        df_prev = full_report[
            ['app', 'store', 'country', 'level', 'cw', 'cohort_date',
             'current_cr', 'previous_cr', 'prev_ci', 'change_perc_prev', 'is_alert_prev', 'is_critical_prev']
        ].copy()
        df_prev['metric'] = 'previous_cr'
        
        if CRITERIA == 'change':
            df_prev['alert_category'] = df_prev.apply(
                lambda r: 'CRITICAL' if r['is_alert_prev'] and r['is_critical_prev']
                          else ('WARNING' if r['is_alert_prev'] else None), axis=1)
        else:
            df_prev['alert_category'] = df_prev['is_alert_prev'].apply(lambda x: 'WARNING' if x else None)
        
        df_prev = df_prev.drop(columns=['is_critical_prev']).rename(columns={
            'level': 'slice1', 
            'cw': 'slice2',
            'current_cr': 'current_value',
            'previous_cr': 'reference_value',
            'prev_ci': 'reference_value_ci',
            'change_perc_prev': 'change_perc',
            'is_alert_prev': 'is_alert'
        })
        
        # Записи для historical_cr
        df_hist = full_report[
            ['app', 'store', 'country', 'level', 'cw', 'cohort_date',
             'current_cr', 'historical_cr', 'hist_ci', 'change_perc_hist', 'is_alert_hist', 'is_critical_hist']
        ].copy()
        df_hist['metric'] = 'historical_cr'
        
        if CRITERIA == 'change':
            df_hist['alert_category'] = df_hist.apply(
                lambda r: 'CRITICAL' if r['is_alert_hist'] and r['is_critical_hist']
                          else ('WARNING' if r['is_alert_hist'] else None), axis=1)
        else:
            df_hist['alert_category'] = df_hist['is_alert_hist'].apply(lambda x: 'CRITICAL' if x else None)
        
        df_hist = df_hist.drop(columns=['is_critical_hist']).rename(columns={
            'level': 'slice1',
            'cw': 'slice2',
            'current_cr': 'current_value',
            'historical_cr': 'reference_value',
            'hist_ci': 'reference_value_ci',
            'change_perc_hist': 'change_perc',
            'is_alert_hist': 'is_alert'
        })
        
        # Объединяем
        all_results = pd.concat([df_prev, df_hist], ignore_index=True)
        
        # Добавляем общие поля
        all_results['date'] = datetime.now()
        all_results['check_name'] = ALERT_NAME
        all_results['partner_id'] = CONFIG_PARTNER_ID  # partner_id из конфига
        all_results['metric_crit_category'] = ALERT_CATEGORY
        all_results['segment'] = None  # Не используется в этой проверке
        all_results['slice3'] = None
        all_results['slice4'] = None
        
        # Формируем app_short с суффиксом store: googleplay → _gp, ios → _as
        store_suffix_map = {'googleplay': '_gp', 'ios': '_as'}
        all_results['app_short'] = all_results.apply(
            lambda row: row['app'] + store_suffix_map.get(row['store'], ''), axis=1
        )
        
        # Сортировка
        all_results = all_results.sort_values(by=['cohort_date', 'app_short', 'metric'], ascending=[False, True, True])
        
        # Подсчёт алертов
        alerts_count = all_results['is_alert'].sum()
        print(f"\n[{ALERT_CATEGORY.upper()}] Всего записей: {len(all_results)}, из них алертов: {alerts_count}")
        
        # Список колонок для записи в БД (новая схема с partner_id, app_short вместо app)
        db_cols = [
            'date', 'check_name', 'metric',
            'partner_id', 'app_short', 'country', 'segment',
            'slice1', 'slice2', 'slice3', 'slice4',
            'cohort_date', 'metric_crit_category',
            'current_value', 'reference_value', 'reference_value_ci',
            'change_perc', 'is_alert', 'alert_category'
        ]
        
        df_to_write = all_results[db_cols].copy()
        
        if not df_to_write.empty:
            print(f"Запись {len(df_to_write)} строк в Redshift...")
            env.insert_table_into_rs(df_to_write, RS_TABLE, RS_SCHEMA, 10000)
            print("Успешно записано.")
        
        # Фильтруем алерты для вывода в отчёт
        alerts_final = all_results[all_results['is_alert'] == True].copy()

        # Вывод таблицы в отчет Jupyter (только алерты для наглядности)
        if not alerts_final.empty:
            display_cols = [
                'date', 'check_name', 'metric',
                'partner_id', 'app_short', 'country', 'slice1', 'slice2',
                'metric_crit_category', 'alert_category',
                'current_value', 'reference_value', 'reference_value_ci',
                'change_perc', 'is_alert'
            ]
            styled_df = alerts_final[display_cols].style.hide(axis='index').format({
                'current_value': '{:.2%}',
                'reference_value': '{:.2%}',
                'reference_value_ci': '{:.2f%}',
                'change_perc': '{:+.1%}'
            })
            display(styled_df)
        else:
            print("Значимых изменений не найдено.")
    else:
        print("Нет данных после фильтрации.")
else:
    print("Нет данных.")