In [23]:
%load_ext autoreload
%autoreload 2

import env

import pandas as pd
import numpy as np
import json
from datetime import datetime, timedelta

# Загрузка конфига
service = env.get_gservice()

if service:
    df_sheet = env.read_df_from_spreadsheet(service, env.SHEET_ID, env.SHEET_NAME)
    print("Данные из Google Sheets загружены")
else:
    raise ConnectionError("Не удалось подключиться к Google API")

# Поиск настроек
ALERT_NAME = "01-incent.cr"

try:
    config_row = df_sheet[df_sheet['name'] == ALERT_NAME].iloc[0]
except IndexError:
    raise ValueError(f"Алерт '{ALERT_NAME}' не найден в Google Sheet")

ALERT_ACTIVE_FLAG = config_row['active_flag']
Z_THRESHOLD = -abs(float(config_row['n_sigmas']))
MIN_INSTALLS = int(config_row['threshold_installs'])
ALERT_CATEGORY = config_row['metric_crit_category']

# Формат для SQL
def to_sql_list(items):
    if not isinstance(items, list):
        items = [items]
    if not items:
        return "()"
    
    formatted = []
    for x in items:
        if isinstance(x, str):
            formatted.append(f"'{x}'")
        else:
            formatted.append(str(x))
            
    return f"({', '.join(formatted)})"

try:
    # Загружаем JSON настроек
    params = json.loads(config_row['config_json'])
    
    CONFIG_COUNTRIES = to_sql_list(params['countries'])   
    CONFIG_PARTNER = f"'{params['partner_id']}'"
    CONFIG_RULES = params['cw']

    # Обработка флага check_countries
    check_countries_val = params.get('check_countries', 'TRUE')
    CHECK_COUNTRIES = str(check_countries_val).upper() == 'TRUE'
    
except json.JSONDecodeError as e:
    raise ValueError(f"Ошибка JSON в ячейке config_json: {e}")
except KeyError as e:
    raise ValueError(f"В JSON отсутствует обязательный ключ: {e}")

print(f"Настройки: Z={Z_THRESHOLD}, MinInstalls={MIN_INSTALLS}")
print(f"Check Countries: {CHECK_COUNTRIES}")
print(f"Countries SQL: {CONFIG_COUNTRIES}")


# Функции статистики
def calc_std_error(cr, n):
    return np.sqrt((cr * (1 - cr)) / n)

def calc_ci(cr, n, z=1.96):
    se = calc_std_error(cr, n)
    lower = np.clip(cr - z * se, 0, 1)
    upper = np.clip(cr + z * se, 0, 1)
    return lower, upper

def calc_z_score(p1, p2, n1):
    se = calc_std_error(p2, n1)
    return np.where(se > 0, (p1 - p2) / se, 0)


# Основная функция
def run_check_for_window(target_cw, lag_weeks, level_rules_dict):
    
    # Формирование SQL
    conditions = []
    if 'exceptions' in level_rules_dict:
        for app_name, levels in level_rules_dict['exceptions'].items():
            levels_sql = to_sql_list(levels)
            conditions.append(f"(app = '{app_name}' AND level IN {levels_sql})")
        excluded_apps = list(level_rules_dict['exceptions'].keys())
    else:
        excluded_apps = []

    default_levels_sql = to_sql_list(level_rules_dict['default'])
    
    if excluded_apps:
        excl_apps_sql = to_sql_list(excluded_apps)
        default_cond = f"(app NOT IN {excl_apps_sql} AND level IN {default_levels_sql})"
    else:
        default_cond = f"(level IN {default_levels_sql})"
    
    conditions.append(default_cond)
    level_filter_sql = " AND (" + " OR ".join(conditions) + ")"
    
    # Расчет дат
    today = datetime.now().date()
    last_full_sunday = today - timedelta(days=today.weekday() + 1)
    
    current_end = last_full_sunday - timedelta(weeks=lag_weeks - 1)
    current_start = current_end - timedelta(days=6)
    
    prev_end = current_start - timedelta(days=1)
    prev_start = prev_end - timedelta(days=6)
    
    history_end = current_start - timedelta(days=1)
    history_start = history_end - timedelta(weeks=4) + timedelta(days=1)

    print(f"\n--- Checking CW={target_cw} ---")
    
    # SQL Запрос
    sql_query = f"""
    WITH raw_data AS (
        SELECT 
            app, store, country, level, cw,
            cohort_date::DATE as cohort_date_clean, 
            unique_user_count, installs
        FROM ma_data.vinokurov_cr_data
        WHERE 
            partner_id = {CONFIG_PARTNER}
            AND country IN {CONFIG_COUNTRIES}
            AND cw = {target_cw}
            {level_filter_sql} 
            AND cohort_date::DATE >= '{history_start}' 
            AND cohort_date::DATE <= '{current_end}'
    ),
    historical_stats AS (
        SELECT app, store, country, level,
            SUM(unique_user_count) as hist_users, SUM(installs) as hist_installs
        FROM raw_data
        WHERE cohort_date_clean BETWEEN '{history_start}' AND '{history_end}'
        GROUP BY app, store, country, level
    ),
    previous_stats AS (
        SELECT app, store, country, level,
            SUM(unique_user_count) as prev_users, SUM(installs) as prev_installs
        FROM raw_data
        WHERE cohort_date_clean BETWEEN '{prev_start}' AND '{prev_end}'
        GROUP BY app, store, country, level
    ),
    current_stats AS (
        SELECT app, store, country, level,
            SUM(unique_user_count) as curr_users, SUM(installs) as curr_installs,
            MIN(cohort_date_clean) as cohort_date
        FROM raw_data
        WHERE cohort_date_clean BETWEEN '{current_start}' AND '{current_end}'
        GROUP BY app, store, country, level
    )
    SELECT 
        c.app, c.store, c.country, c.level, {target_cw} as cw, c.cohort_date,
        c.curr_installs, c.curr_users,
        p.prev_installs, p.prev_users,
        h.hist_installs, h.hist_users,
        -- CR считаем пока предварительно, но пересчитаем в Python после группировки
        (c.curr_users::float / NULLIF(c.curr_installs, 0)) as current_cr,
        (p.prev_users::float / NULLIF(p.prev_installs, 0)) as previous_cr,
        (h.hist_users::float / NULLIF(h.hist_installs, 0)) as historical_cr
    FROM current_stats c
    JOIN previous_stats p USING (app, store, country, level)
    JOIN historical_stats h USING (app, store, country, level)
    """
    
    # Выполнение и обработка
    df = env.execute_sql(sql_query)
    df = df.fillna(0)
    
    if df.empty:
        return df

    # Конвертация для группировки
    numeric_raw_cols = ['curr_installs', 'curr_users', 'prev_installs', 'prev_users', 'hist_installs', 'hist_users']
    for col in numeric_raw_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    # Агрегация по странам (ALL) - это и есть срез app-store
    group_cols = ['app', 'store', 'level', 'cw', 'cohort_date']
    # Суммируем только абсолютные значения
    sum_cols = ['curr_installs', 'curr_users', 'prev_installs', 'prev_users', 'hist_installs', 'hist_users']
    
    df_all = df.groupby(group_cols, as_index=False)[sum_cols].sum()
    df_all['country'] = 'ALL'
    
    # --- НОВОЕ: Логика объединения в зависимости от флага check_countries ---
    if CHECK_COUNTRIES:
        # Если TRUE: проверяем и страны, и общий срез
        df = pd.concat([df, df_all], ignore_index=True)
    else:
        # Если FALSE: проверяем ТОЛЬКО общий срез (df_all)
        df = df_all
    
    # Пересчет CR
    df['current_cr'] = np.where(df['curr_installs'] > 0, df['curr_users'] / df['curr_installs'], 0.0)
    df['previous_cr'] = np.where(df['prev_installs'] > 0, df['prev_users'] / df['prev_installs'], 0.0)
    df['historical_cr'] = np.where(df['hist_installs'] > 0, df['hist_users'] / df['hist_installs'], 0.0)

    # Снова конвертация, чтобы np.sqrt был float64, а не object.
    calc_cols = ['current_cr', 'previous_cr', 'historical_cr', 'curr_installs', 'prev_installs', 'hist_installs']
    for col in calc_cols:
        df[col] = df[col].astype(float)

    # Фильтрация и Статистика
    
    # Фильтр установок
    df = df[(df['curr_installs'] >= MIN_INSTALLS) & (df['prev_installs'] >= MIN_INSTALLS)].copy()
    
    if df.empty:
        return df

    # Расчет Z-score
    df['z_score_hist'] = calc_z_score(df['current_cr'], df['historical_cr'], df['curr_installs'])
    df['z_score_prev'] = calc_z_score(df['current_cr'], df['previous_cr'], df['curr_installs'])
    
    # CI
    df['curr_ci_low'], df['curr_ci_high'] = calc_ci(df['current_cr'], df['curr_installs'])
    df['prev_ci_low'], df['prev_ci_high'] = calc_ci(df['previous_cr'], df['prev_installs'])
    df['hist_ci_low'], df['hist_ci_high'] = calc_ci(df['historical_cr'], df['hist_installs'])

    # Алерты
    df['is_alert_hist'] = (df['z_score_hist'] < Z_THRESHOLD) & (df['current_cr'] < df['historical_cr'])
    df['is_alert_prev'] = (df['z_score_prev'] < Z_THRESHOLD) & (df['current_cr'] < df['previous_cr'])
    df['is_alert_any'] = df['is_alert_hist'] | df['is_alert_prev']
    
    return df

# Запуск
result_frames = []
LAG_MAP = {7: 2, 30: 5}

# Итерация по JSON
for cw_key_str, rules in CONFIG_RULES.items():
    cw = int(cw_key_str) 
    lag = LAG_MAP.get(cw, 5)
    
    df_res = run_check_for_window(cw, lag, rules)
    if not df_res.empty:
        result_frames.append(df_res)

# Отчет

if result_frames:
    full_report = pd.concat(result_frames, ignore_index=True)
    alerts_final = full_report[full_report['is_alert_any'] == True].copy()
    
    if not alerts_final.empty:
        alerts_final['metric_crit_category'] = ALERT_CATEGORY
        alerts_final['date'] = pd.to_datetime(datetime.now())
        alerts_final['incent_opex_check'] = ALERT_NAME
        alerts_final = alerts_final.sort_values(by=['cohort_date', 'z_score_hist'], ascending=[False, True])
        
        print(f"\n[{ALERT_CATEGORY.upper()}] Аномалии найдены: {len(alerts_final)}")
        
        display_cols = [
            'date', 'incent_opex_check', 'app', 'store', 'country', 'level',
            'cw', 'metric_crit_category',
            'current_cr', 'curr_ci_low', 'curr_ci_high',
            'is_alert_prev', 'prev_ci_low', 'prev_ci_high', 
            'is_alert_hist', 'hist_ci_low', 'hist_ci_high', 'z_score_hist'
        ]
        
        if ALERT_ACTIVE_FLAG != 'Enabled':
            print(f"Нотификация '{ALERT_NAME}' отключена.")
            # exit() или return, если в функции
        else:
            # ----------------------------
            #
            # код для отправки нотификаций
            #
            # ----------------------------
            styled_df = alerts_final[display_cols].style.hide(axis='index')
            display(styled_df)
    else:
        print("Аномалий не найдено.")
else:
    print("Нет данных.")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Данные из Google Sheets загружены
Настройки: Z=-3.0, MinInstalls=200
Check Countries: True
Countries SQL: ('US', 'DE', 'JP')

--- Checking CW=7 ---

--- Checking CW=30 ---

[INFO] Аномалии найдены: 34
Нотификация '01-incent.cr' отключена.
