In [1]:
#!pip install scikit-learn --force
#!pip install catboost

In [2]:
import sys
sys.path.append('..')
import pandas as pd
import numpy as np
from pandas.tseries.offsets import MonthEnd
from datetime import datetime
import os
import importlib

pd.set_option('display.max.columns', 300)

from core.calculator.storage import ModelDB
from core.calculator.core import ForecastConfig
from core.calculator.core import TrainingManager
from core.calculator.core import ForecastConfig
from core.calculator.core import ForecastEngine

from core.calculator.deposits import DepositsCalculationType
from core.calculator.deposits import DepositIterativeCalculator

from core.definitions import *
from core.project_update import load_portfolio

from core.models import DepositModels
from warnings import filterwarnings
filterwarnings('ignore')

In [3]:
# Дата из который мы прогнозируем (пока что не меняется)
train_end = datetime(year=2023, month=8, day=31)

# Горизонт прогноза в месяцах
horizon = 3

In [4]:
# Данные для прогноза
scenario_data = {
    # Ожидаемый баланс на первую дату прогноза, задавать необязательно
     'expected_amount':      [np.nan for h in range(horizon)],
    # ССВ
     'SSV':                  [0.48 for h in range(horizon)],
    # ФОР
     'FOR':                  [4.5 for h in range(horizon)],
    # Трансфертные ставки
     'VTB_ftp_rate_[90d]':   [12.3 for h in range(horizon)],
     'VTB_ftp_rate_[180d]':  [12 for h in range(horizon)],
     'VTB_ftp_rate_[365d]':  [12 for h in range(horizon)],
     'VTB_ftp_rate_[548d]':  [12 for h in range(horizon)],
     'VTB_ftp_rate_[730d]':  [12 for h in range(horizon)],
     'VTB_ftp_rate_[1095d]': [12 for h in range(horizon)],
    
    # Маржа бизнеса по срочностям
     'margin_[90d]':         [0.1 for h in range(horizon)],
     'margin_[180d]':        [0.1 for h in range(horizon)],
     'margin_[365d]':        [0.1 for h in range(horizon)],
     'margin_[548d]':        [0.1 for h in range(horizon)],
     'margin_[730d]':        [0.2 for h in range(horizon)],
     'margin_[1095d]':       [0.2 for h in range(horizon)],
    
    # Спред Привилегия - Массовый (на сколько в среднем ставки по сегменту Привилегия больше чем ставки по массовому сегменту)
     'priv_spread':          [0.4 for h in range(horizon)],
    # Спред ВИП - Массовый (на сколько в среднем ставки по сегменту ВИП больше чем ставки по массовому сегменту)
     'vip_spread':           [0.8 for h in range(horizon)],
    
    # Ниже три спреда по разным типам опциональности по отношению к безопциональным вкладам (Подразумевается, что они, как правило, отрицательные)
    # r - возможности пополнения, s - возможность расходных операций
    
    # На сколько ставка по расходным вкладам выше чем ставка по безопциональным вкладам (Если ниже - то со знаком минус)
     'r0s1_spread':          [-1 for h in range(horizon)],
    
    # На сколько ставка по пополняемым вкладам выше чем ставка по безопциональным вкладам (Если ниже - то со знаком минус)
     'r1s0_spread':          [-1 for h in range(horizon)],
    
    # На сколько ставка по расходно-пополняемым вкладам выше чем ставка по безопциональным вкладам (Если ниже - то со знаком минус)
     'r1s1_spread':          [-1.2 for h in range(horizon)],
    
    # Ставка по лучшему предложению сбера
     'SBER_max_rate':        [11.2, 11.2, 11.2],
    
    # Базовая ставка по НС
     'SA_rate':              [5 for h in range(horizon)]
}


## Пример создания JSON

In [5]:
# scenario_data - наш словарь с данными

d1 = {"forecast_params": 
     [
    {"currency": "RUB",
      "params": scenario_data
      },
    {"currency": "CNY",
      "params": 'test'
      },
  ]}

In [6]:
import json

In [7]:
# запись 
with open('sc_example.json', 'w') as outfile:
    json.dump(d1, outfile)

In [8]:
# чтение
with open('sc_example.json', 'r') as openfile:
    d1 = json.load(openfile)

In [9]:
if (d1["forecast_params"][0]['currency']=='RUB'):
    print(d1["forecast_params"][0]['params'])

{'expected_amount': [nan, nan, nan], 'SSV': [0.48, 0.48, 0.48], 'FOR': [4.5, 4.5, 4.5], 'VTB_ftp_rate_[90d]': [12.3, 12.3, 12.3], 'VTB_ftp_rate_[180d]': [12, 12, 12], 'VTB_ftp_rate_[365d]': [12, 12, 12], 'VTB_ftp_rate_[548d]': [12, 12, 12], 'VTB_ftp_rate_[730d]': [12, 12, 12], 'VTB_ftp_rate_[1095d]': [12, 12, 12], 'margin_[90d]': [0.1, 0.1, 0.1], 'margin_[180d]': [0.1, 0.1, 0.1], 'margin_[365d]': [0.1, 0.1, 0.1], 'margin_[548d]': [0.1, 0.1, 0.1], 'margin_[730d]': [0.2, 0.2, 0.2], 'margin_[1095d]': [0.2, 0.2, 0.2], 'priv_spread': [0.4, 0.4, 0.4], 'vip_spread': [0.8, 0.8, 0.8], 'r0s1_spread': [-1, -1, -1], 'r1s0_spread': [-1, -1, -1], 'r1s1_spread': [-1.2, -1.2, -1.2], 'SBER_max_rate': [11.2, 11.2, 11.2], 'SA_rate': [5, 5, 5]}


### Возможная альтернатива (более удобный словарь)

In [10]:
d2 = {"forecast_params": 
      {"currency": {'RUB':  {"params": scenario_data},
                   'CNY': {"params": 'scenario_data_CNY_example'} }}}

In [11]:
d2['forecast_params']['currency']['RUB']['params']

{'expected_amount': [nan, nan, nan],
 'SSV': [0.48, 0.48, 0.48],
 'FOR': [4.5, 4.5, 4.5],
 'VTB_ftp_rate_[90d]': [12.3, 12.3, 12.3],
 'VTB_ftp_rate_[180d]': [12, 12, 12],
 'VTB_ftp_rate_[365d]': [12, 12, 12],
 'VTB_ftp_rate_[548d]': [12, 12, 12],
 'VTB_ftp_rate_[730d]': [12, 12, 12],
 'VTB_ftp_rate_[1095d]': [12, 12, 12],
 'margin_[90d]': [0.1, 0.1, 0.1],
 'margin_[180d]': [0.1, 0.1, 0.1],
 'margin_[365d]': [0.1, 0.1, 0.1],
 'margin_[548d]': [0.1, 0.1, 0.1],
 'margin_[730d]': [0.2, 0.2, 0.2],
 'margin_[1095d]': [0.2, 0.2, 0.2],
 'priv_spread': [0.4, 0.4, 0.4],
 'vip_spread': [0.8, 0.8, 0.8],
 'r0s1_spread': [-1, -1, -1],
 'r1s0_spread': [-1, -1, -1],
 'r1s1_spread': [-1.2, -1.2, -1.2],
 'SBER_max_rate': [11.2, 11.2, 11.2],
 'SA_rate': [5, 5, 5]}

### Прогноз модели

In [12]:
scenario_data = (d1["forecast_params"][0]['params'])

In [13]:
scenario_df_user = pd.DataFrame(scenario_data)

In [14]:
scenario_df = preprocess_scenario(scenario_df_user, train_end, horizon)

In [15]:
port_folder = '../data/portfolio_data'
portfolio = load_portfolio(train_end, port_folder)

In [16]:
# если хотим обучить модели

from core.models.utils import run_spark_session
#spark = run_spark_session('check_calc')

spark = None #если без обучения

In [17]:
folder = '../data/trained_models'

sqlite_filepath = os.path.join(folder, 'modeldb_test.bin')

DB_URL = f"sqlite:///{sqlite_filepath}"
model_db = ModelDB(DB_URL)


In [18]:
ENV_NAME = 'hmelevskoi_env'

os.environ['CC'] = 'x86_64-conda-linux-gnu-gcc'
os.environ['CXX'] = 'x86_64-conda-linux-gnu-g++'
os.environ['PATH'] = os.path.abspath(f'/tmp/envs/{ENV_NAME}/bin') + ':' + os.environ['PATH']

In [19]:
config: ForecastConfig = ForecastConfig(
    first_train_end_dt = train_end,
    horizon = horizon,
    trainers = DepositModels.trainers,
    data_loaders = DepositModels.dataloaders,
    calculator_type = DepositIterativeCalculator,
    calc_type = DepositsCalculationType,
    adapter_types = DepositModels.adapter_types,
    scenario_data = scenario_df,
    portfolio = portfolio
)
    
training_manager = TrainingManager(spark, config.trainers, folder, model_db)   
engine: ForecastEngine = ForecastEngine(spark, config, training_manager, overwrite_models=False)

In [20]:
%%time
engine.run_all()

INFO:core:missing models: []
INFO:core:add_models_from_bytes
INFO:core:plan_close_201402_202308 - adapter <class 'core.models.plan_close.plan_close_model.PlanCloseModelAdapter'>
INFO:core:renewal_model_201401_202308 - adapter <class 'core.models.renewal.renewal_model.RenewalModelAdapter'>
INFO:core:maturity_structure_mass_r0s0_201401_202308 - adapter <class 'core.models.newbusiness.maturity_structure.maturity_structure_mass_model.MaturityStructureMassR0S0ModelAdapter'>
INFO:core:maturity_structure_mass_r0s1_202001_202308 - adapter <class 'core.models.newbusiness.maturity_structure.maturity_structure_mass_model.MaturityStructureMassR0S1ModelAdapter'>
INFO:core:maturity_structure_mass_r1s0_201401_202308 - adapter <class 'core.models.newbusiness.maturity_structure.maturity_structure_mass_model.MaturityStructureMassR1S0ModelAdapter'>
INFO:core:maturity_structure_mass_r1s1_201401_202308 - adapter <class 'core.models.newbusiness.maturity_structure.maturity_structure_mass_model.MaturityStruct

CPU times: user 37.6 s, sys: 4.42 s, total: 42.1 s
Wall time: 22.1 s


In [21]:
# вывод
portfolio_res = engine.calc_results['Deposits']['portfolio'] 
agg_res = engine.calc_results['Deposits']['agg_data']
maturity = engine.calc_results['Deposits']['maturity']
CurrentAccounts = engine.calc_results['CurrentAccounts']
SavingAccounts = engine.calc_results['SavingAccounts']
volumes = engine.calc_results['Volumes']

In [22]:
agg_res.groupby('report_dt').sum()

Unnamed: 0_level_0,replenishable_flg,subtraction_flg,month_maturity,target_maturity_days,balance_start,balance_gain,balance,newbusiness,contract_close,early_withdrawal,operations,interests,renewal,universal_weight_id
report_dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-09-30,32,27,1065,32373,3295017000000.0,336456500000.0,3631474000000.0,760840300000.0,-377793000000.0,-45118360000.0,-21869020000.0,20396560000.0,65508540000.0,304
2023-10-31,31,26,1041,31643,3631474000000.0,207536700000.0,3839010000000.0,514291200000.0,-288033600000.0,-24111350000.0,-19864570000.0,25254950000.0,42617240000.0,294
2023-11-30,30,25,1017,30913,3839010000000.0,39273190000.0,3878284000000.0,807508600000.0,-764271400000.0,-21144750000.0,-4718331000.0,21899020000.0,264458600000.0,284


In [23]:
# для записи и чтения экселя
# import pip
# pip.main(['install', 'openpyxl'])

In [24]:
# import openpyxl

In [25]:
# with pd.ExcelWriter("august_res_v2.xlsx") as writer:
#     portfolio_res.to_excel(writer, sheet_name='portfolio_res', index=False)
#     agg_res.to_excel(writer, sheet_name='agg_res', index=False)
#     maturity.to_excel(writer, sheet_name='maturity', index=False)
#     CurrentAccounts.to_excel(writer, sheet_name='CurrentAccounts', index=False)
#     SavingAccounts.to_excel(writer, sheet_name='SavingAccounts', index=False)
#     volumes.to_excel(writer, sheet_name='volumes', index=False)
#     pd.DataFrame(scenario_data).to_excel(writer, sheet_name='scenario', index=False)