### Упражнение 1

* Скачайте с портала Открытых Данных производственный календарь России: https://data.gov.ru/opendata/7708660670-proizvcalendar
* На его основе определите соответствующий календарь в Pandas: `RussianBusinessCalendar(AbstractHolidayCalendar)`
* С помощью полученного в пункте выше календаря и модуля `pd.offsets` создайте DataFrame one-hot календарных признаков:
    * День недели
    * Месяц
    * Выходной день
    * Праздничный день
    * Предпраздничный рабочий день
    * Последний день месяца
    * Последний рабочий день месяца
    * Предновогодний выходной день

In [None]:
!pip install pymorphy2

In [None]:
import datetime # для работы со временем

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import locale
import calendar
import pymorphy2
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday
from pandas.tseries.offsets import DateOffset, Day

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

def set_rus_locale():
    locale.setlocale(
        locale.LC_ALL,
        'rus_rus' if sys.platform == 'win32' else 'ru_RU.UTF-8')

set_rus_locale()
pd.set_option('display.max_columns', None)
morph = pymorphy2.MorphAnalyzer()

work_calendar = pd.read_csv('./data/calendar.csv')
work_calendar

In [None]:
events = pd.DataFrame(index=pd.date_range(start='2013-03-25', end='2020-01-10', freq='D'))

class RussianBusinessCalendar(AbstractHolidayCalendar):
    start_date = datetime.datetime(2013, 3, 25)
    end_date = datetime.datetime(2020, 1, 10)
    rules = []
    for i, row in work_calendar.iterrows():
        year = int(row[0])
        for month in range(1, 13):
            for day in row[month].replace('*', '').replace('+', '').split(','):
                rules.append(Holiday('Holiday ' + str(i), year=year, month=month, day=int(day)))


russian_busday = pd.offsets.CustomBusinessDay(weekmask='Mon Tue Wed Thu Fri',
                                              calendar=RussianBusinessCalendar())

cal = RussianBusinessCalendar()

#В соответствии с производственным календарем Консультант Плюс заполним государственные праздники http://www.consultant.ru/law/ref/calendar/proizvodstvennye/2021/
feasts = []
for i in range(1, 9):
    feasts.append((1, i))

feasts.append((2, 23))
feasts.append((3, 8))
feasts.append((5, 1))
feasts.append((5, 9))
feasts.append((12, 6))
feasts.append((11, 4))

holidays = cal.holidays()

events['выходной'] = 0
events['праздник'] = 0
events['предпраздничный рабочий день'] = 0
events['последний день месяца'] = 0
events['последний рабочий день месяца'] = 0

for i in range(0, 7):
    events[calendar.day_abbr[i]] = 0
for i in range(1, 13):
    events[morph.parse(calendar.month_name[i])[0].normal_form] = 0

events['Предновогодний выходной день'] = 0
for i, row in events.iterrows():
    year = i.year
    month = i.month
    day = i.day
    weekday = i.weekday()
    day_name = pd.to_datetime(i).strftime("%a")
    month_name = morph.parse(pd.to_datetime(i).strftime("%B"))[0].normal_form
    events.at[i, day_name] = 1
    events.at[i, month_name] = 1

    current_day = datetime.datetime(year, month, day)
    if current_day in holidays:
        events.at[i, 'выходной'] = 1

    if month == 12:
        last_holiday = cal.holidays().to_series()[str(year) + '-' + str(month)].index[-1].day
        if last_holiday == day:
            events.at[i, 'Предновогодний выходной день'] = 1

    if (month, day) in feasts:
        events.at[i, 'праздник'] = 1
        before_feast = i - russian_busday
        events.at[before_feast, 'предпраздничный рабочий день'] = 1

    if i.is_month_end:
        events.at[i, 'последний день месяца'] = 1
        if current_day in holidays:
            events.at[i - russian_busday, 'последний рабочий день месяца'] = 1
        else:
            events.at[i, 'последний рабочий день месяца'] = 1

events

### Упражнение 2

* Для временного ряда курса акций Сбербанка на основе производственного календаря России сформируйте DataFrame признаков:
    * Лаговые переменные рабочих дней с глубинами: 1, 3, 5
    * Скользящие статистики `['mean', 'median', 'max', 'min', 'std'] ` с окнами 1, 3, 5 сгрупированные по дням недели
    * Статистики `['mean', 'median', 'max', 'min', 'std'] ` в расширяющемся окне сгруппированные по рабочим, праздничным и предпраздничным дням

In [None]:
ts = pd.read_csv('./data/sber_stocks.csv', usecols=['CLOSE', 'TRADEDATE'],
                 index_col='TRADEDATE', parse_dates=True)
ts = ts.CLOSE

ts.plot(figsize=(20, 10), grid=True, linewidth=3)

In [None]:
ts

##### Задание 1

In [None]:
ts_by_day = ts.asfreq('1D').fillna(method='ffill')
ts_shift_1 = ts_by_day.shift(1, freq=russian_busday)
ts_shift_3 = ts_by_day.shift(3, freq=russian_busday)
ts_shift_5 = ts_by_day.shift(5, freq=russian_busday)
ts_shift_1['2019'].plot(figsize=(20, 10), color='red')
ts_shift_3['2019'].plot(figsize=(20, 10), color='green')
ts_shift_5['2019'].plot(figsize=(20, 10), color='orange')
ts['2019'].plot(grid=True)

##### Задание 2

In [None]:
ts = pd.DataFrame(ts)
ts_by_day = ts.asfreq('1D').fillna(method='ffill')
ts_by_day['weekday'] = ts_by_day.index.weekday
groupped = ts_by_day.groupby('weekday')['CLOSE']

In [None]:
rolling_mean_window_1 = groupped.transform(lambda x: x.rolling(window=1).mean().shift(1))
rolling_mean_window_1['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_mean_window_3 = groupped.transform(lambda x: x.rolling(window=3).mean().shift(1))
rolling_mean_window_3['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_mean_window_5 = groupped.transform(lambda x: x.rolling(window=5).mean().shift(1))
rolling_mean_window_5['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_median_window_1 = groupped.transform(lambda x: x.rolling(window=1).median().shift(1))
rolling_median_window_1['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_median_window_3 = groupped.transform(lambda x: x.rolling(window=3).median().shift(1))
rolling_median_window_3['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_median_window_5 = groupped.transform(lambda x: x.rolling(window=5).median().shift(1))
rolling_median_window_5['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_max_window_1 = groupped.transform(lambda x: x.rolling(window=1).max().shift(1))
rolling_max_window_1['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_max_window_3 = groupped.transform(lambda x: x.rolling(window=3).max().shift(1))
rolling_max_window_3['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_max_window_5 = groupped.transform(lambda x: x.rolling(window=5).max().shift(1))
rolling_max_window_5['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_min_window_1 = groupped.transform(lambda x: x.rolling(window=1).min().shift(1))
rolling_min_window_1['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_min_window_3 = groupped.transform(lambda x: x.rolling(window=3).min().shift(1))
rolling_min_window_3['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_min_window_5 = groupped.transform(lambda x: x.rolling(window=5).min().shift(1))
rolling_min_window_5['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_std_window_1 = groupped.transform(lambda x: x.rolling(window=1).std().shift(1))
rolling_std_window_1['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_std_window_3 = groupped.transform(lambda x: x.rolling(window=3).std().shift(1))
rolling_std_window_3['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_std_window_3 = groupped.transform(lambda x: x.rolling(window=3).std().shift(1))
result = ts_by_day['2019']['CLOSE'] + rolling_std_window_3['2019']
ts_by_day['2019']['CLOSE'].plot(figsize=(20, 10), color='red')
result.plot(grid=True)

In [None]:
rolling_std_window_5 = groupped.transform(lambda x: x.rolling(window=5).std().shift(1))
rolling_std_window_5['2019'].plot(figsize=(20, 10), color='red')
ts_by_day['2019']['CLOSE'].plot(grid=True)

In [None]:
rolling_std_window_5 = groupped.transform(lambda x: x.rolling(window=5).std().shift(1))
result = ts_by_day['2019']['CLOSE'] + rolling_std_window_5['2019']
ts_by_day['2019']['CLOSE'].plot(figsize=(20, 10), color='red')
result.plot(grid=True)

In [None]:
##### Задание 3

In [None]:
events_with_prices = pd.concat([events, ts_by_day], axis=1, join="inner")
events_with_prices

In [None]:
events_with_prices[events_with_prices['выходной'] == 0].groupby('выходной')['CLOSE'].transform(lambda x: x.expanding().mean().shift(1)).plot(figsize=(20, 10))
events_with_prices[events_with_prices['праздник'] == 1].groupby('праздник')['CLOSE'].transform(lambda x: x.expanding().mean().shift(1)).plot(figsize=(20, 10), color='red')
events_with_prices[events_with_prices['предпраздничный рабочий день'] == 1].groupby('предпраздничный рабочий день')['CLOSE'].transform(lambda x: x.expanding().mean().shift(1)).plot(figsize=(20, 10), color='green')

In [None]:
events_with_prices[events_with_prices['выходной'] == 0].groupby('выходной')['CLOSE'].transform(lambda x: x.expanding().median().shift(1)).plot(figsize=(20, 10))
events_with_prices[events_with_prices['праздник'] == 1].groupby('праздник')['CLOSE'].transform(lambda x: x.expanding().median().shift(1)).plot(figsize=(20, 10), color='red')
events_with_prices[events_with_prices['предпраздничный рабочий день'] == 1].groupby('предпраздничный рабочий день')['CLOSE'].transform(lambda x: x.expanding().median().shift(1)).plot(figsize=(20, 10), color='green')

In [None]:
events_with_prices[events_with_prices['выходной'] == 0].groupby('выходной')['CLOSE'].transform(lambda x: x.expanding().max().shift(1)).plot(figsize=(20, 10))
events_with_prices[events_with_prices['праздник'] == 1].groupby('праздник')['CLOSE'].transform(lambda x: x.expanding().max().shift(1)).plot(figsize=(20, 10), color='red')
events_with_prices[events_with_prices['предпраздничный рабочий день'] == 1].groupby('предпраздничный рабочий день')['CLOSE'].transform(lambda x: x.expanding().max().shift(1)).plot(figsize=(20, 10), color='green')

In [None]:
events_with_prices[events_with_prices['выходной'] == 0].groupby('выходной')['CLOSE'].transform(lambda x: x.expanding().min().shift(1)).plot(figsize=(20, 10))
events_with_prices[events_with_prices['праздник'] == 1].groupby('праздник')['CLOSE'].transform(lambda x: x.expanding().min().shift(1)).plot(figsize=(20, 10), color='red')
events_with_prices[events_with_prices['предпраздничный рабочий день'] == 1].groupby('предпраздничный рабочий день')['CLOSE'].transform(lambda x: x.expanding().min().shift(1)).plot(figsize=(20, 10), color='green')

In [None]:
events_with_prices[events_with_prices['выходной'] == 0].groupby('выходной')['CLOSE'].transform(lambda x: x.expanding().std().shift(1)).plot(figsize=(20, 10))
events_with_prices[events_with_prices['праздник'] == 1].groupby('праздник')['CLOSE'].transform(lambda x: x.expanding().std().shift(1)).plot(figsize=(20, 10), color='red')
events_with_prices[events_with_prices['предпраздничный рабочий день'] == 1].groupby('предпраздничный рабочий день')['CLOSE'].transform(lambda x: x.expanding().std().shift(1)).plot(figsize=(20, 10), color='green')