# Time Series Pipeline - Анализ временных рядов

Пайплайны для:
- Forecasting (ARIMA, Prophet, LSTM)
- Anomaly Detection
- Сезонная декомпозиция
- Feature engineering для временных рядов

In [None]:
!pip install pandas numpy scikit-learn prophet statsmodels torch matplotlib seaborn -q

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from prophet import Prophet
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-darkgrid')
print("✓ Библиотеки загружены!")

## 1. Загрузка данных

In [None]:
# === ВАШИ ДАННЫЕ ===
# Формат: дата + значение (и опционально доп. признаки)
df = pd.read_csv('timeseries.csv')

# Пример данных
# dates = pd.date_range(start='2020-01-01', end='2023-12-31', freq='D')
# values = np.cumsum(np.random.randn(len(dates))) + 100
# df = pd.DataFrame({'date': dates, 'value': values})

# Преобразование в datetime
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date').reset_index(drop=True)

print(f"Данные: {len(df)} записей")
print(f"Период: {df['date'].min()} - {df['date'].max()}")
print(f"\nПервые строки:\n{df.head()}")

In [None]:
# Визуализация
plt.figure(figsize=(15, 5))
plt.plot(df['date'], df['value'])
plt.title('Временной ряд')
plt.xlabel('Дата')
plt.ylabel('Значение')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 2. Feature Engineering для временных рядов

In [None]:
def create_time_features(df, date_col='date'):
    """Создание признаков из даты"""
    df = df.copy()
    
    # Временные признаки
    df['year'] = df[date_col].dt.year
    df['month'] = df[date_col].dt.month
    df['day'] = df[date_col].dt.day
    df['dayofweek'] = df[date_col].dt.dayofweek
    df['dayofyear'] = df[date_col].dt.dayofyear
    df['quarter'] = df[date_col].dt.quarter
    df['week'] = df[date_col].dt.isocalendar().week
    
    # Выходной/будни
    df['is_weekend'] = (df['dayofweek'] >= 5).astype(int)
    
    # Циклические признаки (для периодичности)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['day_sin'] = np.sin(2 * np.pi * df['day'] / 31)
    df['day_cos'] = np.cos(2 * np.pi * df['day'] / 31)
    
    return df

def create_lag_features(df, value_col='value', lags=[1, 7, 14, 30]):
    """Лаговые признаки"""
    df = df.copy()
    
    for lag in lags:
        df[f'lag_{lag}'] = df[value_col].shift(lag)
    
    return df

def create_rolling_features(df, value_col='value', windows=[7, 14, 30]):
    """Rolling статистики"""
    df = df.copy()
    
    for window in windows:
        df[f'rolling_mean_{window}'] = df[value_col].rolling(window=window).mean()
        df[f'rolling_std_{window}'] = df[value_col].rolling(window=window).std()
        df[f'rolling_min_{window}'] = df[value_col].rolling(window=window).min()
        df[f'rolling_max_{window}'] = df[value_col].rolling(window=window).max()
    
    return df

# Применение
df_fe = create_time_features(df)
df_fe = create_lag_features(df_fe)
df_fe = create_rolling_features(df_fe)

print(f"Признаков создано: {len(df_fe.columns)}")
print(f"\nПризнаки: {df_fe.columns.tolist()}")

## 3. Сезонная декомпозиция

In [None]:
# Декомпозиция на тренд, сезонность, остатки
df_temp = df.set_index('date')
decomposition = seasonal_decompose(df_temp['value'], model='additive', period=30)  # period зависит от данных

# Визуализация
fig, axes = plt.subplots(4, 1, figsize=(15, 10))
decomposition.observed.plot(ax=axes[0], title='Наблюдаемые данные')
decomposition.trend.plot(ax=axes[1], title='Тренд')
decomposition.seasonal.plot(ax=axes[2], title='Сезонность')
decomposition.resid.plot(ax=axes[3], title='Остатки')
plt.tight_layout()
plt.show()

## 4. Prophet - Facebook forecasting

In [None]:
# Подготовка данных для Prophet (нужны колонки ds и y)
prophet_df = df.rename(columns={'date': 'ds', 'value': 'y'})

# Разделение на train/test
train_size = int(len(prophet_df) * 0.8)
train_prophet = prophet_df[:train_size]
test_prophet = prophet_df[train_size:]

# Создание модели
model_prophet = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    seasonality_mode='additive',  # или 'multiplicative'
    changepoint_prior_scale=0.05,
)

# Обучение
model_prophet.fit(train_prophet)

# Прогноз
future = model_prophet.make_future_dataframe(periods=len(test_prophet), freq='D')
forecast = model_prophet.predict(future)

# Визуализация
fig1 = model_prophet.plot(forecast)
plt.title('Prophet Forecast')
plt.show()

fig2 = model_prophet.plot_components(forecast)
plt.show()

print("✓ Prophet модель обучена!")

In [None]:
# Оценка точности
test_predictions = forecast.iloc[train_size:]['yhat'].values
test_actual = test_prophet['y'].values

mae = mean_absolute_error(test_actual, test_predictions)
rmse = np.sqrt(mean_squared_error(test_actual, test_predictions))

print(f"Prophet MAE: {mae:.4f}")
print(f"Prophet RMSE: {rmse:.4f}")

## 5. ARIMA / SARIMAX

In [None]:
# ARIMA модель
# Параметры (p, d, q) нужно подбирать (auto_arima из pmdarima может помочь)
# p - порядок авторегрессии
# d - порядок разности
# q - порядок скользящего среднего

train_values = df['value'][:train_size]
test_values = df['value'][train_size:]

# ARIMA
arima_model = ARIMA(train_values, order=(1, 1, 1))
arima_fitted = arima_model.fit()

# Прогноз
arima_forecast = arima_fitted.forecast(steps=len(test_values))

# Оценка
arima_mae = mean_absolute_error(test_values, arima_forecast)
arima_rmse = np.sqrt(mean_squared_error(test_values, arima_forecast))

print(f"ARIMA MAE: {arima_mae:.4f}")
print(f"ARIMA RMSE: {arima_rmse:.4f}")

# Визуализация
plt.figure(figsize=(15, 5))
plt.plot(df['date'][:train_size], train_values, label='Train')
plt.plot(df['date'][train_size:], test_values, label='Test', color='orange')
plt.plot(df['date'][train_size:], arima_forecast, label='ARIMA Forecast', color='red', linestyle='--')
plt.legend()
plt.title('ARIMA Forecast')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# SARIMAX (с сезонностью)
# Параметры: (p,d,q) x (P,D,Q,s)
# s - период сезонности (например, 7 для недельной, 12 для месячной)

# sarima_model = SARIMAX(
#     train_values,
#     order=(1, 1, 1),
#     seasonal_order=(1, 1, 1, 7)  # 7 дней для недельной сезонности
# )
# sarima_fitted = sarima_model.fit()
# sarima_forecast = sarima_fitted.forecast(steps=len(test_values))

## 6. LSTM для временных рядов

In [None]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length
    
    def __len__(self):
        return len(self.data) - self.seq_length
    
    def __getitem__(self, idx):
        x = self.data[idx:idx+self.seq_length]
        y = self.data[idx+self.seq_length]
        return torch.FloatTensor(x), torch.FloatTensor([y])

class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        # x shape: (batch, seq_length, features)
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

In [None]:
# Нормализация данных
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df['value'].values.reshape(-1, 1)).flatten()

# Параметры
SEQ_LENGTH = 30  # Используем 30 предыдущих значений
BATCH_SIZE = 32
EPOCHS = 50

# Train/Test split
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size:]

# Datasets
train_dataset = TimeSeriesDataset(train_data, SEQ_LENGTH)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# Модель
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_lstm = LSTMModel().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model_lstm.parameters(), lr=0.001)

print(f"Device: {device}")
print(f"Train sequences: {len(train_dataset)}")

In [None]:
# Обучение LSTM
model_lstm.train()
losses = []

for epoch in range(EPOCHS):
    epoch_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.unsqueeze(-1).to(device)
        y_batch = y_batch.to(device)
        
        optimizer.zero_grad()
        outputs = model_lstm(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    
    avg_loss = epoch_loss / len(train_loader)
    losses.append(avg_loss)
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {avg_loss:.6f}")

print("✓ LSTM обучена!")

# График loss
plt.figure(figsize=(10, 5))
plt.plot(losses)
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

In [None]:
# Прогнозирование с LSTM
model_lstm.eval()
lstm_predictions = []

# Используем последние SEQ_LENGTH значений из train для начала
current_seq = train_data[-SEQ_LENGTH:].tolist()

with torch.no_grad():
    for _ in range(len(test_data)):
        x = torch.FloatTensor(current_seq).unsqueeze(0).unsqueeze(-1).to(device)
        pred = model_lstm(x).item()
        lstm_predictions.append(pred)
        current_seq = current_seq[1:] + [pred]

# Обратное преобразование
lstm_predictions = scaler.inverse_transform(np.array(lstm_predictions).reshape(-1, 1)).flatten()

# Оценка
lstm_mae = mean_absolute_error(test_values, lstm_predictions)
lstm_rmse = np.sqrt(mean_squared_error(test_values, lstm_predictions))

print(f"LSTM MAE: {lstm_mae:.4f}")
print(f"LSTM RMSE: {lstm_rmse:.4f}")

# Визуализация
plt.figure(figsize=(15, 5))
plt.plot(df['date'][:train_size], train_values, label='Train')
plt.plot(df['date'][train_size:], test_values, label='Test', color='orange')
plt.plot(df['date'][train_size:], lstm_predictions, label='LSTM Forecast', color='green', linestyle='--')
plt.legend()
plt.title('LSTM Forecast')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 7. Сравнение моделей

In [None]:
# Сравнение всех моделей
results = pd.DataFrame({
    'Model': ['Prophet', 'ARIMA', 'LSTM'],
    'MAE': [mae, arima_mae, lstm_mae],
    'RMSE': [rmse, arima_rmse, lstm_rmse]
})

print("\nСравнение моделей:")
print(results.to_string(index=False))

# Визуализация сравнения
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
results.plot(x='Model', y='MAE', kind='bar', ax=axes[0], legend=False)
axes[0].set_title('MAE Comparison')
axes[0].set_ylabel('MAE')

results.plot(x='Model', y='RMSE', kind='bar', ax=axes[1], legend=False, color='orange')
axes[1].set_title('RMSE Comparison')
axes[1].set_ylabel('RMSE')

plt.tight_layout()
plt.show()

## 8. Создание submission

In [None]:
# === ФИНАЛЬНЫЙ ПРОГНОЗ ===
# Используйте лучшую модель для финального прогноза

# Пример с Prophet на будущий период
FORECAST_PERIODS = 30  # Сколько дней вперед прогнозировать

# Обучаем на всех данных
final_model = Prophet()
final_model.fit(prophet_df)

# Прогноз
future_dates = final_model.make_future_dataframe(periods=FORECAST_PERIODS, freq='D')
final_forecast = final_model.predict(future_dates)

# Берем только будущие прогнозы
future_predictions = final_forecast.tail(FORECAST_PERIODS)

# Submission
submission = pd.DataFrame({
    'date': future_predictions['ds'],
    'prediction': future_predictions['yhat']
})

submission.to_csv('timeseries_submission.csv', index=False)
print("✓ Submission сохранен!")
print(submission.head())