In [None]:
import pandas as pd
from main import AiForecast
import numpy as np

# Пути к исходным данным
train_candles_path = "data/candles.csv"
train_news_path = "data/news.csv"

# Загружаем данные
candles = pd.read_csv(train_candles_path, parse_dates=['begin'])
news = pd.read_csv(train_news_path, parse_dates=['publish_date'])

# --- Определяем временные границы ---
train_start = pd.Timestamp("2020-06-19")
train_end   = pd.Timestamp("2025-01-01")

predict_start = pd.Timestamp("2025-01-01")
predict_end   = pd.Timestamp("2025-05-01")   # не включая 2025-05-01

real_start = pd.Timestamp("2025-05-01")
real_end   = pd.Timestamp("2025-05-21")

# --- Создаем выборки для свечей ---
candles_train = candles[(candles['begin'] >= train_start) & (candles['begin'] <= train_end)].copy()
candles_for_predict = candles[(candles['begin'] >= predict_start) & (candles['begin'] < predict_end)].copy()
candles_real_results = candles[(candles['begin'] >= real_start) & (candles['begin'] <= real_end)].copy()

# --- Аналогично для новостей ---
news_train = news[(news['publish_date'] >= train_start) & (news['publish_date'] <= train_end)].copy()
news_for_predict = news[(news['publish_date'] >= predict_start) & (news['publish_date'] < predict_end)].copy()
news_real_results = news[(news['publish_date'] >= real_start) & (news['publish_date'] <= real_end)].copy()



In [None]:
def evaluate_submission(submission_df, test_candles_df):
    """
    submission_df: датафрейм с колонками ticker,p1..p20
    test_candles_df: реальные закрытия
    """
    metrics = {}
    for horizon in range(1, 21):
        col = f"p{horizon}"
        true_returns = []
        pred_returns = []
        for ticker in submission_df["ticker"]:
            df_ticker = test_candles_df[
                test_candles_df["ticker"] == ticker
            ].sort_values("begin")
            if len(df_ticker) <= horizon:
                continue
            close_0 = df_ticker.iloc[0]["close"]
            close_h = df_ticker.iloc[horizon]["close"]
            R_true = close_h / close_0 - 1
            R_pred = submission_df[submission_df["ticker"] == ticker][col].values[0]
            true_returns.append(R_true)
            pred_returns.append(R_pred)
        true_returns = np.array(true_returns)
        pred_returns = np.array(pred_returns)
        rmse = np.sqrt(np.mean((true_returns - pred_returns) ** 2))
        metrics[col] = rmse
    return metrics

In [None]:
ai_forecast = AiForecast(artifact_dir='./test_artifact')

ai_forecast.train(candles_train, news_train)
submission = ai_forecast.predict(candles_for_predict, news_for_predict)

submission.to_csv("test_submission.csv", index=False)

In [None]:
import matplotlib.pyplot as plt

metrics = evaluate_submission(submission, candles_real_results)

print(metrics)

plt.figure(figsize=(10,5))
plt.plot(range(1,21), list(metrics.values()), marker='o')
plt.title('RMSE по горизонту прогноза')
plt.xlabel('Горизонт (дней вперёд)')
plt.ylabel('RMSE ошибки')
plt.grid(True)
plt.show()
