In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
import joblib

def load_and_prepare_data(train_df_dir, seeds_number):
    train_df = []
    for i in range(seeds_number):
        seed_df = pd.read_csv(train_df_dir + f'seir_seed_{i}.csv')
        seed_df = seed_df[pd.notna(seed_df['Beta'])]
        seed_df['prev_I'] = seed_df['I'].shift(2).fillna(0)
        seed_df['seed'] = i
        seed_df['day'] = np.arange(len(seed_df))
        train_df.append(seed_df)
    
    train_df = pd.concat(train_df, ignore_index=True)
    train_df = train_df[train_df['Beta'] > 0].copy()
    train_df['log_Beta'] = np.log(train_df['Beta'])
    return train_df

def train_model(train_df, degree, model_file):
    X = train_df[['day', 'S', 'E', 'I', 'R', 'prev_I']].values
    y = train_df['log_Beta'].values
    
    weights = np.linspace(0.1, 1, len(X))
    
    model = make_pipeline(
        StandardScaler(),
        PolynomialFeatures(include_bias=False, degree=3),
        SGDRegressor(max_iter=10000, penalty='l2', alpha=0.1, 
                     warm_start=False))
    model.fit(X, y)

    joblib.dump(model, model_file)

train_df_dir = f'train_data/'
seeds_number = 1500
model_file = 'regression_day_SEIR_prev_I_for_seir.joblib'
degree = 3
train_df = load_and_prepare_data(train_df_dir, seeds_number)
model = train_model(train_df, degree, model_file)


