In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
import joblib

In [2]:
def load_and_prepare_data(train_dir, val_dir, seeds_number_train, seeds_number_val):
    train_df = []
    for i in range(seeds_number_train):
        seed_df = pd.read_csv(train_dir + f'seir_seed_{i}.csv')
        seed_df['prev_I'] = seed_df['I'].shift(-1).fillna(0)
        seed_df['day'] = np.arange(len(seed_df))
        train_df.append(seed_df)
    for i in range(seeds_number_val):
        seed_df = pd.read_csv(val_dir + f'seir_seed_{i}.csv')
        seed_df['prev_I'] = seed_df['I'].shift(-1).fillna(0)
        seed_df['day'] = np.arange(len(seed_df))
        train_df.append(seed_df)
    
    train_df = pd.concat(train_df, ignore_index=True)
    train_df['Beta'].replace(0, 1e-7, inplace=True)
    train_df = train_df[(train_df['E'] > 0)|(train_df['I'] > 0)].copy()
    train_df['log_Beta'] = np.log(train_df['Beta'])
    return train_df

def train_model(train_df, model_file,features):
    X = train_df[features].values
    y = train_df['log_Beta'].values

    model = make_pipeline(
        StandardScaler(),
        PolynomialFeatures(include_bias=False, degree=3),
        SGDRegressor(alpha=0.1, max_iter=5000, penalty='l2',warm_start=False)
    )
    model.fit(X, y)

    # Сохраняем лучшую модель
    joblib.dump(model, model_file)


train_dir = f'train/'
val_dir = f'val/'
seeds_number_train = 960
seeds_number_val = 240
model_file = 'regression_day_SEIR_prev_I_for_seir.joblib'
features = ['day', 'prev_I', 'S', 'E', 'I', 'R']

train_df = load_and_prepare_data(train_dir, val_dir, seeds_number_train, seeds_number_val)
train_model(train_df, model_file, features)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['Beta'].replace(0, 1e-7, inplace=True)
