In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ruptures as rpt
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model


model = "l2"  
penalty_value = 2  
min_size = 7  
jump = 5  


class StageAttention(tf.keras.layers.Layer):
    def __init__(self, num_stages):
        super(StageAttention, self).__init__()
        self.attention_weights = tf.Variable(
            initial_value=[1.0]*num_stages, 
            trainable=True,
            dtype=tf.float32
        )
        self.softmax = tf.keras.layers.Softmax()

    def call(self, inputs):
        normalized_weights = self.softmax(self.attention_weights)
        return normalized_weights


file_path = "Filtered_data_for_stages/Russia_filtered_covid_data.csv"
df = pd.read_csv(file_path, parse_dates=["date"], index_col=[0])


scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[['total_cases']])


algo = rpt.Pelt(model=model, min_size=min_size, jump=jump)
algo.fit(scaled_data.flatten())
breakpoints = algo.predict(pen=penalty_value)


breakpoints = sorted([0] + [bp for bp in breakpoints if 0 < bp < len(scaled_data)] + [len(scaled_data)-1])
intervals = [(breakpoints[i], breakpoints[i+1]) for i in range(len(breakpoints)-1)]


stages = []
for start, end in intervals:
    stages.append(scaled_data[start:end, :])


attention_layer = StageAttention(num_stages=len(stages))
stage_weights = attention_layer(tf.ones(len(stages)))  
print("Stage weights:", stage_weights.numpy())


weighted_stages = []
for i, stage in enumerate(stages):
    weighted_stage = stage * stage_weights[i].numpy()
    weighted_stages.append(weighted_stage)


weighted_data = np.concatenate(weighted_stages, axis=0)


weighted_data_rescaled = scaler.inverse_transform(weighted_data)


result_df = pd.DataFrame({
    'date': df.index[:len(weighted_data_rescaled)],
    'original_total_cases': df['total_cases'].iloc[:len(weighted_data_rescaled)],
    'weighted_total_cases': weighted_data_rescaled.flatten(),
    'stage': np.repeat(range(len(stages)), [len(stage) for stage in stages])
})


stage_info = pd.DataFrame({
    'stage': range(len(stages)),
    'start_date': [df.index[start] for start, _ in intervals],
    'end_date': [df.index[end-1] for _, end in intervals],
    'weight': stage_weights.numpy()
})


output_dir = "results/"
os.makedirs(output_dir, exist_ok=True)


result_df.to_csv(f"{output_dir}weighted_covid_cases_with_stages.csv", index=False)


stage_info.to_csv(f"{output_dir}covid_stages_info.csv", index=False)

print("\nРезультаты сохранены в:")
print(f"- {output_dir}weighted_covid_cases_with_stages.csv")
print(f"- {output_dir}covid_stages_info.csv")