In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ruptures as rpt
from sklearn.preprocessing import StandardScaler

country_stages = {}

countries = ['France', 'Japan', 'Germany', 'South Korea', 'Russia']

model = "l2"  
penalty_value = 2  
min_size = 7  
jump = 5  

for country in countries:
    try:
        # Загрузка и подготовка данных
        filename = f"Filtered_data_for_stages/{country.replace(' ', '_')}_filtered_covid_data.csv"
        df = pd.read_csv(filename)
        df['date'] = pd.to_datetime(df['date'])
        df = df.dropna(subset=['total_cases'])
        
        # Нормализация данных
        signal = df['total_cases'].values.reshape(-1, 1)
        scaler = StandardScaler()
        signal = scaler.fit_transform(signal).flatten()
        
        # Алгоритм PELT 
        algo = rpt.Pelt(model=model, min_size=min_size, jump=jump)
        algo.fit(signal)
        breakpoints = algo.predict(pen=penalty_value)
        
        # Обработка точек разрыва
        breakpoints = sorted(list(set([0] + [bp for bp in breakpoints if 0 < bp < len(signal)] + [len(signal)-1])))
        
        # Сохранение интервалов стадий
        intervals = []
        for i, (start, end) in enumerate(zip(breakpoints[:-1], breakpoints[1:])):
            intervals.append((start, end))
        
        country_stages[country] = intervals
        
        # Визуализация 
        plt.figure(figsize=(16, 8))
        plt.plot(df['date'], df['total_cases'], label='Все случаи', color='blue', linewidth=1.5)
        
        phase_colors = plt.cm.tab20(np.linspace(0, 1, len(breakpoints)-1))
        
        for i, (start, end) in enumerate(zip(breakpoints[:-1], breakpoints[1:])):
            plt.axvspan(df['date'].iloc[start], df['date'].iloc[end],
                       alpha=0.2, color=phase_colors[i], label=f'Фаза {i+1}')
            
            phase_mean = df['total_cases'].iloc[start:end].mean()
            plt.hlines(phase_mean, df['date'].iloc[start], df['date'].iloc[end],
                      colors=phase_colors[i], linestyles='dashed', linewidth=1)
        
        plt.title(f'COVID-19 в {country}: автоматическое выделение стадий\n(Алгоритм PELT, чувствительность={penalty_value})', pad=20)
        plt.xlabel('Дата', fontsize=12)
        plt.ylabel('Общее число случаев', fontsize=12)
        plt.grid(True, alpha=0.2)
        
        handles, labels = plt.gca().get_legend_handles_labels()
        plt.legend(handles[:10], labels[:10], bbox_to_anchor=(1.05, 1), loc='upper left')
        
        
        phases = []
        for i, (start, end) in enumerate(zip(breakpoints[:-1], breakpoints[1:])):
            phase_data = df.iloc[start:end]
            duration = (phase_data['date'].iloc[-1] - phase_data['date'].iloc[0]).days
            cases_start = phase_data['total_cases'].iloc[0]
            cases_end = phase_data['total_cases'].iloc[-1]
            cases_diff = cases_end - cases_start
            growth_rate = cases_diff / duration if duration > 0 else 0
            
            phases.append({
                'Страна': country,
                'Фаза': i+1,
                'Начало': phase_data['date'].iloc[0].strftime('%Y-%m-%d'),
                'Конец': phase_data['date'].iloc[-1].strftime('%Y-%m-%d'),
                'Длительность (дни)': duration,
                'Рост случаев': f"{cases_diff:,}",
                'Скорость роста': f"{growth_rate:,.2f} случаев/день",
                'Среднее': f"{phase_data['total_cases'].mean():,.0f}"
            })
        
        phases_df = pd.DataFrame(phases)
        print(f"\nДетализация стадий для {country}:")
        print(phases_df.to_string(index=False))
        
        plt.tight_layout()
        plt.show()
        
    except Exception as e:
        print(f"\nОшибка при анализе {country}: {str(e)}")
        continue
