In [4]:
import sqlite3
import pandas as pd
from datetime import datetime
import re
from typing import List, Dict, Any

class BlackoutsDateChecker:
    def __init__(self, db_path: str = "../databases/dataset.db"):
        self.db_path = db_path
        self.conn = None
        self.results = {
            'total_records': 0,
            'empty_start_date': 0,
            'empty_end_date': 0,
            'invalid_start_date': 0,
            'invalid_end_date': 0,
            'end_before_start': 0,
            'future_dates': 0,
            'details': []
        }
    
    def connect(self):
        """–ü–æ–¥–∫–ª—é—á–µ–Ω–∏–µ –∫ –±–∞–∑–µ –¥–∞–Ω–Ω—ã—Ö"""
        try:
            self.conn = sqlite3.connect(self.db_path)
            print(f"‚úÖ –ü–æ–¥–∫–ª—é—á–µ–Ω–∏–µ –∫ {self.db_path} —É—Å–ø–µ—à–Ω–æ")
            return True
        except Exception as e:
            print(f"‚ùå –û—à–∏–±–∫–∞ –ø–æ–¥–∫–ª—é—á–µ–Ω–∏—è: {e}")
            return False
    
    def disconnect(self):
        """–ó–∞–∫—Ä—ã—Ç–∏–µ —Å–æ–µ–¥–∏–Ω–µ–Ω–∏—è"""
        if self.conn:
            self.conn.close()
            print("üîå –°–æ–µ–¥–∏–Ω–µ–Ω–∏–µ –∑–∞–∫—Ä—ã—Ç–æ")
    
    def get_all_blackouts(self) -> pd.DataFrame:
        """–ü–æ–ª—É—á–µ–Ω–∏–µ –≤—Å–µ—Ö –∑–∞–ø–∏—Å–µ–π –∏–∑ —Ç–∞–±–ª–∏—Ü—ã blackouts"""
        query = """
        SELECT id, start_date, end_date, description, type, initiator_name, source
        FROM blackouts
        ORDER BY start_date
        """
        try:
            df = pd.read_sql_query(query, self.conn)
            print(f"üìä –ó–∞–≥—Ä—É–∂–µ–Ω–æ {len(df)} –∑–∞–ø–∏—Å–µ–π")
            return df
        except Exception as e:
            print(f"‚ùå –û—à–∏–±–∫–∞ –∑–∞–≥—Ä—É–∑–∫–∏ –¥–∞–Ω–Ω—ã—Ö: {e}")
            return pd.DataFrame()
    
    def validate_date_format(self, date_str: str) -> bool:
        """–ü—Ä–æ–≤–µ—Ä–∫–∞ —Ñ–æ—Ä–º–∞—Ç–∞ –¥–∞—Ç—ã (YYYY-MM-DD HH:MM:SS)"""
        if not date_str or pd.isna(date_str):
            return False
        
        # –ü–∞—Ç—Ç–µ—Ä–Ω –¥–ª—è —Ñ–æ—Ä–º–∞—Ç–∞ 2018-01-01 00:08:00
        pattern = r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$'
        return bool(re.match(pattern, str(date_str)))
    
    def parse_date(self, date_str: str) -> datetime:
        """–ü–∞—Ä—Å–∏–Ω–≥ –¥–∞—Ç—ã –≤ –æ–±—ä–µ–∫—Ç datetime"""
        try:
            return datetime.strptime(str(date_str), '%Y-%m-%d %H:%M:%S')
        except ValueError:
            return None
    
    def is_future_date(self, date_obj: datetime) -> bool:
        """–ü—Ä–æ–≤–µ—Ä–∫–∞, —è–≤–ª—è–µ—Ç—Å—è –ª–∏ –¥–∞—Ç–∞ –±—É–¥—É—â–µ–π (–ø–æ—Å–ª–µ —Å–µ–≥–æ–¥–Ω—è—à–Ω–µ–≥–æ –¥–Ω—è)"""
        today = datetime.now()
        return date_obj and date_obj > today
    
    def check_single_record(self, row: pd.Series) -> Dict[str, Any]:
        """–ü—Ä–æ–≤–µ—Ä–∫–∞ –æ–¥–Ω–æ–π –∑–∞–ø–∏—Å–∏"""
        issues = []
        start_date_str = str(row['start_date']) if not pd.isna(row['start_date']) else None
        end_date_str = str(row['end_date']) if not pd.isna(row['end_date']) else None
        
        # –ü—Ä–æ–≤–µ—Ä–∫–∞ –ø—É—Å—Ç—ã—Ö –¥–∞—Ç
        if not start_date_str or start_date_str.lower() in ['none', 'nan', 'null']:
            issues.append("–ü—É—Å—Ç–∞—è start_date")
        
        if not end_date_str or end_date_str.lower() in ['none', 'nan', 'null']:
            issues.append("–ü—É—Å—Ç–∞—è end_date")
        
        # –ü—Ä–æ–≤–µ—Ä–∫–∞ —Ñ–æ—Ä–º–∞—Ç–∞
        if start_date_str and not self.validate_date_format(start_date_str):
            issues.append(f"–ù–µ–≤–µ—Ä–Ω—ã–π —Ñ–æ—Ä–º–∞—Ç start_date: '{start_date_str}'")
        
        if end_date_str and not self.validate_date_format(end_date_str):
            issues.append(f"–ù–µ–≤–µ—Ä–Ω—ã–π —Ñ–æ—Ä–º–∞—Ç end_date: '{end_date_str}'")
        
        # –ü–∞—Ä—Å–∏–Ω–≥ –¥–∞—Ç –¥–ª—è –¥–∞–ª—å–Ω–µ–π—à–∏—Ö –ø—Ä–æ–≤–µ—Ä–æ–∫
        start_date = self.parse_date(start_date_str) if start_date_str else None
        end_date = self.parse_date(end_date_str) if end_date_str else None
        
        # –ü—Ä–æ–≤–µ—Ä–∫–∞ –ª–æ–≥–∏—á–µ—Å–∫–æ–π –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ—Å—Ç–∏
        if start_date and end_date and end_date < start_date:
            issues.append(f"end_date ({end_date_str}) —Ä–∞–Ω—å—à–µ start_date ({start_date_str})")
        
        # –ü—Ä–æ–≤–µ—Ä–∫–∞ –±—É–¥—É—â–∏—Ö –¥–∞—Ç
        if start_date and self.is_future_date(start_date):
            issues.append(f"start_date –≤ –±—É–¥—É—â–µ–º: {start_date_str}")
        
        if end_date and self.is_future_date(end_date):
            issues.append(f"end_date –≤ –±—É–¥—É—â–µ–º: {end_date_str}")
        
        return {
            'id': row['id'],
            'start_date': start_date_str,
            'end_date': end_date_str,
            'issues': issues,
            'is_valid': len(issues) == 0
        }
    
    def run_full_check(self):
        """–ó–∞–ø—É—Å–∫ –ø–æ–ª–Ω–æ–π –ø—Ä–æ–≤–µ—Ä–∫–∏"""
        print("\nüöÄ –ù–ê–ß–ò–ù–ê–ï–ú –ü–†–û–í–ï–†–ö–£ –î–ê–¢ –í –¢–ê–ë–õ–ò–¶–ï BLACKOUTS\n" + "="*60)
        
        if not self.connect():
            return
        
        df = self.get_all_blackouts()
        if df.empty:
            print("‚ùå –ù–µ—Ç –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –ø—Ä–æ–≤–µ—Ä–∫–∏")
            self.disconnect()
            return
        
        self.results['total_records'] = len(df)
        print(f"üìà –í—Å–µ–≥–æ –∑–∞–ø–∏—Å–µ–π: {len(df)}")
        
        # –ü—Ä–æ–≤–µ—Ä–∫–∞ –∫–∞–∂–¥–æ–π –∑–∞–ø–∏—Å–∏
        for idx, row in df.iterrows():
            check = self.check_single_record(row)
            self.results['details'].append(check)
            
            if not check['is_valid']:
                if "–ü—É—Å—Ç–∞—è start_date" in check['issues']:
                    self.results['empty_start_date'] += 1
                if "–ü—É—Å—Ç–∞—è end_date" in check['issues']:
                    self.results['empty_end_date'] += 1
                if "–ù–µ–≤–µ—Ä–Ω—ã–π —Ñ–æ—Ä–º–∞—Ç start_date" in check['issues']:
                    self.results['invalid_start_date'] += 1
                if "–ù–µ–≤–µ—Ä–Ω—ã–π —Ñ–æ—Ä–º–∞—Ç end_date" in check['issues']:
                    self.results['invalid_end_date'] += 1
                if "end_date —Ä–∞–Ω—å—à–µ start_date" in check['issues']:
                    self.results['end_before_start'] += 1
                if any("–≤ –±—É–¥—É—â–µ–º" in issue for issue in check['issues']):
                    self.results['future_dates'] += 1
        
        self.print_summary()
        self.print_detailed_report()
        
        self.disconnect()

    def print_summary(self):
        """–í—ã–≤–æ–¥ —Å–≤–æ–¥–∫–∏"""
        print("\nüìã –°–í–û–î–ö–ê –ü–†–û–í–ï–†–ö–ò" + "="*40)
        print(f"‚úÖ –í—Å–µ–≥–æ –∑–∞–ø–∏—Å–µ–π:          {self.results['total_records']}")
        print(f"‚úÖ –í–∞–ª–∏–¥–Ω—ã—Ö –∑–∞–ø–∏—Å–µ–π:       {len([d for d in self.results['details'] if d['is_valid']])}")
        print(f"‚ùå –ü—Ä–æ–±–ª–µ–º–Ω—ã—Ö –∑–∞–ø–∏—Å–µ–π:     {len([d for d in self.results['details'] if not d['is_valid']])}")
        print(f"   ‚Ä¢ –ü—É—Å—Ç—ã–µ start_date:    {self.results['empty_start_date']}")
        print(f"   ‚Ä¢ –ü—É—Å—Ç—ã–µ end_date:      {self.results['empty_end_date']}")
        print(f"   ‚Ä¢ –ù–µ–≤–µ—Ä–Ω—ã–π —Ñ–æ—Ä–º–∞—Ç start: {self.results['invalid_start_date']}")
        print(f"   ‚Ä¢ –ù–µ–≤–µ—Ä–Ω—ã–π —Ñ–æ—Ä–º–∞—Ç end:  {self.results['invalid_end_date']}")
        print(f"   ‚Ä¢ end < start:          {self.results['end_before_start']}")
        print(f"   ‚Ä¢ –ë—É–¥—É—â–∏–µ –¥–∞—Ç—ã:         {self.results['future_dates']}")
    
    def print_detailed_report(self):
        """–ü–æ–¥—Ä–æ–±–Ω—ã–π –æ—Ç—á–µ—Ç –ø–æ –ø—Ä–æ–±–ª–µ–º–Ω—ã–º –∑–∞–ø–∏—Å—è–º"""
        problems = [d for d in self.results['details'] if not d['is_valid']]
        if problems:
            print(f"\n‚ö†Ô∏è  –ü–†–û–ë–õ–ï–ú–ù–´–ï –ó–ê–ü–ò–°–ò ({len(problems)})" + "="*50)
            for i, problem in enumerate(problems[:20], 1):  # –ü–µ—Ä–≤—ã–µ 20
                print(f"\n{i}. ID: {problem['id'][:8]}...")
                print(f"   Start: {problem['start_date']}")
                print(f"   End:   {problem['end_date']}")
                for issue in problem['issues']:
                    print(f"   ‚ùå {issue}")
            if len(problems) > 20:
                print(f"\n... –∏ –µ—â–µ {len(problems) - 20} –ø—Ä–æ–±–ª–µ–º–Ω—ã—Ö –∑–∞–ø–∏—Å–µ–π")
        else:
            print("\nüéâ –û–¢–õ–ò–ß–ù–û! –í—Å–µ –¥–∞—Ç—ã –∫–æ—Ä—Ä–µ–∫—Ç–Ω—ã!")

# üöÄ –û–°–ù–û–í–ù–û–ô –ó–ê–ü–£–°–ö
if __name__ == "__main__":
    checker = BlackoutsDateChecker()
    checker.run_full_check()


üöÄ –ù–ê–ß–ò–ù–ê–ï–ú –ü–†–û–í–ï–†–ö–£ –î–ê–¢ –í –¢–ê–ë–õ–ò–¶–ï BLACKOUTS
‚úÖ –ü–æ–¥–∫–ª—é—á–µ–Ω–∏–µ –∫ ../databases/dataset.db —É—Å–ø–µ—à–Ω–æ
üìä –ó–∞–≥—Ä—É–∂–µ–Ω–æ 25264 –∑–∞–ø–∏—Å–µ–π
üìà –í—Å–µ–≥–æ –∑–∞–ø–∏—Å–µ–π: 25264

‚úÖ –í—Å–µ–≥–æ –∑–∞–ø–∏—Å–µ–π:          25264
‚úÖ –í–∞–ª–∏–¥–Ω—ã—Ö –∑–∞–ø–∏—Å–µ–π:       25264
‚ùå –ü—Ä–æ–±–ª–µ–º–Ω—ã—Ö –∑–∞–ø–∏—Å–µ–π:     0
   ‚Ä¢ –ü—É—Å—Ç—ã–µ start_date:    0
   ‚Ä¢ –ü—É—Å—Ç—ã–µ end_date:      0
   ‚Ä¢ –ù–µ–≤–µ—Ä–Ω—ã–π —Ñ–æ—Ä–º–∞—Ç start: 0
   ‚Ä¢ –ù–µ–≤–µ—Ä–Ω—ã–π —Ñ–æ—Ä–º–∞—Ç end:  0
   ‚Ä¢ end < start:          0
   ‚Ä¢ –ë—É–¥—É—â–∏–µ –¥–∞—Ç—ã:         0

üéâ –û–¢–õ–ò–ß–ù–û! –í—Å–µ –¥–∞—Ç—ã –∫–æ—Ä—Ä–µ–∫—Ç–Ω—ã!
üîå –°–æ–µ–¥–∏–Ω–µ–Ω–∏–µ –∑–∞–∫—Ä—ã—Ç–æ
