# LectureRadar: Source Management System

This notebook demonstrates the initial implementation of the source management system for LectureRadar.

## Features
1. URL Processing and Validation
2. Source Type Detection
3. Basic Data Structures for Sources

In [None]:
# Required imports
from dataclasses import dataclass
from datetime import datetime
from typing import Optional, List
from urllib.parse import urlparse, parse_qs
import requests

## Data Structures

First, let's define our base data structures for different types of sources.

In [None]:
@dataclass
class EventSource:
    url: str
    platform: str  # 'VK', 'Telegram', 'Website'
    source_type: str  # 'Group', 'Channel', 'Page'
    update_freq: int = 60  # minutes
    last_checked: Optional[datetime] = None
    is_active: bool = True

@dataclass
class VKSource(EventSource):
    group_id: Optional[str] = None
    access_token: Optional[str] = None

@dataclass
class TelegramSource(EventSource):
    channel_id: Optional[str] = None

## URL Processing

Let's implement the URL processing system to detect and validate different types of sources.

In [None]:
class URLProcessor:
    @staticmethod
    def process_url(url: str) -> dict:
        """Process URL and detect its platform and properties"""
        parsed = urlparse(url)
        
        # VK URL processing
        if "vk.com" in parsed.netloc:
            path_parts = parsed.path.strip('/').split('/')
            group_name = path_parts[0] if path_parts else None
            return {
                'platform': 'VK',
                'source_type': 'Group',
                'group_name': group_name,
                'url': url
            }
        
        # Telegram URL processing
        elif "t.me" in parsed.netloc:
            path_parts = parsed.path.strip('/').split('/')
            channel_name = path_parts[0] if path_parts else None
            return {
                'platform': 'Telegram',
                'source_type': 'Channel',
                'channel_name': channel_name,
                'url': url
            }
        
        # Generic website processing
        else:
            return {
                'platform': 'Website',
                'source_type': 'Page',
                'domain': parsed.netloc,
                'url': url
            }

    @staticmethod
    def validate_url(url: str) -> bool:
        """Basic URL validation and accessibility check"""
        try:
            response = requests.head(url, timeout=5)
            return response.status_code == 200
        except:
            return False

## Source Manager

Now let's implement the main source management system.

In [None]:
class SourceManager:
    def __init__(self):
        self.sources = []
        self.url_processor = URLProcessor()
    
    def add_source(self, url: str) -> Optional[EventSource]:
        """Add a new source to monitor"""
        # Validate URL
        if not self.url_processor.validate_url(url):
            print(f"Error: Unable to access URL: {url}")
            return None
        
        # Process URL
        source_info = self.url_processor.process_url(url)
        
        # Create appropriate source object
        if source_info['platform'] == 'VK':
            source = VKSource(
                url=url,
                platform='VK',
                source_type='Group',
                group_id=source_info.get('group_name')
            )
        elif source_info['platform'] == 'Telegram':
            source = TelegramSource(
                url=url,
                platform='Telegram',
                source_type='Channel',
                channel_id=source_info.get('channel_name')
            )
        else:
            source = EventSource(
                url=url,
                platform='Website',
                source_type='Page'
            )
        
        self.sources.append(source)
        return source
    
    def list_sources(self) -> List[EventSource]:
        """List all registered sources"""
        return self.sources
    
    def remove_source(self, url: str) -> bool:
        """Remove a source by URL"""
        initial_length = len(self.sources)
        self.sources = [s for s in self.sources if s.url != url]
        return len(self.sources) < initial_length

## Testing

Let's test our implementation with some example URLs.

In [None]:
# Initialize source manager
manager = SourceManager()

# Test with different types of URLs
test_urls = [
    "https://vk.com/science_msu",  # VK group
    "https://t.me/scientific_events",  # Telegram channel
    "https://www.msu.ru/science/",  # Website
]

# Add sources
for url in test_urls:
    source = manager.add_source(url)
    if source:
        print(f"Added source: {source}")

# List all sources
print("\nAll registered sources:")
for source in manager.list_sources():
    print(f"- {source.platform}: {source.url}")