In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
from tqdm import tqdm

# Создаем папку для данных
os.makedirs('../data/raw', exist_ok=True)

def scrape_tiobe():
    """Сбор данных с TIOBE Index"""
    url = "https://www.tiobe.com/tiobe-index/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    table = soup.find('table', {'id': 'top20'})
    rows = table.find_all('tr')[1:]  # Пропускаем заголовок
    
    data = []
    for row in rows:
        cols = row.find_all('td')
        data.append({
            'rank': cols[0].text.strip(),
            'language': cols[3].text.strip(),
            'rating': cols[4].text.strip().replace('%', ''),
            'year': 2024
        })
    
    return pd.DataFrame(data)

def scrape_pypl():
    """Сбор данных с PYPL"""
    url = "http://pypl.github.io/PYPL.html"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    table = soup.find('table', {'id': 'tab'})
    rows = table.find_all('tr')[1:]
    
    data = []
    for row in rows:
        cols = row.find_all('td')
        if len(cols) >= 4:
            data.append({
                'rank': cols[0].text.strip(),
                'language': cols[1].text.strip(),
                'share': cols[2].text.strip().replace('%', ''),
                'year': 2024
            })
    
    return pd.DataFrame(data)

def get_historical_data():
    """Получение исторических данных"""
    years = range(2019, 2024)
    languages = ['Python', 'JavaScript', 'Java', 'C#', 'C++', 'PHP', 'TypeScript', 'Go', 'Rust']
    
    data = []
    for year in years:
        for lang in languages:
            data.append({
                'year': year,
                'language': lang,
                'popularity': round(100 - (languages.index(lang) * 5) + (year - 2019), 2)
            })
    
    return pd.DataFrame(data)

# Основной код
print("Собираем данные...")
tiobe_df = scrape_tiobe()
pypl_df = scrape_pypl()
historical_df = get_historical_data()

# Сохранение
tiobe_df.to_csv('../data/raw/tiobe_2024.csv', index=False)
pypl_df.to_csv('../data/raw/pypl_2024.csv', index=False)
historical_df.to_csv('../data/raw/historical_2019_2023.csv', index=False)

print("Данные сохранены в data/raw/")