In [None]:
import aiohttp
import asyncio
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import time

# Define the list of symbols (you would use your complete list here)
symbols = [
    'ADIN', 'ALK', 'ALKB', 'AMEH', 'APTK', 'ATPP', 'AUMK', 'BANA', 'BGOR', 'BIKF', 'BIM', 'BLTU',
    'CBNG', 'CDHV', 'CEVI', 'CKB', 'CKBKO', 'DEBA', 'DIMI', 'EDST', 'ELMA', 'ELNC', 'ENER', 'ENSA', 
    'EUHA', 'EUMK', 'EVRO', 'FAKM', 'FERS', 'FKTL', 'FROT', 'FUBT', 'GALE', 'GDKM', 'GECK', 'GECT', 
    'GIMS', 'GRDN', 'GRNT', 'GRSN', 'GRZD', 'GTC', 'GTRG', 'IJUG', 'INB', 'INHO', 'INOV', 'INPR', 
    'INTP', 'JAKO', 'JUSK', 'KARO', 'KDFO', 'KJUBI', 'KKST', 'KLST', 'KMB', 'KMPR', 'KOMU', 'KONF', 
    'KONZ', 'KORZ', 'KPSS', 'KULT', 'KVAS', 'LAJO', 'LHND', 'LOTO', 'LOZP', 'MAGP', 'MAKP', 'MAKS', 
    'MB', 'MERM', 'MKSD', 'MLKR', 'MODA', 'MPOL', 'MPT', 'MPTE', 'MTUR', 'MZHE', 'MZPU', 'NEME', 
    'NOSK', 'OBPP', 'OILK', 'OKTA', 'OMOS', 'OPFO', 'OPTK', 'ORAN', 'OSPO', 'OTEK', 'PELK', 'PGGV', 
    'PKB', 'POPK', 'PPIV', 'PROD', 'PROT', 'PTRS', 'RADE', 'REPL', 'RIMI', 'RINS', 'RZEK', 'RZIT', 
    'RZIZ', 'RZLE', 'RZLV', 'RZTK', 'RZUG', 'RZUS', 'SBT', 'SDOM', 'SIL', 'SKON', 'SKP', 'SLAV', 
    'SNBT', 'SNBTO', 'SOLN', 'SPAZ', 'SPAZP', 'SPOL', 'SSPR', 'STB', 'STBP', 'STIL', 'STOK', 'TAJM', 
    'TBKO', 'TEAL', 'TEHN', 'TEL', 'TETE', 'TIKV', 'TKPR', 'TKVS', 'TNB', 'TRDB', 'TRPS', 'TRUB', 
    'TSMP', 'TSZS', 'TTK', 'TTKO', 'UNI', 'USJE', 'VARG', 'VFPM', 'VITA', 'VROS', 'VSC', 'VTKS', 
    'ZAS', 'ZILU', 'ZILUP', 'ZIMS', 'ZKAR', 'ZPKO', 'ZPOG', 'ZUAS'
]

BASE_URL = "https://www.mse.mk/mk/stats/symbolhistory/{symbol}"
HEADERS = {
    "User-Agent": "Mozilla/5.0",
    "Content-Type": "application/x-www-form-urlencoded",
}

# Define timeout and maximum number of retries
TIMEOUT = aiohttp.ClientTimeout(total=8)  # Shorter timeout for faster response handling
MAX_RETRIES = 2

# Limit concurrent requests for stability
MAX_CONCURRENT_REQUESTS = 100  # Aggressive limit for high concurrency
semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)

async def fetch_data(session, symbol, start_date, end_date):
    url = BASE_URL.format(symbol=symbol)
    payload = {
        "FromDate": start_date,
        "ToDate": end_date,
        "Code": symbol,
    }

    for attempt in range(MAX_RETRIES):
        try:
            async with semaphore:
                async with session.post(url, data=payload, timeout=TIMEOUT) as response:
                    if response.status == 200:
                        return await response.text()
                    print(f"Attempt {attempt+1}: Received status {response.status} for {symbol}")
        except (aiohttp.ClientError, asyncio.TimeoutError) as e:
            print(f"Attempt {attempt+1} failed for {symbol} with error: {e}")
        await asyncio.sleep(0.5)  # Shorter delay between retries
    return None

def parse_table(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find("table", {"id": "resultsTable"})
    if not table:
        return None
    headers = [header.text.strip() for header in table.find_all("th")]
    rows = [
        [col.text.strip() for col in row.find_all("td")]
        for row in table.find_all("tr")[1:]  # Skip header row
    ]
    return pd.DataFrame(rows, columns=headers)

async def save_data(df, symbol):
    # Combine data into a single file for all years for simplicity
    filename = f"{symbol}_data.csv"
    df.to_csv(filename, index=False, mode='a', header=not pd.io.common.file_exists(filename))
    print(f"Data saved to {filename}")

async def scrape_symbol_data(session, symbol):
    start_year = 2014
    end_year = datetime.now().year

    # Collect all data for symbol in a single list of data frames to minimize file writes
    data_frames = []
    for year in range(start_year, end_year + 1):
        start_date = f"01/01/{year}"
        end_date = f"31/12/{year}"
        html_content = await fetch_data(session, symbol, start_date, end_date)
        if html_content:
            df = parse_table(html_content)
            if df is not None:
                data_frames.append(df)

    # Concatenate all data frames for this symbol and save once
    if data_frames:
        combined_df = pd.concat(data_frames, ignore_index=True)
        await save_data(combined_df, symbol)

async def main():
    start_time = time.time()  # Start the timer

    async with aiohttp.ClientSession(headers=HEADERS) as session:
        tasks = [asyncio.create_task(scrape_symbol_data(session, symbol)) for symbol in symbols]
        await asyncio.gather(*tasks)

    # Stop the timer and print the elapsed time
    elapsed_time = time.time() - start_time
    print(f"Scraping completed in {elapsed_time:.2f} seconds.")

if __name__ == "__main__":
    asyncio.run(main())
