# Get all the URLs for each car dealer in Berlin

In [8]:
import asyncio
from playwright.async_api import async_playwright

async def visit_autoscout24_dealers():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        page = await browser.new_page()
        all_dealer_urls = []
        
        # Handle cookie consent on first page
        url = "https://www.autoscout24.de/haendler/#?city=Berlin&pageIndex=1&sortBy=distanceAscending"
        await page.goto(url)
        await page.wait_for_selector("h1", timeout=10000)
        
        # Accept cookies
        accept_button = await page.wait_for_selector('button[data-testid="as24-cmp-accept-all-button"]')
        await accept_button.click()
        
        # Iterate through all pages
        for page_index in range(1, 83):  # Pages 1 to 82
            url = f"https://www.autoscout24.de/haendler/#?city=Berlin&pageIndex={page_index}&sortBy=distanceAscending"
            await page.goto(url)
            
            # Wait for dealer list items to load
            await page.wait_for_selector('div[data-react-component="DealerListItem"]')
            
            # Extract dealer URLs from current page
            dealer_urls = await page.evaluate('''
                () => {
                    const dealers = document.querySelectorAll('div[data-react-component="DealerListItem"] .name a');
                    return Array.from(dealers).map(a => a.href);
                }
            ''')
            
            all_dealer_urls.extend(dealer_urls)
            print(f"Page {page_index}: Found {len(dealer_urls)} dealers")
            
            # Optional: Add a small delay between pages to avoid overwhelming the server
            await page.wait_for_timeout(1000)
        
        await browser.close()
        print(f"Total dealers found: {len(all_dealer_urls)}")
        return all_dealer_urls

# For Jupyter notebook
dealer_urls = await visit_autoscout24_dealers()

Page 1: Found 10 dealers
Page 2: Found 10 dealers
Page 3: Found 10 dealers
Page 4: Found 10 dealers
Page 5: Found 10 dealers
Page 6: Found 10 dealers
Page 7: Found 10 dealers
Page 8: Found 10 dealers
Page 9: Found 10 dealers
Page 10: Found 10 dealers
Page 11: Found 10 dealers
Page 12: Found 10 dealers
Page 13: Found 10 dealers
Page 14: Found 10 dealers
Page 15: Found 10 dealers
Page 16: Found 10 dealers
Page 17: Found 10 dealers
Page 18: Found 10 dealers
Page 19: Found 10 dealers
Page 20: Found 10 dealers
Page 21: Found 10 dealers
Page 22: Found 10 dealers
Page 23: Found 10 dealers
Page 24: Found 10 dealers
Page 25: Found 10 dealers
Page 26: Found 10 dealers
Page 27: Found 10 dealers
Page 28: Found 10 dealers
Page 29: Found 10 dealers
Page 30: Found 10 dealers
Page 31: Found 10 dealers
Page 32: Found 10 dealers
Page 33: Found 10 dealers
Page 34: Found 10 dealers
Page 35: Found 10 dealers
Page 36: Found 10 dealers
Page 37: Found 10 dealers
Page 38: Found 10 dealers
Page 39: Found 10 dea

In [9]:
print(dealer_urls)

['https://www.autoscout24.de/haendler/carpoint-gmbh-berlin', 'https://www.autoscout24.de/haendler/pkw-online-de-neuwagen', 'https://www.autoscout24.de/haendler/taheri-auto-kassel', 'https://www.autoscout24.de/haendler/motorrad-lukas', 'https://www.autoscout24.de/haendler/suv4you-gmbh', 'https://www.autoscout24.de/haendler/g-m-automobile-berlin', 'https://www.autoscout24.de/haendler/duerkop-gmbh-berlin-prenzlauer-berg-berlin', 'https://www.autoscout24.de/haendler/amore-automobile-moresco-e-k', 'https://www.autoscout24.de/haendler/duerkop-gmbh-berlin-10409', 'https://www.autoscout24.de/haendler/red-white-corporation-gmbh', 'https://www.autoscout24.de/haendler/carpoint-gmbh-berlin', 'https://www.autoscout24.de/haendler/pkw-online-de-neuwagen', 'https://www.autoscout24.de/haendler/taheri-auto-kassel', 'https://www.autoscout24.de/haendler/motorrad-lukas', 'https://www.autoscout24.de/haendler/suv4you-gmbh', 'https://www.autoscout24.de/haendler/g-m-automobile-berlin', 'https://www.autoscout24

In [10]:
# Write URLs to a file, one URL per line
with open('dealer_urls.txt', 'w') as f:
    for url in dealer_urls:
        f.write(f"{url}\n")

print(f"Saved {len(dealer_urls)} URLs to dealer_urls.txt")

Saved 820 URLs to dealer_urls.txt
