# Ebay webscraping

In [1]:
import pandas as pd
import requests    # for making standart html requests
from bs4 import BeautifulSoup # magical tool for parsing html data

In [17]:
import requests
from bs4 import BeautifulSoup

class ApartmentScraper:
    def __init__(self, url):
        self.url = url
        self.soup = None

    def fetch_page(self):
        """Web sayfasini çeker ve BeautifulSoup ile ayristirir."""
        response = requests.get(self.url)
        response.raise_for_status()  # HTTP hata durumlarını kontrol et
        self.soup = BeautifulSoup(response.content, 'html.parser')

    def get_basic_details(self):
        """Temel ev bilgilerini çikarir."""
        valid_keys = {'Wohnfläche', 'Zimmer', 'Schlafzimmer', 'Badezimmer', 'Nebenkosten', 'Heizkosten', 'Warmmiete'}
        details_dict = {key: '0' for key in valid_keys}
        details = self.soup.find_all(class_='addetailslist--detail')
        for detail in details:
            detail_value_element = detail.find('span', class_='addetailslist--detail--value')
            if detail_value_element:
                detail_value_text = detail_value_element.get_text(strip=True)
                detail_title_text = detail.get_text(strip=True).replace(detail_value_text, '').strip()
                if detail_title_text in valid_keys:
                    details_dict[detail_title_text] = detail_value_text
        return details_dict

    def get_feature_tags(self, details_dict):
        """Özellik etiketlerini cikarir ve günceller."""
        valid_features = {'Balkon', 'Einbauküche', 'Garage', 'Keller', 'Möbliert', 'Neubau', 'Altbau'}
        features_found = set()
        checktag_list = self.soup.find('ul', class_='checktaglist')
        if checktag_list:
            checktags = checktag_list.find_all('li', class_='checktag')
            for checktag in checktags:
                feature = checktag.get_text(strip=True)
                if feature in valid_features:
                    details_dict[feature] = 1
                    features_found.add(feature)
        
        for feature in valid_features:
            if feature not in features_found:
                details_dict[feature] = 0

    def get_kaltmiete(self, details_dict):
        """Kaltmiete bilgisini cikarir ve günceller."""
        kaltmiete_element = self.soup.find('h2', class_='boxedarticle--price', id='viewad-price')
        if kaltmiete_element:
            kaltmiete = kaltmiete_element.get_text(strip=True)
            details_dict['kaltmiete'] = kaltmiete
        else:
            details_dict['kaltmiete'] = '0'  # Eğer kaltmiete bulunamazsa, '0' olarak ayarla

    def extract_details(self):
        """Web sayfasından detaylari cikarir ve bir sözlük olarak döndürür."""
        if self.soup is None:
            raise ValueError("Soup nesnesi boş. Önce fetch_page() unu cagirign.")
        
        details_dict = self.get_basic_details()
        self.get_feature_tags(details_dict)
        self.get_kaltmiete(details_dict)
        
        # Sözlüğü alfabetik olarak sırala ve bir standart dict olarak döndür
        return dict(sorted(details_dict.items()))

# Kullanımı
url = 'https://www.kleinanzeigen.de/s-anzeige/charmantes-voll-moebliertes-2-zimmer-apartment-in-muenchen-lehel-mit-flexibler-mindestmietzeit/2732040224-203-6418'
scraper = ApartmentScraper(url)
scraper.fetch_page()
details = scraper.extract_details()
print(details)  # Sonuçları görmek için yazdırabilirsiniz, ancak bu satırı kaldırabilirsiniz.


{'Altbau': 0, 'Badezimmer': '1', 'Balkon': 1, 'Einbauküche': 1, 'Garage': 0, 'Heizkosten': '0', 'Keller': 0, 'Möbliert': 0, 'Nebenkosten': '420 €', 'Neubau': 0, 'Schlafzimmer': '1', 'Warmmiete': '0', 'Wohnfläche': '68 m²', 'Zimmer': '2', 'kaltmiete': '2.890 €'}


# Chatgpt ile

In [43]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_page(page_url):
    response = requests.get(page_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    return soup

def get_listing_urls(base_url, soup):
    listing_urls = []
    listings = soup.select('.ad-listitem')
    for listing in listings:
        relative_url = listing.get('data-href')
        if relative_url:
            full_url = base_url + relative_url
            listing_urls.append(full_url)
    return listing_urls

def get_listing_details(listing_url):
    soup = scrape_page(listing_url)
    details = {}
    price_tag = soup.find(class_='boxedarticle--price')
    if price_tag:
        details['price'] = price_tag.text.strip()
    features = soup.find_all(class_='addetailslist--detail')
    for feature in features:
        key = feature.find(class_='addetailslist--detail--label').text.strip()
        value = feature.find(class_='addetailslist--detail--value').text.strip()
        details[key] = value
    return details

def scrape_all_pages(base_url, start_page, end_page):
    all_details = []
    for page_num in range(start_page, end_page + 1):
        page_url = f"{base_url}/seite:{page_num}"
        soup = scrape_page(page_url)
        listing_urls = get_listing_urls(base_url, soup)
        for url in listing_urls:
            details = get_listing_details(url)
            all_details.append(details)
            print(f"Detaylar çekildi: {details}")  # Debugging için eklenen print

    if all_details:  # Boş liste kontrolü
        with open('rent_listings.csv', 'w', newline='', encoding='utf-8') as file:
            writer = csv.DictWriter(file, fieldnames=all_details[0].keys())
            writer.writeheader()
            for details in all_details:
                writer.writerow(details)
    else:
        print("Detaylar listesi boş. Hiçbir veri çekilemedi.")

# Ana URL ve sayfa aralığı ayarları
BASE_URL = 'https://www.kleinanzeigen.de'
START_PAGE = 1
END_PAGE = 5  # Örnek olarak 5 sayfa veri çekilecek

scrape_all_pages(BASE_URL, START_PAGE, END_PAGE)

Detaylar listesi boş. Hiçbir veri çekilemedi.
