In [1]:
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import bottle
# from utils import bs_get, convert_date
from ebkscrapper.utils import get_request, convert_date

# API: https://api.ebay-kleinanzeigen.de/docs/pages/home
ebk_url = "https://www.ebay-kleinanzeigen.de"
base_url = "http://kleinanzeigen.ebay.de/anzeigen/s-suchanfrage.html"
base_payload = {"sortingField": "SORTING_DATE", 
                "adType": "", "posterType": "", 
                "pageNum": "1", "action": "find"}

In [21]:
# @app.route('/')
# @app.route('/<query>')
# @checkParams(maxentries=int, radius=int, minprice=int,
#             maxprice=int, categoryid=int, where=str)

def make_clickable(val):
    # target _blank to open new window
    return '<a target="_blank" href="{}">{}</a>'.format(val, val)

def onefeed(query="", where="", maxentries=1000, 
            radius=0, minprice=0, maxprice=1000, categoryid=0):
    """One query and return all search data """
    
    base_payload.update({
        'keywords': query, 
        'locationStr': where, 
        'minPrice': str(minprice), 
        'maxPrice': str(maxprice), 
        'categoryId': str(categoryid),
        'radius': str(radius)})

    soup = get_request(base_url, base_payload)
    # Get category info, usually it has multi-level. But we only care the find category:
    # TODO: a better way to do it is to refer category id from the api. 
    category_box = soup.find('div', {'class': 'browsebox-section-body'})
    category = []
    for li in category_box.findAll('li'):
        category.append(li)
    category = category[-1].find('a').text

    results = []
    if soup.text.find('Es wurden leider keine Anzeigen') == -1:
        main_content = soup.find('div', {"id": "srchrslt-content"})
        listing = main_content.find('ul', {"id": "srchrslt-adtable"})
        listing_result = []
        for li in listing.findAll('li'):
            listing_result.append(li)
        data = []
        for lr in listing_result:
            try:
                title = lr.h2.a.text 
                href = ebk_url + lr.find('div', {'class': 'aditem-main'}).a['href']   
                date = lr.find('div', {'class': 'aditem-addon'}).text.replace(' ', '').replace('\n', '')
                date = convert_date(date)
                details = lr.find('div', {"class": "aditem-details"})
                # Convert german locale number string to float. 
                price = details.strong.text.split(" ")[0]  # Remove the currency sign
                currency = details.strong.text.split(" ")[-1]
                price = float(price.replace('.', '')) if '.' in price else float(price)
                postcode = details.text.replace(' ', '').split('\n')[3]
                city = details.text.replace(' ', '').split('\n')[4]
                distance_km = float(details.text.replace(' ', '').split('\n')[5].replace('km', ''))

                data.append([query, title, category, date, price, postcode, city, distance_km, currency, href])
            except:
                pass
                
    df = pd.DataFrame(data, columns=['Query', 'Title', 'Type', 'Date', 'Price', 'Postcode',
                                      'City', 'Distance', 'Currency', 'URL'])
    
    return df.style.format({'URL': make_clickable})  # Make URL href. 
    

 

In [22]:
data = onefeed(query="Leica M240", where="Bielefeld", radius=500,
    minprice=1000, maxprice=200000)
data

Unnamed: 0,Query,Title,Type,Date,Price,Postcode,City,Distance,Currency,URL
0,Leica M240,"Leica M 240, schwarz, inkl. Zubehör",Foto,2020-02-06 00:00:00,2500,6862,Dessau-Roßlau,297,€,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-240-schwarz-inkl-zubehoer/1287920741-245-16984
1,Leica M240,Leica M 240 + Summilux 50 + Summicron 90 + Zubeh/SET/Service´19,Foto,2020-02-04 00:00:00,5200,83024,Rosenheim,437,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-240-summilux-50-summicron-90-zubeh-set-service-19/1317053302-245-7582
2,Leica M240,Leica M-P (Typ M240) in schwarz - sehr gepflegt (Full Set),Foto,2020-02-04 00:00:00,2950,81925,Bogenhausen,394,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-p-typ-m240-in-schwarz-sehr-gepflegt-full-set-/1316831069-245-6528
3,Leica M240,Leica M240,Foto,2020-02-04 00:00:00,2400,12437,Treptow,396,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m240/1316719771-245-3481
4,Leica M240,Leica M Type 240 M240 schwarz,Foto,2020-02-01 00:00:00,2499,26389,Wilhelmshaven,125,€,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-type-240-m240-schwarz/1314189444-245-3101
5,Leica M240,Leica M Type 240 M240 Schwarz,Foto,2020-01-29 00:00:00,2499,60385,Bornheim,152,€,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-type-240-m240-schwarz/1311609437-245-4293
6,Leica M240,Leica Summicron-M E39 50mm f2 | Germany | M9 M10 M240 MP M-D,Foto,2020-01-03 00:00:00,1149,34323,Malsfeld,109,€,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-summicron-m-e39-50mm-f2-germany-m9-m10-m240-mp-m-d/1290393603-245-4422
7,Leica M240,LEICA M-P (M240),Foto,2019-12-19 00:00:00,3199,10435,PrenzlauerBerg,391,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-p-m240-/1281977941-245-3489


In [16]:
type(data)

pandas.core.frame.DataFrame

In [17]:
def make_clickable(val):
    # target _blank to open new window
    return '<a target="_blank" href="{}">{}</a>'.format(val, val)

data.style.format({'URL': make_clickable})

Unnamed: 0,Query,Title,Type,Date,Price,Postcode,City,Distance,Currency,URL
0,Leica M240,"Leica M 240, schwarz, inkl. Zubehör",Foto,2020-02-06 00:00:00,2500,6862,Dessau-Roßlau,297,€,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-240-schwarz-inkl-zubehoer/1287920741-245-16984
1,Leica M240,Leica M 240 + Summilux 50 + Summicron 90 + Zubeh/SET/Service´19,Foto,2020-02-04 00:00:00,5200,83024,Rosenheim,437,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-240-summilux-50-summicron-90-zubeh-set-service-19/1317053302-245-7582
2,Leica M240,Leica M-P (Typ M240) in schwarz - sehr gepflegt (Full Set),Foto,2020-02-04 00:00:00,2950,81925,Bogenhausen,394,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-p-typ-m240-in-schwarz-sehr-gepflegt-full-set-/1316831069-245-6528
3,Leica M240,Leica M240,Foto,2020-02-04 00:00:00,2400,12437,Treptow,396,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m240/1316719771-245-3481
4,Leica M240,Leica M Type 240 M240 schwarz,Foto,2020-02-01 00:00:00,2499,26389,Wilhelmshaven,125,€,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-type-240-m240-schwarz/1314189444-245-3101
5,Leica M240,Leica M Type 240 M240 Schwarz,Foto,2020-01-29 00:00:00,2499,60385,Bornheim,152,€,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-type-240-m240-schwarz/1311609437-245-4293
6,Leica M240,Leica Summicron-M E39 50mm f2 | Germany | M9 M10 M240 MP M-D,Foto,2020-01-03 00:00:00,1149,34323,Malsfeld,109,€,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-summicron-m-e39-50mm-f2-germany-m9-m10-m240-mp-m-d/1290393603-245-4422
7,Leica M240,LEICA M-P (M240),Foto,2019-12-19 00:00:00,3199,10435,PrenzlauerBerg,391,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/leica-m-p-m240-/1281977941-245-3489


In [7]:
data = onefeed(query="Iphone X", where="Bielefeld", radius=500,
    minprice=10, maxprice=200000)
data

Unnamed: 0,Query,Title,Type,Date,Price,Postcode,City,Distance,Currency,URL
0,Iphone X,iPhone X 64 GB - Neuzustand mit Folie (auch Zu...,Handy & Telefon,2020-02-13,700.0,74074,Heilbronn,238.0,€,https://www.ebay-kleinanzeigen.de/s-anzeige/ip...
1,Iphone X,Suche iPhone 11 im Tausch gegen iPhone X +€,Handy & Telefon,2020-02-13,100.0,9125,Chemnitz,365.0,€,https://www.ebay-kleinanzeigen.de/s-anzeige/su...
2,Iphone X,iDeal of sweden Hülle für IPhone 11 Pro/XS/X,Handy & Telefon,2020-02-13,25.0,29633,Munster,148.0,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/id...
3,Iphone X,iPhone X 256 GB Top Zustand,Handy & Telefon,2020-02-13,450.0,71067,Sindelfingen,266.0,€,https://www.ebay-kleinanzeigen.de/s-anzeige/ip...
4,Iphone X,iPhone X - 256GB - Space Grey,Handy & Telefon,2020-02-13,510.0,28259,Huchting,84.0,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/ip...
5,Iphone X,IPhone X - 64 GB Schwarz / Kameraglas hinten g...,Handy & Telefon,2020-02-13,350.0,76131,Karlsruhe,240.0,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/ip...
6,Iphone X,"iPhone 8 Plus Gold, 64 GB (kein X, Xr, 11, Max...",Handy & Telefon,2020-02-13,450.0,24118,Ravensberg-Brunswik-Düsternbrook,224.0,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/ip...
7,Iphone X,iPhone X Neuwertig,Handy & Telefon,2020-02-13,600.0,30161,Mitte,101.0,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/ip...
8,Iphone X,iPhone X 64GB Space grau,Handy & Telefon,2020-02-13,350.0,58454,Witten,102.0,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/ip...
9,Iphone X,iPhone X silber 64gb,Handy & Telefon,2020-02-13,499.0,41063,Mönchengladbach,181.0,VB,https://www.ebay-kleinanzeigen.de/s-anzeige/ip...


In [33]:
# # Save file 
# with open("soup.html", "w") as file:
#     file.write(soup.prettify())

In [70]:


class EBKScrapper(object):
    
    def __init___(self):
        # API: https://api.ebay-kleinanzeigen.de/docs/pages/home
        self.base_url = "http://kleinanzeigen.ebay.de/anzeigen/s-suchanfrage.html"
        self.base_payload = {"sortingField": "SORTING_DATE", 
                        "adType": "", "posterType": "", 
                        "pageNum": "1", "action": "find"}
        self.app = bottle.Bottle()
        
#     @self.app.route('/')
#     @self.app.route('/<query>')
#     @self.checkParams(maxentries=int, radius=int, minprice=int,
#                 maxprice=int, categoryid=int, where=str)



        

In [73]:
import requests
from bs4 import BeautifulSoup

base_payload = {"sortingField": "SORTING_DATE", 
                "adType": "", "posterType": "", 
                "pageNum": "1", "action": "find"}
base_payload.update({
    'keywords': 'Leica M240', 
    'locationStr': 'Bielefeld', 
    'minPrice': str(1000), 
    'maxPrice': str(200000), 
    'categoryId': str(0),
    'radius': str(500)})
requests_get = requests.get(base_url, params=base_payload)
requests_get.encoding = 'utf-8'
soup = BeautifulSoup(requests_get.text.replace("&#8203",""), features="lxml")

In [12]:
data = [dict(name='Google', url='http://www.google.com'),
        dict(name='Stackoverflow', url='http://stackoverflow.com')]
df = pd.DataFrame(data)

def make_clickable(val):
    # target _blank to open new window
    return '<a target="_blank" href="{}">{}</a>'.format(val, val)

df.style.format({'url': make_clickable})

Unnamed: 0,name,url
0,Google,http://www.google.com
1,Stackoverflow,http://stackoverflow.com


In [23]:
a = None

In [25]:
if None a:
    print("haha")