## events_ch_handler

In [1]:
from html.parser import HTMLParser
import urllib.request
import unidecode
from datetime import datetime

In [2]:
base_url = u"https://events.ch"
language = u"en"
action = u"search"
search_type = u"concerts"
date = u"2015-07-23"
end_search = u"6/cs"
page_number = 1

In [3]:
querry_url = base_url + '/' + language + '/' + action + '/' + search_type + '/' + date + '/' + end_search + '/' + str(page_number)
print(querry_url)

https://events.ch/en/search/concerts/2015-07-23/6/cs/1


In [4]:
page = urllib.request.urlopen(querry_url).read().decode('utf-8')

In [5]:
class MainHTMLParser(HTMLParser):
    def __init__(self):
        super( MainHTMLParser, self ).__init__()
        self.data = []
        
    def handle_starttag(self, tag, attrs):
        if(tag=='a' and len(attrs)==3):
            href=attrs[1][1]
            subquerry_url = base_url + urllib.parse.quote(href)
            print(subquerry_url)
            subpage = urllib.request.urlopen(subquerry_url).read().decode('utf-8')
            subparser = EventHTMLParser()
            subparser.feed(subpage)
            split = href.split('/')
            (artists, date) = subparser.getData()
            currentDate = datetime.strptime(date, "%a %b %d %Y %H:%M:%S GMT%z (%Z)")
            genres = split[2].split('-')
            if(not(len(genres) == 1)):
                genres = [None]
            location = split[4]
            festival = split[5]
            if(len(artists) > 0):
                self.data.append((location, festival, datetime.strftime(currentDate, "%Y-%m-%d"), artists, genres[0]))
    
    def getData(self):
        return self.data
    
class EventHTMLParser(HTMLParser):
    def __init__(self):
        super( EventHTMLParser, self ).__init__()
        self.artist_string = ""
        self.date = ""
        self.grab_artists = False
        self.artists = []
    
    def handle_starttag(self, tag, attrs):
        if(tag == 'h2' and len(attrs) == 2 and attrs[1][1] == "event-subtitle"):
            self.grab_artists = True
        if(tag == 'time' and attrs[0][0] == 'datetime' and self.date == ""):
            self.date = attrs[0][1]
        
    def handle_endtag(self, tag):
        self.grab_artists = False

    def handle_data(self, data):
        if(self.grab_artists == True):
            self.artist_string = data
    
    def cleanArtists(self):
        temp = self.artist_string.split(',')
        for s in temp:
            if('(' in s):
                self.artists.append(s.split('(')[0])
    
    def getData(self):
        self.cleanArtists()
        return (self.artists, self.date)

In [6]:
parser = MainHTMLParser()
parser.feed(page)
print(parser.getData())

https://events.ch/en/Classic/Eglise/Verbier/Verbier-Festival/e-484526/
https://events.ch/en/Rock-Indie-Punk-Heavy-Metal-Gothic/Bierhalle-Wolf/Z%C3%BCrich/Happy-Sound/e-489253/
https://events.ch/en/Pop/Pal%C3%A9o/Nyon/Pal%C3%A9o-Festival/e-492482/
https://events.ch/en/Rock-Indie-Punk-Heavy-Metal-Gothic/Pal%C3%A9o/Nyon/Pal%C3%A9o-Festival/e-492484/
https://events.ch/en/Hip-Hop-R%27n%27B/Pal%C3%A9o/Nyon/Pal%C3%A9o-Festival/e-492485/
https://events.ch/en/traditional/Kasernenareal/Basel/Basel-Tattoo/e-484477/
https://events.ch/en/Rock-Indie-Punk-Heavy-Metal-Gothic/Usine/null/Colaris/e-493757/
https://events.ch/en/Jazz-Blues-Soul/KKL/Luzern/Blue-Balls-Festival/e-484818/
https://events.ch/en/Jazz-Blues-Soul/KKL/Luzern/Blue-Balls-Festival/e-484819/
https://events.ch/en/Hip-Hop-R%27n%27B/Pavillon/Luzern/Blue-Balls-Festival/e-484823/
[('Verbier', 'Verbier-Festival', '2015-07-23', ['Yekwon Sunwoo '], 'Classic'), ('Nyon', 'Paléo-Festival', '2015-07-23', ['Husbands '], 'Pop'), ('Nyon', 'Paléo-Festi

In [29]:
def getEventsForDates(eventType, startDate, maxPage=10):
    data = []
    base_url = "https://events.ch/en/search"
    end_search = "6/cs"
    tempData = []
    page_number = 1
    (startYear, startMonth, startDay) = startDate.split('-')
    while(page_number <= maxPage):
        querry_url = base_url + '/' + eventType + '/' + startDate + '/' + end_search + '/' + str(page_number)
        page = urllib.request.urlopen(querry_url).read().decode('utf-8')
        parser = MainHTMLParser()
        parser.feed(page)
        tempData = parser.getData()
        for l in tempData:
            (location, festival, date_string, artists, genre) = l
            """(currentYear, currentMonth, currentDay) = date_string.split('-')
            if(len(tempData) <= 0 or page_number > maxPage or \
              (startYear <= currentYear and startMonth <= currentMonth and startDay < currentDay)):
                return data"""
            data = data + [(location, festival, date_string, artists, genre)]
        print("page_number:" + str(page_number))
        page_number = page_number + 1
    return data
    
    

In [28]:
data = getEventsForDates("concerts", u"2012-01-01", 1)

https://events.ch/en/world-music/Ono-Das-Kulturlokal/Bern/Einat-Betzalel-%26-Hakim-Boukhit/e-153066/
https://events.ch/en/traditional/Bierhalle-Wolf/Z%C3%BCrich/Austria-Band/e-124424/
https://events.ch/en/Classic/Kongress-und-Kulturzentrum-Rondo/Pontresina/Neujahrskonzert/e-123457/
https://events.ch/en/Rock-Indie-Punk-Heavy-Metal-Gothic/Ice-Rock-Openair/Wasen-im-Emmental/Ice-Rock/e-128184/
https://events.ch/en/Classic/Tonhalle/Z%C3%BCrich/Brillante-Neujahrs-Konzertgala/e-121681/
https://events.ch/en/Jazz-Blues-Soul/Madeleine/Luzern/Salty-Dog-Blues-Band/e-133063/
https://events.ch/en/Jazz-Blues-Soul/Madeleine/Luzern/Dewaser-%26-Pisotelero-Pepe/e-133065/
https://events.ch/en/Jazz-Blues-Soul/Restaurant-Gr%C3%BCnwald/Z%C3%BCrich/Jazz-Happening/e-113561/
https://events.ch/en/Ragga-Reggae-African-Music-Dancehall/Gare-de-Lion/Wil/Gare-Tango/e-125899/
https://events.ch/en/traditional/Bierhalle-Wolf/Z%C3%BCrich/Austria-Band/e-124328/


In [9]:
import numpy as np
import pandas as pd

In [26]:
df = pd.DataFrame(data, columns=['location', 'event', 'date', 'artists', 'genre'])

In [27]:
df

Unnamed: 0,location,event,date,artists,genre
0,Thun,Neujahrskonzert,2016-01-01,"[ Leticia Kahraman , Marysol Schalit , Mikhe...",Classic
1,Luzern,Tanti-affeti-Neujahrskonzert,2016-01-01,"[Julia Lezhneva , Mikhail Anonenko ]",Classic
2,Bern,Neujahrskonzert,2016-01-01,"[ Svetlana Ignatovich , Marie-Claude Chappuis...",Classic
