# Resident Advisor Extractor

This class aim to extract the dataframe of all events in Resident Advisor and create DataFrame such as : Club Location, Event Per Clubs, Club Attendance...

## Retrieving the data

In [2]:
import numpy as np
import pandas as pd
import requests
import bs4

In [3]:
baseURL = "https://www.residentadvisor.net/"
clubListURL = "https://www.residentadvisor.net/clubs.aspx"
content = requests.get(clubListURL).text

soup = bs4.BeautifulSoup(content, "html5lib")

clubDF = pd.DataFrame(columns = ["ClubID","Name","Address"])

for tdTag in soup.findAll('li'):
    for clubs in tdTag.findAll('li'):
        if('class' in clubs.attrs and len(clubs)==2):
            #Parsing infos
            clubID = "null"
            address = "null"
            name = "null"
            for line in clubs:
                if(line.a == None):
                    address = line.text
                else:
                    name = line.text
                    clubID = line.a['href'].split("=")[1]
            #print(str(clubID)+",,"+str(adress)+",,"+str(name))
            df = pd.Series([clubID,name,address],['ClubID','Name','Address'])
            clubDF = clubDF.append(df, ignore_index=True)
            #print(df) 
clubDF.head(200)

Unnamed: 0,ClubID,Name,Address
0,55816,2. Akt Restaurant & Bar,Selnaustrasse 2
1,84182,25 Hours Hotel Zürich,"Pfingstweidstrasse 102, 8005 Zürich"
2,24800,2B Lounge,"Nüschelerstrasse 31, 8001 Zürich"
3,85693,3monkeys,"Alexander-Schönistrasse 17, 2502, Biel/Bienne"
4,82433,4. Akt,"Heinrichstrasse 262, 8005 Zürich"
5,34422,5ème Etage,"Mühlenplatz 11, Bern"
6,31409,Abraxas,"Chemin du Stand 5; Pully, 1009"
7,39607,Acanto,"Pfingstweidstrasse 6, 8005 Zürich"
8,102059,Acapella Bar,"Place de la Gare, 1957 Ardon, Valais, Suisse"
9,18481,Acqua Lounge Basilea,"Binningerstrasse 14; 4051, Basel"


Once we have the list of clubs, we need to get all the events for each club. For this, we create a parser that take the ClubID as argument and retrieve all events dates and line-up.

In [4]:
def getEventsFrom(clubIndex):
    '''Gets dataframe of events for a club'''
    ClubURL = "https://www.residentadvisor.net/club.aspx?id="+str(clubIndex)+""
    content = requests.get(ClubURL).text
    soup = bs4.BeautifulSoup(content, "html5lib")
    
    eventIDList = list() #List of eventsID to be analysed
    
    #Parsing page to get eventIDs
    for sections in soup.findAll('section'):
        for links in sections.findAll('a'):
            if(len(links.attrs)==2 and 'itemprop' in links.attrs):
                link = links['href']
                eventID = link.split('?')[1]
                eventIDList.append(eventID) #Add eventID to list
    
    DFEvents = pd.DataFrame(columns = ['ClubID','EventID','EventName','Date','LineUp'])
    
    for eid in eventIDList:
        series = getSerieFromEvent(clubIndex,eid)
        DFEvents = DFEvents.append(series,ignore_index=True)

    return DFEvents


In [9]:
def getSerieFromEvent(clubInd,eventID):
    '''Returns a Serie [EventID,EventName,Date,LineUp] for the eventID passed as argument'''
    EventURL = 'https://www.residentadvisor.net/event.aspx?'+str(eventID)
    content = requests.get(EventURL).text
    soup = bs4.BeautifulSoup(content, "html5lib")
    
    day = ''
    date = ''
    lineup = set()
    title = ''
    
    #Get LineUp
    for ul in soup.findAll('ul'):
        for div in ul.findAll('div'):
            if('Line' in div.text):
                for p in div.findAll('p'):
                    if('class' in p.attrs and p['class']!=None):
                        artists = p.text.split(',')
                        for a in artists:
                            lineup.add(a)
            
            #Deprecated#
            '''#Get Event date
            if('class' in div.attrs and div['class']!= ['clearfix'] and len(div.findAll('li'))>0):
                    for line in div.findAll('li'):
                        if('Date' in line.text):
                            for a in line:
                                if('-' in a): #Get time
                                     hr = a
                                if('y' in a): #Get weekday
                                    day = a
                                if(len(a)==1 and " /" not in a.text): #Get Date
                                    date = a
            '''
    
    #Get title and Date of the event
    for meta in soup.findAll('meta'):
        if('property' in meta.attrs ):
            if(meta['property']=='og:title'):
                title = meta['content']
            if(meta['property']=='og:description'):
                datestr = meta['content']
                val = datestr.split(",")
                date = val[1].split("-")[0]
                day = val[0]
    
    for a in lineup:
        a = a.replace("\n","")
    data =[clubInd, eventID, title,date,lineup]

    S = pd.Series(data,['ClubID','EventID','EventName','Date','LineUp'])
    return S

In [10]:
ClubID = 31409
eventID =292693
df = getSerieFromEvent(ClubID,eventID)
getEventsFrom(31409)

Unnamed: 0,ClubID,EventID,EventName,Date,LineUp
0,31409.0,292693,The Cruzaders,12 November 2011,{The Cruzaders}
1,31409.0,224466,The Cruzaders - Release Party The Cruzaders,04 February 2011,{The Cruzaders}
2,31409.0,172824,The Cruzaders,03 September 2010,{The Cruzaders}


## Creating CSV files for all dataframes of clubs

In [11]:
d = clubDF.values[0]
index = d[0]
getEventsFrom(index)

Unnamed: 0,ClubID,EventID,EventName,Date,LineUp
0,55816,707003,Deeptown Music Showcase,19 September 2015,{Affani\nMark Faermont}
1,55816,753528,Mucho Stylez,12 September 2015,{Mucho Stylez}
2,55816,730155,Mucho Stylez,03 July 2015,{Mucho Stylez}
3,55816,713648,Zweiter Akt,13 June 2015,{Mucho Stylez}
4,55816,696650,Deeptown Night,21 March 2015,"{ Sonny Dima, Mark Faermont}"
5,55816,671332,Deeptown Night,17 January 2015,"{ Mark Faermont, Carlos Russo}"
6,55816,648424,Tonka UND 3333 Tage Saxer,29 November 2014,"{ Danny Coleman, Mark Faermont, Tonka}"
7,55816,642266,Deeptown Night,18 October 2014,"{ David Eye, Carlos Russo}"
8,55816,619558,Deeptown Parade,02 August 2014,"{Elektroschneider, Mark Faermont, Danny Cole..."
9,55816,557604,Deeptown Music Night,18 January 2014,"{ DJ Le Baron, Mark Faermont}"
