In [35]:
import requests
from bs4 import BeautifulSoup as soup
import pandas as pd

def scrape_data(urls, name_classes, date_classes, location_classes, description_classes, pricing_classes, referers):
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
    }

    all_data = []  # List to store data from all URLs

    for i, url in enumerate(urls):
        try:
            referer = referers[i]
            response = requests.get(url=url, headers=header)
            response.raise_for_status()  # Raise an HTTPError for bad responses

            bsobj = soup(response.content, 'html.parser')

            # Extract event names
            name_list = []
            for name_class in name_classes[i]:
                names = bsobj.select(f'.{name_class}')
                if names:
                    name_list = [name.text.strip() for name in names if name.text.strip()]
                    break
            # Fallback to meta tag content if names are not found
            if not name_list:
                meta_tags = bsobj.select('meta[content]')
                name_list = [meta['content'] for meta in meta_tags if 'content' in meta.attrs and meta['content']]
            if not name_list:
                name_list = ["N/A"]

            # Extract event dates
            date_list = []
            for date_class in date_classes[i]:
                dates = bsobj.select(f'.{date_class}')
                if dates:
                    date_list = [date.text.strip() for date in dates if date.text.strip()]
                    break
            if not date_list:
                date_list = ["N/A"]

            # Extract event locations
            location_list = []
            for location_class in location_classes[i]:
                locations = bsobj.select(f'.{location_class}')
                if locations:
                    location_list = [location.text.strip() for location in locations if location.text.strip()]
                    break
            if not location_list:
                location_list = ["N/A"]

            # Extract event descriptions
            description_list = []
            for description_class in description_classes[i]:
                descriptions = bsobj.select(f'.{description_class}')
                if descriptions:
                    description_list = [description.text.strip() for description in descriptions if description.text.strip()]
                    break
            if not description_list:
                description_list = ["N/A"]

            # Extract event pricing using both class and data-testid attributes
            pricing_list = []
            for pricing_class in pricing_classes[i]:
                pricings = bsobj.select(f'.{pricing_class}, [data-testid="{pricing_class}"]')
                if pricings:
                    pricing_list = [pricing.text.strip() for pricing in pricings if pricing.text.strip()]
                    break

            # Specific handling for nested pricing
            if not pricing_list:
                pricings = bsobj.select(f'[style="--TypographyColor: #585163;"] .{pricing_classes[i][-1]}')
                if pricings:
                    pricing_list = [pricing.text.strip() for pricing in pricings if pricing.text.strip()]

            if not pricing_list:
                pricings = bsobj.select(f'[data-testid="ticket-card-compact-size-display-price"] .{pricing_classes[i][-1]}')
                if pricings:
                    pricing_list = [pricing.text.strip() for pricing in pricings if pricing.text.strip()]

            if not pricing_list:
                pricings = bsobj.select(f'[class="eds-text-bm eds-text-weight--heavy"] .{pricing_classes[i][-1]}')
                if pricings:
                    pricing_list = [pricing.text.strip() for pricing in pricings if pricing.text.strip()]

            if not pricing_list:
                pricing_list = ["N/A"]

            # Make sure the lists have the same length
            min_length = min(len(name_list), len(date_list), len(location_list), len(description_list), len(pricing_list))

            # Create a DataFrame for current URL data
            df = pd.DataFrame({
                'Event Name': name_list[:min_length],
                'Event Date(s)': date_list[:min_length],
                'Location (if applicable)': location_list[:min_length],
                'Website URL': [url] * min_length,
                'Description': description_list[:min_length],
                'Pricing': pricing_list[:min_length]
            })

            all_data.append(df)  # Append current URL's data DataFrame to the list

        except requests.RequestException as e:
            # Handle exceptions for failed requests
            print(f"Failed to fetch data from {url}: {str(e)}")

    if all_data:
        # Concatenate all DataFrames in the list into a single DataFrame
        result_df = pd.concat(all_data, ignore_index=True)
        
        # Save the result DataFrame to a CSV file
        csv_filename = 'event_data.csv'
        result_df.to_csv(csv_filename, index=False)
        print(f"Data saved to {csv_filename}")
        
        return result_df
    else:
        return None

# Example usage (same as before):
urls = [
    'https://www.eventbrite.com/e/eco-consciousness-and-wellness-expo-tickets-878457408757?aff=ebdssbdestsearch&keep_tld=1',
    'https://www.eventbrite.com/e/founders-conclave-startup-mixer-investor-and-d2c-tickets-924950771657?aff=ebdssbdestsearch&_gl=1*1lo6wnn*_up*MQ..*_ga*NzQ1MzA1MzYzLjE3MTgzNzAwOTM.*_ga_TQVES5V6SH*MTcxODM3MDA5My4xLjAuMTcxODM3MDA5My4wLjAuMA..',
    'https://www.eventbrite.com/e/international-d2c-conclave24-tickets-884043807827?aff=ebdssbdestsearch&_gl=1*19n6qkz*_up*MQ..*_ga*MzQ2MzYwMDk0LjE3MTgzODk3MjE.*_ga_TQVES5V6SH*MTcxODM4OTcyMS4xLjAuMTcxODM4OTcyMS4wLjAuMA..',
    'https://www.eventbrite.com/e/eco-consciousness-wellness-networking-expo-tickets-814520571957?aff=erelpanelorg&keep_tld=1',
    'https://www.eventbrite.com/e/live-event-future-pilot-orientation-day-in-lucknow-tickets-917202175397?aff=ebdssbdestsearch&keep_tld=1',
    'https://www.eventbrite.com/e/india-tech-summit-tickets-97765875365?aff=ebdssbdestsearch&_gl=1*k1mzls*_up*MQ..*_ga*MTgzMjkyOTEwOS4xNzE4MzkwMTI2*_ga_TQVES5V6SH*MTcxODM5MDEyNi4xLjAuMTcxODM5MDEyNi4wLjAuMA..'
]
name_classes = [
    ['event-title'],
    ['event-title'],
    ['event-title'],
    ['event-title'],
    ['event-title'],
    ['event-title']
]

date_classes = [
    ['date-info__full-datetime'],
    ['date-info__full-datetime'],
    ['date-info__full-datetime'],
    ['date-info__full-datetime'],
    ['date-info__full-datetime'],
    ['date-info__full-datetime']
]

location_classes = [
    ['location-info__address-text'],
    ['location-info__address-text'],
    ['location-info__address-text'],
    ['location-info__address-text'],
    ['location-info__address-text'],
    ['location-info__address-text']
]

description_classes = [
    ['summary'],
    ['summary'],
    ['summary'],
    ['summary'],
    ['summary'],
    ['summary']
]

pricing_classes = [
    ['conversion-bar__panel-info'],
    ['conversion-bar__panel-info'],
    ['TicketCard-module__pricing___38cNv', 'Typography_root__487rx', 'Typography_align-match-parent__487rx'],
    ['conversion-bar__panel-info'],
    ['conversion-bar__panel-info'],
    ['conversion-bar__panel-info']
]

referers = [
    'https://www.eventbrite.com/',
    'https://www.eventbrite.com/d/india/b2b/',
    'https://www.eventbrite.com/d/india/business--events--next-week/b2b/?page=1',
    'https://www.eventbrite.com/?_gl=1*p9rap4*_up*MQ..*_ga*MzM3NDE0MzUuMTcxODM4NjgyMg..*_ga_TQVES5V6SH*MTcxODM4NjgyMi4xLjAuMTcxODM4NjgyMi4wLjAuMA..',
    'https://www.eventbrite.com/d/india/b2b/',
    'https://www.eventbrite.com/d/india/b2b/?page=1&hash=112d19cd9fc'
]

scrape_data(urls, name_classes, date_classes, location_classes, description_classes, pricing_classes, referers)

Data saved to event_data.csv


Unnamed: 0,Event Name,Event Date(s),Location (if applicable),Website URL,Description,Pricing
0,Eco-Consciousness and Wellness Expo,"Sat, 27 Jul 2024 12:00 - 18:00 IST",The Bombay Presidency Radio Club Limited,https://www.eventbrite.com/e/eco-consciousness...,Discover eco-friendly practices and sustainabl...,£0 – £109.05
1,Founder's Conclave Startup Mixer Investor and D2C,"Saturday, June 22 · 5 - 7pm IST",CoKarma - Coworking Space,https://www.eventbrite.com/e/founders-conclave...,Founder's Conclave D2C and Investment Meetup. ...,On Sale Jun 22 at 5:00 PM
2,International D2C Conclave'24,"Saturday, July 13 · 8:30am - 6pm IST",Radisson Hotel Gurugram Udyog Vihar,https://www.eventbrite.com/e/international-d2c...,The International D2C Conclave 2024 in Mumbai ...,$25.00+$2.72 Fee
3,Eco-Consciousness & Wellness Networking Expo,"Mon, 15 Jul 2024 12:00 - 18:00 GMT+1",4th Floor Studios,https://www.eventbrite.com/e/eco-consciousness...,Explore holistic wellness on May 9th. Engage w...,From £0
4,Live Event: Future Pilot Orientation Day in Lu...,"Saturday, June 29 · 10am - 5pm IST",Radisson Lucknow City Center,https://www.eventbrite.com/e/live-event-future...,Ready to take your passion for flying to the n...,Free
5,India Tech Summit,"Friday, November 15 · 10am - 1pm IST",New Delhi,https://www.eventbrite.com/e/india-tech-summit...,India Tech Summit is an international program ...,$0 – $107.72
