In [2]:
import pandas as pd
import numpy as np
from faker import Faker
from datetime import datetime, timedelta
import random

# Initialize faker generator
fake = Faker()

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
fake.seed_instance(42)

# Define possible values for event data
event_types = ['Conference', 'Summit', 'Symposium', 'Workshop', 'Meetup', 'Hackathon', 'Forum', 'Expo']
domains = ['Tech', 'AI', 'Data Science', 'Blockchain', 'Cybersecurity', 'Healthcare', 'Finance', 
           'Education', 'Sustainability', 'Marketing', 'Design', 'Innovation']
event_qualifiers = ['Global', 'Annual', 'International', 'Future of', 'Advanced', 'Next-Gen', 
                   'Modern', 'Digital', 'Emerging', '', '']

topics = [
    'Machine Learning', 'Deep Learning', 'Computer Vision', 'Natural Language Processing',
    'Robotics', 'IoT', 'Cloud Computing', 'Edge Computing', 'Big Data', 'Data Analytics',
    'Blockchain', 'Cryptocurrency', 'NFTs', 'Web3', 'Metaverse', 'Augmented Reality', 
    'Virtual Reality', 'UX Design', 'UI Design', 'DevOps', 'MLOps', 'Cybersecurity',
    'Quantum Computing', 'Digital Transformation', 'Sustainability', 'Green Tech',
    'Fintech', 'Healthtech', 'Edtech', 'Remote Work', 'Future of Work', 'Entrepreneurship',
    'Product Management', 'Digital Marketing', '5G Technology', 'Ethical AI'
]

locations = [
    'Toronto, Canada', 'Vancouver, Canada', 'Montreal, Canada', 'Calgary, Canada', 'Ottawa, Canada', 'Edmonton, Canada', 'Quebec City, Canada', 'Winnipeg, Canada', 'Halifax, Canada', 'Saskatoon, Canada']# Function to generate event name

def generate_event_name():
    qualifier = random.choice(event_qualifiers)
    domain = random.choice(domains)
    event_type = random.choice(event_types)
    year = 2025
    
    # Different patterns for event names
    patterns = [
        f"{qualifier} {domain} {event_type} {year}",
        f"{domain} {event_type} {year}",
        f"{qualifier} {domain} {event_type}",
        f"{domain} {year}",
        f"{domain} {event_type}"
    ]
    
    event_name = random.choice(patterns).strip()
    # Handle double spaces from empty qualifiers
    return ' '.join(event_name.split())

# Function to generate event summary
def generate_summary(event_name):
    summary_templates = [
        f"A premier gathering focused on the latest advancements in {random.choice(domains).lower()} technologies and innovations.",
        f"An exclusive event bringing together industry leaders to discuss the future of {random.choice(domains).lower()}.",
        f"Join experts and thought leaders to explore cutting-edge developments in {random.choice(domains).lower()}.",
        f"A collaborative platform for professionals to share insights on transforming {random.choice(domains).lower()}.",
        f"The definitive event for anyone looking to stay ahead in the rapidly evolving {random.choice(domains).lower()} landscape."
    ]
    return random.choice(summary_templates)

# Generate synthetic data
num_rows = 100
data = []

# Start date range for events (all in 2025)
start_date = datetime(2025, 1, 1)
end_date = datetime(2025, 12, 31)
date_range = (end_date - start_date).days

for i in range(num_rows):
    # Generate event ID (e1, e2, e3, etc.)
    event_id = f'e{100+i+1}'
    
    # Generate random date in 2025
    random_days = random.randint(0, date_range)
    event_date = start_date + timedelta(days=random_days)
    
    # Generate event name
    event_name = generate_event_name()
    
    # Generate event summary
    event_summary = generate_summary(event_name)
    
    # Generate key topics (ensure they're unique)
    selected_topics = random.sample(topics, 3)
    
    # Generate location
    location = random.choice(locations)
    
    # Add to data list
    data.append({
        'event_id': event_id,
        'event name': event_name,
        'date': event_date.strftime('%Y-%m-%d'),
        'event summary': event_summary,
        'Key topic 1': selected_topics[0],
        'Key topic 2': selected_topics[1],
        'Key topic 3': selected_topics[2],
        'location': location
    })

# Create DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

# Optionally, save to CSV
df.to_csv('synthetic_event_data_2025.csv', index=False)
print("\nData has been saved to 'synthetic_event_data_2024.csv'")


   event_id                          event name        date  \
0      e101                    Tech Meetup 2025  2025-11-24   
1      e102               Blockchain Conference  2025-02-17   
2      e103              Healthcare Meetup 2025  2025-03-23   
3      e104                      Finance Summit  2025-10-02   
4      e105                  Future of AI Forum  2025-02-10   
..      ...                                 ...         ...   
95     e196           Advanced Marketing Meetup  2025-01-27   
96     e197                  Cybersecurity 2025  2025-07-08   
97     e198  Advanced Healthcare Hackathon 2025  2025-12-26   
98     e199                 Modern Finance Expo  2025-04-21   
99     e200                Global Design Summit  2025-11-13   

                                        event summary        Key topic 1  \
0   The definitive event for anyone looking to sta...                IoT   
1   A collaborative platform for professionals to ...          Metaverse   
2   A premier g

In [3]:
# Sort DataFrame by date column
df_sorted = df.sort_values(by='date')


display(df)

Unnamed: 0,event_id,event name,date,event summary,Key topic 1,Key topic 2,Key topic 3,location
0,e101,Tech Meetup 2025,2025-11-24,The definitive event for anyone looking to sta...,IoT,Healthtech,Computer Vision,"Toronto, Canada"
1,e102,Blockchain Conference,2025-02-17,A collaborative platform for professionals to ...,Metaverse,Edtech,UX Design,"Toronto, Canada"
2,e103,Healthcare Meetup 2025,2025-03-23,A premier gathering focused on the latest adva...,Quantum Computing,Virtual Reality,Computer Vision,"Winnipeg, Canada"
3,e104,Finance Summit,2025-10-02,An exclusive event bringing together industry ...,Robotics,Computer Vision,Metaverse,"Ottawa, Canada"
4,e105,Future of AI Forum,2025-02-10,Join experts and thought leaders to explore cu...,Web3,UX Design,Robotics,"Saskatoon, Canada"
...,...,...,...,...,...,...,...,...
95,e196,Advanced Marketing Meetup,2025-01-27,A collaborative platform for professionals to ...,Metaverse,Cloud Computing,Quantum Computing,"Halifax, Canada"
96,e197,Cybersecurity 2025,2025-07-08,The definitive event for anyone looking to sta...,Deep Learning,UX Design,Cryptocurrency,"Ottawa, Canada"
97,e198,Advanced Healthcare Hackathon 2025,2025-12-26,A premier gathering focused on the latest adva...,Data Analytics,Deep Learning,IoT,"Halifax, Canada"
98,e199,Modern Finance Expo,2025-04-21,The definitive event for anyone looking to sta...,IoT,Natural Language Processing,Data Analytics,"Montreal, Canada"
