### Mongo

In [None]:
import json
import random
import string
import os
import logging

# Setup logging
logging.basicConfig(filename='data_generation_mongodb.log', level=logging.ERROR, 
                    format='%(asctime)s:%(levelname)s:%(message)s')

# Directory to save generated data
output_dir = 'generated_data_mongodb'
os.makedirs(output_dir, exist_ok=True)

def random_string(length):
    """Generate a random string of specified length."""
    return ''.join(random.choice(string.ascii_letters) for _ in range(length))

def generate_customer_profiles(num_records):
    """Generate customer profiles data in JSON format."""
    data = []
    for _ in range(num_records):
        try:
            record = {
                "customer_id": random_string(10),
                "name": random_string(10),
                "email": f"{random_string(5)}@example.com",
                "age": random.randint(18, 70),
                "address": random_string(20)
            }
            data.append(record)
        except Exception as e:
            logging.error(f"Error generating customer profile: {e}")
    with open(os.path.join(output_dir, 'customer_profiles.json'), 'w') as f:
        json.dump(data, f)

def generate_product_catalogs(num_records):
    """Generate product catalogs data in JSON format."""
    data = []
    for _ in range(num_records):
        try:
            record = {
                "product_id": random_string(10),
                "name": random_string(15),
                "category": random.choice(["Electronics", "Clothing", "Home", "Books"]),
                "price": round(random.uniform(10, 1000), 2),
                "description": random_string(50)
            }
            data.append(record)
        except Exception as e:
            logging.error(f"Error generating product catalog: {e}")
    with open(os.path.join(output_dir, 'product_catalogs.json'), 'w') as f:
        json.dump(data, f)

def generate_campaign_metadata(num_records):
    """Generate campaign metadata data in JSON format."""
    data = []
    for _ in range(num_records):
        try:
            record = {
                "campaign_id": random_string(10),
                "name": random_string(15),
                "budget": round(random.uniform(1000, 10000), 2),
                "start_date": f"2024-{random.randint(1, 12):02d}-{random.randint(1, 28):02d}",
                "end_date": f"2024-{random.randint(1, 12):02d}-{random.randint(1, 28):02d}"
            }
            data.append(record)
        except Exception as e:
            logging.error(f"Error generating campaign metadata: {e}")
    with open(os.path.join(output_dir, 'campaign_metadata.json'), 'w') as f:
        json.dump(data, f)

# Generate MongoDB data
generate_customer_profiles(10000)   # Example: 10000 records for customer profiles
generate_product_catalogs(5000)     # Example: 5000 records for product catalogs
generate_campaign_metadata(20000)   # Example: 20000 records for campaign metadata

### SQL Server

In [None]:
import csv
import random
import string
import os
import logging

# Setup logging
logging.basicConfig(filename='data_generation_sqlserver.log', level=logging.ERROR, 
                    format='%(asctime)s:%(levelname)s:%(message)s')

# Directory to save generated data
output_dir = 'generated_data_sqlserver'
os.makedirs(output_dir, exist_ok=True)

def random_string(length):
    """Generate a random string of specified length."""
    return ''.join(random.choice(string.ascii_letters) for _ in range(length))

def generate_marketing_attribution_data(file_name, num_records):
    """Generate marketing attribution data in CSV format suitable for SQL Server bulk import."""
    with open(os.path.join(output_dir, file_name), 'w', newline='') as csvfile:
        fieldnames = ['attribution_id', 'campaign_id', 'customer_id', 'interaction_date', 'channel', 'conversion']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for _ in range(num_records):
            try:
                writer.writerow({
                    'attribution_id': random_string(10),
                    'campaign_id': random_string(10),
                    'customer_id': random_string(10),
                    'interaction_date': f"2024-{random.randint(1, 12):02d}-{random.randint(1, 28):02d}",
                    'channel': random.choice(["Email", "Social Media", "Paid Search", "Organic Search"]),
                    'conversion': random.choice([True, False])
                })
            except Exception as e:
                logging.error(f"Error generating marketing attribution data: {e}")

# Generate CSV data for SQL Server bulk import
generate_marketing_attribution_data('marketing_attribution_data.csv', 20000)  # Example: 20000 records

### Streaming

In [None]:
import csv
import random
import string
import os
import logging

# Setup logging
logging.basicConfig(filename='data_generation_csv.log', level=logging.ERROR, 
                    format='%(asctime)s:%(levelname)s:%(message)s')

# Directory to save generated data
output_dir = 'generated_data_csv'
os.makedirs(output_dir, exist_ok=True)

def random_string(length):
    """Generate a random string of specified length."""
    return ''.join(random.choice(string.ascii_letters) for _ in range(length))

def generate_ad_impressions_and_clicks(file_name, num_records):
    """Generate ad impressions and clicks data in CSV format."""
    with open(os.path.join(output_dir, file_name), 'w', newline='') as csvfile:
        fieldnames = ['ad_id', 'impression_time', 'click_time', 'user_id']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for _ in range(num_records):
            try:
                writer.writerow({
                    'ad_id': random_string(10),
                    'impression_time': f"2024-07-{random.randint(1, 31):02d} {random.randint(0, 23):02d}:{random.randint(0, 59):02d}:{random.randint(0, 59):02d}",
                    'click_time': f"2024-07-{random.randint(1, 31):02d} {random.randint(0, 23):02d}:{random.randint(0, 59):02d}:{random.randint(0, 59):02d}",
                    'user_id': random_string(5)
                })
            except Exception as e:
                logging.error(f"Error generating ad impressions and clicks data: {e}")

# Generate CSV data for streaming (Kafka)
generate_ad_impressions_and_clicks('ad_impressions_and_clicks.csv', 50000)  # Example: 50000 records