In [None]:
import numpy as np
import pandas as pd
import clickhouse_connect
import uuid
from faker import Faker
from datetime import datetime

client = clickhouse_connect.get_client(host='localhost', port='8123', user='default')

create_table = """
CREATE TABLE events (
    event_id UUID,
    user_id UUID,
    event_type String,
    event_timestamp DateTime,
    product_id UUID,
    revenue Float
)
ENGINE = MergeTree
ORDER BY (event_timestamp)
;
"""
drop_table_if_exist = 'DROP TABLE IF EXISTS events'

client.command(drop_table_if_exist)
client.command(create_table)

users = [str(uuid.uuid4()) for _ in range(1000)]
products = [str(uuid.uuid4()) for _ in range(10)]
events = ['click', 'view', 'purchase']
count_rows = 100000
fake = Faker()

for i in range(1, 11):
    data = {
        'event_id' : [str(uuid.uuid4()) for _ in range(count_rows)],
        'user_id' : [np.random.choice(users) for _ in range(count_rows)],
        'event_type' : [np.random.choice(events) for _ in range(count_rows)],
        'event_timestamp' : [fake.date_between(start_date='-2y') for _ in range(count_rows)], 
        'product_id' : [np.random.choice(products) for _ in range(count_rows)]
    }
    
    revenues = []
    for value in data['event_type']:
        revenue = 0.0 if value != 'purchase' else np.round(np.random.uniform(low=0.1, high=400.0), decimals=2)
        revenues.append(revenue)    
    
    data['revenue'] = revenues
    
    df = pd.DataFrame(data)
    df['event_timestamp'] = pd.to_datetime(df['event_timestamp'])
    client.insert_df('events', df)
    now =datetime.now()

    print(now.strftime("%Y-%m-%d %H:%M:%S") + ' вставка ' + str(count_rows) + ' строк')
