# Сравнение хранилищ ClickHouse и Vertica

In [106]:
import uuid
import random 
import timeit
import time

### Подготовка данных для теста

In [107]:
file = open('insert_file.csv','w')

In [108]:
n = 1
while n <= 1000000:
    file.write(f'{n},{uuid.uuid1()},{random.randint(1,999999999)}\n')
    n+=1

# Clickhouse

In [123]:
from clickhouse_driver import Client

client = Client(host='127.0.0.1') 

### Создание базы test_base

In [124]:
client.execute('CREATE DATABASE IF NOT EXISTS test_base ON CLUSTER company_cluster')

[('clickhouse-node1', 9000, 0, '', 3, 0),
 ('clickhouse-node3', 9000, 0, '', 2, 0),
 ('clickhouse-node2', 9000, 0, '', 1, 0),
 ('clickhouse-node4', 9000, 0, '', 0, 0)]

### Создание таблицы test_table

In [125]:
client.execute('CREATE TABLE IF NOT EXISTS test_base.test_table ON CLUSTER company_cluster (id Int64, movie_id UUID, frame Int64) Engine=MergeTree() ORDER BY id')

[('clickhouse-node1', 9000, 0, '', 3, 0),
 ('clickhouse-node3', 9000, 0, '', 2, 0),
 ('clickhouse-node4', 9000, 0, '', 1, 0),
 ('clickhouse-node2', 9000, 0, '', 0, 0)]

### Вставка из файла

In [126]:
file = open('insert_file.csv','r').read()

In [127]:
start_time = time.time()
client.execute(f"INSERT INTO test_base.test_table (id, movie_id, frame) FORMAT CSV {file}")
print(f"Time insert {(time.time() - start_time)}")

Time insert 1.7360010147094727


In [129]:
start_time = time.time()
result = client.execute('SELECT * FROM test_base.test_table')
print(f"Time select {(time.time() - start_time)}")

Time select 3.4653730392456055


In [137]:
result[0:10]

[]

In [136]:
result.clear()

### Удаление таблицы test_base.test_table

In [115]:
client.execute('DROP TABLE IF EXISTS test_base.test_table ON CLUSTER company_cluster')

[('clickhouse-node3', 9000, 0, '', 3, 1),
 ('clickhouse-node2', 9000, 0, '', 2, 1),
 ('clickhouse-node1', 9000, 0, '', 1, 0),
 ('clickhouse-node4', 9000, 0, '', 0, 0)]

# Vertica

In [145]:
import vertica_python
connection_info = {
    'host': '127.0.0.1',
    'port': 5433,
    'user': 'dbadmin',
    'password': '',
    'database': 'docker',
    'autocommit': True,
}

In [146]:
connection = vertica_python.connect(**connection_info)
cursor = connection.cursor()

In [153]:
cursor.execute("""CREATE TABLE test (
        id IDENTITY,
        id_test INTEGER,
        movie_id UUID,
        frame INTEGER NOT NULL);
    """)

<vertica_python.vertica.cursor.Cursor at 0x7f8ce5685100>

In [154]:
start_time = time.time()
cursor.execute("COPY test FROM LOCAL 'insert_file.csv' PARSER fcsvparser();")
print(f"Time insert {(time.time() - start_time)}")

Time insert 1.4937548637390137


In [155]:
start_time = time.time()
result = cursor.execute("SELECT id_test, movie_id, frame FROM test;").fetchall()
print(f"Time select {(time.time() - start_time)}")

Time select 18.131824016571045


In [156]:
result[0:10]

[[2, UUID('231ef10e-052a-11ed-b78b-acde48001122'), 807747120],
 [3, UUID('231ef19a-052a-11ed-b78b-acde48001122'), 698313072],
 [4, UUID('231ef208-052a-11ed-b78b-acde48001122'), 714299305],
 [5, UUID('231ef26c-052a-11ed-b78b-acde48001122'), 785159487],
 [6, UUID('231ef2d0-052a-11ed-b78b-acde48001122'), 702708126],
 [7, UUID('231ef320-052a-11ed-b78b-acde48001122'), 366161360],
 [8, UUID('231ef37a-052a-11ed-b78b-acde48001122'), 701103507],
 [9, UUID('231ef3ca-052a-11ed-b78b-acde48001122'), 614277100],
 [10, UUID('231ef41a-052a-11ed-b78b-acde48001122'), 592189220],
 [11, UUID('231ef474-052a-11ed-b78b-acde48001122'), 648620470]]

In [157]:
result.clear()

In [158]:
cursor.execute("DROP TABLE test;")

<vertica_python.vertica.cursor.Cursor at 0x7f8ce5685100>