# Сравнение хранилищ ClickHouse и Vertica

In [1]:
import uuid
import random 
import timeit
import time

### Подготовка данных для теста

In [2]:
file = open('insert_file.csv','w')

In [3]:
n = 1
while n <= 1000000:
    file.write(f'{n},{uuid.uuid1()},{random.randint(1,999999999)}\n')
    n+=1

# Clickhouse

In [4]:
from clickhouse_driver import Client

client = Client(host='127.0.0.1') 

### Создание базы test_base

In [5]:
client.execute('CREATE DATABASE IF NOT EXISTS test_base ON CLUSTER company_cluster')

[('clickhouse-node5', 9000, 0, '', 5, 0),
 ('clickhouse-node1', 9000, 0, '', 4, 0),
 ('clickhouse-node3', 9000, 0, '', 3, 0),
 ('clickhouse-node4', 9000, 0, '', 2, 0),
 ('clickhouse-node6', 9000, 0, '', 1, 0),
 ('clickhouse-node2', 9000, 0, '', 0, 0)]

### Создание таблицы test_table

In [6]:
client.execute('CREATE TABLE IF NOT EXISTS test_base.test_table ON CLUSTER company_cluster (id Int64, movie_id UUID, frame Int64) Engine=MergeTree() ORDER BY id')

[('clickhouse-node5', 9000, 0, '', 5, 0),
 ('clickhouse-node1', 9000, 0, '', 4, 0),
 ('clickhouse-node3', 9000, 0, '', 3, 0),
 ('clickhouse-node6', 9000, 0, '', 2, 0),
 ('clickhouse-node4', 9000, 0, '', 1, 0),
 ('clickhouse-node2', 9000, 0, '', 0, 0)]

### Вставка из файла

In [7]:
file = open('insert_file.csv','r').read()

In [8]:
start_time = time.time()
client.execute(f"INSERT INTO test_base.test_table (id, movie_id, frame) FORMAT CSV {file}")
print(f"Time insert {(time.time() - start_time)}")

Time insert 2.5905041694641113


In [9]:
start_time = time.time()
result = client.execute('SELECT * FROM test_base.test_table')
print(f"Time select {(time.time() - start_time)}")

Time select 3.2951719760894775


In [10]:
result[0:10]

[(335873, UUID('68e47f84-0aa6-11ed-b4ce-acde48001122'), 709720398),
 (335874, UUID('68e47fd4-0aa6-11ed-b4ce-acde48001122'), 711591539),
 (335875, UUID('68e4802e-0aa6-11ed-b4ce-acde48001122'), 237136998),
 (335876, UUID('68e4807e-0aa6-11ed-b4ce-acde48001122'), 194143495),
 (335877, UUID('68e480ce-0aa6-11ed-b4ce-acde48001122'), 800013257),
 (335878, UUID('68e4811e-0aa6-11ed-b4ce-acde48001122'), 726943775),
 (335879, UUID('68e48178-0aa6-11ed-b4ce-acde48001122'), 661911352),
 (335880, UUID('68e481c8-0aa6-11ed-b4ce-acde48001122'), 895735693),
 (335881, UUID('68e48218-0aa6-11ed-b4ce-acde48001122'), 317827658),
 (335882, UUID('68e48268-0aa6-11ed-b4ce-acde48001122'), 27987895)]

In [11]:
result.clear()

### Удаление таблицы test_base.test_table

In [12]:
client.execute('DROP TABLE IF EXISTS test_base.test_table ON CLUSTER company_cluster')

[('clickhouse-node5', 9000, 0, '', 5, 0),
 ('clickhouse-node1', 9000, 0, '', 4, 0),
 ('clickhouse-node3', 9000, 0, '', 3, 0),
 ('clickhouse-node4', 9000, 0, '', 2, 0),
 ('clickhouse-node2', 9000, 0, '', 1, 0),
 ('clickhouse-node6', 9000, 0, '', 0, 0)]

# Vertica

In [15]:
import vertica_python
connection_info = {
    'host': '127.0.0.1',
    'port': 5433,
    'user': 'dbadmin',
    'password': '',
    'database': 'docker',
    'autocommit': True,
}

In [16]:
connection = vertica_python.connect(**connection_info)
cursor = connection.cursor()

In [17]:
cursor.execute("""CREATE TABLE test (
        id IDENTITY,
        id_test INTEGER,
        movie_id UUID,
        frame INTEGER NOT NULL);
    """)

<vertica_python.vertica.cursor.Cursor at 0x7f8abeb685e0>

In [18]:
start_time = time.time()
cursor.execute("COPY test FROM LOCAL 'insert_file.csv' PARSER fcsvparser();")
print(f"Time insert {(time.time() - start_time)}")

Time insert 2.484109878540039


In [19]:
start_time = time.time()
result = cursor.execute("SELECT id_test, movie_id, frame FROM test;").fetchall()
print(f"Time select {(time.time() - start_time)}")

Time select 19.18446111679077


In [20]:
result[0:10]

[[2, UUID('672ef1ba-0aa6-11ed-b4ce-acde48001122'), 157359031],
 [3, UUID('672ef264-0aa6-11ed-b4ce-acde48001122'), 707116490],
 [4, UUID('672ef2dc-0aa6-11ed-b4ce-acde48001122'), 917573493],
 [5, UUID('672ef340-0aa6-11ed-b4ce-acde48001122'), 387062592],
 [6, UUID('672ef39a-0aa6-11ed-b4ce-acde48001122'), 206309199],
 [7, UUID('672ef3f4-0aa6-11ed-b4ce-acde48001122'), 829203619],
 [8, UUID('672ef44e-0aa6-11ed-b4ce-acde48001122'), 950647470],
 [9, UUID('672ef4a8-0aa6-11ed-b4ce-acde48001122'), 216531751],
 [10, UUID('672ef502-0aa6-11ed-b4ce-acde48001122'), 286194518],
 [11, UUID('672ef552-0aa6-11ed-b4ce-acde48001122'), 850731386]]

In [21]:
result.clear()

In [22]:
cursor.execute("DROP TABLE test;")

<vertica_python.vertica.cursor.Cursor at 0x7f8abeb685e0>