In [None]:
import pandas as pd
import getpass
import logging
from datetime import datetime
from clickhouse_driver import Client

In [None]:
# Запрос логина, пароля и session_id
username = input("Мой логин: ")
password = getpass.getpass("Мой пароль: ")
session_id = input("Уникальный session_id: ")

In [None]:
# Настройка логирования
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    filename='clickhouse_extraction.log'
)

In [None]:
# Подключение к ClickHouse
try:
    client = Client(
        host='prod.host',
        database="clickhouse",
        port=9000,
        user=username,
        password=password,
        connect_timeout=60,
        send_receive_timeout=3000,
        settings={
            "session_id": session_id,
            "session_timeout": 3600
        }
    )
    logging.info("✅ Подключение к ClickHouse успешно")
    print(f"✅ Успешное подключение к ClickHouse (сессия: {session_id})")
except Exception as e:
    logging.error(f"Ошибка подключения: {e}")
    print(f"❌ Ошибка подключения: {e}")
    raise

In [None]:
# Файл для выгрузки
file_name = "доступность за 30 дней.csv"

# Текст SQL-запроса (без ; в конце!)
base_query = """
    WITH first_day AS (
        SELECT
            CONCAT(seller_id, '-', product_id) AS product_key,
            MIN(Date) AS first_day_on_web
        FROM Analytics.Assortment
        WHERE 1 = 1
            AND ReadyForSale = 1
        GROUP BY product_key
    )
    SELECT
        f.product_key,
        f.first_day_on_web,
        -- Генерация доступности для последних 30 дней
        MAX(CASE WHEN Date = today() - INTERVAL 30 DAY THEN 1 ELSE 0 END) AS day_1,
        MAX(CASE WHEN Date = today() - INTERVAL 29 DAY THEN 1 ELSE 0 END) AS day_2,
        MAX(CASE WHEN Date = today() - INTERVAL 28 DAY THEN 1 ELSE 0 END) AS day_3,
        MAX(CASE WHEN Date = today() - INTERVAL 27 DAY THEN 1 ELSE 0 END) AS day_4,
        MAX(CASE WHEN Date = today() - INTERVAL 26 DAY THEN 1 ELSE 0 END) AS day_5,
        MAX(CASE WHEN Date = today() - INTERVAL 25 DAY THEN 1 ELSE 0 END) AS day_6,
        MAX(CASE WHEN Date = today() - INTERVAL 24 DAY THEN 1 ELSE 0 END) AS day_7,
        MAX(CASE WHEN Date = today() - INTERVAL 23 DAY THEN 1 ELSE 0 END) AS day_8,
        MAX(CASE WHEN Date = today() - INTERVAL 22 DAY THEN 1 ELSE 0 END) AS day_9,
        MAX(CASE WHEN Date = today() - INTERVAL 21 DAY THEN 1 ELSE 0 END) AS day_10,
        MAX(CASE WHEN Date = today() - INTERVAL 20 DAY THEN 1 ELSE 0 END) AS day_11,
        MAX(CASE WHEN Date = today() - INTERVAL 19 DAY THEN 1 ELSE 0 END) AS day_12,
        MAX(CASE WHEN Date = today() - INTERVAL 18 DAY THEN 1 ELSE 0 END) AS day_13,
        MAX(CASE WHEN Date = today() - INTERVAL 17 DAY THEN 1 ELSE 0 END) AS day_14,
        MAX(CASE WHEN Date = today() - INTERVAL 16 DAY THEN 1 ELSE 0 END) AS day_15,
        MAX(CASE WHEN Date = today() - INTERVAL 15 DAY THEN 1 ELSE 0 END) AS day_16,
        MAX(CASE WHEN Date = today() - INTERVAL 14 DAY THEN 1 ELSE 0 END) AS day_17,
        MAX(CASE WHEN Date = today() - INTERVAL 13 DAY THEN 1 ELSE 0 END) AS day_18,
        MAX(CASE WHEN Date = today() - INTERVAL 12 DAY THEN 1 ELSE 0 END) AS day_19,
        MAX(CASE WHEN Date = today() - INTERVAL 11 DAY THEN 1 ELSE 0 END) AS day_20,
        MAX(CASE WHEN Date = today() - INTERVAL 10 DAY THEN 1 ELSE 0 END) AS day_21,
        MAX(CASE WHEN Date = today() - INTERVAL 9 DAY THEN 1 ELSE 0 END) AS day_22,
        MAX(CASE WHEN Date = today() - INTERVAL 8 DAY THEN 1 ELSE 0 END) AS day_23,
        MAX(CASE WHEN Date = today() - INTERVAL 7 DAY THEN 1 ELSE 0 END) AS day_24,
        MAX(CASE WHEN Date = today() - INTERVAL 6 DAY THEN 1 ELSE 0 END) AS day_25,
        MAX(CASE WHEN Date = today() - INTERVAL 5 DAY THEN 1 ELSE 0 END) AS day_26,
        MAX(CASE WHEN Date = today() - INTERVAL 4 DAY THEN 1 ELSE 0 END) AS day_27,
        MAX(CASE WHEN Date = today() - INTERVAL 3 DAY THEN 1 ELSE 0 END) AS day_28,
        MAX(CASE WHEN Date = today() - INTERVAL 2 DAY THEN 1 ELSE 0 END) AS day_29,
        MAX(CASE WHEN Date = today() - INTERVAL 1 DAY THEN 1 ELSE 0 END) AS day_30
    FROM Analytics.Assortment a
    JOIN first_day f ON f.product_key = CONCAT(a.seller_id, '-', a.product_id)
    WHERE 1 = 1
        AND a.Date BETWEEN today() - INTERVAL 30 DAY AND today() - INTERVAL 1 DAY
    GROUP BY f.product_key, f.first_day_on_web
    ORDER BY f.product_key DESC
;
"""

try:
    # Получаем только мета-информацию
    _, meta = client.execute(base_query, with_column_types=True)
    column_names = [col[0] for col in meta]

    print("🔄 Начало загрузки данных из ClickHouse...")
    logging.info("Старт выгрузки")

    # Открываем CSV-файл
    with open(file_name, 'w', encoding='windows-1251') as f:
        # Заголовки
        f.write(";".join(column_names) + "\n")

        row_count = 0
        for row in client.execute_iter(base_query):
            # Строка как CSV
            row_csv = ";".join([str(x).replace("\n", " ") if x is not None else "" for x in row])
            f.write(row_csv + "\n")
            row_count += 1

            if row_count % 10000 == 0:
                print(f"📦 Загружено {row_count} строк...")

    logging.info(f"✅ Завершено. Всего строк: {row_count}")
    print(f"✅ Данные успешно сохранены в {file_name}. Строк: {row_count}")

except Exception as e:
    logging.error(f"❌ Ошибка при выгрузке: {e}")
    print(f"❌ Произошла ошибка: {e}")

finally:
    print("🔌 Обработка завершена.")
    logging.info("🔌 Обработка завершена.")